· 7 years ago · Aug 30, 2018, 09:36 AM
1import pandas as pd
2import Levenshtein as lev
3import numpy as np
4
5white_domains = [
6 'gmail.com',
7 'yahoo.com',
8 'icloud.com',
9 'mail.ru',
10 'yandex.ru',
11]
12
13df = pd.DataFrame()
14df['email'] = ['yandex.ru', 'yandax.ru', 'mail.ru', 'maik.ru']
15
16df['typo_in_email_domain_flag'] = df['email'].apply(lambda x: min([
17 i for i in [
18 lev.distance(x, d) for d in white_domains
19 ] if i != 0
20]) < 3)