Index de l'article

Work with phones

phonenumbers library

If you have a phone field and a country code field:

...
import phonenumbers
...
 
df['Phone'] = df['Phone'].fillna('')
df['CountryCode'] = df['CountryCode'].fillna('')
 
df['TestPhone'] = df.apply(
    lambda x: phonenumbers.parse(str(x['Phone']), str(x['CountryCode']))
    if str(x['Phone']) != ''
    and str(x['CountryCode']) != ''
    else '',
    axis='columns'
)

Full phone fix

# FIX PHONE
df['Phone'] = df['Phone'].str.strip()
df['Phone'] = df['Phone'].replace({'-': ' '}, regex=True)
df['Phone'] = df['Phone'].replace({'_': ' '}, regex=True)
df['Phone'] = df['Phone'].replace({'\.': ' '}, regex=True)
df['Phone'] = df['Phone'].replace({',': ' '}, regex=True)
df['Phone'] = df['Phone'].replace({' ': ' '}, regex=True)
df['Phone'] = df['Phone'].replace({'[a-zA-Z]': ''}, regex=True)
df['Phone'] = df['Phone'].replace({' ': ' '}, regex=True)
df['Phone'] = df['Phone'].replace({'^ ': ''}, regex=True)
df['Phone'] = df['Phone'].replace({'\*': ''}, regex=True)
df['Phone'] = df['Phone'].replace({'\(': ''}, regex=True)
df['Phone'] = df['Phone'].replace({'\)': ''}, regex=True)
df['Phone'] = df['Phone'].replace({'\$': ''}, regex=True)
df['Phone'] = df['Phone'].str.strip()
 
df['Phone'] = df['Phone'].fillna('')
 
df.loc[df['Phone'].str.replace(' ', '').str.len() < 4, 'Phone'] = ''
df['CountryCode'] = df['CountryCode'].fillna('')
 
# CREATE TESTPHONE FIELD
df['TestPhone'] = ''
df = df.reset_index(drop=True)
for i, row in df.iterrows():
    myPhone = row['Phone']
    myCountryCode = row['CountryCode']
    MyTest = ''
 
    try:
        # IF PHONE EXISTS AND START WITH "+" AND COUNTRY EXISTS
        if myPhone != '' and myPhone.startswith('+') and myCountryCode != '':
            MyTest = phonenumbers.parse(myPhone, myCountryCode)
 
        # IF PHONE EXISTS AND NOT START WITH "+" AND COUNTRY EXISTS
        if myPhone != '' and not myPhone.startswith('+') and myCountryCode != '':
            MyTest = phonenumbers.parse(myPhone, myCountryCode)
 
        # IF PHONE EXISTS AND START WITH "+" AND COUNTRY NOT EXISTS
        if myPhone != '' and myPhone.startswith('+') and myCountryCode == '':
            MyTest = phonenumbers.parse(myPhone, myCountryCode)
 
        # IF PHONE EXISTS AND NOT START WITH "+" AND COUNTRY NOT EXISTS
            if myPhone != '' and not myPhone.startswith('+') and myCountryCode == '':
            MyTest = phonenumbers.parse('+' + myPhone, myCountryCode)
 
    except:
        pass
 
    df.at[i,'TestPhone'] = MyTest
 
df['AutoCode'] = ''
df.loc[df['TestPhone'] != '', 'AutoCode'] = '+' + df['TestPhone'].astype(str).replace({'Country Code: ': ''}, regex=True).replace({'( National Number: ).*': ''}, regex=True)
 
df['AutoNationalPhone'] = ''
df.loc[df['TestPhone'] != '', 'AutoNationalPhone'] = df['TestPhone'].astype(str).replace({'^.*? National Number: ': ''}, regex=True).astype(str).replace({'( Leading Zero).*': ''}, regex=True)
 
# CONCATENATION
df['Formatted Phone'] = df[['AutoCode', 'AutoNationalPhone']].apply(lambda x: ' '.join(x.dropna()), axis=1).fillna('')
 
df.loc[~df['Formatted Phone'].str.startswith(('+')), 'Formatted Phone'] = df['Phone']
 
# AUTOCOUNTRYCODE
df['AutoCountryCode'] = ''
df = df.reset_index(drop=True)
 
for i, row in df.iterrows():
    myCountryCode = row['CountryCode']
    MyTest = ''
 
    try:
        MyTest = phonenumbers.country_code_for_region(myCountryCode)
    except:
        pass
 
    df.at[i,'AutoCountryCode'] = MyTest
 
# LAST
df.loc[(~df['Formatted Phone'].str.startswith(('+'))) &amp; (df['AutoCountryCode'] != 0) &amp; (df['Phone'] != ''), 'Formatted Phone'] = '+' + df['AutoCountryCode'].astype(str) + ' ' + df['Phone'].replace({'^0': ''}, regex=True)
 
# IS_POSSIBLE_NUMBER
df['is_possible_number'] = ''
for i, row in df.iterrows():
    myPhone = row['Formatted Phone']
    MyTest = ''
    try:
        MyTest = phonenumbers.is_possible_number(phonenumbers.parse(myPhone, None))
    except:
        pass
 
    df.at[i, 'is_possible_number'] = MyTest
 
# IS_VALID_NUMBER
df['is_valid_number'] = ''
for i, row in df.iterrows():
    myPhone = row['Formatted Phone']
    MyTest = ''
    try:
        MyTest = phonenumbers.is_valid_number(phonenumbers.parse(myPhone, None))
    except:
        pass
 
    df.at[i, 'is_valid_number'] = MyTest
 
# DELETE WRONG NUMBER
df.loc[(df['is_valid_number'] == False) &amp; (df['is_possible_number'] == False), 'Formatted Phone'] = ''
 
# RE-ORDER
df = df[['Phone', 'CountryCode', 'Formatted Phone']]
 
print(tabulate(df.head(30), headers='keys', tablefmt='psql', showindex=False))

 

Liens ou pièces jointes
Télécharger ce fichier (France-Departements-Deformation.zip)France-Departements-Deformation.zip[France-Departements-Deformation]335 Ko
Télécharger ce fichier (simple_countries.zip)simple_countries.zip[simple_countries]1880 Ko