From 5b97d6013aaed9b6975a594786ce0bbbe0a8b258 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Sat, 13 Jul 2024 13:33:47 +0800 Subject: [PATCH] Refactor core code of mapping vcards --- Whatsapp_Chat_Exporter/__main__.py | 8 +- .../contacts_names_from_vcards.py | 104 +++++++++--------- .../contacts_names_from_vcards_test.py | 8 +- 3 files changed, 58 insertions(+), 62 deletions(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index bc61eb8..3cb0256 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -15,7 +15,7 @@ else: vcards_deps_installed = True from Whatsapp_Chat_Exporter import exported_handler, android_handler from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler -from Whatsapp_Chat_Exporter.contacts_names_from_vcards import ContactsNamesFromVCards, readVCardsFile +from Whatsapp_Chat_Exporter.contacts_names_from_vcards import ContactsFromVCards, read_vcards_file from Whatsapp_Chat_Exporter.data_model import ChatStore from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, is_chat_empty from Whatsapp_Chat_Exporter.utility import check_update, import_from_json @@ -364,7 +364,7 @@ def main(): data = {} - contacts_names_from_vcards_enricher = ContactsNamesFromVCards() + contacts_names_from_vcards_enricher = ContactsFromVCards() if args.enrich_names_from_vcards is not None: if not vcards_deps_installed: @@ -482,7 +482,7 @@ def main(): android_handler.calls(db, data, args.timezone_offset, filter_chat) if not args.no_html: if contacts_names_from_vcards_enricher.should_enrich_names_from_vCards(): - contacts_names_from_vcards_enricher.enrich_names_from_vCards(data) + contacts_names_from_vcards_enricher.enrich_from_vcards(data) if (args.filter_empty): data = {k: v for k, v in data.items() if not is_chat_empty(v)} @@ -548,7 +548,7 @@ def main(): if (args.filter_empty): data = {k: v for k, v in data.items() if not is_chat_empty(v)} - contacts_names_from_vcards_enricher.enrich_names_from_vCards(data) + contacts_names_from_vcards_enricher.enrich_from_vcards(data) if contact_store.should_enrich_from_vcards(): if isinstance(data[next(iter(data))], ChatStore): diff --git a/Whatsapp_Chat_Exporter/contacts_names_from_vcards.py b/Whatsapp_Chat_Exporter/contacts_names_from_vcards.py index e3cb30f..58d5eaa 100644 --- a/Whatsapp_Chat_Exporter/contacts_names_from_vcards.py +++ b/Whatsapp_Chat_Exporter/contacts_names_from_vcards.py @@ -2,79 +2,77 @@ import itertools from typing import List, TypedDict import vobject -class ContactsNamesFromVCards: + +class ExportedContactNumbers(TypedDict): + full_name: str + numbers: List[str] + + +class ContactsFromVCards: def __init__(self) -> None: - self.l = [] - + self.contact_mapping = [] + def should_enrich_from_vcards(self): - return len(self.l) > 0 - - def load_vcf_file(self, vcfFilePath: str, default_country_calling_code: str): - self.l = readVCardsFile(vcfFilePath, default_country_calling_code) - - def enrich_names_from_vCards(self, chats): - for number, name in self.l: + return len(self.contact_mapping) > 0 + + def load_vcf_file(self, vcf_file_path: str, default_country_code: str): + self.contact_mapping = read_vcards_file(vcf_file_path, default_country_code) + + def enrich_from_vcards(self, chats): + for number, name in self.contact_mapping: # short number must be a bad contact, lets skip it if len(number) <= 5: continue - for chat in filter_dict_by_prefix(chats, number).values(): + for chat in filter_chats_by_prefix(chats, number).values(): if not hasattr(chat, 'name') or (hasattr(chat, 'name') and chat.name is None): setattr(chat, 'name', name) -def readVCardsFile(vcfFilePath, default_country_calling_code: str): +def read_vcards_file(vcf_file_path, default_country_code: str): contacts = [] - with open(vcfFilePath, mode="r") as f: + with open(vcf_file_path, mode="r", encoding="utf-8") as f: reader = vobject.readComponents(f) for row in reader: - if not hasattr(row, 'fn'): + if not hasattr(row, 'fn') or not hasattr(row, 'tel'): continue - - if not hasattr(row, 'tel'): - continue - - contact: ExportedGoogleContactVCARDRawNumbers = { - "full_name": row.fn.value, - "numbers": list(map(lambda tel:tel.value, row.tel_list)), - } + contact: ExportedContactNumbers = { + "full_name": row.fn.value, + "numbers": list(map(lambda tel: tel.value, row.tel_list)), + } contacts.append(contact) - step2 = createNumberToNameDicts(contacts, default_country_calling_code) + return map_number_to_name(contacts, default_country_code) - return step2 - -def filter_dict_by_prefix(d, prefix: str): - return {k: v for k, v in d.items() if k.startswith(prefix)} +def filter_chats_by_prefix(chats, prefix: str): + return {k: v for k, v in chats.items() if k.startswith(prefix)} -def createNumberToNameDicts(inContacts, default_country_calling_code: str): - outContacts = list(itertools.chain.from_iterable( - [[normalize_number(num, default_country_calling_code), f"{contact['full_name']} ({i+1})" if len(contact['numbers']) > 1 else contact['full_name']] - for i, num in enumerate(contact['numbers'])] - for contact in inContacts - )) - return outContacts - -class ExportedGoogleContactVCARDRawNumbers(TypedDict): - full_name: str - numbers: List[str] - -def normalize_number(number: str, default_country_calling_code: str): - afterSomeCleaning = number.replace('(', '').replace(')', '').replace(' ', '').replace('-', '') +def map_number_to_name(contacts, default_country_code: str): + mapping = [] + for contact in contacts: + for index, num in enumerate(contact['numbers']): + normalized = normalize_number(num, default_country_code) + if len(contact['numbers']) > 1: + name = f"{contact['full_name']} ({index+1})" + else: + name = contact['full_name'] + mapping.append((normalized, name)) + return mapping - # A number that starts with a + or 00 means it already have country_calling_code - if afterSomeCleaning.startswith('+'): - afterSomeCleaning = afterSomeCleaning.replace('+', '') - elif afterSomeCleaning.startswith('00'): - afterSomeCleaning = afterSomeCleaning[2:] - else: - # Remove leading zero - if afterSomeCleaning.startswith('0'): - afterSomeCleaning = afterSomeCleaning[1:] - afterSomeCleaning = default_country_calling_code + afterSomeCleaning - - return afterSomeCleaning \ No newline at end of file +def normalize_number(number: str, country_code: str): + # Clean the number + number = ''.join(c for c in number if c.isdigit() or c == "+") + + # A number that starts with a + or 00 means it already have a country code + for starting_char in ('+', "00"): + if number.startswith(starting_char): + return number[len(starting_char):] + + # leading zero should be removed + if starting_char == '0': + number = number[1:] + return country_code + number # fall back diff --git a/Whatsapp_Chat_Exporter/contacts_names_from_vcards_test.py b/Whatsapp_Chat_Exporter/contacts_names_from_vcards_test.py index dbe8d9f..6f18507 100644 --- a/Whatsapp_Chat_Exporter/contacts_names_from_vcards_test.py +++ b/Whatsapp_Chat_Exporter/contacts_names_from_vcards_test.py @@ -1,14 +1,12 @@ # from contacts_names_from_vcards import readVCardsFile -from Whatsapp_Chat_Exporter.contacts_names_from_vcards import normalize_number, readVCardsFile +from Whatsapp_Chat_Exporter.contacts_names_from_vcards import normalize_number, read_vcards_file def test_readVCardsFile(): - l = readVCardsFile("contacts.vcf", "973") - - assert len(l) > 0 + assert len(read_vcards_file("contacts.vcf", "973")) > 0 -def test_createNumberToNameDicts(): +def test_create_number_to_name_dicts(): pass def test_fuzzy_match_numbers():