Refactor core code of mapping vcards

This commit is contained in:
KnugiHK
2024-07-13 13:33:47 +08:00
parent 8f304f1c48
commit 5b97d6013a
3 changed files with 58 additions and 62 deletions

View File

@@ -15,7 +15,7 @@ else:
vcards_deps_installed = True
from Whatsapp_Chat_Exporter import exported_handler, android_handler
from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler
from Whatsapp_Chat_Exporter.contacts_names_from_vcards import ContactsNamesFromVCards, readVCardsFile
from Whatsapp_Chat_Exporter.contacts_names_from_vcards import ContactsFromVCards, read_vcards_file
from Whatsapp_Chat_Exporter.data_model import ChatStore
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, is_chat_empty
from Whatsapp_Chat_Exporter.utility import check_update, import_from_json
@@ -364,7 +364,7 @@ def main():
data = {}
contacts_names_from_vcards_enricher = ContactsNamesFromVCards()
contacts_names_from_vcards_enricher = ContactsFromVCards()
if args.enrich_names_from_vcards is not None:
if not vcards_deps_installed:
@@ -482,7 +482,7 @@ def main():
android_handler.calls(db, data, args.timezone_offset, filter_chat)
if not args.no_html:
if contacts_names_from_vcards_enricher.should_enrich_names_from_vCards():
contacts_names_from_vcards_enricher.enrich_names_from_vCards(data)
contacts_names_from_vcards_enricher.enrich_from_vcards(data)
if (args.filter_empty):
data = {k: v for k, v in data.items() if not is_chat_empty(v)}
@@ -548,7 +548,7 @@ def main():
if (args.filter_empty):
data = {k: v for k, v in data.items() if not is_chat_empty(v)}
contacts_names_from_vcards_enricher.enrich_names_from_vCards(data)
contacts_names_from_vcards_enricher.enrich_from_vcards(data)
if contact_store.should_enrich_from_vcards():
if isinstance(data[next(iter(data))], ChatStore):

View File

@@ -2,79 +2,77 @@ import itertools
from typing import List, TypedDict
import vobject
class ContactsNamesFromVCards:
class ExportedContactNumbers(TypedDict):
full_name: str
numbers: List[str]
class ContactsFromVCards:
def __init__(self) -> None:
self.l = []
self.contact_mapping = []
def should_enrich_from_vcards(self):
return len(self.l) > 0
def load_vcf_file(self, vcfFilePath: str, default_country_calling_code: str):
self.l = readVCardsFile(vcfFilePath, default_country_calling_code)
def enrich_names_from_vCards(self, chats):
for number, name in self.l:
return len(self.contact_mapping) > 0
def load_vcf_file(self, vcf_file_path: str, default_country_code: str):
self.contact_mapping = read_vcards_file(vcf_file_path, default_country_code)
def enrich_from_vcards(self, chats):
for number, name in self.contact_mapping:
# short number must be a bad contact, lets skip it
if len(number) <= 5:
continue
for chat in filter_dict_by_prefix(chats, number).values():
for chat in filter_chats_by_prefix(chats, number).values():
if not hasattr(chat, 'name') or (hasattr(chat, 'name') and chat.name is None):
setattr(chat, 'name', name)
def readVCardsFile(vcfFilePath, default_country_calling_code: str):
def read_vcards_file(vcf_file_path, default_country_code: str):
contacts = []
with open(vcfFilePath, mode="r") as f:
with open(vcf_file_path, mode="r", encoding="utf-8") as f:
reader = vobject.readComponents(f)
for row in reader:
if not hasattr(row, 'fn'):
if not hasattr(row, 'fn') or not hasattr(row, 'tel'):
continue
if not hasattr(row, 'tel'):
continue
contact: ExportedGoogleContactVCARDRawNumbers = {
"full_name": row.fn.value,
"numbers": list(map(lambda tel:tel.value, row.tel_list)),
}
contact: ExportedContactNumbers = {
"full_name": row.fn.value,
"numbers": list(map(lambda tel: tel.value, row.tel_list)),
}
contacts.append(contact)
step2 = createNumberToNameDicts(contacts, default_country_calling_code)
return map_number_to_name(contacts, default_country_code)
return step2
def filter_dict_by_prefix(d, prefix: str):
return {k: v for k, v in d.items() if k.startswith(prefix)}
def filter_chats_by_prefix(chats, prefix: str):
return {k: v for k, v in chats.items() if k.startswith(prefix)}
def createNumberToNameDicts(inContacts, default_country_calling_code: str):
outContacts = list(itertools.chain.from_iterable(
[[normalize_number(num, default_country_calling_code), f"{contact['full_name']} ({i+1})" if len(contact['numbers']) > 1 else contact['full_name']]
for i, num in enumerate(contact['numbers'])]
for contact in inContacts
))
return outContacts
class ExportedGoogleContactVCARDRawNumbers(TypedDict):
full_name: str
numbers: List[str]
def normalize_number(number: str, default_country_calling_code: str):
afterSomeCleaning = number.replace('(', '').replace(')', '').replace(' ', '').replace('-', '')
def map_number_to_name(contacts, default_country_code: str):
mapping = []
for contact in contacts:
for index, num in enumerate(contact['numbers']):
normalized = normalize_number(num, default_country_code)
if len(contact['numbers']) > 1:
name = f"{contact['full_name']} ({index+1})"
else:
name = contact['full_name']
mapping.append((normalized, name))
return mapping
# A number that starts with a + or 00 means it already have country_calling_code
if afterSomeCleaning.startswith('+'):
afterSomeCleaning = afterSomeCleaning.replace('+', '')
elif afterSomeCleaning.startswith('00'):
afterSomeCleaning = afterSomeCleaning[2:]
else:
# Remove leading zero
if afterSomeCleaning.startswith('0'):
afterSomeCleaning = afterSomeCleaning[1:]
afterSomeCleaning = default_country_calling_code + afterSomeCleaning
return afterSomeCleaning
def normalize_number(number: str, country_code: str):
# Clean the number
number = ''.join(c for c in number if c.isdigit() or c == "+")
# A number that starts with a + or 00 means it already have a country code
for starting_char in ('+', "00"):
if number.startswith(starting_char):
return number[len(starting_char):]
# leading zero should be removed
if starting_char == '0':
number = number[1:]
return country_code + number # fall back

View File

@@ -1,14 +1,12 @@
# from contacts_names_from_vcards import readVCardsFile
from Whatsapp_Chat_Exporter.contacts_names_from_vcards import normalize_number, readVCardsFile
from Whatsapp_Chat_Exporter.contacts_names_from_vcards import normalize_number, read_vcards_file
def test_readVCardsFile():
l = readVCardsFile("contacts.vcf", "973")
assert len(l) > 0
assert len(read_vcards_file("contacts.vcf", "973")) > 0
def test_createNumberToNameDicts():
def test_create_number_to_name_dicts():
pass
def test_fuzzy_match_numbers():