Fix vcard decoding errors

This commit is contained in:
tomballgithub
2025-11-29 19:34:27 -06:00
parent beaf272a63
commit 029700359e

View File

@@ -1,4 +1,6 @@
import vobject import vobject
import re
import quopri
from typing import List, TypedDict from typing import List, TypedDict
from Whatsapp_Chat_Exporter.data_model import ChatStore from Whatsapp_Chat_Exporter.data_model import ChatStore
from Whatsapp_Chat_Exporter.utility import Device from Whatsapp_Chat_Exporter.utility import Device
@@ -33,24 +35,47 @@ class ContactsFromVCards:
chats.add_chat(number + "@s.whatsapp.net", ChatStore(Device.ANDROID, name)) chats.add_chat(number + "@s.whatsapp.net", ChatStore(Device.ANDROID, name))
def decode_vcard_value(value: str) -> str:
"""Decode a vCard value that may be quoted-printable UTF-8."""
try:
value = value.replace("=\n", "") # remove soft line breaks
bytes_val = quopri.decodestring(value)
return bytes_val.decode("utf-8", errors="replace")
except Exception:
return value
def read_vcards_file(vcf_file_path, default_country_code: str): def read_vcards_file(vcf_file_path, default_country_code: str):
contacts = [] contacts = []
with open(vcf_file_path, mode="r", encoding="utf-8") as f: with open(vcf_file_path, "r", encoding="utf-8", errors="ignore") as f:
reader = vobject.readComponents(f, ignoreUnreadable=True) content = f.read()
for row in reader:
if hasattr(row, 'fn'): # Split into individual vCards
name = str(row.fn.value) vcards = content.split("BEGIN:VCARD")
elif hasattr(row, 'n'): for vcard in vcards:
name = str(row.n.value) if "END:VCARD" not in vcard:
else: continue
name = None
if not hasattr(row, 'tel') or name is None: # Extract name in priority: FN -> N -> ORG
continue name = None
contact: ExportedContactNumbers = { for field in ("FN", "N", "ORG"):
"full_name": name, match = re.search(rf'^{field}(?:;[^:]*)?:(.*)', vcard, re.IGNORECASE | re.MULTILINE)
"numbers": list(map(lambda tel: tel.value, row.tel_list)), if match:
} name = decode_vcard_value(match.group(1).strip())
contacts.append(contact) break
if not name:
continue
# Extract phone numbers
numbers = re.findall(r'^\s*TEL(?:;[^:]*)?:(\+?\d+)', vcard, re.IGNORECASE | re.MULTILINE)
if not numbers:
continue
contact = {
"full_name": name,
"numbers": numbers,
}
contacts.append(contact)
return map_number_to_name(contacts, default_country_code) return map_number_to_name(contacts, default_country_code)