Merge pull request #175 from tomballgithub/vcard_fix

Fix vcard decoding errors
This commit is contained in:
Knugi
2025-12-15 23:00:07 +08:00
committed by GitHub
6 changed files with 213 additions and 42 deletions

View File

@@ -20,7 +20,7 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install pycryptodome vobject javaobj-py3 ordered-set zstandard nuitka==2.6.7 pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.6.7
pip install . pip install .
- name: Build binary with Nuitka - name: Build binary with Nuitka
run: | run: |
@@ -45,7 +45,7 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install pycryptodome vobject javaobj-py3 ordered-set zstandard nuitka==2.6.7 pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.6.7
pip install . pip install .
- name: Build binary with Nuitka - name: Build binary with Nuitka
run: | run: |
@@ -69,7 +69,7 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install pycryptodome vobject javaobj-py3 ordered-set zstandard nuitka==2.6.7 pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.6.7
pip install . pip install .
- name: Build binary with Nuitka - name: Build binary with Nuitka
run: | run: |

View File

@@ -20,16 +20,8 @@ from argparse import ArgumentParser, SUPPRESS
from datetime import datetime from datetime import datetime
from getpass import getpass from getpass import getpass
from sys import exit from sys import exit
from typing import Tuple, Optional, List, Dict, Any, Union from typing import Optional, List, Dict
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
# Try to import vobject for contacts processing
try:
import vobject
except ModuleNotFoundError:
vcards_deps_installed = False
else:
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
vcards_deps_installed = True
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -432,13 +424,6 @@ def process_single_date_filter(parser: ArgumentParser, args) -> None:
def setup_contact_store(args) -> Optional['ContactsFromVCards']: def setup_contact_store(args) -> Optional['ContactsFromVCards']:
"""Set up and return a contact store if needed.""" """Set up and return a contact store if needed."""
if args.enrich_from_vcards is not None: if args.enrich_from_vcards is not None:
if not vcards_deps_installed:
logger.error(
"You don't have the dependency to enrich contacts with vCard.\n"
"Read more on how to deal with enriching contacts:\n"
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage\n"
)
exit(1)
contact_store = ContactsFromVCards() contact_store = ContactsFromVCards()
contact_store.load_vcf_file( contact_store.load_vcf_file(
args.enrich_from_vcards, args.default_country_code) args.enrich_from_vcards, args.default_country_code)

View File

@@ -1,7 +1,12 @@
import vobject import logging
import re
import quopri
from typing import List, TypedDict from typing import List, TypedDict
from Whatsapp_Chat_Exporter.data_model import ChatStore from Whatsapp_Chat_Exporter.data_model import ChatStore
from Whatsapp_Chat_Exporter.utility import Device from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device
logger = logging.getLogger(__name__)
class ExportedContactNumbers(TypedDict): class ExportedContactNumbers(TypedDict):
@@ -33,25 +38,145 @@ class ContactsFromVCards:
chats.add_chat(number + "@s.whatsapp.net", ChatStore(Device.ANDROID, name)) chats.add_chat(number + "@s.whatsapp.net", ChatStore(Device.ANDROID, name))
def decode_quoted_printable(value: str, charset: str) -> str:
"""Decode a vCard value that may be quoted-printable UTF-8."""
try:
bytes_val = quopri.decodestring(value)
return bytes_val.decode(charset, errors="replace")
except Exception:
# Fallback: return the original value if decoding fails
logger.warning(
f"Failed to decode quoted-printable value: {value}, "
f"charset: {charset}. Please report this issue.{CLEAR_LINE}"
)
return value
def _parse_vcard_line(line: str) -> tuple[str, dict[str, str], str] | None:
"""
Parses a single vCard property line into its components:
Property Name, Parameters (as a dict), and Value.
Example: 'FN;CHARSET=UTF-8:John Doe' -> ('FN', {'CHARSET': 'UTF-8'}, 'John Doe')
"""
# Find the first colon, which separates the property/parameters from the value.
colon_index = line.find(':')
if colon_index == -1:
return None # Invalid vCard line format
prop_and_params = line[:colon_index].strip()
value = line[colon_index + 1:].strip()
# Split property name from parameters
parts = prop_and_params.split(';')
property_name = parts[0].upper()
parameters = {}
for part in parts[1:]:
if '=' in part:
key, val = part.split('=', 1)
parameters[key.upper()] = val.strip('"') # Remove potential quotes from value
return property_name, parameters, value
def get_vcard_value(entry: str, field_name: str) -> list[str]:
"""
Scans the vCard entry for lines starting with the specific field_name
and returns a list of its decoded values, handling parameters like
ENCODING and CHARSET.
"""
target_name = field_name.upper()
cached_line = ""
charset = "utf-8"
values = []
for line in entry.splitlines():
line = line.strip()
if cached_line:
if line.endswith('='):
cached_line += line[:-1]
continue # Wait for the next line to complete the value
values.append(decode_quoted_printable(cached_line + line, charset))
cached_line = ""
else:
# Skip empty lines or lines that don't start with the target field (after stripping)
if not line or not line.upper().startswith(target_name):
continue
parsed = _parse_vcard_line(line)
if parsed is None:
continue
prop_name, params, raw_value = parsed
if prop_name != target_name:
continue
encoding = params.get('ENCODING')
charset = params.get('CHARSET', 'utf-8')
# Apply decoding if ENCODING parameter is present
if encoding == 'QUOTED-PRINTABLE':
if raw_value.endswith('='):
# Handle soft line breaks in quoted-printable and cache the line
cached_line += raw_value[:-1]
continue # Wait for the next line to complete the value
values.append(decode_quoted_printable(raw_value, charset))
elif encoding:
raise NotImplementedError(f"Encoding '{encoding}' not supported yet.")
else:
values.append(raw_value)
return values
def process_vcard_entry(entry: str) -> dict | bool:
"""
Process a vCard entry using pure string manipulation
Args:
entry: A string containing a single vCard block.
Returns:
A dictionary of the extracted data or False if required fields are missing.
"""
name = None
# Extract name in priority: FN -> N -> ORG
for field in ("FN", "N", "ORG"):
if name_values := get_vcard_value(entry, field):
name = name_values[0].replace(';', ' ') # Simple cleanup for structured name
break
if not name:
return False
numbers = get_vcard_value(entry, "TEL")
if not numbers:
return False
return {
"full_name": name,
# Remove duplications
"numbers": set(numbers),
}
def read_vcards_file(vcf_file_path, default_country_code: str): def read_vcards_file(vcf_file_path, default_country_code: str):
contacts = [] contacts = []
with open(vcf_file_path, mode="r", encoding="utf-8") as f: with open(vcf_file_path, "r", encoding="utf-8", errors="ignore") as f:
reader = vobject.readComponents(f, ignoreUnreadable=True) content = f.read()
for row in reader:
if hasattr(row, 'fn'): # Split into individual vCards
name = str(row.fn.value) vcards = content.split("BEGIN:VCARD")
elif hasattr(row, 'n'): for vcard in vcards:
name = str(row.n.value) if "END:VCARD" not in vcard:
else: continue
name = None
if not hasattr(row, 'tel') or name is None: if contact := process_vcard_entry(vcard):
continue
contact: ExportedContactNumbers = {
"full_name": name,
"numbers": list(map(lambda tel: tel.value, row.tel_list)),
}
contacts.append(contact) contacts.append(contact)
logger.info(f"Imported {len(contacts)} contacts/vcards{CLEAR_LINE}")
return map_number_to_name(contacts, default_country_code) return map_number_to_name(contacts, default_country_code)

View File

@@ -44,10 +44,9 @@ android_backup = ["pycryptodome", "javaobj-py3"]
crypt12 = ["pycryptodome"] crypt12 = ["pycryptodome"]
crypt14 = ["pycryptodome"] crypt14 = ["pycryptodome"]
crypt15 = ["pycryptodome", "javaobj-py3"] crypt15 = ["pycryptodome", "javaobj-py3"]
all = ["pycryptodome", "javaobj-py3", "vobject"] all = ["pycryptodome", "javaobj-py3"]
everything = ["pycryptodome", "javaobj-py3", "vobject"] everything = ["pycryptodome", "javaobj-py3"]
backup = ["pycryptodome", "javaobj-py3"] backup = ["pycryptodome", "javaobj-py3"]
vcards = ["vobject", "pycryptodome", "javaobj-py3"]
[project.scripts] [project.scripts]
wtsexporter = "Whatsapp_Chat_Exporter.__main__:main" wtsexporter = "Whatsapp_Chat_Exporter.__main__:main"

View File

@@ -2,4 +2,43 @@ BEGIN:VCARD
VERSION:3.0 VERSION:3.0
FN:Sample Contact FN:Sample Contact
TEL;TYPE=CELL:+85288888888 TEL;TYPE=CELL:+85288888888
END:VCARD END:VCARD
BEGIN:VCARD
VERSION:2.1
N:Lopez;Yard Lawn Guy;Jose;;
FN:Yard Lawn Guy, Jose Lopez
TEL;HOME:5673334444
END:VCARD
BEGIN:VCARD
VERSION:2.1
N;CHARSET=UTF-8;ENCODING=QUOTED-PRINTABLE:;=4A=6F=68=6E=20=42=75=74=6C=65=72=20=F0=9F=8C=9F=
=F0=9F=92=AB=F0=9F=8C=9F;;;
FN;CHARSET=UTF-8;ENCODING=QUOTED-PRINTABLE:=4A=6F=68=6E=20=42=75=74=6C=65=72=20=F0=9F=8C=9F=
=F0=9F=92=AB=F0=9F=8C=9F
TEL;PREF:5556667777
END:VCARD
BEGIN:VCARD
VERSION:2.1
TEL;WORK;PREF:1234567890
ORG:Airline Contact #'s
NOTE;ENCODING=QUOTED-PRINTABLE:=53=70=69=72=69=74=20=41=69=72=6C=69=
=6E=65=73=20=38=30=30=2D=37=37=32=2D=37=31=31=37=55=6E=69=74=65=64=
=20=41=69=72=6C=69=6E=65=73=20=38=30=30=2D=32=34=31=2D=36=35=32=32
END:VCARD
BEGIN:VCARD
VERSION:2.1
TEL;WORK;PREF:3451112222
X-SAMSUNGADR;ENCODING=QUOTED-PRINTABLE:;;=31=31=31=31=32=20=4E=6F=72=74=68=20=45=6C=64=72=
=69=64=67=65=20=50=61=72=6B=77=61=79;=44=61=6C=6C=61=73;=54=58;=32=32=32=32=32
ORG:James Peacock Elementary
END:VCARD
BEGIN:VCARD
VERSION:2.1
TEL;CELL:8889990001
ORG:AAA Car Service
END:VCARD

View File

@@ -6,7 +6,30 @@ from Whatsapp_Chat_Exporter.vcards_contacts import normalize_number, read_vcards
def test_readVCardsFile(): def test_readVCardsFile():
data_dir = os.path.join(os.path.dirname(__file__), "data") data_dir = os.path.join(os.path.dirname(__file__), "data")
assert len(read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")) > 0 data = read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")
if data:
print("Found Names")
print("-----------------------")
for count, contact_tuple in enumerate(data, start=1):
# The name is the second element of the tuple (at index 1)
name = contact_tuple[1]
# Print the count and the name
print(f"{count}. {name}")
print(data)
assert len(data) == 6
# Test simple contact name
assert data[0][1] == "Sample Contact"
# Test complex name
assert data[1][1] == "Yard Lawn Guy, Jose Lopez"
# Test name with emoji
assert data[2][1] == "John Butler 🌟💫🌟"
# Test note with multi-line encoding
assert data[3][1] == "Airline Contact #'s"
# Test address with multi-line encoding
assert data[4][1] == "James Peacock Elementary"
# Test business entry using ORG but not F/FN
assert data[5][1] == "AAA Car Service"
def test_create_number_to_name_dicts(): def test_create_number_to_name_dicts():