Merge pull request #106 from Bnaya/bnaya-iteration

[Feat] Import contact names from exported google vcards and more
This commit is contained in:
Knugi
2024-07-20 13:45:20 +08:00
committed by GitHub
9 changed files with 221 additions and 19 deletions

View File

@@ -16,11 +16,11 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.3
pip install .
- name: Build binary with Nuitka
run: |
@@ -40,11 +40,11 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.3
pip install .
- name: Build binary with Nuitka
run: |
@@ -64,11 +64,11 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.3
pip install .
- name: Build binary with Nuitka
run: |

11
.gitignore vendored
View File

@@ -134,3 +134,14 @@ dmypy.json
*.onefile-build/
*.exe
__main__
# Dev time intermidiates & temp files
result/
WhatsApp/
/*.db
/*.db-*
/myout
/msgstore.db
/myout-json
.vscode/

View File

@@ -35,6 +35,13 @@ Simply invoke the following command from shell.
```sh
wtsexporter -a
```
#### Enriching Contact from vCard
Usually, the default WhatsApp contact database extracted from your phone will contains the contact names and the exporter will use it to map your chats. However, some reported cases showed that the database could has never been populated.
In this case, you can export your contacts to a vCard file from your phone or a cloud provider like Google Contacts. Then, install the necessary dependency and run the following command from the shell:
```sh
pip install whatsapp-chat-exporter["vcards"]
wtsexporter -a --enrich-from-vcard contacts.vcf --default-country-code 852
```
### Encrypted Android WhatsApp Backup
In order to support the decryption, install pycryptodome if it is not installed

View File

@@ -7,10 +7,17 @@ import shutil
import json
import string
import glob
try:
import vobject
except ModuleNotFoundError:
vcards_deps_installed = False
else:
vcards_deps_installed = True
from Whatsapp_Chat_Exporter import exported_handler, android_handler
from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
from Whatsapp_Chat_Exporter.data_model import ChatStore
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, chat_is_empty
from Whatsapp_Chat_Exporter.utility import check_update, import_from_json
from argparse import ArgumentParser, SUPPRESS
from datetime import datetime
@@ -85,6 +92,20 @@ def main():
type=str,
const="result.json",
help="Save the result to a single JSON file (default if present: result.json)")
parser.add_argument(
'--avoid-encoding-json',
dest='avoid_encoding_json',
default=False,
action='store_true',
help="Don't encode non-ascii characters in the output JSON files")
parser.add_argument(
'--pretty-print-json',
dest='pretty_print_json',
default=None,
nargs='?',
const=2,
type=int,
help="Pretty print the output JSON.")
parser.add_argument(
'-d',
'--db',
@@ -232,6 +253,13 @@ def main():
metavar="phone number",
help="Exclude chats that match the supplied phone number"
)
parser.add_argument(
"--dont-filter-empty",
dest="filter_empty",
default=True,
action='store_false',
help="By default, the exporter will not render chats with no valid message. Setting this flag will cause the exporter to render those."
)
parser.add_argument(
"--per-chat",
dest="json_per_chat",
@@ -253,6 +281,19 @@ def main():
type=int,
help="Specify the chunk size for decrypting iOS backup, which may affect the decryption speed."
)
parser.add_argument(
"--enrich-from-vcards",
dest="enrich_from_vcards",
default=None,
help="Path to an exported vcf file from Google contacts export. Add names missing from WhatsApp's default database"
)
parser.add_argument(
"--default-country-code",
dest="default_contry_code",
default=None,
help="Use with --enrich-from-vcards. When numbers in the vcf file does not have a country code, this will be used. 1 is for US, 66 for Thailand etc. Most likely use the number of your own country"
)
args = parser.parse_args()
# Check for updates
@@ -277,6 +318,8 @@ def main():
(args.json[-5:] == ".json" and os.path.isfile(args.json[:-5]))
):
parser.error("When --per-chat is enabled, the destination of --json must be a directory.")
if args.enrich_from_vcards is not None and args.default_contry_code is None:
parser.error("When --enrich-from-vcards is provided, you must also set --default-country-code")
if args.filter_date is not None:
if " - " in args.filter_date:
start, end = args.filter_date.split(" - ")
@@ -320,6 +363,17 @@ def main():
data = {}
contact_store = ContactsFromVCards()
if args.enrich_from_vcards is not None:
if not vcards_deps_installed:
parser.error(
"You don't have the dependency to enrich contacts with vCard.\n"
"Read more on how to deal with enriching contacts:\n"
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
)
contact_store.load_vcf_file(args.enrich_from_vcards, args.default_contry_code)
if args.android:
contacts = android_handler.contacts
messages = android_handler.messages
@@ -429,6 +483,9 @@ def main():
if args.android:
android_handler.calls(db, data, args.timezone_offset, filter_chat)
if not args.no_html:
if not contact_store.is_empty():
contact_store.enrich_from_vcards(data)
create_html(
data,
args.output,
@@ -436,7 +493,8 @@ def main():
args.embedded,
args.offline,
args.size,
args.no_avatar
args.no_avatar,
args.filter_empty
)
else:
print(
@@ -471,7 +529,9 @@ def main():
args.template,
args.embedded,
args.offline,
args.size
args.size,
args.no_avatar,
args.filter_empty
)
for file in glob.glob(r'*.*'):
shutil.copy(file, args.output)
@@ -483,15 +543,28 @@ def main():
args.template,
args.embedded,
args.offline,
args.size
args.size,
args.no_avatar,
args.filter_empty
)
if args.json and not args.import_json:
if args.filter_empty:
data = {k: v for k, v in data.items() if not chat_is_empty(v)}
if not contact_store.is_empty():
contact_store.enrich_from_vcards(data)
if isinstance(data[next(iter(data))], ChatStore):
data = {jik: chat.to_json() for jik, chat in data.items()}
if not args.json_per_chat:
with open(args.json, "w") as f:
data = json.dumps(data)
data = json.dumps(
data,
ensure_ascii=not args.avoid_encoding_json,
indent=args.pretty_print_json
)
print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)")
f.write(data)
else:
@@ -506,7 +579,8 @@ def main():
else:
contact = jik.replace('+', '')
with open(f"{args.json}/{contact}.json", "w") as f:
f.write(json.dumps(data[jik]))
file_content_to_write = json.dumps(data[jik], ensure_ascii=not args.avoid_encoding_json, indent=2 if args.pretty_print_json else None)
f.write(file_content_to_write)
print(f"Writing JSON file...({index + 1}/{total})", end="\r")
print()
else:

View File

@@ -11,7 +11,7 @@ from markupsafe import escape as htmle
from hashlib import sha256
from base64 import b64decode, b64encode
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, DbType, determine_metadata, JidType
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, DbType, determine_metadata, JidType, chat_is_empty
from Whatsapp_Chat_Exporter.utility import rendering, Crypt, Device, get_file_name, setup_template
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS, get_status_location
from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify
@@ -157,7 +157,11 @@ def contacts(db, data):
c = db.cursor()
c.execute("""SELECT count() FROM wa_contacts""")
total_row_number = c.fetchone()[0]
print(f"Processing contacts...({total_row_number})")
if total_row_number == 0:
print("No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
return False
else:
print(f"Processing contacts...({total_row_number})")
c.execute("""SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts; """)
row = c.fetchone()
@@ -745,7 +749,8 @@ def create_html(
embedded=False,
offline_static=False,
maximum_size=None,
no_avatar=False
no_avatar=False,
filter_empty=True
):
template = setup_template(template, no_avatar)
@@ -759,7 +764,7 @@ def create_html(
for current, contact in enumerate(data):
chat = data[contact]
if len(chat.messages) == 0:
if filter_empty and chat_is_empty(chat):
continue
safe_file_name, name = get_file_name(contact, chat)

View File

@@ -168,6 +168,12 @@ def get_chat_condition(filter, include, column):
else:
return ""
def _is_message_empty(message):
return (message.data is None or message.data == "") and not message.media
def chat_is_empty(chat: ChatStore):
return len(chat.messages) == 0 or all(_is_message_empty(message) for message in chat.messages.values())
# Android Specific
CRYPT14_OFFSETS = (

View File

@@ -0,0 +1,78 @@
import itertools
from typing import List, TypedDict
import vobject
class ExportedContactNumbers(TypedDict):
full_name: str
numbers: List[str]
class ContactsFromVCards:
def __init__(self) -> None:
self.contact_mapping = []
def is_empty(self):
return self.contact_mapping == []
def load_vcf_file(self, vcf_file_path: str, default_country_code: str):
self.contact_mapping = read_vcards_file(vcf_file_path, default_country_code)
def enrich_from_vcards(self, chats):
for number, name in self.contact_mapping:
# short number must be a bad contact, lets skip it
if len(number) <= 5:
continue
for chat in filter_chats_by_prefix(chats, number).values():
if not hasattr(chat, 'name') or (hasattr(chat, 'name') and chat.name is None):
setattr(chat, 'name', name)
def read_vcards_file(vcf_file_path, default_country_code: str):
contacts = []
with open(vcf_file_path, mode="r", encoding="utf-8") as f:
reader = vobject.readComponents(f)
for row in reader:
if not hasattr(row, 'fn') or not hasattr(row, 'tel'):
continue
contact: ExportedContactNumbers = {
"full_name": row.fn.value,
"numbers": list(map(lambda tel: tel.value, row.tel_list)),
}
contacts.append(contact)
return map_number_to_name(contacts, default_country_code)
def filter_chats_by_prefix(chats, prefix: str):
return {k: v for k, v in chats.items() if k.startswith(prefix)}
def map_number_to_name(contacts, default_country_code: str):
mapping = []
for contact in contacts:
for index, num in enumerate(contact['numbers']):
normalized = normalize_number(num, default_country_code)
if len(contact['numbers']) > 1:
name = f"{contact['full_name']} ({index+1})"
else:
name = contact['full_name']
mapping.append((normalized, name))
return mapping
def normalize_number(number: str, country_code: str):
# Clean the number
number = ''.join(c for c in number if c.isdigit() or c == "+")
# A number that starts with a + or 00 means it already have a country code
for starting_char in ('+', "00"):
if number.startswith(starting_char):
return number[len(starting_char):]
# leading zero should be removed
if starting_char == '0':
number = number[1:]
return country_code + number # fall back

View File

@@ -0,0 +1,20 @@
# from contacts_names_from_vcards import readVCardsFile
from Whatsapp_Chat_Exporter.vcards_contacts import normalize_number, read_vcards_file
def test_readVCardsFile():
assert len(read_vcards_file("contacts.vcf", "973")) > 0
def test_create_number_to_name_dicts():
pass
def test_fuzzy_match_numbers():
pass
def test_normalize_number():
assert normalize_number('0531234567', '1') == '1531234567'
assert normalize_number('001531234567', '2') == '1531234567'
assert normalize_number('+1531234567', '34') == '1531234567'
assert normalize_number('053(123)4567', '34') == '34531234567'
assert normalize_number('0531-234-567', '58') == '58531234567'

View File

@@ -55,9 +55,10 @@ setuptools.setup(
'crypt12': ["pycryptodome"],
'crypt14': ["pycryptodome"],
'crypt15': ["pycryptodome", "javaobj-py3"],
'all': ["pycryptodome", "javaobj-py3"],
'everything': ["pycryptodome", "javaobj-py3"],
'backup': ["pycryptodome", "javaobj-py3"]
'all': ["pycryptodome", "javaobj-py3", "vobject"],
'everything': ["pycryptodome", "javaobj-py3", "vobject"],
'backup': ["pycryptodome", "javaobj-py3"],
'vcards': ["vobject", "pycryptodome", "javaobj-py3"],
},
entry_points={
"console_scripts": [