mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-02-11 03:32:25 +00:00
Merge pull request #106 from Bnaya/bnaya-iteration
[Feat] Import contact names from exported google vcards and more
This commit is contained in:
12
.github/workflows/compile-binary.yml
vendored
12
.github/workflows/compile-binary.yml
vendored
@@ -16,11 +16,11 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: '3.12'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka
|
||||
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.3
|
||||
pip install .
|
||||
- name: Build binary with Nuitka
|
||||
run: |
|
||||
@@ -40,11 +40,11 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: '3.12'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka
|
||||
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.3
|
||||
pip install .
|
||||
- name: Build binary with Nuitka
|
||||
run: |
|
||||
@@ -64,11 +64,11 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: '3.12'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka
|
||||
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.3
|
||||
pip install .
|
||||
- name: Build binary with Nuitka
|
||||
run: |
|
||||
|
||||
11
.gitignore
vendored
11
.gitignore
vendored
@@ -134,3 +134,14 @@ dmypy.json
|
||||
*.onefile-build/
|
||||
*.exe
|
||||
__main__
|
||||
|
||||
|
||||
# Dev time intermidiates & temp files
|
||||
result/
|
||||
WhatsApp/
|
||||
/*.db
|
||||
/*.db-*
|
||||
/myout
|
||||
/msgstore.db
|
||||
/myout-json
|
||||
.vscode/
|
||||
@@ -35,6 +35,13 @@ Simply invoke the following command from shell.
|
||||
```sh
|
||||
wtsexporter -a
|
||||
```
|
||||
#### Enriching Contact from vCard
|
||||
Usually, the default WhatsApp contact database extracted from your phone will contains the contact names and the exporter will use it to map your chats. However, some reported cases showed that the database could has never been populated.
|
||||
In this case, you can export your contacts to a vCard file from your phone or a cloud provider like Google Contacts. Then, install the necessary dependency and run the following command from the shell:
|
||||
```sh
|
||||
pip install whatsapp-chat-exporter["vcards"]
|
||||
wtsexporter -a --enrich-from-vcard contacts.vcf --default-country-code 852
|
||||
```
|
||||
|
||||
### Encrypted Android WhatsApp Backup
|
||||
In order to support the decryption, install pycryptodome if it is not installed
|
||||
|
||||
@@ -7,10 +7,17 @@ import shutil
|
||||
import json
|
||||
import string
|
||||
import glob
|
||||
try:
|
||||
import vobject
|
||||
except ModuleNotFoundError:
|
||||
vcards_deps_installed = False
|
||||
else:
|
||||
vcards_deps_installed = True
|
||||
from Whatsapp_Chat_Exporter import exported_handler, android_handler
|
||||
from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler
|
||||
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore
|
||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType
|
||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, chat_is_empty
|
||||
from Whatsapp_Chat_Exporter.utility import check_update, import_from_json
|
||||
from argparse import ArgumentParser, SUPPRESS
|
||||
from datetime import datetime
|
||||
@@ -85,6 +92,20 @@ def main():
|
||||
type=str,
|
||||
const="result.json",
|
||||
help="Save the result to a single JSON file (default if present: result.json)")
|
||||
parser.add_argument(
|
||||
'--avoid-encoding-json',
|
||||
dest='avoid_encoding_json',
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Don't encode non-ascii characters in the output JSON files")
|
||||
parser.add_argument(
|
||||
'--pretty-print-json',
|
||||
dest='pretty_print_json',
|
||||
default=None,
|
||||
nargs='?',
|
||||
const=2,
|
||||
type=int,
|
||||
help="Pretty print the output JSON.")
|
||||
parser.add_argument(
|
||||
'-d',
|
||||
'--db',
|
||||
@@ -232,6 +253,13 @@ def main():
|
||||
metavar="phone number",
|
||||
help="Exclude chats that match the supplied phone number"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dont-filter-empty",
|
||||
dest="filter_empty",
|
||||
default=True,
|
||||
action='store_false',
|
||||
help="By default, the exporter will not render chats with no valid message. Setting this flag will cause the exporter to render those."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--per-chat",
|
||||
dest="json_per_chat",
|
||||
@@ -253,6 +281,19 @@ def main():
|
||||
type=int,
|
||||
help="Specify the chunk size for decrypting iOS backup, which may affect the decryption speed."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enrich-from-vcards",
|
||||
dest="enrich_from_vcards",
|
||||
default=None,
|
||||
help="Path to an exported vcf file from Google contacts export. Add names missing from WhatsApp's default database"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--default-country-code",
|
||||
dest="default_contry_code",
|
||||
default=None,
|
||||
help="Use with --enrich-from-vcards. When numbers in the vcf file does not have a country code, this will be used. 1 is for US, 66 for Thailand etc. Most likely use the number of your own country"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check for updates
|
||||
@@ -277,6 +318,8 @@ def main():
|
||||
(args.json[-5:] == ".json" and os.path.isfile(args.json[:-5]))
|
||||
):
|
||||
parser.error("When --per-chat is enabled, the destination of --json must be a directory.")
|
||||
if args.enrich_from_vcards is not None and args.default_contry_code is None:
|
||||
parser.error("When --enrich-from-vcards is provided, you must also set --default-country-code")
|
||||
if args.filter_date is not None:
|
||||
if " - " in args.filter_date:
|
||||
start, end = args.filter_date.split(" - ")
|
||||
@@ -320,6 +363,17 @@ def main():
|
||||
|
||||
data = {}
|
||||
|
||||
contact_store = ContactsFromVCards()
|
||||
|
||||
if args.enrich_from_vcards is not None:
|
||||
if not vcards_deps_installed:
|
||||
parser.error(
|
||||
"You don't have the dependency to enrich contacts with vCard.\n"
|
||||
"Read more on how to deal with enriching contacts:\n"
|
||||
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
|
||||
)
|
||||
contact_store.load_vcf_file(args.enrich_from_vcards, args.default_contry_code)
|
||||
|
||||
if args.android:
|
||||
contacts = android_handler.contacts
|
||||
messages = android_handler.messages
|
||||
@@ -429,6 +483,9 @@ def main():
|
||||
if args.android:
|
||||
android_handler.calls(db, data, args.timezone_offset, filter_chat)
|
||||
if not args.no_html:
|
||||
if not contact_store.is_empty():
|
||||
contact_store.enrich_from_vcards(data)
|
||||
|
||||
create_html(
|
||||
data,
|
||||
args.output,
|
||||
@@ -436,7 +493,8 @@ def main():
|
||||
args.embedded,
|
||||
args.offline,
|
||||
args.size,
|
||||
args.no_avatar
|
||||
args.no_avatar,
|
||||
args.filter_empty
|
||||
)
|
||||
else:
|
||||
print(
|
||||
@@ -471,7 +529,9 @@ def main():
|
||||
args.template,
|
||||
args.embedded,
|
||||
args.offline,
|
||||
args.size
|
||||
args.size,
|
||||
args.no_avatar,
|
||||
args.filter_empty
|
||||
)
|
||||
for file in glob.glob(r'*.*'):
|
||||
shutil.copy(file, args.output)
|
||||
@@ -483,15 +543,28 @@ def main():
|
||||
args.template,
|
||||
args.embedded,
|
||||
args.offline,
|
||||
args.size
|
||||
args.size,
|
||||
args.no_avatar,
|
||||
args.filter_empty
|
||||
)
|
||||
|
||||
if args.json and not args.import_json:
|
||||
if args.filter_empty:
|
||||
data = {k: v for k, v in data.items() if not chat_is_empty(v)}
|
||||
|
||||
if not contact_store.is_empty():
|
||||
contact_store.enrich_from_vcards(data)
|
||||
|
||||
if isinstance(data[next(iter(data))], ChatStore):
|
||||
data = {jik: chat.to_json() for jik, chat in data.items()}
|
||||
|
||||
if not args.json_per_chat:
|
||||
with open(args.json, "w") as f:
|
||||
data = json.dumps(data)
|
||||
data = json.dumps(
|
||||
data,
|
||||
ensure_ascii=not args.avoid_encoding_json,
|
||||
indent=args.pretty_print_json
|
||||
)
|
||||
print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)")
|
||||
f.write(data)
|
||||
else:
|
||||
@@ -506,7 +579,8 @@ def main():
|
||||
else:
|
||||
contact = jik.replace('+', '')
|
||||
with open(f"{args.json}/{contact}.json", "w") as f:
|
||||
f.write(json.dumps(data[jik]))
|
||||
file_content_to_write = json.dumps(data[jik], ensure_ascii=not args.avoid_encoding_json, indent=2 if args.pretty_print_json else None)
|
||||
f.write(file_content_to_write)
|
||||
print(f"Writing JSON file...({index + 1}/{total})", end="\r")
|
||||
print()
|
||||
else:
|
||||
|
||||
@@ -11,7 +11,7 @@ from markupsafe import escape as htmle
|
||||
from hashlib import sha256
|
||||
from base64 import b64decode, b64encode
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, DbType, determine_metadata, JidType
|
||||
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, DbType, determine_metadata, JidType, chat_is_empty
|
||||
from Whatsapp_Chat_Exporter.utility import rendering, Crypt, Device, get_file_name, setup_template
|
||||
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS, get_status_location
|
||||
from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify
|
||||
@@ -157,7 +157,11 @@ def contacts(db, data):
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT count() FROM wa_contacts""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Processing contacts...({total_row_number})")
|
||||
if total_row_number == 0:
|
||||
print("No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
|
||||
return False
|
||||
else:
|
||||
print(f"Processing contacts...({total_row_number})")
|
||||
|
||||
c.execute("""SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts; """)
|
||||
row = c.fetchone()
|
||||
@@ -745,7 +749,8 @@ def create_html(
|
||||
embedded=False,
|
||||
offline_static=False,
|
||||
maximum_size=None,
|
||||
no_avatar=False
|
||||
no_avatar=False,
|
||||
filter_empty=True
|
||||
):
|
||||
template = setup_template(template, no_avatar)
|
||||
|
||||
@@ -759,7 +764,7 @@ def create_html(
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
chat = data[contact]
|
||||
if len(chat.messages) == 0:
|
||||
if filter_empty and chat_is_empty(chat):
|
||||
continue
|
||||
safe_file_name, name = get_file_name(contact, chat)
|
||||
|
||||
|
||||
@@ -168,6 +168,12 @@ def get_chat_condition(filter, include, column):
|
||||
else:
|
||||
return ""
|
||||
|
||||
def _is_message_empty(message):
|
||||
return (message.data is None or message.data == "") and not message.media
|
||||
|
||||
def chat_is_empty(chat: ChatStore):
|
||||
return len(chat.messages) == 0 or all(_is_message_empty(message) for message in chat.messages.values())
|
||||
|
||||
|
||||
# Android Specific
|
||||
CRYPT14_OFFSETS = (
|
||||
|
||||
78
Whatsapp_Chat_Exporter/vcards_contacts.py
Normal file
78
Whatsapp_Chat_Exporter/vcards_contacts.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import itertools
|
||||
from typing import List, TypedDict
|
||||
import vobject
|
||||
|
||||
|
||||
class ExportedContactNumbers(TypedDict):
|
||||
full_name: str
|
||||
numbers: List[str]
|
||||
|
||||
|
||||
class ContactsFromVCards:
|
||||
def __init__(self) -> None:
|
||||
self.contact_mapping = []
|
||||
|
||||
def is_empty(self):
|
||||
return self.contact_mapping == []
|
||||
|
||||
def load_vcf_file(self, vcf_file_path: str, default_country_code: str):
|
||||
self.contact_mapping = read_vcards_file(vcf_file_path, default_country_code)
|
||||
|
||||
def enrich_from_vcards(self, chats):
|
||||
for number, name in self.contact_mapping:
|
||||
# short number must be a bad contact, lets skip it
|
||||
if len(number) <= 5:
|
||||
continue
|
||||
|
||||
for chat in filter_chats_by_prefix(chats, number).values():
|
||||
if not hasattr(chat, 'name') or (hasattr(chat, 'name') and chat.name is None):
|
||||
setattr(chat, 'name', name)
|
||||
|
||||
|
||||
def read_vcards_file(vcf_file_path, default_country_code: str):
|
||||
contacts = []
|
||||
with open(vcf_file_path, mode="r", encoding="utf-8") as f:
|
||||
reader = vobject.readComponents(f)
|
||||
for row in reader:
|
||||
if not hasattr(row, 'fn') or not hasattr(row, 'tel'):
|
||||
continue
|
||||
|
||||
contact: ExportedContactNumbers = {
|
||||
"full_name": row.fn.value,
|
||||
"numbers": list(map(lambda tel: tel.value, row.tel_list)),
|
||||
}
|
||||
contacts.append(contact)
|
||||
|
||||
return map_number_to_name(contacts, default_country_code)
|
||||
|
||||
|
||||
def filter_chats_by_prefix(chats, prefix: str):
|
||||
return {k: v for k, v in chats.items() if k.startswith(prefix)}
|
||||
|
||||
|
||||
def map_number_to_name(contacts, default_country_code: str):
|
||||
mapping = []
|
||||
for contact in contacts:
|
||||
for index, num in enumerate(contact['numbers']):
|
||||
normalized = normalize_number(num, default_country_code)
|
||||
if len(contact['numbers']) > 1:
|
||||
name = f"{contact['full_name']} ({index+1})"
|
||||
else:
|
||||
name = contact['full_name']
|
||||
mapping.append((normalized, name))
|
||||
return mapping
|
||||
|
||||
|
||||
def normalize_number(number: str, country_code: str):
|
||||
# Clean the number
|
||||
number = ''.join(c for c in number if c.isdigit() or c == "+")
|
||||
|
||||
# A number that starts with a + or 00 means it already have a country code
|
||||
for starting_char in ('+', "00"):
|
||||
if number.startswith(starting_char):
|
||||
return number[len(starting_char):]
|
||||
|
||||
# leading zero should be removed
|
||||
if starting_char == '0':
|
||||
number = number[1:]
|
||||
return country_code + number # fall back
|
||||
20
Whatsapp_Chat_Exporter/vcards_contacts_test.py
Normal file
20
Whatsapp_Chat_Exporter/vcards_contacts_test.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# from contacts_names_from_vcards import readVCardsFile
|
||||
|
||||
from Whatsapp_Chat_Exporter.vcards_contacts import normalize_number, read_vcards_file
|
||||
|
||||
|
||||
def test_readVCardsFile():
|
||||
assert len(read_vcards_file("contacts.vcf", "973")) > 0
|
||||
|
||||
def test_create_number_to_name_dicts():
|
||||
pass
|
||||
|
||||
def test_fuzzy_match_numbers():
|
||||
pass
|
||||
|
||||
def test_normalize_number():
|
||||
assert normalize_number('0531234567', '1') == '1531234567'
|
||||
assert normalize_number('001531234567', '2') == '1531234567'
|
||||
assert normalize_number('+1531234567', '34') == '1531234567'
|
||||
assert normalize_number('053(123)4567', '34') == '34531234567'
|
||||
assert normalize_number('0531-234-567', '58') == '58531234567'
|
||||
7
setup.py
7
setup.py
@@ -55,9 +55,10 @@ setuptools.setup(
|
||||
'crypt12': ["pycryptodome"],
|
||||
'crypt14': ["pycryptodome"],
|
||||
'crypt15': ["pycryptodome", "javaobj-py3"],
|
||||
'all': ["pycryptodome", "javaobj-py3"],
|
||||
'everything': ["pycryptodome", "javaobj-py3"],
|
||||
'backup': ["pycryptodome", "javaobj-py3"]
|
||||
'all': ["pycryptodome", "javaobj-py3", "vobject"],
|
||||
'everything': ["pycryptodome", "javaobj-py3", "vobject"],
|
||||
'backup': ["pycryptodome", "javaobj-py3"],
|
||||
'vcards': ["vobject", "pycryptodome", "javaobj-py3"],
|
||||
},
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
|
||||
Reference in New Issue
Block a user