From 042f6f9024f6ae6d7c7962743404f4df5933a4ce Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Mon, 23 May 2022 20:12:50 +0800 Subject: [PATCH 01/58] Remove unused dependencies --- Whatsapp_Chat_Exporter/extract.py | 2 -- Whatsapp_Chat_Exporter/extract_iphone.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/Whatsapp_Chat_Exporter/extract.py b/Whatsapp_Chat_Exporter/extract.py index 6a4aab3..f2aebf1 100644 --- a/Whatsapp_Chat_Exporter/extract.py +++ b/Whatsapp_Chat_Exporter/extract.py @@ -4,10 +4,8 @@ import sqlite3 import json import jinja2 import os -import requests import shutil import re -import pkgutil import io import hmac from pathlib import Path diff --git a/Whatsapp_Chat_Exporter/extract_iphone.py b/Whatsapp_Chat_Exporter/extract_iphone.py index 546be23..7400688 100644 --- a/Whatsapp_Chat_Exporter/extract_iphone.py +++ b/Whatsapp_Chat_Exporter/extract_iphone.py @@ -4,9 +4,7 @@ import sqlite3 import json import jinja2 import os -import requests import shutil -import pkgutil from pathlib import Path from bleach import clean as sanitize from markupsafe import Markup From 996ee655258baede7bc2fca3754cd3c33e2b25c3 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Wed, 25 May 2022 18:28:07 +0800 Subject: [PATCH 02/58] Bug fix Duplicated folder creation https://github.com/KnugiHK/Whatsapp-Chat-Exporter/issues/14 --- Whatsapp_Chat_Exporter/extract_iphone_media.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Whatsapp_Chat_Exporter/extract_iphone_media.py b/Whatsapp_Chat_Exporter/extract_iphone_media.py index 83b1576..9b6f82e 100644 --- a/Whatsapp_Chat_Exporter/extract_iphone_media.py +++ b/Whatsapp_Chat_Exporter/extract_iphone_media.py @@ -115,7 +115,10 @@ def extract_media(base_dir): folder = hashes[:2] flags = row[2] if flags == 2: - os.mkdir(destination) + try: + os.mkdir(destination) + except FileExistsError: + pass elif flags == 1: shutil.copyfile(f"{base_dir}/{folder}/{hashes}", destination) i += 1 From 422ab2f784451ff4f439d6919d9d73c39fd751d5 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+knugi0123@users.noreply.github.com> Date: Sun, 3 Jul 2022 12:30:01 +0800 Subject: [PATCH 03/58] Link to the file intead of showing the path directly Not tested Ref: https://github.com/KnugiHK/Whatsapp-Chat-Exporter/issues/15 --- Whatsapp_Chat_Exporter/whatsapp.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Whatsapp_Chat_Exporter/whatsapp.html b/Whatsapp_Chat_Exporter/whatsapp.html index d653ce5..d467a87 100644 --- a/Whatsapp_Chat_Exporter/whatsapp.html +++ b/Whatsapp_Chat_Exporter/whatsapp.html @@ -92,7 +92,7 @@ {% elif "/" in msg.mime %}
-

The file cannot be displayed here, however it should be located at {{ msg.data }}

+

The file cannot be displayed here, however it should be located at here

{% else %} {% filter escape %}{{ msg.data }}{% endfilter %} @@ -148,7 +148,7 @@ {% elif "/" in msg.mime %}
-

The file cannot be displayed here, however it should be located at {{ msg.data }}

+

The file cannot be displayed here, however it should be located at here

{% else %} {% filter escape %}{{ msg.data }}{% endfilter %} From 48f667d02b96e64a9432adda94315b2731472435 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Mon, 5 Sep 2022 12:16:07 +0800 Subject: [PATCH 04/58] Implement exporting 64-digit crypt15 encryption key https://github.com/KnugiHK/Whatsapp-Chat-Exporter/issues/20 --- Whatsapp_Chat_Exporter/__main__.py | 9 ++++++++- Whatsapp_Chat_Exporter/extract.py | 11 +++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index d449a91..2ccb4a8 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -85,6 +85,13 @@ def main(): default=False, action='store_true', help="Embed media into HTML file") + parser.add_option( + "-s", + "--showkey", + dest="showkey", + default=False, + action='store_true', + help="Show the HEX key used to decrypt the database") (options, args) = parser.parse_args() if options.android and options.iphone: @@ -121,7 +128,7 @@ def main(): elif all(char in string.hexdigits for char in options.key): key = bytes.fromhex(options.key) db = open(options.backup, "rb").read() - error = extract.decrypt_backup(db, key, msg_db, crypt) + error = extract.decrypt_backup(db, key, msg_db, crypt, options.showkey) if error != 0: if error == 1: print("Dependencies of decrypt_backup and/or extract_encrypted_key" diff --git a/Whatsapp_Chat_Exporter/extract.py b/Whatsapp_Chat_Exporter/extract.py index f2aebf1..f13eecf 100644 --- a/Whatsapp_Chat_Exporter/extract.py +++ b/Whatsapp_Chat_Exporter/extract.py @@ -71,7 +71,7 @@ def _generate_hmac_of_hmac(key_stream): b"backup encryption\x01", sha256 ) - return key.digest() + return key.digest(), key_stream def _extract_encrypted_key(keyfile): @@ -82,7 +82,7 @@ def _extract_encrypted_key(keyfile): return _generate_hmac_of_hmac(key_stream) -def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14): +def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False): if not support_backup: return 1 if isinstance(key, io.IOBase): @@ -119,9 +119,12 @@ def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14): if crypt == Crypt.CRYPT15: if len(key) == 32: - main_key = _generate_hmac_of_hmac(key) + main_key, hex_key = _generate_hmac_of_hmac(key) else: - main_key = _extract_encrypted_key(key) + main_key, hex_key = _extract_encrypted_key(key) + if show_crypt15: + hex_key = [hex_key.hex()[c:c+4] for c in range(0, len(hex_key.hex()), 4)] + print("The HEX key of the crypt15 backup is: " + ' '.join(hex_key)) else: main_key = key[126:] decompressed = False From 876729eb81284447067215ac8e179fa44f7addb7 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Mon, 5 Sep 2022 12:48:36 +0800 Subject: [PATCH 05/58] A more concrete way to determine database offset --- Whatsapp_Chat_Exporter/extract.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Whatsapp_Chat_Exporter/extract.py b/Whatsapp_Chat_Exporter/extract.py index f13eecf..2b0b6c9 100644 --- a/Whatsapp_Chat_Exporter/extract.py +++ b/Whatsapp_Chat_Exporter/extract.py @@ -112,7 +112,8 @@ def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=Fals raise ValueError("The crypt15 file must be at least 131 bytes") t1 = t2 = None iv = database[8:24] - db_ciphertext = database[131:] + db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type + db_ciphertext = database[proto_len:] if t1 != t2: raise ValueError("The signature of key file and backup file mismatch") From d9a77e0eec9e41918f5dc316b5f6ec722e2b3f81 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Mon, 5 Sep 2022 12:49:12 +0800 Subject: [PATCH 06/58] Forgot to change the variable lol --- Whatsapp_Chat_Exporter/extract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Whatsapp_Chat_Exporter/extract.py b/Whatsapp_Chat_Exporter/extract.py index 2b0b6c9..4c119a1 100644 --- a/Whatsapp_Chat_Exporter/extract.py +++ b/Whatsapp_Chat_Exporter/extract.py @@ -113,7 +113,7 @@ def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=Fals t1 = t2 = None iv = database[8:24] db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type - db_ciphertext = database[proto_len:] + db_ciphertext = database[db_offset:] if t1 != t2: raise ValueError("The signature of key file and backup file mismatch") From 41d365926946a89b1ba73435a42948f666d942ff Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Wed, 21 Dec 2022 20:16:37 +0800 Subject: [PATCH 07/58] Prepare for porting --- Whatsapp_Chat_Exporter/__main__.py | 4 +- Whatsapp_Chat_Exporter/extract_new.py | 540 ++++++++++++++++++++++++++ 2 files changed, 543 insertions(+), 1 deletion(-) create mode 100644 Whatsapp_Chat_Exporter/extract_new.py diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index 2ccb4a8..7712ce0 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -139,7 +139,7 @@ def main(): "Possibly incorrect offsets used in decryption.") exit(4) else: - print("Unknown error occurred.") + print("Unknown error occurred.", error) exit(5) if options.wa is None: contact_db = "wa.db" @@ -153,6 +153,7 @@ def main(): if os.path.isfile(contact_db): with sqlite3.connect(contact_db) as db: + db.row_factory = sqlite3.Row contacts(db, data) elif options.iphone: @@ -178,6 +179,7 @@ def main(): if os.path.isfile(msg_db): with sqlite3.connect(msg_db) as db: + db.row_factory = sqlite3.Row messages(db, data) media(db, data, options.media) vcard(db, data) diff --git a/Whatsapp_Chat_Exporter/extract_new.py b/Whatsapp_Chat_Exporter/extract_new.py new file mode 100644 index 0000000..4c119a1 --- /dev/null +++ b/Whatsapp_Chat_Exporter/extract_new.py @@ -0,0 +1,540 @@ +#!/usr/bin/python3 + +import sqlite3 +import json +import jinja2 +import os +import shutil +import re +import io +import hmac +from pathlib import Path +from bleach import clean as sanitize +from markupsafe import Markup +from datetime import datetime +from enum import Enum +from mimetypes import MimeTypes +from hashlib import sha256 + +try: + import zlib + from Crypto.Cipher import AES +except ModuleNotFoundError: + support_backup = False +else: + support_backup = True +try: + import javaobj +except ModuleNotFoundError: + support_crypt15 = False +else: + support_crypt15 = True + +def sanitize_except(html): + return Markup(sanitize(html, tags=["br"])) + + +def determine_day(last, current): + last = datetime.fromtimestamp(last).date() + current = datetime.fromtimestamp(current).date() + if last == current: + return None + else: + return current + +CRYPT14_OFFSETS = [ + {"iv": 67, "db": 191}, + {"iv": 67, "db": 190}, + {"iv": 66, "db": 99} +] + + +class Crypt(Enum): + CRYPT15 = 15 + CRYPT14 = 14 + CRYPT12 = 12 + + +def brute_force_offset(): + for iv in range(0, 200): + for db in range(0, 200): + yield iv, iv + 16, db + + +def _generate_hmac_of_hmac(key_stream): + key = hmac.new( + hmac.new( + b'\x00' * 32, + key_stream, + sha256 + ).digest(), + b"backup encryption\x01", + sha256 + ) + return key.digest(), key_stream + + +def _extract_encrypted_key(keyfile): + key_stream = b"" + for byte in javaobj.loads(keyfile): + key_stream += byte.to_bytes(1, "big", signed=True) + + return _generate_hmac_of_hmac(key_stream) + + +def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False): + if not support_backup: + return 1 + if isinstance(key, io.IOBase): + key = key.read() + if crypt is not Crypt.CRYPT15: + t1 = key[30:62] + if crypt is not Crypt.CRYPT15 and len(key) != 158: + raise ValueError("The key file must be 158 bytes") + if crypt == Crypt.CRYPT14: + if len(database) < 191: + raise ValueError("The crypt14 file must be at least 191 bytes") + current_try = 0 + offsets = CRYPT14_OFFSETS[current_try] + t2 = database[15:47] + iv = database[offsets["iv"]:offsets["iv"] + 16] + db_ciphertext = database[offsets["db"]:] + elif crypt == Crypt.CRYPT12: + if len(database) < 67: + raise ValueError("The crypt12 file must be at least 67 bytes") + t2 = database[3:35] + iv = database[51:67] + db_ciphertext = database[67:-20] + elif crypt == Crypt.CRYPT15: + if not support_crypt15: + return 1 + if len(database) < 131: + raise ValueError("The crypt15 file must be at least 131 bytes") + t1 = t2 = None + iv = database[8:24] + db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type + db_ciphertext = database[db_offset:] + + if t1 != t2: + raise ValueError("The signature of key file and backup file mismatch") + + if crypt == Crypt.CRYPT15: + if len(key) == 32: + main_key, hex_key = _generate_hmac_of_hmac(key) + else: + main_key, hex_key = _extract_encrypted_key(key) + if show_crypt15: + hex_key = [hex_key.hex()[c:c+4] for c in range(0, len(hex_key.hex()), 4)] + print("The HEX key of the crypt15 backup is: " + ' '.join(hex_key)) + else: + main_key = key[126:] + decompressed = False + while not decompressed: + cipher = AES.new(main_key, AES.MODE_GCM, iv) + db_compressed = cipher.decrypt(db_ciphertext) + try: + db = zlib.decompress(db_compressed) + except zlib.error: + if crypt == Crypt.CRYPT14: + current_try += 1 + if current_try < len(CRYPT14_OFFSETS): + offsets = CRYPT14_OFFSETS[current_try] + iv = database[offsets["iv"]:offsets["iv"] + 16] + db_ciphertext = database[offsets["db"]:] + continue + else: + print("Common offsets are not applicable to " + "your backup. Trying to brute force it...") + for start_iv, end_iv, start_db in brute_force_offset(): + iv = database[start_iv:end_iv] + db_ciphertext = database[start_db:] + cipher = AES.new(main_key, AES.MODE_GCM, iv) + db_compressed = cipher.decrypt(db_ciphertext) + try: + db = zlib.decompress(db_compressed) + except zlib.error: + continue + else: + decompressed = True + print( + f"The offsets of your IV and database are {start_iv} and " + f"{start_db}, respectively. To include your offsets in the " + "program, please report it by creating an issue on GitHub: " + "https://github.com/KnugiHK/Whatsapp-Chat-Exporter/issues/new" + ) + break + if not decompressed: + return 2 + else: + return 3 + else: + decompressed = True + if db[0:6].upper() == b"SQLITE": + with open(output, "wb") as f: + f.write(db) + return 0 + else: + raise ValueError("The plaintext is not a SQLite database. Did you use the key to encrypt something...") + + +def contacts(db, data): + # Get contacts + c = db.cursor() + c.execute("""SELECT count() FROM wa_contacts""") + total_row_number = c.fetchone()[0] + print(f"Gathering contacts...({total_row_number})") + + c.execute("""SELECT jid, display_name FROM wa_contacts; """) + row = c.fetchone() + while row is not None: + data[row[0]] = {"name": row[1], "messages": {}} + row = c.fetchone() + + +def messages(db, data): + # Get message history + c = db.cursor() + c.execute("""SELECT count() FROM messages""") + total_row_number = c.fetchone()[0] + print(f"Gathering messages...(0/{total_row_number})", end="\r") + + phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net") + c.execute("""SELECT messages.key_remote_jid, + messages._id, + messages.key_from_me, + messages.timestamp, + messages.data, + messages.status, + messages.edit_version, + messages.thumb_image, + messages.remote_resource, + messages.media_wa_type, + messages.latitude, + messages.longitude, + messages_quotes.key_id as quoted, + messages.key_id, + messages_quotes.data, + messages.media_caption + FROM messages + LEFT JOIN messages_quotes + ON messages.quoted_row_id = messages_quotes._id + WHERE messages.key_remote_jid <> '-1';""") + i = 0 + content = c.fetchone() + while content is not None: + if content[0] not in data: + data[content[0]] = {"name": None, "messages": {}} + data[content[0]]["messages"][content[1]] = { + "from_me": bool(content[2]), + "timestamp": content[3]/1000, + "time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"), + "media": False, + "key_id": content[13], + "meta": False, + "data": None + } + if "-" in content[0] and content[2] == 0: + name = None + if content[8] in data: + name = data[content[8]]["name"] + if "@" in content[8]: + fallback = content[8].split('@')[0] + else: + fallback = None + else: + fallback = None + + data[content[0]]["messages"][content[1]]["sender"] = name or fallback + else: + data[content[0]]["messages"][content[1]]["sender"] = None + + if content[12] is not None: + data[content[0]]["messages"][content[1]]["reply"] = content[12] + data[content[0]]["messages"][content[1]]["quoted_data"] = content[14] + else: + data[content[0]]["messages"][content[1]]["reply"] = None + + if content[15] is not None: + data[content[0]]["messages"][content[1]]["caption"] = content[15] + else: + data[content[0]]["messages"][content[1]]["caption"] = None + + if content[5] == 6: + if "-" in content[0]: + # Is Group + if content[4] is not None: + try: + int(content[4]) + except ValueError: + msg = f"The group name changed to {content[4]}" + data[content[0]]["messages"][content[1]]["data"] = msg + data[content[0]]["messages"][content[1]]["meta"] = True + else: + del data[content[0]]["messages"][content[1]] + else: + thumb_image = content[7] + if thumb_image is not None: + if b"\x00\x00\x01\x74\x00\x1A" in thumb_image: + # Add user + added = phone_number_re.search( + thumb_image.decode("unicode_escape"))[0] + if added in data: + name_right = data[added]["name"] + else: + name_right = added.split('@')[0] + if content[8] is not None: + if content[8] in data: + name_left = data[content[8]]["name"] + else: + name_left = content[8].split('@')[0] + msg = f"{name_left} added {name_right or 'You'}" + else: + msg = f"Added {name_right or 'You'}" + elif b"\xac\xed\x00\x05\x74\x00" in thumb_image: + # Changed number + original = content[8].split('@')[0] + changed = thumb_image[7:].decode().split('@')[0] + msg = f"{original} changed to {changed}" + data[content[0]]["messages"][content[1]]["data"] = msg + data[content[0]]["messages"][content[1]]["meta"] = True + else: + if content[4] is None: + del data[content[0]]["messages"][content[1]] + else: + # Private chat + if content[4] is None and content[7] is None: + del data[content[0]]["messages"][content[1]] + + else: + if content[2] == 1: + if content[5] == 5 and content[6] == 7: + msg = "Message deleted" + data[content[0]]["messages"][content[1]]["meta"] = True + else: + if content[9] == "5": + msg = f"Location shared: {content[10], content[11]}" + data[content[0]]["messages"][content[1]]["meta"] = True + else: + msg = content[4] + if msg is not None: + if "\r\n" in msg: + msg = msg.replace("\r\n", "
") + if "\n" in msg: + msg = msg.replace("\n", "
") + else: + if content[5] == 0 and content[6] == 7: + msg = "Message deleted" + data[content[0]]["messages"][content[1]]["meta"] = True + else: + if content[9] == "5": + msg = f"Location shared: {content[10], content[11]}" + data[content[0]]["messages"][content[1]]["meta"] = True + else: + msg = content[4] + if msg is not None: + if "\r\n" in msg: + msg = msg.replace("\r\n", "
") + if "\n" in msg: + msg = msg.replace("\n", "
") + + data[content[0]]["messages"][content[1]]["data"] = msg + + i += 1 + if i % 1000 == 0: + print(f"Gathering messages...({i}/{total_row_number})", end="\r") + content = c.fetchone() + print(f"Gathering messages...({total_row_number}/{total_row_number})", end="\r") + + +def media(db, data, media_folder): + # Get media + c = db.cursor() + c.execute("""SELECT count() FROM message_media""") + total_row_number = c.fetchone()[0] + print(f"\nGathering media...(0/{total_row_number})", end="\r") + i = 0 + c.execute("""SELECT messages.key_remote_jid, + message_row_id, + file_path, + message_url, + mime_type, + media_key + FROM message_media + INNER JOIN messages + ON message_media.message_row_id = messages._id + ORDER BY messages.key_remote_jid ASC""") + content = c.fetchone() + mime = MimeTypes() + while content is not None: + file_path = f"{media_folder}/{content[2]}" + data[content[0]]["messages"][content[1]]["media"] = True + if os.path.isfile(file_path): + data[content[0]]["messages"][content[1]]["data"] = file_path + if content[4] is None: + guess = mime.guess_type(file_path)[0] + if guess is not None: + data[content[0]]["messages"][content[1]]["mime"] = guess + else: + data[content[0]]["messages"][content[1]]["mime"] = "data/data" + else: + data[content[0]]["messages"][content[1]]["mime"] = content[4] + else: + # if "https://mmg" in content[4]: + # try: + # r = requests.get(content[3]) + # if r.status_code != 200: + # raise RuntimeError() + # except: + # data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" + # data[content[0]]["messages"][content[1]]["media"] = True + # data[content[0]]["messages"][content[1]]["mime"] = "media" + # else: + data[content[0]]["messages"][content[1]]["data"] = "The media is missing" + data[content[0]]["messages"][content[1]]["mime"] = "media" + data[content[0]]["messages"][content[1]]["meta"] = True + i += 1 + if i % 100 == 0: + print(f"Gathering media...({i}/{total_row_number})", end="\r") + content = c.fetchone() + print( + f"Gathering media...({total_row_number}/{total_row_number})", end="\r") + + +def vcard(db, data): + c = db.cursor() + c.execute("""SELECT message_row_id, + messages.key_remote_jid, + vcard, + messages.media_name + FROM messages_vcards + INNER JOIN messages + ON messages_vcards.message_row_id = messages._id + ORDER BY messages.key_remote_jid ASC;""") + rows = c.fetchall() + total_row_number = len(rows) + print(f"\nGathering vCards...(0/{total_row_number})", end="\r") + base = "WhatsApp/vCards" + if not os.path.isdir(base): + Path(base).mkdir(parents=True, exist_ok=True) + for index, row in enumerate(rows): + media_name = row[3] if row[3] else "" + file_name = "".join(x for x in media_name if x.isalnum()) + file_path = f"{base}/{file_name}.vcf" + if not os.path.isfile(file_path): + with open(file_path, "w", encoding="utf-8") as f: + f.write(row[2]) + data[row[1]]["messages"][row[0]]["data"] = media_name + \ + "The vCard file cannot be displayed here, " \ + f"however it should be located at {file_path}" + data[row[1]]["messages"][row[0]]["mime"] = "text/x-vcard" + data[row[1]]["messages"][row[0]]["meta"] = True + print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") + + +def create_html(data, output_folder, template=None, embedded=False): + if template is None: + template_dir = os.path.dirname(__file__) + template_file = "whatsapp.html" + else: + template_dir = os.path.dirname(template) + template_file = os.path.basename(template) + templateLoader = jinja2.FileSystemLoader(searchpath=template_dir) + templateEnv = jinja2.Environment(loader=templateLoader) + templateEnv.globals.update(determine_day=determine_day) + templateEnv.filters['sanitize_except'] = sanitize_except + template = templateEnv.get_template(template_file) + + total_row_number = len(data) + print(f"\nCreating HTML...(0/{total_row_number})", end="\r") + + if not os.path.isdir(output_folder): + os.mkdir(output_folder) + + for current, contact in enumerate(data): + if len(data[contact]["messages"]) == 0: + continue + phone_number = contact.split('@')[0] + if "-" in contact: + file_name = "" + else: + file_name = phone_number + + if data[contact]["name"] is not None: + if file_name != "": + file_name += "-" + file_name += data[contact]["name"].replace("/", "-") + name = data[contact]["name"] + else: + name = phone_number + safe_file_name = '' + safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") + with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f: + f.write( + template.render( + name=name, + msgs=data[contact]["messages"].values(), + my_avatar=None, + their_avatar=f"WhatsApp/Avatars/{contact}.j" + ) + ) + if current % 10 == 0: + print(f"Creating HTML...({current}/{total_row_number})", end="\r") + + print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r") + + +if __name__ == "__main__": + from optparse import OptionParser + parser = OptionParser() + parser.add_option( + "-w", + "--wa", + dest="wa", + default="wa.db", + help="Path to contact database") + parser.add_option( + "-m", + "--media", + dest="media", + default="WhatsApp", + help="Path to WhatsApp media folder" + ) + # parser.add_option( + # "-t", + # "--template", + # dest="html", + # default="wa.db", + # help="Path to HTML template") + (options, args) = parser.parse_args() + msg_db = "msgstore.db" + output_folder = "temp" + contact_db = options.wa + media_folder = options.media + + if len(args) == 1: + msg_db = args[0] + elif len(args) == 2: + msg_db = args[0] + output_folder = args[1] + + data = {} + + if os.path.isfile(contact_db): + with sqlite3.connect(contact_db) as db: + contacts(db, data) + if os.path.isfile(msg_db): + with sqlite3.connect(msg_db) as db: + messages(db, data) + media(db, data, media_folder) + vcard(db, data) + create_html(data, output_folder) + + if not os.path.isdir(f"{output_folder}/WhatsApp"): + shutil.move(media_folder, f"{output_folder}/") + + with open("result.json", "w") as f: + data = json.dumps(data) + print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)") + f.write(data) + + print("Everything is done!") From 6ca7e81484950c1da0d5e9a58c5ca9322463a6c6 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Wed, 21 Dec 2022 21:28:54 +0800 Subject: [PATCH 08/58] Support new WhatsApp database schema https://github.com/KnugiHK/Whatsapp-Chat-Exporter/issues/9 --- Whatsapp_Chat_Exporter/data_model.py | 35 ++++ Whatsapp_Chat_Exporter/extract_new.py | 258 ++++++++++++++------------ 2 files changed, 177 insertions(+), 116 deletions(-) create mode 100644 Whatsapp_Chat_Exporter/data_model.py diff --git a/Whatsapp_Chat_Exporter/data_model.py b/Whatsapp_Chat_Exporter/data_model.py new file mode 100644 index 0000000..2e03c06 --- /dev/null +++ b/Whatsapp_Chat_Exporter/data_model.py @@ -0,0 +1,35 @@ +from datetime import datetime +from typing import Union + + +class ChatStore(): + def __init__(self, name=None): + if name is not None and not isinstance(name, str): + raise TypeError("Name must be a string or None") + self.name = name + self.messages = {} + + def add_message(self, id, message): + if not isinstance(message, Message): + raise TypeError("Chat must be a Chat object") + self.messages[id] = message + + def delete_message(self, id): + if id in self.messages: + del self.messages[id] + +class Message(): + def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int): + self.from_me = bool(from_me) + self.timestamp = timestamp / 1000 if timestamp > 9999999999 else timestamp + self.time = datetime.fromtimestamp(time/1000).strftime("%H:%M") + self.media = False + self.key_id = key_id + self.meta = False + self.data = None + self.sender = None + # Extra + self.reply = None + self.quoted_data = None + self.caption = None + diff --git a/Whatsapp_Chat_Exporter/extract_new.py b/Whatsapp_Chat_Exporter/extract_new.py index 4c119a1..25498eb 100644 --- a/Whatsapp_Chat_Exporter/extract_new.py +++ b/Whatsapp_Chat_Exporter/extract_new.py @@ -16,6 +16,8 @@ from enum import Enum from mimetypes import MimeTypes from hashlib import sha256 +from Whatsapp_Chat_Exporter.data_model import ChatStore, Message + try: import zlib from Crypto.Cipher import AES @@ -30,6 +32,7 @@ except ModuleNotFoundError: else: support_crypt15 = True + def sanitize_except(html): return Markup(sanitize(html, tags=["br"])) @@ -42,6 +45,7 @@ def determine_day(last, current): else: return current + CRYPT14_OFFSETS = [ {"iv": 67, "db": 191}, {"iv": 67, "db": 190}, @@ -80,7 +84,7 @@ def _extract_encrypted_key(keyfile): key_stream += byte.to_bytes(1, "big", signed=True) return _generate_hmac_of_hmac(key_stream) - + def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False): if not support_backup: @@ -187,92 +191,104 @@ def contacts(db, data): c.execute("""SELECT jid, display_name FROM wa_contacts; """) row = c.fetchone() while row is not None: - data[row[0]] = {"name": row[1], "messages": {}} + data[row["jid"]] = ChatStore(row["display_name"]) row = c.fetchone() def messages(db, data): # Get message history c = db.cursor() - c.execute("""SELECT count() FROM messages""") + c.execute("""SELECT count() FROM message""") total_row_number = c.fetchone()[0] print(f"Gathering messages...(0/{total_row_number})", end="\r") phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net") - c.execute("""SELECT messages.key_remote_jid, - messages._id, - messages.key_from_me, - messages.timestamp, - messages.data, - messages.status, - messages.edit_version, - messages.thumb_image, - messages.remote_resource, - messages.media_wa_type, - messages.latitude, - messages.longitude, - messages_quotes.key_id as quoted, - messages.key_id, - messages_quotes.data, - messages.media_caption - FROM messages - LEFT JOIN messages_quotes - ON messages.quoted_row_id = messages_quotes._id - WHERE messages.key_remote_jid <> '-1';""") + c.execute("""SELECT jid.raw_string as key_remote_jid, + message._id, + message.from_me as key_from_me, + message.timestamp, + message.text_data as data, + message.status, + message_future.version as edit_version, + message_thumbnail.thumbnail as thumb_image, + message_media.file_path as remote_resource, + message_media.mime_type as media_wa_type, + message_location.latitude, + message_location.longitude, + message_quoted.key_id as quoted, + message.key_id, + message_quoted.text_data as quoted_data, + message_media.media_caption, + chat.subject as chat_subject + FROM message + LEFT JOIN message_quoted + ON message_quoted.message_row_id = message._id + LEFT JOIN message_location + ON message_location.message_row_id = message._id + LEFT JOIN message_media + ON message_media.message_row_id = message._id + LEFT JOIN message_thumbnail + ON message_thumbnail.message_row_id = message._id + LEFT JOIN message_future + ON message_future.message_row_id = message._id + LEFT JOIN chat + ON chat._id = message.chat_row_id + INNER JOIN jid + ON jid._id = chat.jid_row_id + WHERE key_remote_jid <> '-1';""") i = 0 content = c.fetchone() while content is not None: - if content[0] not in data: - data[content[0]] = {"name": None, "messages": {}} - data[content[0]]["messages"][content[1]] = { - "from_me": bool(content[2]), - "timestamp": content[3]/1000, - "time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"), - "media": False, - "key_id": content[13], - "meta": False, - "data": None - } - if "-" in content[0] and content[2] == 0: + if content["key_remote_jid"] not in data: + data[content["key_remote_jid"]] = ChatStore() + if content["key_remote_jid"] is None: + continue + data[content["key_remote_jid"]].add_message(content["_id"], Message( + from_me=content["key_from_me"], + timestamp=content["timestamp"], + time=content["timestamp"], + key_id=content["key_id"], + )) + if "-" in content["key_remote_jid"] and content["key_from_me"] == 0: name = None - if content[8] in data: - name = data[content[8]]["name"] - if "@" in content[8]: - fallback = content[8].split('@')[0] + if content["remote_resource"] in data: + name = data[content["remote_resource"]]["name"] + if "@" in content["remote_resource"]: + fallback = content["remote_resource"].split('@')["key_remote_jid"] else: fallback = None else: fallback = None - data[content[0]]["messages"][content[1]]["sender"] = name or fallback + data[content["key_remote_jid"]].messages[content["_id"]].sender = name or fallback else: - data[content[0]]["messages"][content[1]]["sender"] = None + data[content["key_remote_jid"]].messages[content["_id"]].sender = None - if content[12] is not None: - data[content[0]]["messages"][content[1]]["reply"] = content[12] - data[content[0]]["messages"][content[1]]["quoted_data"] = content[14] + if content["quoted"] is not None: + data[content["key_remote_jid"]].messages[content["_id"]].reply = content["quoted"] + data[content["key_remote_jid"]].messages[content["_id"]].quoted_data = content["quoted_data"] else: - data[content[0]]["messages"][content[1]]["reply"] = None + data[content["key_remote_jid"]].messages[content["_id"]].reply = None - if content[15] is not None: - data[content[0]]["messages"][content[1]]["caption"] = content[15] + if content["key_id"] is not None: + data[content["key_remote_jid"]].messages[content["_id"]].caption = content["key_id"] else: - data[content[0]]["messages"][content[1]]["caption"] = None + data[content["key_remote_jid"]].messages[content["_id"]].caption = None - if content[5] == 6: - if "-" in content[0]: + if content["status"] == 6: + if content["chat_subject"] is not None: # Is Group - if content[4] is not None: + if content["data"] is not None: try: - int(content[4]) + int(content["data"]) except ValueError: - msg = f"The group name changed to {content[4]}" - data[content[0]]["messages"][content[1]]["data"] = msg - data[content[0]]["messages"][content[1]]["meta"] = True + msg = f"The group name changed to {content['data']}" + data[content["key_remote_jid"]].messages[content["_id"]].data = msg + data[content["key_remote_jid"]].messages[content["_id"]].meta = True else: - del data[content[0]]["messages"][content[1]] + data[content["key_remote_jid"]].delete_message(content["_id"]) else: - thumb_image = content[7] + thumb_image = content["thumb_image"] if thumb_image is not None: if b"\x00\x00\x01\x74\x00\x1A" in thumb_image: # Add user @@ -282,62 +298,62 @@ def messages(db, data): name_right = data[added]["name"] else: name_right = added.split('@')[0] - if content[8] is not None: - if content[8] in data: - name_left = data[content[8]]["name"] + if content["remote_resource"] is not None: + if content["remote_resource"] in data: + name_left = data[content["remote_resource"]]["name"] else: - name_left = content[8].split('@')[0] + name_left = content["remote_resource"].split('@')[0] msg = f"{name_left} added {name_right or 'You'}" else: msg = f"Added {name_right or 'You'}" elif b"\xac\xed\x00\x05\x74\x00" in thumb_image: # Changed number - original = content[8].split('@')[0] + original = content["remote_resource"].split('@')[0] changed = thumb_image[7:].decode().split('@')[0] msg = f"{original} changed to {changed}" - data[content[0]]["messages"][content[1]]["data"] = msg - data[content[0]]["messages"][content[1]]["meta"] = True + data[content["key_remote_jid"]].messages[content["_id"]].data = msg + data[content["key_remote_jid"]].messages[content["_id"]].meta = True else: - if content[4] is None: - del data[content[0]]["messages"][content[1]] + if content["data"] is None: + del data[content["key_remote_jid"]].messages[content["_id"]] else: # Private chat - if content[4] is None and content[7] is None: - del data[content[0]]["messages"][content[1]] + if content["data"] is None and content["thumb_image"] is None: + data[content["key_remote_jid"]].delete_message(content["_id"]) else: - if content[2] == 1: - if content[5] == 5 and content[6] == 7: + if content["key_from_me"] == 1: + if content["status"] == 5 and content["edit_version"] == 7: msg = "Message deleted" - data[content[0]]["messages"][content[1]]["meta"] = True + data[content["key_remote_jid"]].messages[content["_id"]].meta = True else: - if content[9] == "5": + if content["media_wa_type"] == "5": msg = f"Location shared: {content[10], content[11]}" - data[content[0]]["messages"][content[1]]["meta"] = True + data[content["key_remote_jid"]].messages[content["_id"]].meta = True else: - msg = content[4] + msg = content["data"] if msg is not None: if "\r\n" in msg: msg = msg.replace("\r\n", "
") if "\n" in msg: msg = msg.replace("\n", "
") else: - if content[5] == 0 and content[6] == 7: + if content["status"] == 0 and content["edit_version"] == 7: msg = "Message deleted" - data[content[0]]["messages"][content[1]]["meta"] = True + data[content["key_remote_jid"]].messages[content["_id"]].meta = True else: - if content[9] == "5": + if content["media_wa_type"] == "5": msg = f"Location shared: {content[10], content[11]}" - data[content[0]]["messages"][content[1]]["meta"] = True + data[content["key_remote_jid"]].messages[content["_id"]].meta = True else: - msg = content[4] + msg = content["data"] if msg is not None: if "\r\n" in msg: msg = msg.replace("\r\n", "
") if "\n" in msg: msg = msg.replace("\n", "
") - data[content[0]]["messages"][content[1]]["data"] = msg + data[content["key_remote_jid"]].messages[content["_id"]].data = msg i += 1 if i % 1000 == 0: @@ -353,45 +369,50 @@ def media(db, data, media_folder): total_row_number = c.fetchone()[0] print(f"\nGathering media...(0/{total_row_number})", end="\r") i = 0 - c.execute("""SELECT messages.key_remote_jid, + c.execute("""SELECT jid.raw_string, message_row_id, file_path, message_url, mime_type, media_key FROM message_media - INNER JOIN messages - ON message_media.message_row_id = messages._id - ORDER BY messages.key_remote_jid ASC""") + INNER JOIN message + ON message_media.message_row_id = message._id + LEFT JOIN chat + ON chat._id = message.chat_row_id + INNER JOIN jid + ON jid._id = chat.jid_row_id + + ORDER BY jid.raw_string ASC""") content = c.fetchone() mime = MimeTypes() while content is not None: - file_path = f"{media_folder}/{content[2]}" - data[content[0]]["messages"][content[1]]["media"] = True + file_path = f"{media_folder}/{content['file_path']}" + data[content["raw_string"]].messages[content["message_row_id"]].media = True if os.path.isfile(file_path): - data[content[0]]["messages"][content[1]]["data"] = file_path - if content[4] is None: + data[content["raw_string"]].messages[content["message_row_id"]].data = file_path + if content["mime_type"] is None: guess = mime.guess_type(file_path)[0] if guess is not None: - data[content[0]]["messages"][content[1]]["mime"] = guess + data[content["raw_string"]].messages[content["message_row_id"]].mime = guess else: - data[content[0]]["messages"][content[1]]["mime"] = "data/data" + data[content["raw_string"]].messages[content["message_row_id"]].mime = "data/data" else: - data[content[0]]["messages"][content[1]]["mime"] = content[4] + data[content["raw_string"]].messages[content["message_row_id"]].mime = content["mime_type"] else: - # if "https://mmg" in content[4]: + # if "https://mmg" in content["mime_type"]: # try: - # r = requests.get(content[3]) + # r = requests.get(content["message_url"]) # if r.status_code != 200: # raise RuntimeError() # except: - # data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" - # data[content[0]]["messages"][content[1]]["media"] = True - # data[content[0]]["messages"][content[1]]["mime"] = "media" + # data[content["raw_string"]].messages[content["message_row_id"]].data = "{The media is missing}" + # data[content["raw_string"]].messages[content["message_row_id"]].media = True + # data[content["raw_string"]].messages[content["message_row_id"]].mime = "media" # else: - data[content[0]]["messages"][content[1]]["data"] = "The media is missing" - data[content[0]]["messages"][content[1]]["mime"] = "media" - data[content[0]]["messages"][content[1]]["meta"] = True + data[content["raw_string"]].messages[content["message_row_id"]].data = "The media is missing" + data[content["raw_string"]].messages[content["message_row_id"]].mime = "media" + data[content["raw_string"]].messages[content["message_row_id"]].meta = True i += 1 if i % 100 == 0: print(f"Gathering media...({i}/{total_row_number})", end="\r") @@ -403,13 +424,17 @@ def media(db, data, media_folder): def vcard(db, data): c = db.cursor() c.execute("""SELECT message_row_id, - messages.key_remote_jid, + jid.raw_string, vcard, - messages.media_name - FROM messages_vcards - INNER JOIN messages - ON messages_vcards.message_row_id = messages._id - ORDER BY messages.key_remote_jid ASC;""") + message.text_data + FROM message_vcard + INNER JOIN message + ON message_vcard.message_row_id = message._id + LEFT JOIN chat + ON chat._id = message.chat_row_id + INNER JOIN jid + ON jid._id = chat.jid_row_id + ORDER BY message.chat_row_id ASC;""") rows = c.fetchall() total_row_number = len(rows) print(f"\nGathering vCards...(0/{total_row_number})", end="\r") @@ -417,17 +442,17 @@ def vcard(db, data): if not os.path.isdir(base): Path(base).mkdir(parents=True, exist_ok=True) for index, row in enumerate(rows): - media_name = row[3] if row[3] else "" + media_name = row["text_data"] if row["text_data"] else "" file_name = "".join(x for x in media_name if x.isalnum()) file_path = f"{base}/{file_name}.vcf" if not os.path.isfile(file_path): with open(file_path, "w", encoding="utf-8") as f: - f.write(row[2]) - data[row[1]]["messages"][row[0]]["data"] = media_name + \ + f.write(row["vcard"]) + data[row["raw_string"]].messages[row["message_row_id"]].data = media_name + \ "The vCard file cannot be displayed here, " \ f"however it should be located at {file_path}" - data[row[1]]["messages"][row[0]]["mime"] = "text/x-vcard" - data[row[1]]["messages"][row[0]]["meta"] = True + data[row["raw_string"]].messages[row["message_row_id"]].mime = "text/x-vcard" + data[row["raw_string"]].messages[row["message_row_id"]].meta = True print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") @@ -451,28 +476,29 @@ def create_html(data, output_folder, template=None, embedded=False): os.mkdir(output_folder) for current, contact in enumerate(data): - if len(data[contact]["messages"]) == 0: + if len(data[contact].messages) == 0: continue phone_number = contact.split('@')[0] + file_name = phone_number if "-" in contact: file_name = "" else: file_name = phone_number - if data[contact]["name"] is not None: + if data[contact].name is not None: if file_name != "": file_name += "-" - file_name += data[contact]["name"].replace("/", "-") - name = data[contact]["name"] + file_name += data[contact].name.replace("/", "-") + name = data[contact].name else: name = phone_number - safe_file_name = '' + safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f: f.write( template.render( name=name, - msgs=data[contact]["messages"].values(), + msgs=data[contact].messages.values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{contact}.j" ) From f87108dadc0024531f09249cbbc6ffdc1b18b86b Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Wed, 21 Dec 2022 21:42:54 +0800 Subject: [PATCH 09/58] Some left-over --- Whatsapp_Chat_Exporter/extract_new.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Whatsapp_Chat_Exporter/extract_new.py b/Whatsapp_Chat_Exporter/extract_new.py index 25498eb..4d7d0a9 100644 --- a/Whatsapp_Chat_Exporter/extract_new.py +++ b/Whatsapp_Chat_Exporter/extract_new.py @@ -315,7 +315,7 @@ def messages(db, data): data[content["key_remote_jid"]].messages[content["_id"]].meta = True else: if content["data"] is None: - del data[content["key_remote_jid"]].messages[content["_id"]] + data[content["key_remote_jid"]].delete_message(content["_id"]) else: # Private chat if content["data"] is None and content["thumb_image"] is None: @@ -382,8 +382,7 @@ def media(db, data, media_folder): ON chat._id = message.chat_row_id INNER JOIN jid ON jid._id = chat.jid_row_id - - ORDER BY jid.raw_string ASC""") + ORDER BY jid.raw_string ASC""") content = c.fetchone() mime = MimeTypes() while content is not None: @@ -479,7 +478,6 @@ def create_html(data, output_folder, template=None, embedded=False): if len(data[contact].messages) == 0: continue phone_number = contact.split('@')[0] - file_name = phone_number if "-" in contact: file_name = "" else: From 6407ba213650f99bc6597a1f81a77e585cf5b4cd Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Wed, 21 Dec 2022 21:45:20 +0800 Subject: [PATCH 10/58] Adopt the latest version --- Whatsapp_Chat_Exporter/__main__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index 7712ce0..1adffe1 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -1,5 +1,6 @@ from .__init__ import __version__ -from Whatsapp_Chat_Exporter import extract, extract_iphone +from Whatsapp_Chat_Exporter import extract_new as extract +from Whatsapp_Chat_Exporter import extract_iphone from Whatsapp_Chat_Exporter import extract_iphone_media from Whatsapp_Chat_Exporter.extract import Crypt from optparse import OptionParser From 4b357d5ea9ddc4aa4516018face182f0f93cb1c6 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Fri, 23 Dec 2022 16:49:28 +0800 Subject: [PATCH 11/58] Update the import of Crypt to latest one for message --- Whatsapp_Chat_Exporter/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index 7712ce0..34f0ca0 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -1,7 +1,7 @@ from .__init__ import __version__ from Whatsapp_Chat_Exporter import extract, extract_iphone from Whatsapp_Chat_Exporter import extract_iphone_media -from Whatsapp_Chat_Exporter.extract import Crypt +from Whatsapp_Chat_Exporter.extract_new import Crypt from optparse import OptionParser import os import sqlite3 From d3892a4e4f4f0b533107f871a23de22153cc879a Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Fri, 23 Dec 2022 17:28:23 +0800 Subject: [PATCH 12/58] Fix caption part --- Whatsapp_Chat_Exporter/extract_new.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Whatsapp_Chat_Exporter/extract_new.py b/Whatsapp_Chat_Exporter/extract_new.py index 4d7d0a9..8100c5f 100644 --- a/Whatsapp_Chat_Exporter/extract_new.py +++ b/Whatsapp_Chat_Exporter/extract_new.py @@ -218,7 +218,7 @@ def messages(db, data): message_quoted.key_id as quoted, message.key_id, message_quoted.text_data as quoted_data, - message_media.media_caption, + message.message_type, chat.subject as chat_subject FROM message LEFT JOIN message_quoted @@ -270,8 +270,8 @@ def messages(db, data): else: data[content["key_remote_jid"]].messages[content["_id"]].reply = None - if content["key_id"] is not None: - data[content["key_remote_jid"]].messages[content["_id"]].caption = content["key_id"] + if content["message_type"] == 1: + data[content["key_remote_jid"]].messages[content["_id"]].caption = content["data"] else: data[content["key_remote_jid"]].messages[content["_id"]].caption = None From 92b8903521aeaf3ab900095cab1e6cd44378fffa Mon Sep 17 00:00:00 2001 From: GoComputing Date: Sat, 28 Jan 2023 20:51:40 +0100 Subject: [PATCH 13/58] Fixed JSON export Added serialization to the classes 'ChatStore' and 'Messages' so that they can be JSON serialized. --- Whatsapp_Chat_Exporter/__main__.py | 3 ++- Whatsapp_Chat_Exporter/data_model.py | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index b35d79a..69a6e46 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -184,7 +184,7 @@ def main(): messages(db, data) media(db, data, options.media) vcard(db, data) - create_html(data, options.output, options.template, options.embedded) + # create_html(data, options.output, options.template, options.embedded) else: print( "The message database does not exist. You may specify the path " @@ -202,6 +202,7 @@ def main(): "Perhaps the directory is opened?") if options.json: + data = {jik : chat.to_json() for jik,chat in data.items()} with open("result.json", "w") as f: data = json.dumps(data) print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)") diff --git a/Whatsapp_Chat_Exporter/data_model.py b/Whatsapp_Chat_Exporter/data_model.py index 2e03c06..bd41394 100644 --- a/Whatsapp_Chat_Exporter/data_model.py +++ b/Whatsapp_Chat_Exporter/data_model.py @@ -18,6 +18,10 @@ class ChatStore(): if id in self.messages: del self.messages[id] + def to_json(self): + serialized_msgs = {id : msg.to_json() for id,msg in self.messages.items()} + return {'name' : self.name, 'messages' : serialized_msgs} + class Message(): def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int): self.from_me = bool(from_me) @@ -32,4 +36,18 @@ class Message(): self.reply = None self.quoted_data = None self.caption = None - + + def to_json(self): + return { + 'from_me' : self.from_me, + 'timestamp' : self.timestamp, + 'time' : self.time, + 'media' : self.media, + 'key_id' : self.key_id, + 'meta' : self.meta, + 'data' : self.data, + 'sender' : self.sender, + 'reply' : self.reply, + 'quoted_data' : self.quoted_data, + 'caption' : self.caption + } From 14b1cb7fdecd4eb4f1a6c501f56cf756e48423a3 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Mon, 30 Jan 2023 18:53:01 +0800 Subject: [PATCH 14/58] Minor fix --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index b5cb7d6..8ea2068 100644 --- a/setup.py +++ b/setup.py @@ -37,13 +37,13 @@ setuptools.setup( ], python_requires='>=3.7', install_requires=[ - 'jinja2', - 'bleach' + 'jinja2', + 'bleach' ], extras_require={ 'android_backup': ["pycryptodome", "javaobj-py3"], 'crypt12': ["pycryptodome"], - 'crypt12': ["pycryptodome"], + 'crypt14': ["pycryptodome"], 'crypt15': ["pycryptodome", "javaobj-py3"] }, entry_points={ From 60575c798920729bc4c2a1afb4488b9b3205207c Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Tue, 31 Jan 2023 16:34:34 +0800 Subject: [PATCH 15/58] Implement #25 Copying media folder to the output directory will be the default starting from this commit. --- Whatsapp_Chat_Exporter/__main__.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index b35d79a..7ae3c01 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -93,6 +93,13 @@ def main(): default=False, action='store_true', help="Show the HEX key used to decrypt the database") + parser.add_option( + "-c", + "--copy-media", + dest="copy_media", + default=True, + action='store_true', + help="Copy media directory to output directory, otherwise move the media directory to output directory") (options, args) = parser.parse_args() if options.android and options.iphone: @@ -193,13 +200,18 @@ def main(): ) exit(2) - if os.path.isdir(options.media) and \ - not os.path.isdir(f"{options.output}/{options.media}"): - try: - shutil.move(options.media, f"{options.output}/") - except PermissionError: - print("Cannot remove original WhatsApp directory. " - "Perhaps the directory is opened?") + if os.path.isdir(options.media): + if os.path.isdir(f"{options.output}/{options.media}"): + print("Media directory already exists in output directory. Skipping...") + else: + if options.copy_media: + shutil.copytree(options.media, f"{options.output}/WhatsApp") + else: + try: + shutil.move(options.media, f"{options.output}/") + except PermissionError: + print("Cannot remove original WhatsApp directory. " + "Perhaps the directory is opened?") if options.json: with open("result.json", "w") as f: From 26b58843fb1a64ecbff3ef7e5f2d85c0e5b5d009 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Tue, 31 Jan 2023 16:46:22 +0800 Subject: [PATCH 16/58] Add message --- Whatsapp_Chat_Exporter/__main__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index 7ae3c01..49e1fa1 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -205,6 +205,7 @@ def main(): print("Media directory already exists in output directory. Skipping...") else: if options.copy_media: + print("Copying media directory...") shutil.copytree(options.media, f"{options.output}/WhatsApp") else: try: From 9600da59aea83b28f661a05079a5aea3a16546b3 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Tue, 31 Jan 2023 17:25:59 +0800 Subject: [PATCH 17/58] Correct the default in #25 --- Whatsapp_Chat_Exporter/__main__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index 49e1fa1..8f95171 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -95,11 +95,11 @@ def main(): help="Show the HEX key used to decrypt the database") parser.add_option( "-c", - "--copy-media", - dest="copy_media", - default=True, + "--move-media", + dest="move_media", + default=False, action='store_true', - help="Copy media directory to output directory, otherwise move the media directory to output directory") + help="Move the media directory to output directory if the flag is set, otherwise copy it") (options, args) = parser.parse_args() if options.android and options.iphone: @@ -204,7 +204,7 @@ def main(): if os.path.isdir(f"{options.output}/{options.media}"): print("Media directory already exists in output directory. Skipping...") else: - if options.copy_media: + if not options.move_media: print("Copying media directory...") shutil.copytree(options.media, f"{options.output}/WhatsApp") else: From 726812a5f76774012c7399d6c3fde5e57f561b3d Mon Sep 17 00:00:00 2001 From: Knugi <24708955+KnugiHK@users.noreply.github.com> Date: Sun, 12 Feb 2023 08:15:40 +0000 Subject: [PATCH 18/58] Prepare for standalone binary https://github.com/KnugiHK/Whatsapp-Chat-Exporter/issues/29 --- Whatsapp_Chat_Exporter/__main__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index 8f95171..b438fe2 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -1,4 +1,7 @@ -from .__init__ import __version__ +try: + from .__init__ import __version__ +except ImportError: + from Whatsapp_Chat_Exporter.__init__ import __version__ from Whatsapp_Chat_Exporter import extract_new as extract from Whatsapp_Chat_Exporter import extract_iphone from Whatsapp_Chat_Exporter import extract_iphone_media From 7cd259143adffb66bca6a06ac858fc5299615356 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Sun, 12 Feb 2023 17:54:32 +0800 Subject: [PATCH 19/58] Implement standalone binary compilation --- .github/workflows/compile-binary.yml | 77 ++++++++++++++++++++++++++++ Whatsapp_Chat_Exporter/__main__.py | 4 ++ 2 files changed, 81 insertions(+) create mode 100644 .github/workflows/compile-binary.yml diff --git a/.github/workflows/compile-binary.yml b/.github/workflows/compile-binary.yml new file mode 100644 index 0000000..434b4cb --- /dev/null +++ b/.github/workflows/compile-binary.yml @@ -0,0 +1,77 @@ +name: Compile the program into binary + +on: + workflow_dispatch: + +permissions: + contents: read + +jobs: + linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka + pip install . + - name: Build binary with Nuitka + run: | + python -m nuitka --onefile --follow-imports Whatsapp_Chat_Exporter/__main__.py + cp __main__.bin wtsexporter_linux_x64 + - uses: actions/upload-artifact@v3 + with: + name: binary-linux + path: | + ./wtsexporter_linux_x64 + + windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka + pip install . + - name: Build binary with Nuitka + run: | + python -m nuitka --onefile --assume-yes-for-downloads --follow-imports Whatsapp_Chat_Exporter\__main__.py + copy __main__.exe wtsexporter_x64.exe + - uses: actions/upload-artifact@v3 + with: + name: binary-windows + path: | + .\wtsexporter_x64.exe + + macos: + runs-on: macos-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka + pip install . + - name: Build binary with Nuitka + run: | + python -m nuitka --onefile --follow-imports Whatsapp_Chat_Exporter/__main__.py + cp __main__.bin wtsexporter_macos_x64 + - uses: actions/upload-artifact@v3 + with: + name: binary-macos + path: | + ./wtsexporter_macos_x64 diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index b438fe2..7e51333 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -226,3 +226,7 @@ def main(): print() print("Everything is done!") + + +if __name__ == "__main__": + main() From d46a42a0978f63393c2b4559f1804341bee1ce98 Mon Sep 17 00:00:00 2001 From: Knugi <24708955+KnugiHK@users.noreply.github.com> Date: Sun, 12 Feb 2023 09:56:55 +0000 Subject: [PATCH 20/58] Update compile-binary.yml --- .github/workflows/compile-binary.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/compile-binary.yml b/.github/workflows/compile-binary.yml index 434b4cb..8256832 100644 --- a/.github/workflows/compile-binary.yml +++ b/.github/workflows/compile-binary.yml @@ -1,4 +1,4 @@ -name: Compile the program into binary +name: Compile standalone binary on: workflow_dispatch: From 4cb4ac3e7b96595248a71d0232284b86be0cde96 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Mon, 13 Feb 2023 00:25:31 +0800 Subject: [PATCH 21/58] Bug fix for sender name in group chat #9 --- Whatsapp_Chat_Exporter/extract_new.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/Whatsapp_Chat_Exporter/extract_new.py b/Whatsapp_Chat_Exporter/extract_new.py index 8100c5f..3302ed4 100644 --- a/Whatsapp_Chat_Exporter/extract_new.py +++ b/Whatsapp_Chat_Exporter/extract_new.py @@ -203,7 +203,7 @@ def messages(db, data): print(f"Gathering messages...(0/{total_row_number})", end="\r") phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net") - c.execute("""SELECT jid.raw_string as key_remote_jid, + c.execute("""SELECT jid_global.raw_string as key_remote_jid, message._id, message.from_me as key_from_me, message.timestamp, @@ -219,6 +219,7 @@ def messages(db, data): message.key_id, message_quoted.text_data as quoted_data, message.message_type, + jid_group.raw_string as group_sender_jid, chat.subject as chat_subject FROM message LEFT JOIN message_quoted @@ -233,8 +234,10 @@ def messages(db, data): ON message_future.message_row_id = message._id LEFT JOIN chat ON chat._id = message.chat_row_id - INNER JOIN jid - ON jid._id = chat.jid_row_id + INNER JOIN jid jid_global + ON jid_global._id = chat.jid_row_id + LEFT JOIN jid jid_group + ON jid_group._id = message.sender_jid_row_id WHERE key_remote_jid <> '-1';""") i = 0 content = c.fetchone() @@ -252,14 +255,15 @@ def messages(db, data): if "-" in content["key_remote_jid"] and content["key_from_me"] == 0: name = None if content["remote_resource"] in data: - name = data[content["remote_resource"]]["name"] - if "@" in content["remote_resource"]: - fallback = content["remote_resource"].split('@')["key_remote_jid"] - else: - fallback = None + if content["chat_subject"] is not None: + _jid = content["group_sender_jid"] + else: + _jid = content["key_remote_jid"] + if _jid in data: + name = data[_jid].name + fallback = _jid.split('@')[0] if "@" in _jid else None else: fallback = None - data[content["key_remote_jid"]].messages[content["_id"]].sender = name or fallback else: data[content["key_remote_jid"]].messages[content["_id"]].sender = None From a275a0f40c8966362da4643b0c721ff620aef402 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Mon, 13 Feb 2023 00:27:55 +0800 Subject: [PATCH 22/58] Why is this line not in last commit... --- Whatsapp_Chat_Exporter/extract_new.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Whatsapp_Chat_Exporter/extract_new.py b/Whatsapp_Chat_Exporter/extract_new.py index 3302ed4..e5f4358 100644 --- a/Whatsapp_Chat_Exporter/extract_new.py +++ b/Whatsapp_Chat_Exporter/extract_new.py @@ -254,7 +254,6 @@ def messages(db, data): )) if "-" in content["key_remote_jid"] and content["key_from_me"] == 0: name = None - if content["remote_resource"] in data: if content["chat_subject"] is not None: _jid = content["group_sender_jid"] else: From 26320413e8e358bdbfc21bfa12d75eeb1229a60d Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Mon, 13 Feb 2023 12:23:43 +0800 Subject: [PATCH 23/58] Add offline availability of w3css --- Whatsapp_Chat_Exporter/__main__.py | 7 ++++++- Whatsapp_Chat_Exporter/extract.py | 17 +++++++++++++++-- Whatsapp_Chat_Exporter/extract_iphone.py | 17 +++++++++++++++-- Whatsapp_Chat_Exporter/extract_new.py | 17 +++++++++++++++-- Whatsapp_Chat_Exporter/whatsapp.html | 7 +++---- 5 files changed, 54 insertions(+), 11 deletions(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index 7e51333..bfaf2d1 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -103,6 +103,11 @@ def main(): default=False, action='store_true', help="Move the media directory to output directory if the flag is set, otherwise copy it") + parser.add_option( + "--offline", + dest="offline", + default=None, + help="Relative path to offline static files") (options, args) = parser.parse_args() if options.android and options.iphone: @@ -194,7 +199,7 @@ def main(): messages(db, data) media(db, data, options.media) vcard(db, data) - create_html(data, options.output, options.template, options.embedded) + create_html(data, options.output, options.template, options.embedded, options.offline) else: print( "The message database does not exist. You may specify the path " diff --git a/Whatsapp_Chat_Exporter/extract.py b/Whatsapp_Chat_Exporter/extract.py index 4c119a1..1c407a1 100644 --- a/Whatsapp_Chat_Exporter/extract.py +++ b/Whatsapp_Chat_Exporter/extract.py @@ -431,7 +431,7 @@ def vcard(db, data): print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") -def create_html(data, output_folder, template=None, embedded=False): +def create_html(data, output_folder, template=None, embedded=False, offline_static=False): if template is None: template_dir = os.path.dirname(__file__) template_file = "whatsapp.html" @@ -450,6 +450,18 @@ def create_html(data, output_folder, template=None, embedded=False): if not os.path.isdir(output_folder): os.mkdir(output_folder) + w3css = "https://www.w3schools.com/w3css/4/w3.css" + if offline_static: + import urllib.request + static_folder = os.path.join(output_folder, offline_static) + if not os.path.isdir(static_folder): + os.mkdir(static_folder) + w3css_path = os.path.join(static_folder, "w3.css") + if not os.path.isfile(w3css_path): + with urllib.request.urlopen(w3css) as resp: + with open(w3css_path, "wb") as f: f.write(resp.read()) + w3css = os.path.join(offline_static, "w3.css") + for current, contact in enumerate(data): if len(data[contact]["messages"]) == 0: continue @@ -474,7 +486,8 @@ def create_html(data, output_folder, template=None, embedded=False): name=name, msgs=data[contact]["messages"].values(), my_avatar=None, - their_avatar=f"WhatsApp/Avatars/{contact}.j" + their_avatar=f"WhatsApp/Avatars/{contact}.j", + w3css=w3css ) ) if current % 10 == 0: diff --git a/Whatsapp_Chat_Exporter/extract_iphone.py b/Whatsapp_Chat_Exporter/extract_iphone.py index 7400688..ce1fbd6 100644 --- a/Whatsapp_Chat_Exporter/extract_iphone.py +++ b/Whatsapp_Chat_Exporter/extract_iphone.py @@ -228,7 +228,7 @@ def vcard(db, data): print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") -def create_html(data, output_folder, template=None, embedded=False): +def create_html(data, output_folder, template=None, embedded=False, offline_static=False): if template is None: template_dir = os.path.dirname(__file__) template_file = "whatsapp.html" @@ -247,6 +247,18 @@ def create_html(data, output_folder, template=None, embedded=False): if not os.path.isdir(output_folder): os.mkdir(output_folder) + w3css = "https://www.w3schools.com/w3css/4/w3.css" + if offline_static: + import urllib.request + static_folder = os.path.join(output_folder, offline_static) + if not os.path.isdir(static_folder): + os.mkdir(static_folder) + w3css_path = os.path.join(static_folder, "w3.css") + if not os.path.isfile(w3css_path): + with urllib.request.urlopen(w3css) as resp: + with open(w3css_path, "wb") as f: f.write(resp.read()) + w3css = os.path.join(offline_static, "w3.css") + for current, contact in enumerate(data): if len(data[contact]["messages"]) == 0: continue @@ -272,7 +284,8 @@ def create_html(data, output_folder, template=None, embedded=False): name=name, msgs=data[contact]["messages"].values(), my_avatar=None, - their_avatar=f"WhatsApp/Avatars/{contact}.j" + their_avatar=f"WhatsApp/Avatars/{contact}.j", + w3css=w3css ) ) if current % 10 == 0: diff --git a/Whatsapp_Chat_Exporter/extract_new.py b/Whatsapp_Chat_Exporter/extract_new.py index e5f4358..f8a58cf 100644 --- a/Whatsapp_Chat_Exporter/extract_new.py +++ b/Whatsapp_Chat_Exporter/extract_new.py @@ -458,7 +458,7 @@ def vcard(db, data): print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") -def create_html(data, output_folder, template=None, embedded=False): +def create_html(data, output_folder, template=None, embedded=False, offline_static=False): if template is None: template_dir = os.path.dirname(__file__) template_file = "whatsapp.html" @@ -477,6 +477,18 @@ def create_html(data, output_folder, template=None, embedded=False): if not os.path.isdir(output_folder): os.mkdir(output_folder) + w3css = "https://www.w3schools.com/w3css/4/w3.css" + if offline_static: + import urllib.request + static_folder = os.path.join(output_folder, offline_static) + if not os.path.isdir(static_folder): + os.mkdir(static_folder) + w3css_path = os.path.join(static_folder, "w3.css") + if not os.path.isfile(w3css_path): + with urllib.request.urlopen(w3css) as resp: + with open(w3css_path, "wb") as f: f.write(resp.read()) + w3css = os.path.join(offline_static, "w3.css") + for current, contact in enumerate(data): if len(data[contact].messages) == 0: continue @@ -501,7 +513,8 @@ def create_html(data, output_folder, template=None, embedded=False): name=name, msgs=data[contact].messages.values(), my_avatar=None, - their_avatar=f"WhatsApp/Avatars/{contact}.j" + their_avatar=f"WhatsApp/Avatars/{contact}.j", + w3css=w3css ) ) if current % 10 == 0: diff --git a/Whatsapp_Chat_Exporter/whatsapp.html b/Whatsapp_Chat_Exporter/whatsapp.html index d467a87..ef5e9df 100644 --- a/Whatsapp_Chat_Exporter/whatsapp.html +++ b/Whatsapp_Chat_Exporter/whatsapp.html @@ -2,11 +2,10 @@ Whatsapp - {{ name }} - + +