diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index 9d7df9c..b04f114 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -125,9 +125,13 @@ def main(): parser.add_argument( "--size", "--output-size", + "--split", dest="size", + nargs='?', + type=int, + const=0, default=None, - help="Maximum size of a single output file in bytes, 0 for auto (not yet implemented)" + help="Maximum (Rough) size of a single output file in bytes, 0 for auto" ) parser.add_argument( "--no-html", @@ -216,7 +220,10 @@ def main(): elif args.iphone: import sys if "--iphone" in sys.argv: - print("WARNING: The --iphone flag is deprecated and will be removed in the future. Use --ios instead.") + print( + "WARNING: The --iphone flag is deprecated and will" + "be removed in the future. Use --ios instead." + ) messages = extract_iphone.messages media = extract_iphone.media vcard = extract_iphone.vcard @@ -271,7 +278,7 @@ def main(): shutil.move(args.media, f"{args.output}/") except PermissionError: print("Cannot remove original WhatsApp directory. " - "Perhaps the directory is opened?") + "Perhaps the directory is opened?") if args.json: if isinstance(data[next(iter(data))], ChatStore): diff --git a/Whatsapp_Chat_Exporter/data_model.py b/Whatsapp_Chat_Exporter/data_model.py index bd41394..a445d26 100644 --- a/Whatsapp_Chat_Exporter/data_model.py +++ b/Whatsapp_Chat_Exporter/data_model.py @@ -19,9 +19,16 @@ class ChatStore(): del self.messages[id] def to_json(self): - serialized_msgs = {id : msg.to_json() for id,msg in self.messages.items()} + serialized_msgs = {id: msg.to_json() for id, msg in self.messages.items()} return {'name' : self.name, 'messages' : serialized_msgs} + def get_last_message(self): + return tuple(self.messages.values())[-1] + + def get_messages(self): + return self.messages.values() + + class Message(): def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int): self.from_me = bool(from_me) diff --git a/Whatsapp_Chat_Exporter/extract.py b/Whatsapp_Chat_Exporter/extract.py index 8489454..77a76a3 100644 --- a/Whatsapp_Chat_Exporter/extract.py +++ b/Whatsapp_Chat_Exporter/extract.py @@ -12,7 +12,7 @@ from pathlib import Path from mimetypes import MimeTypes from hashlib import sha256 from Whatsapp_Chat_Exporter.data_model import ChatStore, Message -from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, Crypt +from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, Crypt from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS try: @@ -49,7 +49,7 @@ def _extract_encrypted_key(keyfile): key_stream += byte.to_bytes(1, "big", signed=True) return _generate_hmac_of_hmac(key_stream) - + def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False): if not support_backup: @@ -82,7 +82,7 @@ def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=Fals raise ValueError("The crypt15 file must be at least 131 bytes") t1 = t2 = None iv = database[8:24] - db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type + db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type db_ciphertext = database[db_offset:] if t1 != t2: @@ -253,13 +253,13 @@ def messages(db, data): if content["key_remote_jid"] not in data: data[content["key_remote_jid"]] = ChatStore() if content["key_remote_jid"] is None: - continue # Not sure + continue # Not sure data[content["key_remote_jid"]].add_message(content["_id"], Message( from_me=content["key_from_me"], timestamp=content["timestamp"], time=content["timestamp"], key_id=content["key_id"], - )) + )) if "-" in content["key_remote_jid"] and content["key_from_me"] == 0: name = None if table_message: @@ -495,7 +495,7 @@ def vcard(db, data): ON jid._id = chat.jid_row_id ORDER BY message.chat_row_id ASC;""" ) - + rows = c.fetchall() total_row_number = len(rows) print(f"\nGathering vCards...(0/{total_row_number})", end="\r") @@ -558,7 +558,8 @@ def create_html( w3css = os.path.join(offline_static, "w3.css") for current, contact in enumerate(data): - if len(data[contact].messages) == 0: + chat = data[contact] + if len(chat.messages) == 0: continue phone_number = contact.split('@')[0] if "-" in contact: @@ -566,25 +567,62 @@ def create_html( else: file_name = phone_number - if data[contact].name is not None: + if chat.name is not None: if file_name != "": file_name += "-" - file_name += data[contact].name.replace("/", "-") - name = data[contact].name + file_name += chat.name.replace("/", "-") + name = chat.name else: name = phone_number - safe_file_name = '' + safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") - with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f: - f.write( - template.render( - name=name, - msgs=data[contact].messages.values(), - my_avatar=None, - their_avatar=f"WhatsApp/Avatars/{contact}.j", - w3css=w3css - ) - ) + + if maximum_size is not None: + current_size = 0 + current_page = 1 + render_box = [] + if maximum_size == 0: + maximum_size = MAX_SIZE + last_msg = chat.get_last_message().key_id + for message in chat.get_messages(): + if message.data is not None and not message.meta and not message.media: + current_size += len(message.data) + ROW_SIZE + else: + current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes + if current_size > maximum_size: + output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html" + rendering( + output_file_name, + template, + name, + render_box, + contact, + w3css, + f"{safe_file_name}-{current_page + 1}.html" + ) + render_box = [message] + current_size = 0 + current_page += 1 + else: + if message.key_id == last_msg: + if current_page == 1: + output_file_name = f"{output_folder}/{safe_file_name}.html" + else: + output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html" + rendering( + output_file_name, + template, + name, + render_box, + contact, + w3css, + False + ) + else: + render_box.append(message) + else: + output_file_name = f"{output_folder}/{safe_file_name}.html" + rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False) if current % 10 == 0: print(f"Creating HTML...({current}/{total_row_number})", end="\r") diff --git a/Whatsapp_Chat_Exporter/extract_iphone.py b/Whatsapp_Chat_Exporter/extract_iphone.py index 16a4a35..7750146 100644 --- a/Whatsapp_Chat_Exporter/extract_iphone.py +++ b/Whatsapp_Chat_Exporter/extract_iphone.py @@ -2,15 +2,13 @@ import sqlite3 import json -import string import jinja2 import os import shutil from pathlib import Path -from datetime import datetime from mimetypes import MimeTypes from Whatsapp_Chat_Exporter.data_model import ChatStore, Message -from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, APPLE_TIME +from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, APPLE_TIME def messages(db, data): @@ -56,7 +54,7 @@ def messages(db, data): data[_id].add_message(Z_PK, Message( from_me=content["ZISFROMME"], timestamp=ts, - time=ts, # Could be bug + time=ts, # TODO: Could be bug key_id=content["ZSTANZAID"][:17], )) if "-" in _id and content["ZISFROMME"] == 0: @@ -226,7 +224,14 @@ def vcard(db, data): print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") -def create_html(data, output_folder, template=None, embedded=False, offline_static=False, maximum_size=None): +def create_html( + data, + output_folder, + template=None, + embedded=False, + offline_static=False, + maximum_size=None + ): if template is None: template_dir = os.path.dirname(__file__) template_file = "whatsapp.html" @@ -258,7 +263,8 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat w3css = os.path.join(offline_static, "w3.css") for current, contact in enumerate(data): - if len(data[contact].messages) == 0: + chat = data[contact] + if len(chat.messages) == 0: continue phone_number = contact.split('@')[0] if "-" in contact: @@ -266,26 +272,62 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat else: file_name = phone_number - if data[contact].name is not None: + if chat.name is not None: if file_name != "": file_name += "-" - file_name += data[contact].name.replace("/", "-") - name = data[contact].name + file_name += chat.name.replace("/", "-") + name = chat.name else: name = phone_number - safe_file_name = '' safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") - with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f: - f.write( - template.render( - name=name, - msgs=data[contact].messages.values(), - my_avatar=None, - their_avatar=f"WhatsApp/Avatars/{contact}.j", - w3css=w3css - ) - ) + + if maximum_size is not None: + current_size = 0 + current_page = 1 + render_box = [] + if maximum_size == 0: + maximum_size = MAX_SIZE + last_msg = chat.get_last_message().key_id + for message in chat.get_messages(): + if message.data is not None and not message.meta and not message.media: + current_size += len(message.data) + ROW_SIZE + else: + current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes + if current_size > maximum_size: + output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html" + rendering( + output_file_name, + template, + name, + render_box, + contact, + w3css, + f"{safe_file_name}-{current_page + 1}.html" + ) + render_box = [message] + current_size = 0 + current_page += 1 + else: + if message.key_id == last_msg: + if current_page == 1: + output_file_name = f"{output_folder}/{safe_file_name}.html" + else: + output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html" + rendering( + output_file_name, + template, + name, + render_box, + contact, + w3css, + False + ) + else: + render_box.append(message) + else: + output_file_name = f"{output_folder}/{safe_file_name}.html" + rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False) if current % 10 == 0: print(f"Creating HTML...({current}/{total_row_number})", end="\r") diff --git a/Whatsapp_Chat_Exporter/utility.py b/Whatsapp_Chat_Exporter/utility.py index d6175a8..c2860cd 100644 --- a/Whatsapp_Chat_Exporter/utility.py +++ b/Whatsapp_Chat_Exporter/utility.py @@ -4,6 +4,10 @@ from datetime import datetime from enum import Enum +MAX_SIZE = 4 * 1024 * 1024 # Default 4MB +ROW_SIZE = 0x300 + + def sanitize_except(html): return Markup(sanitize(html, tags=["br"])) @@ -17,28 +21,6 @@ def determine_day(last, current): return current -# Android Specific - -CRYPT14_OFFSETS = ( - {"iv": 67, "db": 191}, - {"iv": 67, "db": 190}, - {"iv": 66, "db": 99}, - {"iv": 67, "db": 193}, - {"iv": 67, "db": 194}, -) - - -class Crypt(Enum): - CRYPT15 = 15 - CRYPT14 = 14 - CRYPT12 = 12 - - -def brute_force_offset(max_iv=200, max_db=200): - for iv in range(0, max_iv): - for db in range(0, max_db): - yield iv, iv + 16, db - def check_update(): import urllib.request import json @@ -70,6 +52,42 @@ def check_update(): print("You are using the latest version of WhatsApp Chat Exporter.") return 0 -# iOS Specific +def rendering(output_file_name, template, name, msgs, contact, w3css, next): + with open(output_file_name, "w", encoding="utf-8") as f: + f.write( + template.render( + name=name, + msgs=msgs, + my_avatar=None, + their_avatar=f"WhatsApp/Avatars/{contact}.j", + w3css=w3css, + next=next + ) + ) + + +# Android Specific +CRYPT14_OFFSETS = ( + {"iv": 67, "db": 191}, + {"iv": 67, "db": 190}, + {"iv": 66, "db": 99}, + {"iv": 67, "db": 193}, + {"iv": 67, "db": 194}, +) + + +class Crypt(Enum): + CRYPT15 = 15 + CRYPT14 = 14 + CRYPT12 = 12 + + +def brute_force_offset(max_iv=200, max_db=200): + for iv in range(0, max_iv): + for db in range(0, max_db): + yield iv, iv + 16, db + + +# iOS Specific APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1)) diff --git a/Whatsapp_Chat_Exporter/whatsapp.html b/Whatsapp_Chat_Exporter/whatsapp.html index 48c4ae6..8862f0e 100644 --- a/Whatsapp_Chat_Exporter/whatsapp.html +++ b/Whatsapp_Chat_Exporter/whatsapp.html @@ -193,7 +193,11 @@ \ No newline at end of file