Implement splitted outputs #23

This commit is contained in:
KnugiHK
2023-06-08 18:16:47 +08:00
parent dbdfdaedcf
commit f63b180500
6 changed files with 184 additions and 68 deletions

View File

@@ -125,9 +125,13 @@ def main():
parser.add_argument( parser.add_argument(
"--size", "--size",
"--output-size", "--output-size",
"--split",
dest="size", dest="size",
nargs='?',
type=int,
const=0,
default=None, default=None,
help="Maximum size of a single output file in bytes, 0 for auto (not yet implemented)" help="Maximum (Rough) size of a single output file in bytes, 0 for auto"
) )
parser.add_argument( parser.add_argument(
"--no-html", "--no-html",
@@ -216,7 +220,10 @@ def main():
elif args.iphone: elif args.iphone:
import sys import sys
if "--iphone" in sys.argv: if "--iphone" in sys.argv:
print("WARNING: The --iphone flag is deprecated and will be removed in the future. Use --ios instead.") print(
"WARNING: The --iphone flag is deprecated and will"
"be removed in the future. Use --ios instead."
)
messages = extract_iphone.messages messages = extract_iphone.messages
media = extract_iphone.media media = extract_iphone.media
vcard = extract_iphone.vcard vcard = extract_iphone.vcard
@@ -271,7 +278,7 @@ def main():
shutil.move(args.media, f"{args.output}/") shutil.move(args.media, f"{args.output}/")
except PermissionError: except PermissionError:
print("Cannot remove original WhatsApp directory. " print("Cannot remove original WhatsApp directory. "
"Perhaps the directory is opened?") "Perhaps the directory is opened?")
if args.json: if args.json:
if isinstance(data[next(iter(data))], ChatStore): if isinstance(data[next(iter(data))], ChatStore):

View File

@@ -19,9 +19,16 @@ class ChatStore():
del self.messages[id] del self.messages[id]
def to_json(self): def to_json(self):
serialized_msgs = {id : msg.to_json() for id,msg in self.messages.items()} serialized_msgs = {id: msg.to_json() for id, msg in self.messages.items()}
return {'name' : self.name, 'messages' : serialized_msgs} return {'name' : self.name, 'messages' : serialized_msgs}
def get_last_message(self):
return tuple(self.messages.values())[-1]
def get_messages(self):
return self.messages.values()
class Message(): class Message():
def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int): def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int):
self.from_me = bool(from_me) self.from_me = bool(from_me)

View File

@@ -12,7 +12,7 @@ from pathlib import Path
from mimetypes import MimeTypes from mimetypes import MimeTypes
from hashlib import sha256 from hashlib import sha256
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, Crypt from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, Crypt
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS
try: try:
@@ -49,7 +49,7 @@ def _extract_encrypted_key(keyfile):
key_stream += byte.to_bytes(1, "big", signed=True) key_stream += byte.to_bytes(1, "big", signed=True)
return _generate_hmac_of_hmac(key_stream) return _generate_hmac_of_hmac(key_stream)
def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False): def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False):
if not support_backup: if not support_backup:
@@ -82,7 +82,7 @@ def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=Fals
raise ValueError("The crypt15 file must be at least 131 bytes") raise ValueError("The crypt15 file must be at least 131 bytes")
t1 = t2 = None t1 = t2 = None
iv = database[8:24] iv = database[8:24]
db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type
db_ciphertext = database[db_offset:] db_ciphertext = database[db_offset:]
if t1 != t2: if t1 != t2:
@@ -253,13 +253,13 @@ def messages(db, data):
if content["key_remote_jid"] not in data: if content["key_remote_jid"] not in data:
data[content["key_remote_jid"]] = ChatStore() data[content["key_remote_jid"]] = ChatStore()
if content["key_remote_jid"] is None: if content["key_remote_jid"] is None:
continue # Not sure continue # Not sure
data[content["key_remote_jid"]].add_message(content["_id"], Message( data[content["key_remote_jid"]].add_message(content["_id"], Message(
from_me=content["key_from_me"], from_me=content["key_from_me"],
timestamp=content["timestamp"], timestamp=content["timestamp"],
time=content["timestamp"], time=content["timestamp"],
key_id=content["key_id"], key_id=content["key_id"],
)) ))
if "-" in content["key_remote_jid"] and content["key_from_me"] == 0: if "-" in content["key_remote_jid"] and content["key_from_me"] == 0:
name = None name = None
if table_message: if table_message:
@@ -495,7 +495,7 @@ def vcard(db, data):
ON jid._id = chat.jid_row_id ON jid._id = chat.jid_row_id
ORDER BY message.chat_row_id ASC;""" ORDER BY message.chat_row_id ASC;"""
) )
rows = c.fetchall() rows = c.fetchall()
total_row_number = len(rows) total_row_number = len(rows)
print(f"\nGathering vCards...(0/{total_row_number})", end="\r") print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
@@ -558,7 +558,8 @@ def create_html(
w3css = os.path.join(offline_static, "w3.css") w3css = os.path.join(offline_static, "w3.css")
for current, contact in enumerate(data): for current, contact in enumerate(data):
if len(data[contact].messages) == 0: chat = data[contact]
if len(chat.messages) == 0:
continue continue
phone_number = contact.split('@')[0] phone_number = contact.split('@')[0]
if "-" in contact: if "-" in contact:
@@ -566,25 +567,62 @@ def create_html(
else: else:
file_name = phone_number file_name = phone_number
if data[contact].name is not None: if chat.name is not None:
if file_name != "": if file_name != "":
file_name += "-" file_name += "-"
file_name += data[contact].name.replace("/", "-") file_name += chat.name.replace("/", "-")
name = data[contact].name name = chat.name
else: else:
name = phone_number name = phone_number
safe_file_name = ''
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
f.write( if maximum_size is not None:
template.render( current_size = 0
name=name, current_page = 1
msgs=data[contact].messages.values(), render_box = []
my_avatar=None, if maximum_size == 0:
their_avatar=f"WhatsApp/Avatars/{contact}.j", maximum_size = MAX_SIZE
w3css=w3css last_msg = chat.get_last_message().key_id
) for message in chat.get_messages():
) if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE
else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
f"{safe_file_name}-{current_page + 1}.html"
)
render_box = [message]
current_size = 0
current_page += 1
else:
if message.key_id == last_msg:
if current_page == 1:
output_file_name = f"{output_folder}/{safe_file_name}.html"
else:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
False
)
else:
render_box.append(message)
else:
output_file_name = f"{output_folder}/{safe_file_name}.html"
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
if current % 10 == 0: if current % 10 == 0:
print(f"Creating HTML...({current}/{total_row_number})", end="\r") print(f"Creating HTML...({current}/{total_row_number})", end="\r")

View File

@@ -2,15 +2,13 @@
import sqlite3 import sqlite3
import json import json
import string
import jinja2 import jinja2
import os import os
import shutil import shutil
from pathlib import Path from pathlib import Path
from datetime import datetime
from mimetypes import MimeTypes from mimetypes import MimeTypes
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, APPLE_TIME from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, APPLE_TIME
def messages(db, data): def messages(db, data):
@@ -56,7 +54,7 @@ def messages(db, data):
data[_id].add_message(Z_PK, Message( data[_id].add_message(Z_PK, Message(
from_me=content["ZISFROMME"], from_me=content["ZISFROMME"],
timestamp=ts, timestamp=ts,
time=ts, # Could be bug time=ts, # TODO: Could be bug
key_id=content["ZSTANZAID"][:17], key_id=content["ZSTANZAID"][:17],
)) ))
if "-" in _id and content["ZISFROMME"] == 0: if "-" in _id and content["ZISFROMME"] == 0:
@@ -226,7 +224,14 @@ def vcard(db, data):
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
def create_html(data, output_folder, template=None, embedded=False, offline_static=False, maximum_size=None): def create_html(
data,
output_folder,
template=None,
embedded=False,
offline_static=False,
maximum_size=None
):
if template is None: if template is None:
template_dir = os.path.dirname(__file__) template_dir = os.path.dirname(__file__)
template_file = "whatsapp.html" template_file = "whatsapp.html"
@@ -258,7 +263,8 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
w3css = os.path.join(offline_static, "w3.css") w3css = os.path.join(offline_static, "w3.css")
for current, contact in enumerate(data): for current, contact in enumerate(data):
if len(data[contact].messages) == 0: chat = data[contact]
if len(chat.messages) == 0:
continue continue
phone_number = contact.split('@')[0] phone_number = contact.split('@')[0]
if "-" in contact: if "-" in contact:
@@ -266,26 +272,62 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
else: else:
file_name = phone_number file_name = phone_number
if data[contact].name is not None: if chat.name is not None:
if file_name != "": if file_name != "":
file_name += "-" file_name += "-"
file_name += data[contact].name.replace("/", "-") file_name += chat.name.replace("/", "-")
name = data[contact].name name = chat.name
else: else:
name = phone_number name = phone_number
safe_file_name = ''
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
f.write( if maximum_size is not None:
template.render( current_size = 0
name=name, current_page = 1
msgs=data[contact].messages.values(), render_box = []
my_avatar=None, if maximum_size == 0:
their_avatar=f"WhatsApp/Avatars/{contact}.j", maximum_size = MAX_SIZE
w3css=w3css last_msg = chat.get_last_message().key_id
) for message in chat.get_messages():
) if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE
else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
f"{safe_file_name}-{current_page + 1}.html"
)
render_box = [message]
current_size = 0
current_page += 1
else:
if message.key_id == last_msg:
if current_page == 1:
output_file_name = f"{output_folder}/{safe_file_name}.html"
else:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
False
)
else:
render_box.append(message)
else:
output_file_name = f"{output_folder}/{safe_file_name}.html"
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
if current % 10 == 0: if current % 10 == 0:
print(f"Creating HTML...({current}/{total_row_number})", end="\r") print(f"Creating HTML...({current}/{total_row_number})", end="\r")

View File

@@ -4,6 +4,10 @@ from datetime import datetime
from enum import Enum from enum import Enum
MAX_SIZE = 4 * 1024 * 1024 # Default 4MB
ROW_SIZE = 0x300
def sanitize_except(html): def sanitize_except(html):
return Markup(sanitize(html, tags=["br"])) return Markup(sanitize(html, tags=["br"]))
@@ -17,28 +21,6 @@ def determine_day(last, current):
return current return current
# Android Specific
CRYPT14_OFFSETS = (
{"iv": 67, "db": 191},
{"iv": 67, "db": 190},
{"iv": 66, "db": 99},
{"iv": 67, "db": 193},
{"iv": 67, "db": 194},
)
class Crypt(Enum):
CRYPT15 = 15
CRYPT14 = 14
CRYPT12 = 12
def brute_force_offset(max_iv=200, max_db=200):
for iv in range(0, max_iv):
for db in range(0, max_db):
yield iv, iv + 16, db
def check_update(): def check_update():
import urllib.request import urllib.request
import json import json
@@ -70,6 +52,42 @@ def check_update():
print("You are using the latest version of WhatsApp Chat Exporter.") print("You are using the latest version of WhatsApp Chat Exporter.")
return 0 return 0
# iOS Specific
def rendering(output_file_name, template, name, msgs, contact, w3css, next):
with open(output_file_name, "w", encoding="utf-8") as f:
f.write(
template.render(
name=name,
msgs=msgs,
my_avatar=None,
their_avatar=f"WhatsApp/Avatars/{contact}.j",
w3css=w3css,
next=next
)
)
# Android Specific
CRYPT14_OFFSETS = (
{"iv": 67, "db": 191},
{"iv": 67, "db": 190},
{"iv": 66, "db": 99},
{"iv": 67, "db": 193},
{"iv": 67, "db": 194},
)
class Crypt(Enum):
CRYPT15 = 15
CRYPT14 = 14
CRYPT12 = 12
def brute_force_offset(max_iv=200, max_db=200):
for iv in range(0, max_iv):
for db in range(0, max_db):
yield iv, iv + 16, db
# iOS Specific
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1)) APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))

View File

@@ -193,7 +193,11 @@
</div> </div>
</article> </article>
<footer class="w3-center"> <footer class="w3-center">
{% if next %}
<a href="./{{ next }}">Next</a>
{% else %}
End of history End of history
{% endif %}
</footer> </footer>
</body> </body>
</html> </html>