Implement splitted outputs #23

This commit is contained in:
KnugiHK
2023-06-08 18:16:47 +08:00
parent dbdfdaedcf
commit f63b180500
6 changed files with 184 additions and 68 deletions

View File

@@ -125,9 +125,13 @@ def main():
parser.add_argument(
"--size",
"--output-size",
"--split",
dest="size",
nargs='?',
type=int,
const=0,
default=None,
help="Maximum size of a single output file in bytes, 0 for auto (not yet implemented)"
help="Maximum (Rough) size of a single output file in bytes, 0 for auto"
)
parser.add_argument(
"--no-html",
@@ -216,7 +220,10 @@ def main():
elif args.iphone:
import sys
if "--iphone" in sys.argv:
print("WARNING: The --iphone flag is deprecated and will be removed in the future. Use --ios instead.")
print(
"WARNING: The --iphone flag is deprecated and will"
"be removed in the future. Use --ios instead."
)
messages = extract_iphone.messages
media = extract_iphone.media
vcard = extract_iphone.vcard
@@ -271,7 +278,7 @@ def main():
shutil.move(args.media, f"{args.output}/")
except PermissionError:
print("Cannot remove original WhatsApp directory. "
"Perhaps the directory is opened?")
"Perhaps the directory is opened?")
if args.json:
if isinstance(data[next(iter(data))], ChatStore):

View File

@@ -19,9 +19,16 @@ class ChatStore():
del self.messages[id]
def to_json(self):
serialized_msgs = {id : msg.to_json() for id,msg in self.messages.items()}
serialized_msgs = {id: msg.to_json() for id, msg in self.messages.items()}
return {'name' : self.name, 'messages' : serialized_msgs}
def get_last_message(self):
return tuple(self.messages.values())[-1]
def get_messages(self):
return self.messages.values()
class Message():
def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int):
self.from_me = bool(from_me)

View File

@@ -12,7 +12,7 @@ from pathlib import Path
from mimetypes import MimeTypes
from hashlib import sha256
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, Crypt
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, Crypt
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS
try:
@@ -49,7 +49,7 @@ def _extract_encrypted_key(keyfile):
key_stream += byte.to_bytes(1, "big", signed=True)
return _generate_hmac_of_hmac(key_stream)
def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False):
if not support_backup:
@@ -82,7 +82,7 @@ def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=Fals
raise ValueError("The crypt15 file must be at least 131 bytes")
t1 = t2 = None
iv = database[8:24]
db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type
db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type
db_ciphertext = database[db_offset:]
if t1 != t2:
@@ -253,13 +253,13 @@ def messages(db, data):
if content["key_remote_jid"] not in data:
data[content["key_remote_jid"]] = ChatStore()
if content["key_remote_jid"] is None:
continue # Not sure
continue # Not sure
data[content["key_remote_jid"]].add_message(content["_id"], Message(
from_me=content["key_from_me"],
timestamp=content["timestamp"],
time=content["timestamp"],
key_id=content["key_id"],
))
))
if "-" in content["key_remote_jid"] and content["key_from_me"] == 0:
name = None
if table_message:
@@ -495,7 +495,7 @@ def vcard(db, data):
ON jid._id = chat.jid_row_id
ORDER BY message.chat_row_id ASC;"""
)
rows = c.fetchall()
total_row_number = len(rows)
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
@@ -558,7 +558,8 @@ def create_html(
w3css = os.path.join(offline_static, "w3.css")
for current, contact in enumerate(data):
if len(data[contact].messages) == 0:
chat = data[contact]
if len(chat.messages) == 0:
continue
phone_number = contact.split('@')[0]
if "-" in contact:
@@ -566,25 +567,62 @@ def create_html(
else:
file_name = phone_number
if data[contact].name is not None:
if chat.name is not None:
if file_name != "":
file_name += "-"
file_name += data[contact].name.replace("/", "-")
name = data[contact].name
file_name += chat.name.replace("/", "-")
name = chat.name
else:
name = phone_number
safe_file_name = ''
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
f.write(
template.render(
name=name,
msgs=data[contact].messages.values(),
my_avatar=None,
their_avatar=f"WhatsApp/Avatars/{contact}.j",
w3css=w3css
)
)
if maximum_size is not None:
current_size = 0
current_page = 1
render_box = []
if maximum_size == 0:
maximum_size = MAX_SIZE
last_msg = chat.get_last_message().key_id
for message in chat.get_messages():
if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE
else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
f"{safe_file_name}-{current_page + 1}.html"
)
render_box = [message]
current_size = 0
current_page += 1
else:
if message.key_id == last_msg:
if current_page == 1:
output_file_name = f"{output_folder}/{safe_file_name}.html"
else:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
False
)
else:
render_box.append(message)
else:
output_file_name = f"{output_folder}/{safe_file_name}.html"
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
if current % 10 == 0:
print(f"Creating HTML...({current}/{total_row_number})", end="\r")

View File

@@ -2,15 +2,13 @@
import sqlite3
import json
import string
import jinja2
import os
import shutil
from pathlib import Path
from datetime import datetime
from mimetypes import MimeTypes
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, APPLE_TIME
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, APPLE_TIME
def messages(db, data):
@@ -56,7 +54,7 @@ def messages(db, data):
data[_id].add_message(Z_PK, Message(
from_me=content["ZISFROMME"],
timestamp=ts,
time=ts, # Could be bug
time=ts, # TODO: Could be bug
key_id=content["ZSTANZAID"][:17],
))
if "-" in _id and content["ZISFROMME"] == 0:
@@ -226,7 +224,14 @@ def vcard(db, data):
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
def create_html(data, output_folder, template=None, embedded=False, offline_static=False, maximum_size=None):
def create_html(
data,
output_folder,
template=None,
embedded=False,
offline_static=False,
maximum_size=None
):
if template is None:
template_dir = os.path.dirname(__file__)
template_file = "whatsapp.html"
@@ -258,7 +263,8 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
w3css = os.path.join(offline_static, "w3.css")
for current, contact in enumerate(data):
if len(data[contact].messages) == 0:
chat = data[contact]
if len(chat.messages) == 0:
continue
phone_number = contact.split('@')[0]
if "-" in contact:
@@ -266,26 +272,62 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
else:
file_name = phone_number
if data[contact].name is not None:
if chat.name is not None:
if file_name != "":
file_name += "-"
file_name += data[contact].name.replace("/", "-")
name = data[contact].name
file_name += chat.name.replace("/", "-")
name = chat.name
else:
name = phone_number
safe_file_name = ''
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
f.write(
template.render(
name=name,
msgs=data[contact].messages.values(),
my_avatar=None,
their_avatar=f"WhatsApp/Avatars/{contact}.j",
w3css=w3css
)
)
if maximum_size is not None:
current_size = 0
current_page = 1
render_box = []
if maximum_size == 0:
maximum_size = MAX_SIZE
last_msg = chat.get_last_message().key_id
for message in chat.get_messages():
if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE
else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
f"{safe_file_name}-{current_page + 1}.html"
)
render_box = [message]
current_size = 0
current_page += 1
else:
if message.key_id == last_msg:
if current_page == 1:
output_file_name = f"{output_folder}/{safe_file_name}.html"
else:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
False
)
else:
render_box.append(message)
else:
output_file_name = f"{output_folder}/{safe_file_name}.html"
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
if current % 10 == 0:
print(f"Creating HTML...({current}/{total_row_number})", end="\r")

View File

@@ -4,6 +4,10 @@ from datetime import datetime
from enum import Enum
MAX_SIZE = 4 * 1024 * 1024 # Default 4MB
ROW_SIZE = 0x300
def sanitize_except(html):
return Markup(sanitize(html, tags=["br"]))
@@ -17,28 +21,6 @@ def determine_day(last, current):
return current
# Android Specific
CRYPT14_OFFSETS = (
{"iv": 67, "db": 191},
{"iv": 67, "db": 190},
{"iv": 66, "db": 99},
{"iv": 67, "db": 193},
{"iv": 67, "db": 194},
)
class Crypt(Enum):
CRYPT15 = 15
CRYPT14 = 14
CRYPT12 = 12
def brute_force_offset(max_iv=200, max_db=200):
for iv in range(0, max_iv):
for db in range(0, max_db):
yield iv, iv + 16, db
def check_update():
import urllib.request
import json
@@ -70,6 +52,42 @@ def check_update():
print("You are using the latest version of WhatsApp Chat Exporter.")
return 0
# iOS Specific
def rendering(output_file_name, template, name, msgs, contact, w3css, next):
with open(output_file_name, "w", encoding="utf-8") as f:
f.write(
template.render(
name=name,
msgs=msgs,
my_avatar=None,
their_avatar=f"WhatsApp/Avatars/{contact}.j",
w3css=w3css,
next=next
)
)
# Android Specific
CRYPT14_OFFSETS = (
{"iv": 67, "db": 191},
{"iv": 67, "db": 190},
{"iv": 66, "db": 99},
{"iv": 67, "db": 193},
{"iv": 67, "db": 194},
)
class Crypt(Enum):
CRYPT15 = 15
CRYPT14 = 14
CRYPT12 = 12
def brute_force_offset(max_iv=200, max_db=200):
for iv in range(0, max_iv):
for db in range(0, max_db):
yield iv, iv + 16, db
# iOS Specific
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))

View File

@@ -193,7 +193,11 @@
</div>
</article>
<footer class="w3-center">
{% if next %}
<a href="./{{ next }}">Next</a>
{% else %}
End of history
{% endif %}
</footer>
</body>
</html>