mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-02-10 19:22:24 +00:00
Implement splitted outputs #23
This commit is contained in:
@@ -125,9 +125,13 @@ def main():
|
||||
parser.add_argument(
|
||||
"--size",
|
||||
"--output-size",
|
||||
"--split",
|
||||
dest="size",
|
||||
nargs='?',
|
||||
type=int,
|
||||
const=0,
|
||||
default=None,
|
||||
help="Maximum size of a single output file in bytes, 0 for auto (not yet implemented)"
|
||||
help="Maximum (Rough) size of a single output file in bytes, 0 for auto"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-html",
|
||||
@@ -216,7 +220,10 @@ def main():
|
||||
elif args.iphone:
|
||||
import sys
|
||||
if "--iphone" in sys.argv:
|
||||
print("WARNING: The --iphone flag is deprecated and will be removed in the future. Use --ios instead.")
|
||||
print(
|
||||
"WARNING: The --iphone flag is deprecated and will"
|
||||
"be removed in the future. Use --ios instead."
|
||||
)
|
||||
messages = extract_iphone.messages
|
||||
media = extract_iphone.media
|
||||
vcard = extract_iphone.vcard
|
||||
@@ -271,7 +278,7 @@ def main():
|
||||
shutil.move(args.media, f"{args.output}/")
|
||||
except PermissionError:
|
||||
print("Cannot remove original WhatsApp directory. "
|
||||
"Perhaps the directory is opened?")
|
||||
"Perhaps the directory is opened?")
|
||||
|
||||
if args.json:
|
||||
if isinstance(data[next(iter(data))], ChatStore):
|
||||
|
||||
@@ -19,9 +19,16 @@ class ChatStore():
|
||||
del self.messages[id]
|
||||
|
||||
def to_json(self):
|
||||
serialized_msgs = {id : msg.to_json() for id,msg in self.messages.items()}
|
||||
serialized_msgs = {id: msg.to_json() for id, msg in self.messages.items()}
|
||||
return {'name' : self.name, 'messages' : serialized_msgs}
|
||||
|
||||
def get_last_message(self):
|
||||
return tuple(self.messages.values())[-1]
|
||||
|
||||
def get_messages(self):
|
||||
return self.messages.values()
|
||||
|
||||
|
||||
class Message():
|
||||
def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int):
|
||||
self.from_me = bool(from_me)
|
||||
|
||||
@@ -12,7 +12,7 @@ from pathlib import Path
|
||||
from mimetypes import MimeTypes
|
||||
from hashlib import sha256
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, Crypt
|
||||
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, Crypt
|
||||
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS
|
||||
|
||||
try:
|
||||
@@ -49,7 +49,7 @@ def _extract_encrypted_key(keyfile):
|
||||
key_stream += byte.to_bytes(1, "big", signed=True)
|
||||
|
||||
return _generate_hmac_of_hmac(key_stream)
|
||||
|
||||
|
||||
|
||||
def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False):
|
||||
if not support_backup:
|
||||
@@ -82,7 +82,7 @@ def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=Fals
|
||||
raise ValueError("The crypt15 file must be at least 131 bytes")
|
||||
t1 = t2 = None
|
||||
iv = database[8:24]
|
||||
db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type
|
||||
db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type
|
||||
db_ciphertext = database[db_offset:]
|
||||
|
||||
if t1 != t2:
|
||||
@@ -253,13 +253,13 @@ def messages(db, data):
|
||||
if content["key_remote_jid"] not in data:
|
||||
data[content["key_remote_jid"]] = ChatStore()
|
||||
if content["key_remote_jid"] is None:
|
||||
continue # Not sure
|
||||
continue # Not sure
|
||||
data[content["key_remote_jid"]].add_message(content["_id"], Message(
|
||||
from_me=content["key_from_me"],
|
||||
timestamp=content["timestamp"],
|
||||
time=content["timestamp"],
|
||||
key_id=content["key_id"],
|
||||
))
|
||||
))
|
||||
if "-" in content["key_remote_jid"] and content["key_from_me"] == 0:
|
||||
name = None
|
||||
if table_message:
|
||||
@@ -495,7 +495,7 @@ def vcard(db, data):
|
||||
ON jid._id = chat.jid_row_id
|
||||
ORDER BY message.chat_row_id ASC;"""
|
||||
)
|
||||
|
||||
|
||||
rows = c.fetchall()
|
||||
total_row_number = len(rows)
|
||||
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
|
||||
@@ -558,7 +558,8 @@ def create_html(
|
||||
w3css = os.path.join(offline_static, "w3.css")
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
if len(data[contact].messages) == 0:
|
||||
chat = data[contact]
|
||||
if len(chat.messages) == 0:
|
||||
continue
|
||||
phone_number = contact.split('@')[0]
|
||||
if "-" in contact:
|
||||
@@ -566,25 +567,62 @@ def create_html(
|
||||
else:
|
||||
file_name = phone_number
|
||||
|
||||
if data[contact].name is not None:
|
||||
if chat.name is not None:
|
||||
if file_name != "":
|
||||
file_name += "-"
|
||||
file_name += data[contact].name.replace("/", "-")
|
||||
name = data[contact].name
|
||||
file_name += chat.name.replace("/", "-")
|
||||
name = chat.name
|
||||
else:
|
||||
name = phone_number
|
||||
safe_file_name = ''
|
||||
|
||||
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
|
||||
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
|
||||
f.write(
|
||||
template.render(
|
||||
name=name,
|
||||
msgs=data[contact].messages.values(),
|
||||
my_avatar=None,
|
||||
their_avatar=f"WhatsApp/Avatars/{contact}.j",
|
||||
w3css=w3css
|
||||
)
|
||||
)
|
||||
|
||||
if maximum_size is not None:
|
||||
current_size = 0
|
||||
current_page = 1
|
||||
render_box = []
|
||||
if maximum_size == 0:
|
||||
maximum_size = MAX_SIZE
|
||||
last_msg = chat.get_last_message().key_id
|
||||
for message in chat.get_messages():
|
||||
if message.data is not None and not message.meta and not message.media:
|
||||
current_size += len(message.data) + ROW_SIZE
|
||||
else:
|
||||
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
|
||||
if current_size > maximum_size:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||
rendering(
|
||||
output_file_name,
|
||||
template,
|
||||
name,
|
||||
render_box,
|
||||
contact,
|
||||
w3css,
|
||||
f"{safe_file_name}-{current_page + 1}.html"
|
||||
)
|
||||
render_box = [message]
|
||||
current_size = 0
|
||||
current_page += 1
|
||||
else:
|
||||
if message.key_id == last_msg:
|
||||
if current_page == 1:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}.html"
|
||||
else:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||
rendering(
|
||||
output_file_name,
|
||||
template,
|
||||
name,
|
||||
render_box,
|
||||
contact,
|
||||
w3css,
|
||||
False
|
||||
)
|
||||
else:
|
||||
render_box.append(message)
|
||||
else:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}.html"
|
||||
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
|
||||
if current % 10 == 0:
|
||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
@@ -2,15 +2,13 @@
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import string
|
||||
import jinja2
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from mimetypes import MimeTypes
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, APPLE_TIME
|
||||
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, APPLE_TIME
|
||||
|
||||
|
||||
def messages(db, data):
|
||||
@@ -56,7 +54,7 @@ def messages(db, data):
|
||||
data[_id].add_message(Z_PK, Message(
|
||||
from_me=content["ZISFROMME"],
|
||||
timestamp=ts,
|
||||
time=ts, # Could be bug
|
||||
time=ts, # TODO: Could be bug
|
||||
key_id=content["ZSTANZAID"][:17],
|
||||
))
|
||||
if "-" in _id and content["ZISFROMME"] == 0:
|
||||
@@ -226,7 +224,14 @@ def vcard(db, data):
|
||||
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def create_html(data, output_folder, template=None, embedded=False, offline_static=False, maximum_size=None):
|
||||
def create_html(
|
||||
data,
|
||||
output_folder,
|
||||
template=None,
|
||||
embedded=False,
|
||||
offline_static=False,
|
||||
maximum_size=None
|
||||
):
|
||||
if template is None:
|
||||
template_dir = os.path.dirname(__file__)
|
||||
template_file = "whatsapp.html"
|
||||
@@ -258,7 +263,8 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
|
||||
w3css = os.path.join(offline_static, "w3.css")
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
if len(data[contact].messages) == 0:
|
||||
chat = data[contact]
|
||||
if len(chat.messages) == 0:
|
||||
continue
|
||||
phone_number = contact.split('@')[0]
|
||||
if "-" in contact:
|
||||
@@ -266,26 +272,62 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
|
||||
else:
|
||||
file_name = phone_number
|
||||
|
||||
if data[contact].name is not None:
|
||||
if chat.name is not None:
|
||||
if file_name != "":
|
||||
file_name += "-"
|
||||
file_name += data[contact].name.replace("/", "-")
|
||||
name = data[contact].name
|
||||
file_name += chat.name.replace("/", "-")
|
||||
name = chat.name
|
||||
else:
|
||||
name = phone_number
|
||||
|
||||
safe_file_name = ''
|
||||
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
|
||||
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
|
||||
f.write(
|
||||
template.render(
|
||||
name=name,
|
||||
msgs=data[contact].messages.values(),
|
||||
my_avatar=None,
|
||||
their_avatar=f"WhatsApp/Avatars/{contact}.j",
|
||||
w3css=w3css
|
||||
)
|
||||
)
|
||||
|
||||
if maximum_size is not None:
|
||||
current_size = 0
|
||||
current_page = 1
|
||||
render_box = []
|
||||
if maximum_size == 0:
|
||||
maximum_size = MAX_SIZE
|
||||
last_msg = chat.get_last_message().key_id
|
||||
for message in chat.get_messages():
|
||||
if message.data is not None and not message.meta and not message.media:
|
||||
current_size += len(message.data) + ROW_SIZE
|
||||
else:
|
||||
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
|
||||
if current_size > maximum_size:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||
rendering(
|
||||
output_file_name,
|
||||
template,
|
||||
name,
|
||||
render_box,
|
||||
contact,
|
||||
w3css,
|
||||
f"{safe_file_name}-{current_page + 1}.html"
|
||||
)
|
||||
render_box = [message]
|
||||
current_size = 0
|
||||
current_page += 1
|
||||
else:
|
||||
if message.key_id == last_msg:
|
||||
if current_page == 1:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}.html"
|
||||
else:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||
rendering(
|
||||
output_file_name,
|
||||
template,
|
||||
name,
|
||||
render_box,
|
||||
contact,
|
||||
w3css,
|
||||
False
|
||||
)
|
||||
else:
|
||||
render_box.append(message)
|
||||
else:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}.html"
|
||||
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
|
||||
if current % 10 == 0:
|
||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
@@ -4,6 +4,10 @@ from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
|
||||
MAX_SIZE = 4 * 1024 * 1024 # Default 4MB
|
||||
ROW_SIZE = 0x300
|
||||
|
||||
|
||||
def sanitize_except(html):
|
||||
return Markup(sanitize(html, tags=["br"]))
|
||||
|
||||
@@ -17,28 +21,6 @@ def determine_day(last, current):
|
||||
return current
|
||||
|
||||
|
||||
# Android Specific
|
||||
|
||||
CRYPT14_OFFSETS = (
|
||||
{"iv": 67, "db": 191},
|
||||
{"iv": 67, "db": 190},
|
||||
{"iv": 66, "db": 99},
|
||||
{"iv": 67, "db": 193},
|
||||
{"iv": 67, "db": 194},
|
||||
)
|
||||
|
||||
|
||||
class Crypt(Enum):
|
||||
CRYPT15 = 15
|
||||
CRYPT14 = 14
|
||||
CRYPT12 = 12
|
||||
|
||||
|
||||
def brute_force_offset(max_iv=200, max_db=200):
|
||||
for iv in range(0, max_iv):
|
||||
for db in range(0, max_db):
|
||||
yield iv, iv + 16, db
|
||||
|
||||
def check_update():
|
||||
import urllib.request
|
||||
import json
|
||||
@@ -70,6 +52,42 @@ def check_update():
|
||||
print("You are using the latest version of WhatsApp Chat Exporter.")
|
||||
return 0
|
||||
|
||||
# iOS Specific
|
||||
|
||||
def rendering(output_file_name, template, name, msgs, contact, w3css, next):
|
||||
with open(output_file_name, "w", encoding="utf-8") as f:
|
||||
f.write(
|
||||
template.render(
|
||||
name=name,
|
||||
msgs=msgs,
|
||||
my_avatar=None,
|
||||
their_avatar=f"WhatsApp/Avatars/{contact}.j",
|
||||
w3css=w3css,
|
||||
next=next
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# Android Specific
|
||||
CRYPT14_OFFSETS = (
|
||||
{"iv": 67, "db": 191},
|
||||
{"iv": 67, "db": 190},
|
||||
{"iv": 66, "db": 99},
|
||||
{"iv": 67, "db": 193},
|
||||
{"iv": 67, "db": 194},
|
||||
)
|
||||
|
||||
|
||||
class Crypt(Enum):
|
||||
CRYPT15 = 15
|
||||
CRYPT14 = 14
|
||||
CRYPT12 = 12
|
||||
|
||||
|
||||
def brute_force_offset(max_iv=200, max_db=200):
|
||||
for iv in range(0, max_iv):
|
||||
for db in range(0, max_db):
|
||||
yield iv, iv + 16, db
|
||||
|
||||
|
||||
# iOS Specific
|
||||
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))
|
||||
|
||||
@@ -193,7 +193,11 @@
|
||||
</div>
|
||||
</article>
|
||||
<footer class="w3-center">
|
||||
{% if next %}
|
||||
<a href="./{{ next }}">Next</a>
|
||||
{% else %}
|
||||
End of history
|
||||
{% endif %}
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user