mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-04-27 16:24:53 +00:00
Implement splitted outputs #23
This commit is contained in:
@@ -125,9 +125,13 @@ def main():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--size",
|
"--size",
|
||||||
"--output-size",
|
"--output-size",
|
||||||
|
"--split",
|
||||||
dest="size",
|
dest="size",
|
||||||
|
nargs='?',
|
||||||
|
type=int,
|
||||||
|
const=0,
|
||||||
default=None,
|
default=None,
|
||||||
help="Maximum size of a single output file in bytes, 0 for auto (not yet implemented)"
|
help="Maximum (Rough) size of a single output file in bytes, 0 for auto"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-html",
|
"--no-html",
|
||||||
@@ -216,7 +220,10 @@ def main():
|
|||||||
elif args.iphone:
|
elif args.iphone:
|
||||||
import sys
|
import sys
|
||||||
if "--iphone" in sys.argv:
|
if "--iphone" in sys.argv:
|
||||||
print("WARNING: The --iphone flag is deprecated and will be removed in the future. Use --ios instead.")
|
print(
|
||||||
|
"WARNING: The --iphone flag is deprecated and will"
|
||||||
|
"be removed in the future. Use --ios instead."
|
||||||
|
)
|
||||||
messages = extract_iphone.messages
|
messages = extract_iphone.messages
|
||||||
media = extract_iphone.media
|
media = extract_iphone.media
|
||||||
vcard = extract_iphone.vcard
|
vcard = extract_iphone.vcard
|
||||||
@@ -271,7 +278,7 @@ def main():
|
|||||||
shutil.move(args.media, f"{args.output}/")
|
shutil.move(args.media, f"{args.output}/")
|
||||||
except PermissionError:
|
except PermissionError:
|
||||||
print("Cannot remove original WhatsApp directory. "
|
print("Cannot remove original WhatsApp directory. "
|
||||||
"Perhaps the directory is opened?")
|
"Perhaps the directory is opened?")
|
||||||
|
|
||||||
if args.json:
|
if args.json:
|
||||||
if isinstance(data[next(iter(data))], ChatStore):
|
if isinstance(data[next(iter(data))], ChatStore):
|
||||||
|
|||||||
@@ -19,9 +19,16 @@ class ChatStore():
|
|||||||
del self.messages[id]
|
del self.messages[id]
|
||||||
|
|
||||||
def to_json(self):
|
def to_json(self):
|
||||||
serialized_msgs = {id : msg.to_json() for id,msg in self.messages.items()}
|
serialized_msgs = {id: msg.to_json() for id, msg in self.messages.items()}
|
||||||
return {'name' : self.name, 'messages' : serialized_msgs}
|
return {'name' : self.name, 'messages' : serialized_msgs}
|
||||||
|
|
||||||
|
def get_last_message(self):
|
||||||
|
return tuple(self.messages.values())[-1]
|
||||||
|
|
||||||
|
def get_messages(self):
|
||||||
|
return self.messages.values()
|
||||||
|
|
||||||
|
|
||||||
class Message():
|
class Message():
|
||||||
def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int):
|
def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int):
|
||||||
self.from_me = bool(from_me)
|
self.from_me = bool(from_me)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from pathlib import Path
|
|||||||
from mimetypes import MimeTypes
|
from mimetypes import MimeTypes
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||||
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, Crypt
|
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, Crypt
|
||||||
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS
|
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -49,7 +49,7 @@ def _extract_encrypted_key(keyfile):
|
|||||||
key_stream += byte.to_bytes(1, "big", signed=True)
|
key_stream += byte.to_bytes(1, "big", signed=True)
|
||||||
|
|
||||||
return _generate_hmac_of_hmac(key_stream)
|
return _generate_hmac_of_hmac(key_stream)
|
||||||
|
|
||||||
|
|
||||||
def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False):
|
def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False):
|
||||||
if not support_backup:
|
if not support_backup:
|
||||||
@@ -82,7 +82,7 @@ def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=Fals
|
|||||||
raise ValueError("The crypt15 file must be at least 131 bytes")
|
raise ValueError("The crypt15 file must be at least 131 bytes")
|
||||||
t1 = t2 = None
|
t1 = t2 = None
|
||||||
iv = database[8:24]
|
iv = database[8:24]
|
||||||
db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type
|
db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type
|
||||||
db_ciphertext = database[db_offset:]
|
db_ciphertext = database[db_offset:]
|
||||||
|
|
||||||
if t1 != t2:
|
if t1 != t2:
|
||||||
@@ -253,13 +253,13 @@ def messages(db, data):
|
|||||||
if content["key_remote_jid"] not in data:
|
if content["key_remote_jid"] not in data:
|
||||||
data[content["key_remote_jid"]] = ChatStore()
|
data[content["key_remote_jid"]] = ChatStore()
|
||||||
if content["key_remote_jid"] is None:
|
if content["key_remote_jid"] is None:
|
||||||
continue # Not sure
|
continue # Not sure
|
||||||
data[content["key_remote_jid"]].add_message(content["_id"], Message(
|
data[content["key_remote_jid"]].add_message(content["_id"], Message(
|
||||||
from_me=content["key_from_me"],
|
from_me=content["key_from_me"],
|
||||||
timestamp=content["timestamp"],
|
timestamp=content["timestamp"],
|
||||||
time=content["timestamp"],
|
time=content["timestamp"],
|
||||||
key_id=content["key_id"],
|
key_id=content["key_id"],
|
||||||
))
|
))
|
||||||
if "-" in content["key_remote_jid"] and content["key_from_me"] == 0:
|
if "-" in content["key_remote_jid"] and content["key_from_me"] == 0:
|
||||||
name = None
|
name = None
|
||||||
if table_message:
|
if table_message:
|
||||||
@@ -495,7 +495,7 @@ def vcard(db, data):
|
|||||||
ON jid._id = chat.jid_row_id
|
ON jid._id = chat.jid_row_id
|
||||||
ORDER BY message.chat_row_id ASC;"""
|
ORDER BY message.chat_row_id ASC;"""
|
||||||
)
|
)
|
||||||
|
|
||||||
rows = c.fetchall()
|
rows = c.fetchall()
|
||||||
total_row_number = len(rows)
|
total_row_number = len(rows)
|
||||||
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
|
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
|
||||||
@@ -558,7 +558,8 @@ def create_html(
|
|||||||
w3css = os.path.join(offline_static, "w3.css")
|
w3css = os.path.join(offline_static, "w3.css")
|
||||||
|
|
||||||
for current, contact in enumerate(data):
|
for current, contact in enumerate(data):
|
||||||
if len(data[contact].messages) == 0:
|
chat = data[contact]
|
||||||
|
if len(chat.messages) == 0:
|
||||||
continue
|
continue
|
||||||
phone_number = contact.split('@')[0]
|
phone_number = contact.split('@')[0]
|
||||||
if "-" in contact:
|
if "-" in contact:
|
||||||
@@ -566,25 +567,62 @@ def create_html(
|
|||||||
else:
|
else:
|
||||||
file_name = phone_number
|
file_name = phone_number
|
||||||
|
|
||||||
if data[contact].name is not None:
|
if chat.name is not None:
|
||||||
if file_name != "":
|
if file_name != "":
|
||||||
file_name += "-"
|
file_name += "-"
|
||||||
file_name += data[contact].name.replace("/", "-")
|
file_name += chat.name.replace("/", "-")
|
||||||
name = data[contact].name
|
name = chat.name
|
||||||
else:
|
else:
|
||||||
name = phone_number
|
name = phone_number
|
||||||
safe_file_name = ''
|
|
||||||
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
|
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
|
||||||
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
|
|
||||||
f.write(
|
if maximum_size is not None:
|
||||||
template.render(
|
current_size = 0
|
||||||
name=name,
|
current_page = 1
|
||||||
msgs=data[contact].messages.values(),
|
render_box = []
|
||||||
my_avatar=None,
|
if maximum_size == 0:
|
||||||
their_avatar=f"WhatsApp/Avatars/{contact}.j",
|
maximum_size = MAX_SIZE
|
||||||
w3css=w3css
|
last_msg = chat.get_last_message().key_id
|
||||||
)
|
for message in chat.get_messages():
|
||||||
)
|
if message.data is not None and not message.meta and not message.media:
|
||||||
|
current_size += len(message.data) + ROW_SIZE
|
||||||
|
else:
|
||||||
|
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
|
||||||
|
if current_size > maximum_size:
|
||||||
|
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||||
|
rendering(
|
||||||
|
output_file_name,
|
||||||
|
template,
|
||||||
|
name,
|
||||||
|
render_box,
|
||||||
|
contact,
|
||||||
|
w3css,
|
||||||
|
f"{safe_file_name}-{current_page + 1}.html"
|
||||||
|
)
|
||||||
|
render_box = [message]
|
||||||
|
current_size = 0
|
||||||
|
current_page += 1
|
||||||
|
else:
|
||||||
|
if message.key_id == last_msg:
|
||||||
|
if current_page == 1:
|
||||||
|
output_file_name = f"{output_folder}/{safe_file_name}.html"
|
||||||
|
else:
|
||||||
|
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||||
|
rendering(
|
||||||
|
output_file_name,
|
||||||
|
template,
|
||||||
|
name,
|
||||||
|
render_box,
|
||||||
|
contact,
|
||||||
|
w3css,
|
||||||
|
False
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
render_box.append(message)
|
||||||
|
else:
|
||||||
|
output_file_name = f"{output_folder}/{safe_file_name}.html"
|
||||||
|
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
|
||||||
if current % 10 == 0:
|
if current % 10 == 0:
|
||||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||||
|
|
||||||
|
|||||||
@@ -2,15 +2,13 @@
|
|||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import json
|
import json
|
||||||
import string
|
|
||||||
import jinja2
|
import jinja2
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
|
||||||
from mimetypes import MimeTypes
|
from mimetypes import MimeTypes
|
||||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||||
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, APPLE_TIME
|
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, APPLE_TIME
|
||||||
|
|
||||||
|
|
||||||
def messages(db, data):
|
def messages(db, data):
|
||||||
@@ -56,7 +54,7 @@ def messages(db, data):
|
|||||||
data[_id].add_message(Z_PK, Message(
|
data[_id].add_message(Z_PK, Message(
|
||||||
from_me=content["ZISFROMME"],
|
from_me=content["ZISFROMME"],
|
||||||
timestamp=ts,
|
timestamp=ts,
|
||||||
time=ts, # Could be bug
|
time=ts, # TODO: Could be bug
|
||||||
key_id=content["ZSTANZAID"][:17],
|
key_id=content["ZSTANZAID"][:17],
|
||||||
))
|
))
|
||||||
if "-" in _id and content["ZISFROMME"] == 0:
|
if "-" in _id and content["ZISFROMME"] == 0:
|
||||||
@@ -226,7 +224,14 @@ def vcard(db, data):
|
|||||||
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
|
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
|
||||||
|
|
||||||
|
|
||||||
def create_html(data, output_folder, template=None, embedded=False, offline_static=False, maximum_size=None):
|
def create_html(
|
||||||
|
data,
|
||||||
|
output_folder,
|
||||||
|
template=None,
|
||||||
|
embedded=False,
|
||||||
|
offline_static=False,
|
||||||
|
maximum_size=None
|
||||||
|
):
|
||||||
if template is None:
|
if template is None:
|
||||||
template_dir = os.path.dirname(__file__)
|
template_dir = os.path.dirname(__file__)
|
||||||
template_file = "whatsapp.html"
|
template_file = "whatsapp.html"
|
||||||
@@ -258,7 +263,8 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
|
|||||||
w3css = os.path.join(offline_static, "w3.css")
|
w3css = os.path.join(offline_static, "w3.css")
|
||||||
|
|
||||||
for current, contact in enumerate(data):
|
for current, contact in enumerate(data):
|
||||||
if len(data[contact].messages) == 0:
|
chat = data[contact]
|
||||||
|
if len(chat.messages) == 0:
|
||||||
continue
|
continue
|
||||||
phone_number = contact.split('@')[0]
|
phone_number = contact.split('@')[0]
|
||||||
if "-" in contact:
|
if "-" in contact:
|
||||||
@@ -266,26 +272,62 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
|
|||||||
else:
|
else:
|
||||||
file_name = phone_number
|
file_name = phone_number
|
||||||
|
|
||||||
if data[contact].name is not None:
|
if chat.name is not None:
|
||||||
if file_name != "":
|
if file_name != "":
|
||||||
file_name += "-"
|
file_name += "-"
|
||||||
file_name += data[contact].name.replace("/", "-")
|
file_name += chat.name.replace("/", "-")
|
||||||
name = data[contact].name
|
name = chat.name
|
||||||
else:
|
else:
|
||||||
name = phone_number
|
name = phone_number
|
||||||
|
|
||||||
safe_file_name = ''
|
|
||||||
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
|
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
|
||||||
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
|
|
||||||
f.write(
|
if maximum_size is not None:
|
||||||
template.render(
|
current_size = 0
|
||||||
name=name,
|
current_page = 1
|
||||||
msgs=data[contact].messages.values(),
|
render_box = []
|
||||||
my_avatar=None,
|
if maximum_size == 0:
|
||||||
their_avatar=f"WhatsApp/Avatars/{contact}.j",
|
maximum_size = MAX_SIZE
|
||||||
w3css=w3css
|
last_msg = chat.get_last_message().key_id
|
||||||
)
|
for message in chat.get_messages():
|
||||||
)
|
if message.data is not None and not message.meta and not message.media:
|
||||||
|
current_size += len(message.data) + ROW_SIZE
|
||||||
|
else:
|
||||||
|
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
|
||||||
|
if current_size > maximum_size:
|
||||||
|
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||||
|
rendering(
|
||||||
|
output_file_name,
|
||||||
|
template,
|
||||||
|
name,
|
||||||
|
render_box,
|
||||||
|
contact,
|
||||||
|
w3css,
|
||||||
|
f"{safe_file_name}-{current_page + 1}.html"
|
||||||
|
)
|
||||||
|
render_box = [message]
|
||||||
|
current_size = 0
|
||||||
|
current_page += 1
|
||||||
|
else:
|
||||||
|
if message.key_id == last_msg:
|
||||||
|
if current_page == 1:
|
||||||
|
output_file_name = f"{output_folder}/{safe_file_name}.html"
|
||||||
|
else:
|
||||||
|
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||||
|
rendering(
|
||||||
|
output_file_name,
|
||||||
|
template,
|
||||||
|
name,
|
||||||
|
render_box,
|
||||||
|
contact,
|
||||||
|
w3css,
|
||||||
|
False
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
render_box.append(message)
|
||||||
|
else:
|
||||||
|
output_file_name = f"{output_folder}/{safe_file_name}.html"
|
||||||
|
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
|
||||||
if current % 10 == 0:
|
if current % 10 == 0:
|
||||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,10 @@ from datetime import datetime
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
MAX_SIZE = 4 * 1024 * 1024 # Default 4MB
|
||||||
|
ROW_SIZE = 0x300
|
||||||
|
|
||||||
|
|
||||||
def sanitize_except(html):
|
def sanitize_except(html):
|
||||||
return Markup(sanitize(html, tags=["br"]))
|
return Markup(sanitize(html, tags=["br"]))
|
||||||
|
|
||||||
@@ -17,28 +21,6 @@ def determine_day(last, current):
|
|||||||
return current
|
return current
|
||||||
|
|
||||||
|
|
||||||
# Android Specific
|
|
||||||
|
|
||||||
CRYPT14_OFFSETS = (
|
|
||||||
{"iv": 67, "db": 191},
|
|
||||||
{"iv": 67, "db": 190},
|
|
||||||
{"iv": 66, "db": 99},
|
|
||||||
{"iv": 67, "db": 193},
|
|
||||||
{"iv": 67, "db": 194},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Crypt(Enum):
|
|
||||||
CRYPT15 = 15
|
|
||||||
CRYPT14 = 14
|
|
||||||
CRYPT12 = 12
|
|
||||||
|
|
||||||
|
|
||||||
def brute_force_offset(max_iv=200, max_db=200):
|
|
||||||
for iv in range(0, max_iv):
|
|
||||||
for db in range(0, max_db):
|
|
||||||
yield iv, iv + 16, db
|
|
||||||
|
|
||||||
def check_update():
|
def check_update():
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import json
|
import json
|
||||||
@@ -70,6 +52,42 @@ def check_update():
|
|||||||
print("You are using the latest version of WhatsApp Chat Exporter.")
|
print("You are using the latest version of WhatsApp Chat Exporter.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# iOS Specific
|
|
||||||
|
|
||||||
|
def rendering(output_file_name, template, name, msgs, contact, w3css, next):
|
||||||
|
with open(output_file_name, "w", encoding="utf-8") as f:
|
||||||
|
f.write(
|
||||||
|
template.render(
|
||||||
|
name=name,
|
||||||
|
msgs=msgs,
|
||||||
|
my_avatar=None,
|
||||||
|
their_avatar=f"WhatsApp/Avatars/{contact}.j",
|
||||||
|
w3css=w3css,
|
||||||
|
next=next
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Android Specific
|
||||||
|
CRYPT14_OFFSETS = (
|
||||||
|
{"iv": 67, "db": 191},
|
||||||
|
{"iv": 67, "db": 190},
|
||||||
|
{"iv": 66, "db": 99},
|
||||||
|
{"iv": 67, "db": 193},
|
||||||
|
{"iv": 67, "db": 194},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Crypt(Enum):
|
||||||
|
CRYPT15 = 15
|
||||||
|
CRYPT14 = 14
|
||||||
|
CRYPT12 = 12
|
||||||
|
|
||||||
|
|
||||||
|
def brute_force_offset(max_iv=200, max_db=200):
|
||||||
|
for iv in range(0, max_iv):
|
||||||
|
for db in range(0, max_db):
|
||||||
|
yield iv, iv + 16, db
|
||||||
|
|
||||||
|
|
||||||
|
# iOS Specific
|
||||||
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))
|
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))
|
||||||
|
|||||||
@@ -193,7 +193,11 @@
|
|||||||
</div>
|
</div>
|
||||||
</article>
|
</article>
|
||||||
<footer class="w3-center">
|
<footer class="w3-center">
|
||||||
|
{% if next %}
|
||||||
|
<a href="./{{ next }}">Next</a>
|
||||||
|
{% else %}
|
||||||
End of history
|
End of history
|
||||||
|
{% endif %}
|
||||||
</footer>
|
</footer>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
Reference in New Issue
Block a user