mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-02-23 09:04:04 +00:00
Merge old and new schema processing logic
This commit is contained in:
@@ -2,11 +2,11 @@ try:
|
||||
from .__init__ import __version__
|
||||
except ImportError:
|
||||
from Whatsapp_Chat_Exporter.__init__ import __version__
|
||||
from Whatsapp_Chat_Exporter import extract_new as extract
|
||||
from Whatsapp_Chat_Exporter import extract
|
||||
from Whatsapp_Chat_Exporter import extract_iphone
|
||||
from Whatsapp_Chat_Exporter import extract_iphone_media
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore
|
||||
from Whatsapp_Chat_Exporter.extract_new import Crypt
|
||||
from Whatsapp_Chat_Exporter.extract import Crypt
|
||||
from argparse import ArgumentParser
|
||||
import os
|
||||
import sqlite3
|
||||
|
||||
@@ -15,6 +15,7 @@ from datetime import datetime
|
||||
from enum import Enum
|
||||
from mimetypes import MimeTypes
|
||||
from hashlib import sha256
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
|
||||
try:
|
||||
import zlib
|
||||
@@ -92,6 +93,7 @@ def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=Fals
|
||||
t1 = key[30:62]
|
||||
if crypt is not Crypt.CRYPT15 and len(key) != 158:
|
||||
raise ValueError("The key file must be 158 bytes")
|
||||
# Determine the IV and database offsets
|
||||
if crypt == Crypt.CRYPT14:
|
||||
if len(database) < 191:
|
||||
raise ValueError("The crypt14 file must be at least 191 bytes")
|
||||
@@ -195,150 +197,215 @@ def contacts(db, data):
|
||||
def messages(db, data):
|
||||
# Get message history
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT count() FROM messages""")
|
||||
try:
|
||||
c.execute("""SELECT count() FROM messages""")
|
||||
except sqlite3.OperationalError:
|
||||
c.execute("""SELECT count() FROM message""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering messages...(0/{total_row_number})", end="\r")
|
||||
|
||||
phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net")
|
||||
c.execute("""SELECT messages.key_remote_jid,
|
||||
messages._id,
|
||||
messages.key_from_me,
|
||||
messages.timestamp,
|
||||
messages.data,
|
||||
messages.status,
|
||||
messages.edit_version,
|
||||
messages.thumb_image,
|
||||
messages.remote_resource,
|
||||
messages.media_wa_type,
|
||||
messages.latitude,
|
||||
messages.longitude,
|
||||
messages_quotes.key_id as quoted,
|
||||
messages.key_id,
|
||||
messages_quotes.data,
|
||||
messages.media_caption
|
||||
FROM messages
|
||||
LEFT JOIN messages_quotes
|
||||
ON messages.quoted_row_id = messages_quotes._id
|
||||
WHERE messages.key_remote_jid <> '-1';""")
|
||||
try:
|
||||
c.execute("""SELECT messages.key_remote_jid,
|
||||
messages._id,
|
||||
messages.key_from_me,
|
||||
messages.timestamp,
|
||||
messages.data,
|
||||
messages.status,
|
||||
messages.edit_version,
|
||||
messages.thumb_image,
|
||||
messages.remote_resource,
|
||||
messages.media_wa_type,
|
||||
messages.latitude,
|
||||
messages.longitude,
|
||||
messages_quotes.key_id as quoted,
|
||||
messages.key_id,
|
||||
messages_quotes.data as quoted_data,
|
||||
messages.media_caption
|
||||
FROM messages
|
||||
LEFT JOIN messages_quotes
|
||||
ON messages.quoted_row_id = messages_quotes._id
|
||||
WHERE messages.key_remote_jid <> '-1';"""
|
||||
)
|
||||
except sqlite3.OperationalError:
|
||||
try:
|
||||
c.execute("""SELECT jid_global.raw_string as key_remote_jid,
|
||||
message._id,
|
||||
message.from_me as key_from_me,
|
||||
message.timestamp,
|
||||
message.text_data as data,
|
||||
message.status,
|
||||
message_future.version as edit_version,
|
||||
message_thumbnail.thumbnail as thumb_image,
|
||||
message_media.file_path as remote_resource,
|
||||
message_media.mime_type as media_wa_type,
|
||||
message_location.latitude,
|
||||
message_location.longitude,
|
||||
message_quoted.key_id as quoted,
|
||||
message.key_id,
|
||||
message_quoted.text_data as quoted_data,
|
||||
message.message_type,
|
||||
jid_group.raw_string as group_sender_jid,
|
||||
chat.subject as chat_subject
|
||||
FROM message
|
||||
LEFT JOIN message_quoted
|
||||
ON message_quoted.message_row_id = message._id
|
||||
LEFT JOIN message_location
|
||||
ON message_location.message_row_id = message._id
|
||||
LEFT JOIN message_media
|
||||
ON message_media.message_row_id = message._id
|
||||
LEFT JOIN message_thumbnail
|
||||
ON message_thumbnail.message_row_id = message._id
|
||||
LEFT JOIN message_future
|
||||
ON message_future.message_row_id = message._id
|
||||
LEFT JOIN chat
|
||||
ON chat._id = message.chat_row_id
|
||||
INNER JOIN jid jid_global
|
||||
ON jid_global._id = chat.jid_row_id
|
||||
LEFT JOIN jid jid_group
|
||||
ON jid_group._id = message.sender_jid_row_id
|
||||
WHERE key_remote_jid <> '-1';"""
|
||||
)
|
||||
except:
|
||||
...
|
||||
else:
|
||||
table_message = True
|
||||
else:
|
||||
table_message = False
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
if content[0] not in data:
|
||||
data[content[0]] = {"name": None, "messages": {}}
|
||||
data[content[0]]["messages"][content[1]] = {
|
||||
"from_me": bool(content[2]),
|
||||
"timestamp": content[3]/1000,
|
||||
"time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"),
|
||||
"media": False,
|
||||
"key_id": content[13],
|
||||
"meta": False,
|
||||
"data": None
|
||||
}
|
||||
if "-" in content[0] and content[2] == 0:
|
||||
if content["key_remote_jid"] not in data:
|
||||
data[content["key_remote_jid"]] = ChatStore()
|
||||
if content["key_remote_jid"] is None:
|
||||
continue # Not sure
|
||||
data[content["key_remote_jid"]].add_message(content["_id"], Message(
|
||||
from_me=content["key_from_me"],
|
||||
timestamp=content["timestamp"],
|
||||
time=content["timestamp"],
|
||||
key_id=content["key_id"],
|
||||
))
|
||||
if "-" in content["key_remote_jid"] and content["key_from_me"] == 0:
|
||||
name = None
|
||||
if content[8] in data:
|
||||
name = data[content[8]]["name"]
|
||||
if "@" in content[8]:
|
||||
fallback = content[8].split('@')[0]
|
||||
if table_message:
|
||||
if content["chat_subject"] is not None:
|
||||
_jid = content["group_sender_jid"]
|
||||
else:
|
||||
_jid = content["key_remote_jid"]
|
||||
if _jid in data:
|
||||
name = data[_jid].name
|
||||
fallback = _jid.split('@')[0] if "@" in _jid else None
|
||||
else:
|
||||
fallback = None
|
||||
else:
|
||||
fallback = None
|
||||
|
||||
data[content[0]]["messages"][content[1]]["sender"] = name or fallback
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["sender"] = None
|
||||
|
||||
if content[12] is not None:
|
||||
data[content[0]]["messages"][content[1]]["reply"] = content[12]
|
||||
data[content[0]]["messages"][content[1]]["quoted_data"] = content[14]
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["reply"] = None
|
||||
|
||||
if content[15] is not None:
|
||||
data[content[0]]["messages"][content[1]]["caption"] = content[15]
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["caption"] = None
|
||||
|
||||
if content[5] == 6:
|
||||
if "-" in content[0]:
|
||||
# Is Group
|
||||
if content[4] is not None:
|
||||
try:
|
||||
int(content[4])
|
||||
except ValueError:
|
||||
msg = f"The group name changed to {content[4]}"
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
if content["remote_resource"] in data:
|
||||
name = data[content["remote_resource"]].name
|
||||
if "@" in content["remote_resource"]:
|
||||
fallback = content["remote_resource"].split('@')[0]
|
||||
else:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
fallback = None
|
||||
else:
|
||||
thumb_image = content[7]
|
||||
fallback = None
|
||||
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].sender = name or fallback
|
||||
else:
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].sender = None
|
||||
|
||||
if content["quoted"] is not None:
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].reply = content["quoted"]
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].quoted_data = content["quoted_data"]
|
||||
else:
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].reply = None
|
||||
|
||||
if not table_message and content["media_caption"] is not None:
|
||||
# Old schema
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].caption = content["media_caption"]
|
||||
elif table_message and content["message_type"] == 1 and content["data"] is not None:
|
||||
# New schema
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].caption = content["data"]
|
||||
else:
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].caption = None
|
||||
|
||||
if content["status"] == 6:
|
||||
if (not table_message and "-" in content["key_remote_jid"]) or \
|
||||
(table_message and content["chat_subject"] is not None):
|
||||
# Is Group
|
||||
if content["data"] is not None:
|
||||
try:
|
||||
int(content["data"])
|
||||
except ValueError:
|
||||
msg = f"The group name changed to {content['data']}"
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].data = msg
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].meta = True
|
||||
else:
|
||||
data[content["key_remote_jid"]].delete_message(content["_id"])
|
||||
else:
|
||||
thumb_image = content["thumb_image"]
|
||||
if thumb_image is not None:
|
||||
if b"\x00\x00\x01\x74\x00\x1A" in thumb_image:
|
||||
# Add user
|
||||
added = phone_number_re.search(
|
||||
thumb_image.decode("unicode_escape"))[0]
|
||||
if added in data:
|
||||
name_right = data[added]["name"]
|
||||
name_right = data[added].name
|
||||
else:
|
||||
name_right = added.split('@')[0]
|
||||
if content[8] is not None:
|
||||
if content[8] in data:
|
||||
name_left = data[content[8]]["name"]
|
||||
if content["remote_resource"] is not None:
|
||||
if content["remote_resource"] in data:
|
||||
name_left = data[content["remote_resource"]].name
|
||||
else:
|
||||
name_left = content[8].split('@')[0]
|
||||
name_left = content["remote_resource"].split('@')[0]
|
||||
msg = f"{name_left} added {name_right or 'You'}"
|
||||
else:
|
||||
msg = f"Added {name_right or 'You'}"
|
||||
elif b"\xac\xed\x00\x05\x74\x00" in thumb_image:
|
||||
# Changed number
|
||||
original = content[8].split('@')[0]
|
||||
original = content["remote_resource"].split('@')[0]
|
||||
changed = thumb_image[7:].decode().split('@')[0]
|
||||
msg = f"{original} changed to {changed}"
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].data = msg
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].meta = True
|
||||
else:
|
||||
if content[4] is None:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
if content["data"] is None:
|
||||
data[content["key_remote_jid"]].delete_message(content["_id"])
|
||||
else:
|
||||
# Private chat
|
||||
if content[4] is None and content[7] is None:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
if content["data"] is None and content["thumb_image"] is None:
|
||||
data[content["key_remote_jid"]].delete_message(content["_id"])
|
||||
|
||||
else:
|
||||
if content[2] == 1:
|
||||
if content[5] == 5 and content[6] == 7:
|
||||
if content["key_from_me"] == 1:
|
||||
if content["status"] == 5 and content["edit_version"] == 7:
|
||||
msg = "Message deleted"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].meta = True
|
||||
else:
|
||||
if content[9] == "5":
|
||||
msg = f"Location shared: {content[10], content[11]}"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
if content["media_wa_type"] == "5":
|
||||
msg = f"Location shared: {content['latitude'], content['longitude']}"
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].meta = True
|
||||
else:
|
||||
msg = content[4]
|
||||
msg = content["data"]
|
||||
if msg is not None:
|
||||
if "\r\n" in msg:
|
||||
msg = msg.replace("\r\n", "<br>")
|
||||
if "\n" in msg:
|
||||
msg = msg.replace("\n", "<br>")
|
||||
else:
|
||||
if content[5] == 0 and content[6] == 7:
|
||||
if content["status"] == 0 and content["edit_version"] == 7:
|
||||
msg = "Message deleted"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].meta = True
|
||||
else:
|
||||
if content[9] == "5":
|
||||
msg = f"Location shared: {content[10], content[11]}"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
if content["media_wa_type"] == "5":
|
||||
msg = f"Location shared: {content['latitude'], content['longitude']}"
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].meta = True
|
||||
else:
|
||||
msg = content[4]
|
||||
msg = content["data"]
|
||||
if msg is not None:
|
||||
if "\r\n" in msg:
|
||||
msg = msg.replace("\r\n", "<br>")
|
||||
if "\n" in msg:
|
||||
msg = msg.replace("\n", "<br>")
|
||||
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
data[content["key_remote_jid"]].messages[content["_id"]].data = msg
|
||||
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
@@ -354,7 +421,8 @@ def media(db, data, media_folder):
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"\nGathering media...(0/{total_row_number})", end="\r")
|
||||
i = 0
|
||||
c.execute("""SELECT messages.key_remote_jid,
|
||||
try:
|
||||
c.execute("""SELECT messages.key_remote_jid,
|
||||
message_row_id,
|
||||
file_path,
|
||||
message_url,
|
||||
@@ -363,22 +431,39 @@ def media(db, data, media_folder):
|
||||
FROM message_media
|
||||
INNER JOIN messages
|
||||
ON message_media.message_row_id = messages._id
|
||||
ORDER BY messages.key_remote_jid ASC""")
|
||||
ORDER BY messages.key_remote_jid ASC"""
|
||||
)
|
||||
except sqlite3.OperationalError:
|
||||
c.execute("""SELECT jid.raw_string as key_remote_jid,
|
||||
message_row_id,
|
||||
file_path,
|
||||
message_url,
|
||||
mime_type,
|
||||
media_key
|
||||
FROM message_media
|
||||
INNER JOIN message
|
||||
ON message_media.message_row_id = message._id
|
||||
LEFT JOIN chat
|
||||
ON chat._id = message.chat_row_id
|
||||
INNER JOIN jid
|
||||
ON jid._id = chat.jid_row_id
|
||||
ORDER BY jid.raw_string ASC"""
|
||||
)
|
||||
content = c.fetchone()
|
||||
mime = MimeTypes()
|
||||
while content is not None:
|
||||
file_path = f"{media_folder}/{content[2]}"
|
||||
data[content[0]]["messages"][content[1]]["media"] = True
|
||||
file_path = f"{media_folder}/{content['file_path']}"
|
||||
data[content["key_remote_jid"]].messages[content["message_row_id"]].media = True
|
||||
if os.path.isfile(file_path):
|
||||
data[content[0]]["messages"][content[1]]["data"] = file_path
|
||||
if content[4] is None:
|
||||
data[content["key_remote_jid"]].messages[content["message_row_id"]].data = file_path
|
||||
if content["mime_type"] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
if guess is not None:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = guess
|
||||
data[content["key_remote_jid"]].messages[content["message_row_id"]].mime = guess
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "data/data"
|
||||
data[content["key_remote_jid"]].messages[content["message_row_id"]].mime = "data/data"
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = content[4]
|
||||
data[content["key_remote_jid"]].messages[content["message_row_id"]].mime = content["mime_type"]
|
||||
else:
|
||||
# if "https://mmg" in content[4]:
|
||||
# try:
|
||||
@@ -390,9 +475,9 @@ def media(db, data, media_folder):
|
||||
# data[content[0]]["messages"][content[1]]["media"] = True
|
||||
# data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
# else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = "The media is missing"
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
data[content["key_remote_jid"]].messages[content["message_row_id"]].data = "The media is missing"
|
||||
data[content["key_remote_jid"]].messages[content["message_row_id"]].mime = "media"
|
||||
data[content["key_remote_jid"]].messages[content["message_row_id"]].meta = True
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Gathering media...({i}/{total_row_number})", end="\r")
|
||||
@@ -403,14 +488,31 @@ def media(db, data, media_folder):
|
||||
|
||||
def vcard(db, data):
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT message_row_id,
|
||||
try:
|
||||
c.execute("""SELECT message_row_id,
|
||||
messages.key_remote_jid,
|
||||
vcard,
|
||||
messages.media_name
|
||||
FROM messages_vcards
|
||||
INNER JOIN messages
|
||||
ON messages_vcards.message_row_id = messages._id
|
||||
ORDER BY messages.key_remote_jid ASC;""")
|
||||
ORDER BY messages.key_remote_jid ASC;"""
|
||||
)
|
||||
except sqlite3.OperationalError:
|
||||
c.execute("""SELECT message_row_id,
|
||||
jid.raw_string as key_remote_jid,
|
||||
vcard,
|
||||
message.text_data as media_name
|
||||
FROM message_vcard
|
||||
INNER JOIN message
|
||||
ON message_vcard.message_row_id = message._id
|
||||
LEFT JOIN chat
|
||||
ON chat._id = message.chat_row_id
|
||||
INNER JOIN jid
|
||||
ON jid._id = chat.jid_row_id
|
||||
ORDER BY message.chat_row_id ASC;"""
|
||||
)
|
||||
|
||||
rows = c.fetchall()
|
||||
total_row_number = len(rows)
|
||||
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
|
||||
@@ -418,21 +520,28 @@ def vcard(db, data):
|
||||
if not os.path.isdir(base):
|
||||
Path(base).mkdir(parents=True, exist_ok=True)
|
||||
for index, row in enumerate(rows):
|
||||
media_name = row[3] if row[3] else ""
|
||||
media_name = row["media_name"]
|
||||
file_name = "".join(x for x in media_name if x.isalnum())
|
||||
file_path = f"{base}/{file_name}.vcf"
|
||||
if not os.path.isfile(file_path):
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(row[2])
|
||||
data[row[1]]["messages"][row[0]]["data"] = media_name + \
|
||||
data[row["key_remote_jid"]].messages[row["message_row_id"]].data = media_name + \
|
||||
"The vCard file cannot be displayed here, " \
|
||||
f"however it should be located at {file_path}"
|
||||
data[row[1]]["messages"][row[0]]["mime"] = "text/x-vcard"
|
||||
data[row[1]]["messages"][row[0]]["meta"] = True
|
||||
data[row["key_remote_jid"]].messages[row["message_row_id"]].mime = "text/x-vcard"
|
||||
data[row["key_remote_jid"]].messages[row["message_row_id"]].meta = True
|
||||
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def create_html(data, output_folder, template=None, embedded=False, offline_static=False):
|
||||
def create_html(
|
||||
data,
|
||||
output_folder,
|
||||
template=None,
|
||||
embedded=False,
|
||||
offline_static=False,
|
||||
maximum_size=None
|
||||
):
|
||||
if template is None:
|
||||
template_dir = os.path.dirname(__file__)
|
||||
template_file = "whatsapp.html"
|
||||
@@ -464,7 +573,7 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
|
||||
w3css = os.path.join(offline_static, "w3.css")
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
if len(data[contact]["messages"]) == 0:
|
||||
if len(data[contact].messages) == 0:
|
||||
continue
|
||||
phone_number = contact.split('@')[0]
|
||||
if "-" in contact:
|
||||
@@ -472,11 +581,11 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
|
||||
else:
|
||||
file_name = phone_number
|
||||
|
||||
if data[contact]["name"] is not None:
|
||||
if data[contact].name is not None:
|
||||
if file_name != "":
|
||||
file_name += "-"
|
||||
file_name += data[contact]["name"].replace("/", "-")
|
||||
name = data[contact]["name"]
|
||||
file_name += data[contact].name.replace("/", "-")
|
||||
name = data[contact].name
|
||||
else:
|
||||
name = phone_number
|
||||
safe_file_name = ''
|
||||
@@ -485,7 +594,7 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
|
||||
f.write(
|
||||
template.render(
|
||||
name=name,
|
||||
msgs=data[contact]["messages"].values(),
|
||||
msgs=data[contact].messages.values(),
|
||||
my_avatar=None,
|
||||
their_avatar=f"WhatsApp/Avatars/{contact}.j",
|
||||
w3css=w3css
|
||||
|
||||
Reference in New Issue
Block a user