This commit is contained in:
KnugiHK
2025-05-11 18:07:51 +08:00
parent cc410b8503
commit 33149075d3
14 changed files with 372 additions and 311 deletions

View File

@@ -452,7 +452,8 @@ def decrypt_android_backup(args) -> int:
elif "crypt15" in args.backup:
crypt = Crypt.CRYPT15
else:
logger.error(f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
logger.error(
f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
return 1
# Get key
@@ -505,11 +506,11 @@ def handle_decrypt_error(error: int) -> None:
"""Handle decryption errors with appropriate messages."""
if error == 1:
logger.error("Dependencies of decrypt_backup and/or extract_encrypted_key"
" are not present. For details, see README.md.\n")
" are not present. For details, see README.md.\n")
exit(3)
elif error == 2:
logger.error("Failed when decompressing the decrypted backup. "
"Possibly incorrect offsets used in decryption.\n")
"Possibly incorrect offsets used in decryption.\n")
exit(4)
else:
logger.error("Unknown error occurred.\n")
@@ -598,7 +599,7 @@ def handle_media_directory(args) -> None:
logger.info(f"Media directory has been moved to the output directory{CLEAR_LINE}")
except PermissionError:
logger.warning("Cannot remove original WhatsApp directory. "
"Perhaps the directory is opened?\n")
"Perhaps the directory is opened?\n")
else:
logger.info(f"Copying media directory...\r")
shutil.copytree(args.media, media_path)

View File

@@ -121,6 +121,7 @@ def _decrypt_database(db_ciphertext: bytes, main_key: bytes, iv: bytes) -> bytes
)
return db
def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes:
"""Decrypt a crypt14 database using multithreading for brute-force offset detection.
@@ -194,7 +195,8 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
return db
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
future_to_offset = {executor.submit(attempt_decrypt, offset): offset for offset in offset_combinations}
future_to_offset = {executor.submit(attempt_decrypt, offset)
: offset for offset in offset_combinations}
try:
for future in concurrent.futures.as_completed(future_to_offset):
@@ -217,7 +219,6 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
"""Decrypt a crypt12 database.
@@ -319,7 +320,7 @@ def decrypt_backup(
if crypt is not Crypt.CRYPT15 and len(key) != 158:
raise InvalidKeyError("The key file must be 158 bytes")
#signature check, this is check is used in crypt 12 and 14
# signature check, this is check is used in crypt 12 and 14
if crypt != Crypt.CRYPT15:
t1 = key[30:62]
@@ -329,7 +330,6 @@ def decrypt_backup(
if t1 != database[3:35] and crypt == Crypt.CRYPT12:
raise ValueError("The signature of key file and backup file mismatch")
if crypt == Crypt.CRYPT15:
if keyfile_stream:
main_key, hex_key = _extract_enc_key(key)
@@ -353,7 +353,6 @@ def decrypt_backup(
except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e:
raise DecryptionError(f"Decryption failed: {e}") from e
if not dry_run:
with open(output, "wb") as f:
f.write(db)

View File

@@ -22,24 +22,26 @@ logger = logging.getLogger(__name__)
def contacts(db, data, enrich_from_vcards):
"""
Process WhatsApp contacts from the database.
Args:
db: Database connection
data: Data store object
enrich_from_vcards: Path to vCard file for contact enrichment
Returns:
bool: False if no contacts found, True otherwise
"""
c = db.cursor()
c.execute("SELECT count() FROM wa_contacts")
total_row_number = c.fetchone()[0]
if total_row_number == 0:
if enrich_from_vcards is not None:
logger.info("No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
logger.info(
"No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
else:
logger.warning("No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
logger.warning(
"No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
return False
else:
logger.info(f"Processed {total_row_number} contacts\n")
@@ -51,14 +53,14 @@ def contacts(db, data, enrich_from_vcards):
if row["status"] is not None:
current_chat.status = row["status"]
row = c.fetchone()
return True
def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty):
"""
Process WhatsApp messages from the database.
Args:
db: Database connection
data: Data store object
@@ -85,17 +87,17 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
i = 0
# Fetch the first row safely
content = _fetch_row_safely(content_cursor)
while content is not None:
_process_single_message(data, content, table_message, timezone_offset)
i += 1
if i % 1000 == 0:
logger.info(f"Processing messages...({i}/{total_row_number})\r")
# Fetch the next row safely
content = _fetch_row_safely(content_cursor)
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
@@ -106,8 +108,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
try:
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
cursor.execute(f"""SELECT count()
FROM messages
@@ -123,8 +127,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT count()
FROM message
@@ -146,8 +152,10 @@ def _get_messages_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for legacy database schema."""
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
cursor.execute(f"""SELECT messages.key_remote_jid,
messages._id,
@@ -209,8 +217,10 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for new database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid,
message._id,
@@ -292,19 +302,20 @@ def _process_single_message(data, content, table_message, timezone_offset):
"""Process a single message row."""
if content["key_remote_jid"] is None:
return
# Get or create the chat
if not data.get_chat(content["key_remote_jid"]):
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(Device.ANDROID, content["chat_subject"]))
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(
Device.ANDROID, content["chat_subject"]))
else:
current_chat = data.get_chat(content["key_remote_jid"])
# Determine sender_jid_row_id
if "sender_jid_row_id" in content:
sender_jid_row_id = content["sender_jid_row_id"]
else:
sender_jid_row_id = None
# Create message object
message = Message(
from_me=not sender_jid_row_id and content["key_from_me"],
@@ -316,19 +327,19 @@ def _process_single_message(data, content, table_message, timezone_offset):
received_timestamp=content["received_timestamp"],
read_timestamp=content["read_timestamp"]
)
# Handle binary data
if isinstance(content["data"], bytes):
_process_binary_message(message, content)
current_chat.add_message(content["_id"], message)
return
# Set sender for group chats
if content["jid_type"] == JidType.GROUP and content["key_from_me"] == 0:
_set_group_sender(message, content, data, table_message)
else:
message.sender = None
# Handle quoted messages
if content["quoted"] is not None:
message.reply = content["quoted"]
@@ -338,7 +349,7 @@ def _process_single_message(data, content, table_message, timezone_offset):
message.quoted_data = content["quoted_data"]
else:
message.reply = None
# Handle message caption
if not table_message and content["media_caption"] is not None:
# Old schema
@@ -348,14 +359,14 @@ def _process_single_message(data, content, table_message, timezone_offset):
message.caption = content["data"]
else:
message.caption = None
# Handle message content based on status
if content["status"] == 6: # 6 = Metadata
_process_metadata_message(message, content, data, table_message)
else:
# Real message
_process_regular_message(message, content, table_message)
current_chat.add_message(content["_id"], message)
@@ -385,7 +396,7 @@ def _set_group_sender(message, content, data, table_message):
name = data.get_chat(content["remote_resource"]).name
if "@" in content["remote_resource"]:
fallback = content["remote_resource"].split('@')[0]
message.sender = name or fallback
@@ -393,7 +404,7 @@ def _process_metadata_message(message, content, data, table_message):
"""Process metadata message."""
message.meta = True
name = fallback = None
if table_message:
if content["sender_jid_row_id"] > 0:
_jid = content["group_sender_jid"]
@@ -412,12 +423,12 @@ def _process_metadata_message(message, content, data, table_message):
fallback = _jid.split('@')[0]
else:
name = "You"
message.data = determine_metadata(content, name or fallback)
if isinstance(message.data, str) and "<br>" in message.data:
message.safe = True
if message.data is None:
if content["video_call"] is not None: # Missed call
message.meta = True
@@ -433,7 +444,7 @@ def _process_metadata_message(message, content, data, table_message):
def _process_regular_message(message, content, table_message):
"""Process regular (non-metadata) message."""
message.sticker = content["media_wa_type"] == 20 # Sticker is a message
if content["key_from_me"] == 1:
if content["status"] == 5 and content["edit_version"] == 7 or table_message and content["media_wa_type"] == 15:
msg = "Message deleted"
@@ -458,7 +469,7 @@ def _process_regular_message(message, content, table_message):
msg = content["data"]
if msg is not None:
msg = _format_message_text(msg)
message.data = msg
@@ -474,7 +485,7 @@ def _format_message_text(text):
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True):
"""
Process WhatsApp media files from the database.
Args:
db: Database connection
data: Data store object
@@ -487,28 +498,28 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
c = db.cursor()
total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat)
logger.info(f"Processing media...(0/{total_row_number})\r")
try:
content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat)
except sqlite3.OperationalError:
content_cursor = _get_media_cursor_new(c, filter_empty, filter_date, filter_chat)
content = content_cursor.fetchone()
mime = MimeTypes()
# Ensure thumbnails directory exists
Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True)
i = 0
while content is not None:
_process_single_media(data, content, media_folder, mime, separate_media)
i += 1
if i % 100 == 0:
logger.info(f"Processing media...({i}/{total_row_number})\r")
content = content_cursor.fetchone()
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
@@ -519,8 +530,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
try:
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
cursor.execute(f"""SELECT count()
FROM message_media
@@ -538,8 +551,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT count()
FROM message_media
@@ -563,8 +578,10 @@ def _get_media_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for legacy media database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
cursor.execute(f"""SELECT messages.key_remote_jid,
message_row_id,
@@ -596,8 +613,10 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for new media database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT jid.raw_string as key_remote_jid,
message_row_id,
@@ -633,10 +652,10 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
current_chat = data.get_chat(content["key_remote_jid"])
message = current_chat.get_message(content["message_row_id"])
message.media = True
if os.path.isfile(file_path):
message.data = file_path
# Set mime type
if content["mime_type"] is None:
guess = mime.guess_type(file_path)[0]
@@ -646,11 +665,11 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
message.mime = "application/octet-stream"
else:
message.mime = content["mime_type"]
# Copy media to separate folder if needed
if separate_media:
chat_display_name = slugify(current_chat.name or message.sender
or content["key_remote_jid"].split('@')[0], True)
chat_display_name = slugify(current_chat.name or message.sender
or content["key_remote_jid"].split('@')[0], True)
current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True)
@@ -661,7 +680,7 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
message.data = "The media is missing"
message.mime = "media"
message.meta = True
# Handle thumbnail
if content["thumbnail"] is not None:
thumb_path = f"{media_folder}/thumbnails/{b64decode(content['file_hash']).hex()}.png"
@@ -681,11 +700,11 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
total_row_number = len(rows)
logger.info(f"Processing vCards...(0/{total_row_number})\r")
# Create vCards directory if it doesn't exist
path = os.path.join(media_folder, "vCards")
Path(path).mkdir(parents=True, exist_ok=True)
for index, row in enumerate(rows):
_process_vcard_row(row, path, data)
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
@@ -696,8 +715,10 @@ def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for modern WhatsApp database schema."""
# Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
@@ -726,8 +747,10 @@ def _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for legacy WhatsApp database schema."""
# Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
@@ -760,11 +783,11 @@ def _process_vcard_row(row, path, data):
file_name = "".join(x for x in media_name if x.isalnum())
file_name = file_name.encode('utf-8')[:230].decode('utf-8', 'ignore')
file_path = os.path.join(path, f"{file_name}.vcf")
if not os.path.isfile(file_path):
with open(file_path, "w", encoding="utf-8") as f:
f.write(row["vcard"])
message = data.get_chat(row["key_remote_jid"]).get_message(row["message_row_id"])
message.data = "This media include the following vCard file(s):<br>" \
f'<a href="{htmle(file_path)}">{htmle(media_name)}</a>'
@@ -776,26 +799,26 @@ def _process_vcard_row(row, path, data):
def calls(db, data, timezone_offset, filter_chat):
"""Process call logs from WhatsApp database."""
c = db.cursor()
# Check if there are any calls that match the filter
total_row_number = _get_calls_count(c, filter_chat)
if total_row_number == 0:
return
logger.info(f"Processing calls...({total_row_number})\r")
# Fetch call data
calls_data = _fetch_calls_data(c, filter_chat)
# Create a chat store for all calls
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call
content = calls_data.fetchone()
while content is not None:
_process_call_record(content, chat, data, timezone_offset)
content = calls_data.fetchone()
# Add the calls chat to the data
data.add_chat("000000000000000", chat)
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}")
@@ -861,7 +884,7 @@ def _process_call_record(content, chat, data, timezone_offset):
received_timestamp=None, # TODO: Add timestamp
read_timestamp=None # TODO: Add timestamp
)
# Get caller/callee name
_jid = content["raw_string"]
name = data.get_chat(_jid).name if _jid in data else content["chat_subject"] or None
@@ -870,13 +893,13 @@ def _process_call_record(content, chat, data, timezone_offset):
else:
fallback = None
call.sender = name or fallback
# Set metadata
call.meta = True
# Construct call description based on call type and result
call.data = _construct_call_description(content, call)
# Add call to chat
chat.add_message(content["_id"], call)
@@ -888,7 +911,7 @@ def _construct_call_description(content, call):
f"call {'to' if call.from_me else 'from'} "
f"{call.sender} was "
)
if content['call_result'] in (0, 4, 7):
description += "cancelled." if call.from_me else "missed."
elif content['call_result'] == 2:
@@ -904,21 +927,21 @@ def _construct_call_description(content, call):
)
else:
description += "in an unknown state."
return description
def create_html(
data,
output_folder,
template=None,
embedded=False,
offline_static=False,
maximum_size=None,
no_avatar=False,
experimental=False,
headline=None
):
data,
output_folder,
template=None,
embedded=False,
offline_static=False,
maximum_size=None,
no_avatar=False,
experimental=False,
headline=None
):
"""Generate HTML chat files from data."""
template = setup_template(template, no_avatar, experimental)
@@ -936,33 +959,33 @@ def create_html(
if len(current_chat) == 0:
# Skip empty chats
continue
safe_file_name, name = get_file_name(contact, current_chat)
if maximum_size is not None:
_generate_paginated_chat(
current_chat,
safe_file_name,
name,
contact,
output_folder,
template,
w3css,
maximum_size,
current_chat,
safe_file_name,
name,
contact,
output_folder,
template,
w3css,
maximum_size,
headline
)
else:
_generate_single_chat(
current_chat,
safe_file_name,
name,
contact,
output_folder,
template,
w3css,
current_chat,
safe_file_name,
name,
contact,
output_folder,
template,
w3css,
headline
)
if current % 10 == 0:
logger.info(f"Generating chats...({current}/{total_row_number})\r")
@@ -990,20 +1013,20 @@ def _generate_paginated_chat(current_chat, safe_file_name, name, contact, output
current_size = 0
current_page = 1
render_box = []
# Use default maximum size if set to 0
if maximum_size == 0:
maximum_size = MAX_SIZE
last_msg = current_chat.get_last_message().key_id
for message in current_chat.values():
# Calculate message size
if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE
else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size:
# Create a new page
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
@@ -1047,25 +1070,25 @@ def _generate_paginated_chat(current_chat, safe_file_name, name, contact, output
def create_txt(data, output):
"""Generate text files from chat data."""
os.makedirs(output, exist_ok=True)
for jik, chat in data.items():
if len(chat) == 0:
continue
# Determine file name
if chat.name is not None:
contact = chat.name.replace('/', '')
else:
contact = jik.replace('+', '')
output_file = os.path.join(output, f"{contact}.txt")
with open(output_file, "w", encoding="utf8") as f:
for message in chat.values():
# Skip metadata in text format
if message.meta and message.mime != "media":
continue
# Format the message
formatted_message = _format_message_for_txt(message, contact)
f.write(f"{formatted_message}\n")
@@ -1074,16 +1097,16 @@ def create_txt(data, output):
def _format_message_for_txt(message, contact):
"""Format a message for text output."""
date = datetime.fromtimestamp(message.timestamp).date()
# Determine the sender name
if message.from_me:
name = "You"
else:
name = message.sender if message.sender else contact
prefix = f"[{date} {message.time}] {name}: "
prefix_length = len(prefix)
# Handle different message types
if message.media and ("/" in message.mime or message.mime == "media"):
if message.data == "The media is missing":
@@ -1095,9 +1118,9 @@ def _format_message_for_txt(message, contact):
message_text = ""
else:
message_text = message.data.replace('<br>', f'\n{" " * prefix_length}')
# Add caption if present
if message.caption is not None:
message_text += "\n" + ' ' * len(prefix) + message.caption.replace('<br>', f'\n{" " * prefix_length}')
return f"{prefix}{message_text}"

View File

@@ -24,31 +24,32 @@ import struct
import codecs
from datetime import datetime, timedelta
class BPListWriter(object):
def __init__(self, objects):
self.bplist = ""
self.objects = objects
def binary(self):
'''binary -> string
Generates bplist
'''
self.data = 'bplist00'
# TODO: flatten objects and count max length size
# TODO: write objects and save offsets
# TODO: write offsets
# TODO: write metadata
return self.data
def write(self, filename):
'''
Writes bplist to file
'''
if self.bplist != "":
@@ -57,18 +58,19 @@ class BPListWriter(object):
else:
raise Exception('BPlist not yet generated')
class BPListReader(object):
def __init__(self, s):
self.data = s
self.objects = []
self.resolved = {}
def __unpackIntStruct(self, sz, s):
'''__unpackIntStruct(size, string) -> int
Unpacks the integer of given size (1, 2 or 4 bytes) from string
'''
if sz == 1:
if sz == 1:
ot = '!B'
elif sz == 2:
ot = '!H'
@@ -79,17 +81,17 @@ class BPListReader(object):
else:
raise Exception('int unpack size '+str(sz)+' unsupported')
return struct.unpack(ot, s)[0]
def __unpackInt(self, offset):
'''__unpackInt(offset) -> int
Unpacks int field from plist at given offset
'''
return self.__unpackIntMeta(offset)[1]
def __unpackIntMeta(self, offset):
'''__unpackIntMeta(offset) -> (size, int)
Unpacks int field from plist at given offset and returns its size and value
'''
obj_header = self.data[offset]
@@ -99,7 +101,7 @@ class BPListReader(object):
def __resolveIntSize(self, obj_info, offset):
'''__resolveIntSize(obj_info, offset) -> (count, offset)
Calculates count of objref* array entries and returns count and offset to first element
'''
if obj_info == 0x0F:
@@ -112,10 +114,10 @@ class BPListReader(object):
def __unpackFloatStruct(self, sz, s):
'''__unpackFloatStruct(size, string) -> float
Unpacks the float of given size (4 or 8 bytes) from string
'''
if sz == 4:
if sz == 4:
ot = '!f'
elif sz == 8:
ot = '!d'
@@ -125,7 +127,7 @@ class BPListReader(object):
def __unpackFloat(self, offset):
'''__unpackFloat(offset) -> float
Unpacks float field from plist at given offset
'''
obj_header = self.data[offset]
@@ -135,70 +137,79 @@ class BPListReader(object):
def __unpackDate(self, offset):
td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0])
return datetime(year=2001,month=1,day=1) + timedelta(seconds=td)
return datetime(year=2001, month=1, day=1) + timedelta(seconds=td)
def __unpackItem(self, offset):
'''__unpackItem(offset)
Unpacks and returns an item from plist
'''
obj_header = self.data[offset]
obj_type, obj_info = (obj_header & 0xF0), (obj_header & 0x0F)
if obj_type == 0x00:
if obj_info == 0x00: # null 0000 0000
if obj_type == 0x00:
if obj_info == 0x00: # null 0000 0000
return None
elif obj_info == 0x08: # bool 0000 1000 // false
elif obj_info == 0x08: # bool 0000 1000 // false
return False
elif obj_info == 0x09: # bool 0000 1001 // true
elif obj_info == 0x09: # bool 0000 1001 // true
return True
elif obj_info == 0x0F: # fill 0000 1111 // fill byte
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
elif obj_info == 0x0F: # fill 0000 1111 // fill byte
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
else:
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset)+ 'failed')
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset) + 'failed')
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
return self.__unpackInt(offset)
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
return self.__unpackFloat(offset)
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
return self.__unpackDate(offset)
elif obj_type == 0x40: # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
# data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
elif obj_type == 0x40:
obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] # XXX: we return data as str
elif obj_type == 0x50: # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
return self.data[objref:objref+obj_count] # XXX: we return data as str
# string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
elif obj_type == 0x50:
obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count]
elif obj_type == 0x60: # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
# string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
elif obj_type == 0x60:
obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count*2].decode('utf-16be')
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
# FIXME: Accept as a string for now
obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count]
elif obj_type == 0xA0: # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
# array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xA0:
obj_count, objref = self.__resolveIntSize(obj_info, offset)
arr = []
for i in range(obj_count):
arr.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
arr.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
return arr
elif obj_type == 0xC0: # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
# set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xC0:
# XXX: not serializable via apple implementation
raise Exception("0xC0 Not Implemented") # FIXME: implement
elif obj_type == 0xD0: # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
raise Exception("0xC0 Not Implemented") # FIXME: implement
# dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xD0:
obj_count, objref = self.__resolveIntSize(obj_info, offset)
keys = []
for i in range(obj_count):
keys.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
keys.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
values = []
objref += obj_count*self.object_ref_size
for i in range(obj_count):
values.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
values.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
dic = {}
for i in range(obj_count):
dic[keys[i]] = values[i]
return dic
else:
raise Exception('don\'t know how to unpack obj type '+hex(obj_type)+' at '+str(offset))
def __resolveObject(self, idx):
try:
return self.resolved[idx]
@@ -212,7 +223,7 @@ class BPListReader(object):
return newArr
if type(obj) == dict:
newDic = {}
for k,v in obj.items():
for k, v in obj.items():
key_resolved = self.__resolveObject(k)
if isinstance(key_resolved, str):
rk = key_resolved
@@ -225,15 +236,16 @@ class BPListReader(object):
else:
self.resolved[idx] = obj
return obj
def parse(self):
# read header
if self.data[:8] != b'bplist00':
raise Exception('Bad magic')
# read trailer
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack('!6xBB4xI4xI4xI', self.data[-32:])
#print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack(
'!6xBB4xI4xI4xI', self.data[-32:])
# print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
# read offset table
self.offset_table = self.data[self.table_offset:-32]
@@ -243,40 +255,45 @@ class BPListReader(object):
offset_entry = ot[:self.offset_size]
ot = ot[self.offset_size:]
self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry))
#print "** plist offsets:",self.offsets
# print "** plist offsets:",self.offsets
# read object table
self.objects = []
k = 0
for i in self.offsets:
obj = self.__unpackItem(i)
#print "** plist unpacked",k,type(obj),obj,"at",i
# print "** plist unpacked",k,type(obj),obj,"at",i
k += 1
self.objects.append(obj)
# rebuild object tree
#for i in range(len(self.objects)):
# for i in range(len(self.objects)):
# self.__resolveObject(i)
# return root object
return self.__resolveObject(self.top_object)
@classmethod
def plistWithString(cls, s):
parser = cls(s)
return parser.parse()
# helpers for testing
def plist(obj):
from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(obj, NSPropertyListBinaryFormat_v1_0, 0, None)
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(
obj, NSPropertyListBinaryFormat_v1_0, 0, None)
return str(b.bytes())
def unplist(s):
from Foundation import NSData, NSPropertyListSerialization
d = NSData.dataWithBytes_length_(s, len(s))
return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None)
if __name__ == "__main__":
import os
import sys

View File

@@ -14,12 +14,12 @@ logger = logging.getLogger(__name__)
def messages(path, data, assume_first_as_me=False):
"""
Extracts messages from an exported WhatsApp chat file.
Args:
path: Path to the exported chat file
data: Data container object to store the parsed chat
assume_first_as_me: If True, assumes the first message is sent from the user without asking
Returns:
Updated data container with extracted messages
"""
@@ -27,16 +27,16 @@ def messages(path, data, assume_first_as_me=False):
chat = data.add_chat("ExportedChat", ChatStore(Device.EXPORTED))
you = "" # Will store the username of the current user
user_identification_done = False # Flag to track if user identification has been done
# First pass: count total lines for progress reporting
with open(path, "r", encoding="utf8") as file:
total_row_number = sum(1 for _ in file)
# Second pass: process the messages
with open(path, "r", encoding="utf8") as file:
for index, line in enumerate(file):
you, user_identification_done = process_line(
line, index, chat, path, you,
line, index, chat, path, you,
assume_first_as_me, user_identification_done
)
@@ -51,31 +51,31 @@ def messages(path, data, assume_first_as_me=False):
def process_line(line, index, chat, file_path, you, assume_first_as_me, user_identification_done):
"""
Process a single line from the chat file
Returns:
Tuple of (updated_you_value, updated_user_identification_done_flag)
"""
parts = line.split(" - ", 1)
# Check if this is a new message (has timestamp format)
if len(parts) > 1:
time = parts[0]
you, user_identification_done = process_new_message(
time, parts[1], index, chat, you, file_path,
time, parts[1], index, chat, you, file_path,
assume_first_as_me, user_identification_done
)
else:
# This is a continuation of the previous message
process_message_continuation(line, index, chat)
return you, user_identification_done
def process_new_message(time, content, index, chat, you, file_path,
def process_new_message(time, content, index, chat, you, file_path,
assume_first_as_me, user_identification_done):
"""
Process a line that contains a new message
Returns:
Tuple of (updated_you_value, updated_user_identification_done_flag)
"""
@@ -88,7 +88,7 @@ def process_new_message(time, content, index, chat, you, file_path,
received_timestamp=None,
read_timestamp=None
)
# Check if this is a system message (no name:message format)
if ":" not in content:
msg.data = content
@@ -96,7 +96,7 @@ def process_new_message(time, content, index, chat, you, file_path,
else:
# Process user message
name, message = content.strip().split(":", 1)
# Handle user identification
if you == "":
if chat.name is None:
@@ -113,17 +113,17 @@ def process_new_message(time, content, index, chat, you, file_path,
# If we know the chat name, anyone else must be "you"
if name != chat.name:
you = name
# Set the chat name if needed
if chat.name is None and name != you:
chat.name = name
# Determine if this message is from the current user
msg.from_me = (name == you)
# Process message content
process_message_content(msg, message, file_path)
chat.add_message(index, msg)
return you, user_identification_done
@@ -144,11 +144,11 @@ def process_attached_file(msg, message, file_path):
"""Process an attached file in a message"""
mime = MimeTypes()
msg.media = True
# Extract file path and check if it exists
file_name = message.split("(file attached)")[0].strip()
attached_file_path = os.path.join(os.path.dirname(file_path), file_name)
if os.path.isfile(attached_file_path):
msg.data = attached_file_path
guess = mime.guess_type(attached_file_path)[0]
@@ -165,9 +165,9 @@ def process_message_continuation(line, index, chat):
lookback = index - 1
while lookback not in chat.keys():
lookback -= 1
msg = chat.get_message(lookback)
# Add the continuation line to the message
if msg.media:
msg.caption = line.strip()
@@ -182,4 +182,4 @@ def prompt_for_user_identification(name):
if ans == "y":
return name
elif ans == "n":
return ""
return ""

View File

@@ -21,14 +21,14 @@ def contacts(db, data):
c.execute("""SELECT count() FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
total_row_number = c.fetchone()[0]
logger.info(f"Pre-processing contacts...({total_row_number})\r")
c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
content = c.fetchone()
while content is not None:
zwhatsapp_id = content["ZWHATSAPPID"]
if not zwhatsapp_id.endswith("@s.whatsapp.net"):
zwhatsapp_id += "@s.whatsapp.net"
current_chat = ChatStore(Device.IOS)
current_chat.status = content["ZABOUTTEXT"]
data.add_chat(zwhatsapp_id, current_chat)
@@ -40,7 +40,7 @@ def process_contact_avatars(current_chat, media_folder, contact_id):
"""Process and assign avatar images for a contact."""
path = f'{media_folder}/Media/Profile/{contact_id.split("@")[0]}'
avatars = glob(f"{path}*")
if 0 < len(avatars) <= 1:
current_chat.their_avatar = avatars[0]
else:
@@ -64,12 +64,14 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
"""Process WhatsApp messages and contacts from the database."""
c = db.cursor()
cursor2 = db.cursor()
# Build the chat filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Process contacts first
contact_query = f"""
SELECT count()
@@ -110,13 +112,13 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
GROUP BY ZCONTACTJID;
"""
c.execute(contacts_query)
# Process each contact
content = c.fetchone()
while content is not None:
contact_name = get_contact_name(content)
contact_id = content["ZCONTACTJID"]
# Add or update chat
if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
@@ -124,11 +126,11 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
current_chat = data.get_chat(contact_id)
current_chat.name = contact_name
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
# Process avatar images
process_contact_avatars(current_chat, media_folder, contact_id)
content = c.fetchone()
logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}")
# Get message count
@@ -147,7 +149,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
c.execute(message_count_query)
total_row_number = c.fetchone()[0]
logger.info(f"Processing messages...(0/{total_row_number})\r")
# Fetch messages
messages_query = f"""
SELECT ZCONTACTJID,
@@ -175,7 +177,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
ORDER BY ZMESSAGEDATE ASC;
"""
c.execute(messages_query)
# Process each message
i = 0
content = c.fetchone()
@@ -183,14 +185,14 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
contact_id = content["ZCONTACTJID"]
message_pk = content["Z_PK"]
is_group_message = content["ZGROUPINFO"] is not None
# Ensure chat exists
if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
process_contact_avatars(current_chat, media_folder, contact_id)
else:
current_chat = data.get_chat(contact_id)
# Create message object
ts = APPLE_TIME + content["ZMESSAGEDATE"]
message = Message(
@@ -203,14 +205,14 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
read_timestamp=None # TODO: Add timestamp
)
# Process message data
invalid = process_message_data(message, content, is_group_message, data, cursor2)
# Add valid messages to chat
if not invalid:
current_chat.add_message(message_pk, message)
# Update progress
i += 1
if i % 1000 == 0:
@@ -236,11 +238,11 @@ def process_message_data(message, content, is_group_message, data, cursor2):
message.sender = name or fallback
else:
message.sender = None
# Handle metadata messages
if content["ZMESSAGETYPE"] == 6:
return process_metadata_message(message, content, is_group_message)
# Handle quoted replies
if content["ZMETADATA"] is not None and content["ZMETADATA"].startswith(b"\x2a\x14") and False:
quoted = content["ZMETADATA"][2:19]
@@ -250,17 +252,17 @@ def process_message_data(message, content, is_group_message, data, cursor2):
WHERE ZSTANZAID LIKE '{message.reply}%'""")
quoted_content = cursor2.fetchone()
if quoted_content and "ZTEXT" in quoted_content:
message.quoted_data = quoted_content["ZTEXT"]
message.quoted_data = quoted_content["ZTEXT"]
else:
message.quoted_data = None
# Handle stickers
if content["ZMESSAGETYPE"] == 15:
message.sticker = True
# Process message text
process_message_text(message, content)
return False # Message is valid
@@ -305,19 +307,21 @@ def process_message_text(message, content):
msg = content["ZTEXT"]
if msg is not None:
msg = msg.replace("\r\n", "<br>").replace("\n", "<br>")
message.data = msg
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False):
"""Process media files from WhatsApp messages."""
c = db.cursor()
# Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID","ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Get media count
media_count_query = f"""
SELECT count()
@@ -336,7 +340,7 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
c.execute(media_count_query)
total_row_number = c.fetchone()[0]
logger.info(f"Processing media...(0/{total_row_number})\r")
# Fetch media items
media_query = f"""
SELECT ZCONTACTJID,
@@ -360,14 +364,14 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
ORDER BY ZCONTACTJID ASC
"""
c.execute(media_query)
# Process each media item
mime = MimeTypes()
i = 0
content = c.fetchone()
while content is not None:
process_media_item(content, data, media_folder, mime, separate_media)
# Update progress
i += 1
if i % 100 == 0:
@@ -382,23 +386,24 @@ def process_media_item(content, data, media_folder, mime, separate_media):
current_chat = data.get_chat(content["ZCONTACTJID"])
message = current_chat.get_message(content["ZMESSAGE"])
message.media = True
if current_chat.media_base == "":
current_chat.media_base = media_folder + "/"
if os.path.isfile(file_path):
message.data = '/'.join(file_path.split("/")[1:])
# Set MIME type
if content["ZVCARDSTRING"] is None:
guess = mime.guess_type(file_path)[0]
message.mime = guess if guess is not None else "application/octet-stream"
else:
message.mime = content["ZVCARDSTRING"]
# Handle separate media option
if separate_media:
chat_display_name = slugify(current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
chat_display_name = slugify(
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True)
@@ -410,7 +415,7 @@ def process_media_item(content, data, media_folder, mime, separate_media):
message.data = "The media is missing"
message.mime = "media"
message.meta = True
# Add caption if available
if content["ZTITLE"] is not None:
message.caption = content["ZTITLE"]
@@ -419,12 +424,14 @@ def process_media_item(content, data, media_folder, mime, separate_media):
def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
"""Process vCard contacts from WhatsApp messages."""
c = db.cursor()
# Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Fetch vCard mentions
vcard_query = f"""
SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM,
@@ -450,7 +457,7 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
contents = c.fetchall()
total_row_number = len(contents)
logger.info(f"Processing vCards...(0/{total_row_number})\r")
# Create vCards directory
path = f'{media_folder}/Message/vCards'
Path(path).mkdir(parents=True, exist_ok=True)
@@ -484,9 +491,10 @@ def process_vcard_item(content, path, data):
f.write(vcard_string)
# Create vCard summary and update message
vcard_summary = "This media include the following vCard file(s):<br>"
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name, fp in zip(vcard_names, file_paths)])
vcard_summary = "This media include the following vCard file(s):<br>"
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name,
fp in zip(vcard_names, file_paths)])
message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"])
message.data = vcard_summary
message.mime = "text/x-vcard"
@@ -498,11 +506,13 @@ def process_vcard_item(content, path, data):
def calls(db, data, timezone_offset, filter_chat):
"""Process WhatsApp call records."""
c = db.cursor()
# Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
# Get call count
call_count_query = f"""
SELECT count()
@@ -515,9 +525,9 @@ def calls(db, data, timezone_offset, filter_chat):
total_row_number = c.fetchone()[0]
if total_row_number == 0:
return
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n")
# Fetch call records
calls_query = f"""
SELECT ZCALLIDSTRING,
@@ -538,16 +548,16 @@ def calls(db, data, timezone_offset, filter_chat):
{chat_filter_exclude}
"""
c.execute(calls_query)
# Create calls chat
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call
content = c.fetchone()
while content is not None:
process_call_record(content, chat, data, timezone_offset)
content = c.fetchone()
# Add calls chat to data
data.add_chat("000000000000000", chat)
@@ -562,7 +572,7 @@ def process_call_record(content, chat, data, timezone_offset):
key_id=content["ZCALLIDSTRING"],
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET
)
# Set sender info
_jid = content["ZGROUPCALLCREATORUSERJIDSTRING"]
name = data.get_chat(_jid).name if _jid in data else None
@@ -571,11 +581,11 @@ def process_call_record(content, chat, data, timezone_offset):
else:
fallback = None
call.sender = name or fallback
# Set call metadata
call.meta = True
call.data = format_call_data(call, content)
# Add call to chat
chat.add_message(call.key_id, call)
@@ -589,7 +599,7 @@ def format_call_data(call, content):
f"call {'to' if call.from_me else 'from'} "
f"{call.sender} was "
)
# Call outcome
if content['ZOUTCOME'] in (1, 4):
call_data += "not answered." if call.from_me else "missed."
@@ -604,5 +614,5 @@ def format_call_data(call, content):
)
else:
call_data += "in an unknown state."
return call_data
return call_data

View File

@@ -18,6 +18,7 @@ else:
logger = logging.getLogger(__name__)
class BackupExtractor:
"""
A class to handle the extraction of WhatsApp data from iOS backups,
@@ -61,9 +62,9 @@ class BackupExtractor:
"""
if not support_encrypted:
logger.error("You don't have the dependencies to handle encrypted backup."
"Read more on how to deal with encrypted backup:"
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
)
"Read more on how to deal with encrypted backup:"
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
)
return
logger.info(f"Encryption detected on the backup!{CLEAR_LINE}")
@@ -116,12 +117,12 @@ class BackupExtractor:
exit(6)
else:
logger.info(f"Done{CLEAR_LINE}")
def _extract_decrypted_files(self):
"""Extract all WhatsApp files after decryption"""
def extract_progress_handler(file_id, domain, relative_path, n, total_files):
if n % 100 == 0:
logger.info(f"Decrypting and extracting files...({n}/{total_files})\r")
logger.info(f"Decrypting and extracting files...({n}/{total_files})\r")
return True
self.backup.extract_files(
@@ -234,4 +235,3 @@ def extract_media(base_dir, identifiers, decrypt_chunk_size):
"""
extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size)
extractor.extract()

View File

@@ -33,6 +33,7 @@ CLEAR_LINE = "\x1b[K\n"
logger = logging.getLogger(__name__)
def convert_time_unit(time_second: int) -> str:
"""Converts a time duration in seconds to a human-readable string.
@@ -168,7 +169,7 @@ def check_update():
"===============Update===============\n"
"A newer version of WhatsApp Chat Exporter is available.\n"
f"Current version: {__version__}\n"
f"Latest version: {package_info['info']['version']}\n"
f"Latest version: {package_info['info']['version']}\n"
)
if platform == "win32":
logger.info("Update with: pip install --upgrade whatsapp-chat-exporter\n")