Merge branch 'dev' into feature/export-reactions

This commit is contained in:
Knugi
2026-01-19 20:38:20 +08:00
committed by GitHub
12 changed files with 541 additions and 422 deletions

View File

@@ -115,7 +115,7 @@ Do an iPhone/iPad Backup with iTunes/Finder first.
If you want to work on an encrypted iOS/iPadOS Backup, you should install iphone_backup_decrypt from [KnugiHK/iphone_backup_decrypt](https://github.com/KnugiHK/iphone_backup_decrypt) before you run the extract_iphone_media.py. If you want to work on an encrypted iOS/iPadOS Backup, you should install iphone_backup_decrypt from [KnugiHK/iphone_backup_decrypt](https://github.com/KnugiHK/iphone_backup_decrypt) before you run the extract_iphone_media.py.
```sh ```sh
pip install git+https://github.com/KnugiHK/iphone_backup_decrypt pip install whatsapp-chat-exporter["ios_backup"]
``` ```
> [!NOTE] > [!NOTE]
> You will need to disable the built-in end-to-end encryption for WhatsApp backups. See [WhatsApp's FAQ](https://faq.whatsapp.com/490592613091019#turn-off-end-to-end-encrypted-backup) for how to do it. > You will need to disable the built-in end-to-end encryption for WhatsApp backups. See [WhatsApp's FAQ](https://faq.whatsapp.com/490592613091019#turn-off-end-to-end-encrypted-backup) for how to do it.

View File

@@ -11,14 +11,15 @@ import logging
import importlib.metadata import importlib.metadata
from Whatsapp_Chat_Exporter import android_crypt, exported_handler, android_handler from Whatsapp_Chat_Exporter import android_crypt, exported_handler, android_handler
from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, Crypt, check_update from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, CURRENT_TZ_OFFSET, Crypt
from Whatsapp_Chat_Exporter.utility import readable_to_bytes, safe_name, bytes_to_readable from Whatsapp_Chat_Exporter.utility import readable_to_bytes, safe_name, bytes_to_readable
from Whatsapp_Chat_Exporter.utility import import_from_json, incremental_merge, DbType from Whatsapp_Chat_Exporter.utility import import_from_json, incremental_merge, check_update
from Whatsapp_Chat_Exporter.utility import telegram_json_format from Whatsapp_Chat_Exporter.utility import telegram_json_format, convert_time_unit, DbType
from argparse import ArgumentParser, SUPPRESS from argparse import ArgumentParser, SUPPRESS
from datetime import datetime from datetime import datetime
from getpass import getpass from getpass import getpass
from tqdm import tqdm
from sys import exit from sys import exit
from typing import Optional, List, Dict from typing import Optional, List, Dict
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
@@ -286,13 +287,17 @@ def setup_argument_parser() -> ArgumentParser:
help="Specify the chunk size for decrypting iOS backup, which may affect the decryption speed." help="Specify the chunk size for decrypting iOS backup, which may affect the decryption speed."
) )
misc_group.add_argument( misc_group.add_argument(
"--max-bruteforce-worker", dest="max_bruteforce_worker", default=10, type=int, "--max-bruteforce-worker", dest="max_bruteforce_worker", default=4, type=int,
help="Specify the maximum number of worker for bruteforce decryption." help="Specify the maximum number of worker for bruteforce decryption."
) )
misc_group.add_argument( misc_group.add_argument(
"--no-banner", dest="no_banner", default=False, action='store_true', "--no-banner", dest="no_banner", default=False, action='store_true',
help="Do not show the banner" help="Do not show the banner"
) )
misc_group.add_argument(
"--fix-dot-files", dest="fix_dot_files", default=False, action='store_true',
help="Fix files with a dot at the end of their name (allowing the outputs be stored in FAT filesystems)"
)
return parser return parser
@@ -537,6 +542,7 @@ def process_messages(args, data: ChatCollection) -> None:
exit(6) exit(6)
filter_chat = (args.filter_chat_include, args.filter_chat_exclude) filter_chat = (args.filter_chat_include, args.filter_chat_exclude)
timing = Timing(args.timezone_offset if args.timezone_offset else CURRENT_TZ_OFFSET)
with sqlite3.connect(msg_db) as db: with sqlite3.connect(msg_db) as db:
db.row_factory = sqlite3.Row db.row_factory = sqlite3.Row
@@ -548,14 +554,14 @@ def process_messages(args, data: ChatCollection) -> None:
message_handler = ios_handler message_handler = ios_handler
message_handler.messages( message_handler.messages(
db, data, args.media, args.timezone_offset, args.filter_date, db, data, args.media, timing, args.filter_date,
filter_chat, args.filter_empty, args.no_reply_ios filter_chat, args.filter_empty, args.no_reply_ios
) )
# Process media # Process media
message_handler.media( message_handler.media(
db, data, args.media, args.filter_date, db, data, args.media, args.filter_date,
filter_chat, args.filter_empty, args.separate_media filter_chat, args.filter_empty, args.separate_media, args.fix_dot_files
) )
# Process vcards # Process vcards
@@ -565,17 +571,17 @@ def process_messages(args, data: ChatCollection) -> None:
) )
# Process calls # Process calls
process_calls(args, db, data, filter_chat) process_calls(args, db, data, filter_chat, timing)
def process_calls(args, db, data: ChatCollection, filter_chat) -> None: def process_calls(args, db, data: ChatCollection, filter_chat, timing) -> None:
"""Process call history if available.""" """Process call history if available."""
if args.android: if args.android:
android_handler.calls(db, data, args.timezone_offset, filter_chat) android_handler.calls(db, data, timing, filter_chat)
elif args.ios and args.call_db_ios is not None: elif args.ios and args.call_db_ios is not None:
with sqlite3.connect(args.call_db_ios) as cdb: with sqlite3.connect(args.call_db_ios) as cdb:
cdb.row_factory = sqlite3.Row cdb.row_factory = sqlite3.Row
ios_handler.calls(cdb, data, args.timezone_offset, filter_chat) ios_handler.calls(cdb, data, timing, filter_chat)
def handle_media_directory(args) -> None: def handle_media_directory(args) -> None:
@@ -665,7 +671,8 @@ def export_multiple_json(args, data: Dict) -> None:
# Export each chat # Export each chat
total = len(data.keys()) total = len(data.keys())
for index, jik in enumerate(data.keys()): with tqdm(total=total, desc="Generating JSON files", unit="file", leave=False) as pbar:
for jik in data.keys():
if data[jik]["name"] is not None: if data[jik]["name"] is not None:
contact = data[jik]["name"].replace('/', '') contact = data[jik]["name"].replace('/', '')
else: else:
@@ -682,7 +689,9 @@ def export_multiple_json(args, data: Dict) -> None:
indent=args.pretty_print_json indent=args.pretty_print_json
) )
f.write(file_content) f.write(file_content)
logger.info(f"Writing JSON file...({index + 1}/{total})\r") pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Generated {total} JSON files in {convert_time_unit(total_time)}{CLEAR_LINE}")
def process_exported_chat(args, data: ChatCollection) -> None: def process_exported_chat(args, data: ChatCollection) -> None:

View File

@@ -1,13 +1,12 @@
import time
import hmac import hmac
import io import io
import logging import logging
import threading
import zlib import zlib
import concurrent.futures import concurrent.futures
from tqdm import tqdm
from typing import Tuple, Union from typing import Tuple, Union
from hashlib import sha256 from hashlib import sha256
from sys import exit from functools import partial
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CRYPT14_OFFSETS, Crypt, DbType from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CRYPT14_OFFSETS, Crypt, DbType
try: try:
@@ -112,13 +111,36 @@ def _decrypt_database(db_ciphertext: bytes, main_key: bytes, iv: bytes) -> bytes
zlib.error: If decompression fails. zlib.error: If decompression fails.
ValueError: if the plaintext is not a SQLite database. ValueError: if the plaintext is not a SQLite database.
""" """
FOOTER_SIZE = 32
if len(db_ciphertext) <= FOOTER_SIZE:
raise ValueError("Input data too short to contain a valid GCM tag.")
actual_ciphertext = db_ciphertext[:-FOOTER_SIZE]
tag = db_ciphertext[-FOOTER_SIZE: -FOOTER_SIZE + 16]
cipher = AES.new(main_key, AES.MODE_GCM, iv) cipher = AES.new(main_key, AES.MODE_GCM, iv)
db_compressed = cipher.decrypt(db_ciphertext) try:
db = zlib.decompress(db_compressed) db_compressed = cipher.decrypt_and_verify(actual_ciphertext, tag)
if db[0:6].upper() != b"SQLITE": except ValueError:
# This could be key, IV, or tag is wrong, but likely the key is wrong.
raise ValueError("Decryption/Authentication failed. Ensure you are using the correct key.")
if len(db_compressed) < 2 or db_compressed[0] != 0x78:
logger.debug(f"Data passes GCM but is not Zlib. Header: {db_compressed[:2].hex()}")
raise ValueError( raise ValueError(
"The plaintext is not a SQLite database. Ensure you are using the correct key." "Key is correct, but decrypted data is not a valid compressed stream. "
"Is this even a valid WhatsApp database backup?"
) )
try:
db = zlib.decompress(db_compressed)
except zlib.error as e:
raise zlib.error(f"Decompression failed (The backup file likely corrupted at source): {e}")
if not db.startswith(b"SQLite"):
raise ValueError(
"Data is valid and decompressed, but it is not a SQLite database. "
"Is this even a valid WhatsApp database backup?")
return db return db
@@ -142,81 +164,68 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
# Attempt known offsets first # Attempt known offsets first
for offsets in CRYPT14_OFFSETS: for offsets in CRYPT14_OFFSETS:
iv = database[offsets["iv"]:offsets["iv"] + 16] iv = offsets["iv"]
db_ciphertext = database[offsets["db"]:] db = offsets["db"]
try: try:
decrypted_db = _decrypt_database(db_ciphertext, main_key, iv) decrypted_db = _attempt_decrypt_task((iv, iv + 16, db), database, main_key)
except (zlib.error, ValueError): except (zlib.error, ValueError):
pass # Try next offset continue
else: else:
logger.debug( logger.debug(
f"Decryption successful with known offsets: IV {offsets['iv']}, DB {offsets['db']}{CLEAR_LINE}" f"Decryption successful with known offsets: IV {iv}, DB {db}{CLEAR_LINE}"
) )
return decrypted_db # Successful decryption return decrypted_db # Successful decryption
def animate_message(stop_event): logger.info(f"Common offsets failed. Will attempt to brute-force{CLEAR_LINE}")
base_msg = "Common offsets failed. Initiating brute-force with multithreading" offset_max = 200
dots = ["", ".", "..", "..."] workers = max_worker
i = 0 check_offset = partial(_attempt_decrypt_task, database=database, main_key=main_key)
while not stop_event.is_set(): all_offsets = list(brute_force_offset(offset_max, offset_max))
logger.info(f"{base_msg}{dots[i % len(dots)]}\x1b[K\r") executor = concurrent.futures.ProcessPoolExecutor(max_workers=workers)
time.sleep(0.3) try:
i += 1 with tqdm(total=len(all_offsets), desc="Brute-forcing offsets", unit="trial", leave=False) as pbar:
logger.info(f"Common offsets failed but brute-forcing the offset works!{CLEAR_LINE}") results = executor.map(check_offset, all_offsets, chunksize=8)
found = False
for offset_info, result in zip(all_offsets, results):
pbar.update(1)
if result:
start_iv, _, start_db = offset_info
# Clean shutdown on success
executor.shutdown(wait=False, cancel_futures=True)
found = True
break
if found:
logger.info(
f"The offsets of your IV and database are {start_iv} and {start_db}, respectively.{CLEAR_LINE}"
)
logger.info(
f"To include your offsets in the expoter, please report it in the discussion thread on GitHub:{CLEAR_LINE}"
)
logger.info(f"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47{CLEAR_LINE}")
return result
stop_event = threading.Event() except KeyboardInterrupt:
anim_thread = threading.Thread(target=animate_message, args=(stop_event,)) executor.shutdown(wait=False, cancel_futures=True)
anim_thread.start() print("\n")
raise KeyboardInterrupt(
f"Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}"
)
# Convert brute force generator into a list for parallel processing finally:
offset_combinations = list(brute_force_offset()) executor.shutdown(wait=False)
def attempt_decrypt(offset_tuple): raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
def _attempt_decrypt_task(offset_tuple, database, main_key):
"""Attempt decryption with the given offsets.""" """Attempt decryption with the given offsets."""
start_iv, end_iv, start_db = offset_tuple start_iv, end_iv, start_db = offset_tuple
iv = database[start_iv:end_iv] iv = database[start_iv:end_iv]
db_ciphertext = database[start_db:] db_ciphertext = database[start_db:]
logger.debug(""f"Trying offsets: IV {start_iv}-{end_iv}, DB {start_db}{CLEAR_LINE}")
try: try:
db = _decrypt_database(db_ciphertext, main_key, iv) return _decrypt_database(db_ciphertext, main_key, iv)
except (zlib.error, ValueError): except (zlib.error, ValueError):
return None # Decryption failed, move to next return None
else:
stop_event.set()
anim_thread.join()
logger.info(
f"The offsets of your IV and database are {start_iv} and "
f"{start_db}, respectively. To include your offsets in the "
"program, please report it by creating an issue on GitHub: "
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47"
f"\nShutting down other threads...{CLEAR_LINE}"
)
return db
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
future_to_offset = {executor.submit(attempt_decrypt, offset)
: offset for offset in offset_combinations}
try:
for future in concurrent.futures.as_completed(future_to_offset):
result = future.result()
if result is not None:
# Shutdown remaining threads
executor.shutdown(wait=False, cancel_futures=True)
return result
except KeyboardInterrupt:
stop_event.set()
anim_thread.join()
logger.info(f"Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}")
executor.shutdown(wait=False, cancel_futures=True)
exit(1)
finally:
stop_event.set()
anim_thread.join()
raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes: def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:

View File

@@ -4,13 +4,14 @@ import logging
import sqlite3 import sqlite3
import os import os
import shutil import shutil
from tqdm import tqdm
from pathlib import Path from pathlib import Path
from mimetypes import MimeTypes from mimetypes import MimeTypes
from markupsafe import escape as htmle from markupsafe import escape as htmle
from base64 import b64decode, b64encode from base64 import b64decode, b64encode
from datetime import datetime from datetime import datetime
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message, Timing
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, JidType, Device from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, MAX_SIZE, ROW_SIZE, JidType, Device
from Whatsapp_Chat_Exporter.utility import rendering, get_file_name, setup_template, get_cond_for_empty from Whatsapp_Chat_Exporter.utility import rendering, get_file_name, setup_template, get_cond_for_empty
from Whatsapp_Chat_Exporter.utility import get_status_location, convert_time_unit, determine_metadata from Whatsapp_Chat_Exporter.utility import get_status_location, convert_time_unit, determine_metadata
from Whatsapp_Chat_Exporter.utility import get_chat_condition, safe_name, bytes_to_readable from Whatsapp_Chat_Exporter.utility import get_chat_condition, safe_name, bytes_to_readable
@@ -47,12 +48,15 @@ def contacts(db, data, enrich_from_vcards):
logger.info(f"Processed {total_row_number} contacts\n") logger.info(f"Processed {total_row_number} contacts\n")
c.execute("SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts;") c.execute("SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts;")
row = c.fetchone()
while row is not None: with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
while (row := _fetch_row_safely(c)) is not None:
current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"])) current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"]))
if row["status"] is not None: if row["status"] is not None:
current_chat.status = row["status"] current_chat.status = row["status"]
row = c.fetchone() pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} contacts in {convert_time_unit(total_time)}{CLEAR_LINE}")
return True return True
@@ -72,7 +76,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
""" """
c = db.cursor() c = db.cursor()
total_row_number = _get_message_count(c, filter_empty, filter_date, filter_chat) total_row_number = _get_message_count(c, filter_empty, filter_date, filter_chat)
logger.info(f"Processing messages...(0/{total_row_number})\r")
try: try:
content_cursor = _get_messages_cursor_legacy(c, filter_empty, filter_date, filter_chat) content_cursor = _get_messages_cursor_legacy(c, filter_empty, filter_date, filter_chat)
@@ -84,23 +87,12 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
except Exception as e: except Exception as e:
raise e raise e
i = 0 with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar:
# Fetch the first row safely while (content := _fetch_row_safely(content_cursor)) is not None:
content = _fetch_row_safely(content_cursor)
while content is not None:
_process_single_message(data, content, table_message, timezone_offset) _process_single_message(data, content, table_message, timezone_offset)
pbar.update(1)
i += 1 total_time = pbar.format_dict['elapsed']
if i % 1000 == 0: logger.info(f"Processed {total_row_number} messages in {convert_time_unit(total_time)}{CLEAR_LINE}")
logger.info(f"Processing messages...({i}/{total_row_number})\r")
# Fetch the next row safely
content = _fetch_row_safely(content_cursor)
_get_reactions(db, data)
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
# Helper functions for message processing # Helper functions for message processing
@@ -126,14 +118,16 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
{include_filter} {include_filter}
{exclude_filter}""") {exclude_filter}""")
except sqlite3.OperationalError: except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition( include_filter = get_chat_condition(
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["key_remote_jid", "group_sender_jid"], "jid", "android")
exclude_filter = get_chat_condition( exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") filter_chat[1], False, ["key_remote_jid", "group_sender_jid"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count(),
COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid,
COALESCE(lid_group.raw_string, jid_group.raw_string) as group_sender_jid
FROM message FROM message
LEFT JOIN chat LEFT JOIN chat
ON chat._id = message.chat_row_id ON chat._id = message.chat_row_id
@@ -141,6 +135,14 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
ON jid._id = chat.jid_row_id ON jid._id = chat.jid_row_id
LEFT JOIN jid jid_group LEFT JOIN jid jid_group
ON jid_group._id = message.sender_jid_row_id ON jid_group._id = message.sender_jid_row_id
LEFT JOIN jid_map as jid_map_global
ON chat.jid_row_id = jid_map_global.lid_row_id
LEFT JOIN jid lid_global
ON jid_map_global.jid_row_id = lid_global._id
LEFT JOIN jid_map as jid_map_group
ON message.sender_jid_row_id = jid_map_group.lid_row_id
LEFT JOIN jid lid_group
ON jid_map_group.jid_row_id = lid_group._id
WHERE 1=1 WHERE 1=1
{empty_filter} {empty_filter}
{date_filter} {date_filter}
@@ -219,11 +221,11 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition( include_filter = get_chat_condition(
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android") filter_chat[0], True, ["key_remote_jid", "lid_group.raw_string"], "jid_global", "android")
exclude_filter = get_chat_condition( exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android") filter_chat[1], False, ["key_remote_jid", "lid_group.raw_string"], "jid_global", "android")
cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid, cursor.execute(f"""SELECT COALESCE(lid_global.raw_string, jid_global.raw_string) as key_remote_jid,
message._id, message._id,
message.from_me as key_from_me, message.from_me as key_from_me,
message.timestamp, message.timestamp,
@@ -238,7 +240,7 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
message.key_id, message.key_id,
message_quoted.text_data as quoted_data, message_quoted.text_data as quoted_data,
message.message_type as media_wa_type, message.message_type as media_wa_type,
jid_group.raw_string as group_sender_jid, COALESCE(lid_group.raw_string, jid_group.raw_string) as group_sender_jid,
chat.subject as chat_subject, chat.subject as chat_subject,
missed_call_logs.video_call, missed_call_logs.video_call,
message.sender_jid_row_id, message.sender_jid_row_id,
@@ -248,7 +250,8 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
jid_new.raw_string as new_jid, jid_new.raw_string as new_jid,
jid_global.type as jid_type, jid_global.type as jid_type,
COALESCE(receipt_user.receipt_timestamp, message.received_timestamp) as received_timestamp, COALESCE(receipt_user.receipt_timestamp, message.received_timestamp) as received_timestamp,
COALESCE(receipt_user.read_timestamp, receipt_user.played_timestamp) as read_timestamp COALESCE(receipt_user.read_timestamp, receipt_user.played_timestamp) as read_timestamp,
message_media.raw_transcription_text as transcription_text
FROM message FROM message
LEFT JOIN message_quoted LEFT JOIN message_quoted
ON message_quoted.message_row_id = message._id ON message_quoted.message_row_id = message._id
@@ -280,6 +283,14 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
ON jid_new._id = message_system_number_change.new_jid_row_id ON jid_new._id = message_system_number_change.new_jid_row_id
LEFT JOIN receipt_user LEFT JOIN receipt_user
ON receipt_user.message_row_id = message._id ON receipt_user.message_row_id = message._id
LEFT JOIN jid_map as jid_map_global
ON chat.jid_row_id = jid_map_global.lid_row_id
LEFT JOIN jid lid_global
ON jid_map_global.jid_row_id = lid_global._id
LEFT JOIN jid_map as jid_map_group
ON message.sender_jid_row_id = jid_map_group.lid_row_id
LEFT JOIN jid lid_group
ON jid_map_group.jid_row_id = lid_group._id
WHERE key_remote_jid <> '-1' WHERE key_remote_jid <> '-1'
{empty_filter} {empty_filter}
{date_filter} {date_filter}
@@ -321,7 +332,7 @@ def _process_single_message(data, content, table_message, timezone_offset):
timestamp=content["timestamp"], timestamp=content["timestamp"],
time=content["timestamp"], time=content["timestamp"],
key_id=content["key_id"], key_id=content["key_id"],
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET, timezone_offset=timezone_offset,
message_type=content["media_wa_type"], message_type=content["media_wa_type"],
received_timestamp=content["received_timestamp"], received_timestamp=content["received_timestamp"],
read_timestamp=content["read_timestamp"] read_timestamp=content["read_timestamp"]
@@ -353,9 +364,12 @@ def _process_single_message(data, content, table_message, timezone_offset):
if not table_message and content["media_caption"] is not None: if not table_message and content["media_caption"] is not None:
# Old schema # Old schema
message.caption = content["media_caption"] message.caption = content["media_caption"]
elif table_message and content["media_wa_type"] == 1 and content["data"] is not None: elif table_message:
# New schema # New schema
if content["media_wa_type"] == 1 and content["data"] is not None:
message.caption = content["data"] message.caption = content["data"]
elif content["media_wa_type"] == 2 and content["transcription_text"] is not None:
message.caption = f'"{content["transcription_text"]}"'
else: else:
message.caption = None message.caption = None
@@ -547,7 +561,7 @@ def _get_reactions(db, data):
logger.info(f"Processed reactions{CLEAR_LINE}") logger.info(f"Processed reactions{CLEAR_LINE}")
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True): def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True, fix_dot_files=False):
""" """
Process WhatsApp media files from the database. Process WhatsApp media files from the database.
@@ -562,8 +576,6 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
""" """
c = db.cursor() c = db.cursor()
total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat) total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat)
logger.info(f"Processing media...(0/{total_row_number})\r")
try: try:
content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat) content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat)
except sqlite3.OperationalError: except sqlite3.OperationalError:
@@ -575,18 +587,12 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
# Ensure thumbnails directory exists # Ensure thumbnails directory exists
Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True) Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True)
i = 0 with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar:
while content is not None: while (content := _fetch_row_safely(content_cursor)) is not None:
_process_single_media(data, content, media_folder, mime, separate_media) _process_single_media(data, content, media_folder, mime, separate_media, fix_dot_files)
pbar.update(1)
i += 1 total_time = pbar.format_dict['elapsed']
if i % 100 == 0: logger.info(f"Processed {total_row_number} media in {convert_time_unit(total_time)}{CLEAR_LINE}")
logger.info(f"Processing media...({i}/{total_row_number})\r")
content = content_cursor.fetchone()
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
# Helper functions for media processing # Helper functions for media processing
@@ -617,11 +623,13 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition( include_filter = get_chat_condition(
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["key_remote_jid", "group_sender_jid"], "jid", "android")
exclude_filter = get_chat_condition( exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") filter_chat[1], False, ["key_remote_jid", "group_sender_jid"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count(),
COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid,
COALESCE(lid_group.raw_string, jid_group.raw_string) as group_sender_jid
FROM message_media FROM message_media
INNER JOIN message INNER JOIN message
ON message_media.message_row_id = message._id ON message_media.message_row_id = message._id
@@ -631,6 +639,14 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
ON jid._id = chat.jid_row_id ON jid._id = chat.jid_row_id
LEFT JOIN jid jid_group LEFT JOIN jid jid_group
ON jid_group._id = message.sender_jid_row_id ON jid_group._id = message.sender_jid_row_id
LEFT JOIN jid_map as jid_map_global
ON chat.jid_row_id = jid_map_global.lid_row_id
LEFT JOIN jid lid_global
ON jid_map_global.jid_row_id = lid_global._id
LEFT JOIN jid_map as jid_map_group
ON message.sender_jid_row_id = jid_map_group.lid_row_id
LEFT JOIN jid lid_group
ON jid_map_group.jid_row_id = lid_group._id
WHERE 1=1 WHERE 1=1
{empty_filter} {empty_filter}
{date_filter} {date_filter}
@@ -679,18 +695,19 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition( include_filter = get_chat_condition(
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["key_remote_jid", "group_sender_jid"], "jid", "android")
exclude_filter = get_chat_condition( exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") filter_chat[1], False, ["key_remote_jid", "group_sender_jid"], "jid", "android")
cursor.execute(f"""SELECT jid.raw_string as key_remote_jid, cursor.execute(f"""SELECT COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid,
message_row_id, message_row_id,
file_path, file_path,
message_url, message_url,
mime_type, mime_type,
media_key, media_key,
file_hash, file_hash,
thumbnail thumbnail,
COALESCE(lid_group.raw_string, jid_group.raw_string) as group_sender_jid
FROM message_media FROM message_media
INNER JOIN message INNER JOIN message
ON message_media.message_row_id = message._id ON message_media.message_row_id = message._id
@@ -702,6 +719,14 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
ON message_media.file_hash = media_hash_thumbnail.media_hash ON message_media.file_hash = media_hash_thumbnail.media_hash
LEFT JOIN jid jid_group LEFT JOIN jid jid_group
ON jid_group._id = message.sender_jid_row_id ON jid_group._id = message.sender_jid_row_id
LEFT JOIN jid_map as jid_map_global
ON chat.jid_row_id = jid_map_global.lid_row_id
LEFT JOIN jid lid_global
ON jid_map_global.jid_row_id = lid_global._id
LEFT JOIN jid_map as jid_map_group
ON message.sender_jid_row_id = jid_map_group.lid_row_id
LEFT JOIN jid lid_group
ON jid_map_group.jid_row_id = lid_group._id
WHERE jid.type <> 7 WHERE jid.type <> 7
{empty_filter} {empty_filter}
{date_filter} {date_filter}
@@ -711,7 +736,7 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
return cursor return cursor
def _process_single_media(data, content, media_folder, mime, separate_media): def _process_single_media(data, content, media_folder, mime, separate_media, fix_dot_files=False):
"""Process a single media file.""" """Process a single media file."""
file_path = f"{media_folder}/{content['file_path']}" file_path = f"{media_folder}/{content['file_path']}"
current_chat = data.get_chat(content["key_remote_jid"]) current_chat = data.get_chat(content["key_remote_jid"])
@@ -719,8 +744,6 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
message.media = True message.media = True
if os.path.isfile(file_path): if os.path.isfile(file_path):
message.data = file_path
# Set mime type # Set mime type
if content["mime_type"] is None: if content["mime_type"] is None:
guess = mime.guess_type(file_path)[0] guess = mime.guess_type(file_path)[0]
@@ -731,6 +754,16 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
else: else:
message.mime = content["mime_type"] message.mime = content["mime_type"]
if fix_dot_files and file_path.endswith("."):
extension = mime.guess_extension(message.mime)
if message.mime == "application/octet-stream" or not extension:
new_file_path = file_path[:-1]
else:
extension = mime.guess_extension(message.mime)
new_file_path = file_path[:-1] + extension
os.rename(file_path, new_file_path)
file_path = new_file_path
# Copy media to separate folder if needed # Copy media to separate folder if needed
if separate_media: if separate_media:
chat_display_name = safe_name(current_chat.name or message.sender chat_display_name = safe_name(current_chat.name or message.sender
@@ -741,6 +774,8 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
new_path = os.path.join(new_folder, current_filename) new_path = os.path.join(new_folder, current_filename)
shutil.copy2(file_path, new_path) shutil.copy2(file_path, new_path)
message.data = new_path message.data = new_path
else:
message.data = file_path
else: else:
message.data = "The media is missing" message.data = "The media is missing"
message.mime = "media" message.mime = "media"
@@ -764,33 +799,34 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
rows = _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty) rows = _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty)
total_row_number = len(rows) total_row_number = len(rows)
logger.info(f"Processing vCards...(0/{total_row_number})\r")
# Create vCards directory if it doesn't exist # Create vCards directory if it doesn't exist
path = os.path.join(media_folder, "vCards") path = os.path.join(media_folder, "vCards")
Path(path).mkdir(parents=True, exist_ok=True) Path(path).mkdir(parents=True, exist_ok=True)
for index, row in enumerate(rows): with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar:
for row in rows:
_process_vcard_row(row, path, data) _process_vcard_row(row, path, data)
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r") pbar.update(1)
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}") total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} vCards in {convert_time_unit(total_time)}{CLEAR_LINE}")
def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty): def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for modern WhatsApp database schema.""" """Execute vCard query for modern WhatsApp database schema."""
# Build the filter conditions # Build the filter conditions
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
include_filter = get_chat_condition(
filter_chat[0], True, ["key_remote_jid", "group_sender_jid"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "group_sender_jid"], "jid", "android")
query = f"""SELECT message_row_id, query = f"""SELECT message_row_id,
messages.key_remote_jid, COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid,
vcard, vcard,
messages.media_name messages.media_name,
COALESCE(lid_group.raw_string, jid_group.raw_string) as group_sender_jid
FROM messages_vcards FROM messages_vcards
INNER JOIN messages INNER JOIN messages
ON messages_vcards.message_row_id = messages._id ON messages_vcards.message_row_id = messages._id
@@ -798,11 +834,21 @@ def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
ON messages.key_remote_jid = jid.raw_string ON messages.key_remote_jid = jid.raw_string
LEFT JOIN chat LEFT JOIN chat
ON chat.jid_row_id = jid._id ON chat.jid_row_id = jid._id
LEFT JOIN jid jid_group
ON jid_group._id = message.sender_jid_row_id
LEFT JOIN jid_map as jid_map_global
ON chat.jid_row_id = jid_map_global.lid_row_id
LEFT JOIN jid lid_global
ON jid_map_global.jid_row_id = lid_global._id
LEFT JOIN jid_map as jid_map_group
ON message.sender_jid_row_id = jid_map_group.lid_row_id
LEFT JOIN jid lid_group
ON jid_map_group.jid_row_id = lid_group._id
WHERE 1=1 WHERE 1=1
{empty_filter} {empty_filter}
{date_filter} {date_filter}
{chat_filter_include} {include_filter}
{chat_filter_exclude} {exclude_filter}
ORDER BY messages.key_remote_jid ASC;""" ORDER BY messages.key_remote_jid ASC;"""
c.execute(query) c.execute(query)
return c.fetchall() return c.fetchall()
@@ -879,32 +925,37 @@ def calls(db, data, timezone_offset, filter_chat):
chat = ChatStore(Device.ANDROID, "WhatsApp Calls") chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call # Process each call
content = calls_data.fetchone() with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar:
while content is not None: while (content := _fetch_row_safely(calls_data)) is not None:
_process_call_record(content, chat, data, timezone_offset) _process_call_record(content, chat, data, timezone_offset)
content = calls_data.fetchone() pbar.update(1)
total_time = pbar.format_dict['elapsed']
# Add the calls chat to the data # Add the calls chat to the data
data.add_chat("000000000000000", chat) data.add_chat("000000000000000", chat)
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}") logger.info(f"Processed {total_row_number} calls in {convert_time_unit(total_time)}{CLEAR_LINE}")
def _get_calls_count(c, filter_chat): def _get_calls_count(c, filter_chat):
"""Get the count of call records that match the filter.""" """Get the count of call records that match the filter."""
# Build the filter conditions # Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["jid.raw_string"]) include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid"])
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["jid.raw_string"]) exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid"])
query = f"""SELECT count() query = f"""SELECT count(),
COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid
FROM call_log FROM call_log
INNER JOIN jid INNER JOIN jid
ON call_log.jid_row_id = jid._id ON call_log.jid_row_id = jid._id
LEFT JOIN chat LEFT JOIN chat
ON call_log.jid_row_id = chat.jid_row_id ON call_log.jid_row_id = chat.jid_row_id
LEFT JOIN jid_map as jid_map_global
ON chat.jid_row_id = jid_map_global.lid_row_id
LEFT JOIN jid lid_global
ON jid_map_global.jid_row_id = lid_global._id
WHERE 1=1 WHERE 1=1
{chat_filter_include} {include_filter}
{chat_filter_exclude}""" {exclude_filter}"""
c.execute(query) c.execute(query)
return c.fetchone()[0] return c.fetchone()[0]
@@ -913,11 +964,11 @@ def _fetch_calls_data(c, filter_chat):
"""Fetch call data from the database.""" """Fetch call data from the database."""
# Build the filter conditions # Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["jid.raw_string"]) include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid"])
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["jid.raw_string"]) exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid"])
query = f"""SELECT call_log._id, query = f"""SELECT call_log._id,
jid.raw_string, COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid,
from_me, from_me,
call_id, call_id,
timestamp, timestamp,
@@ -931,9 +982,13 @@ def _fetch_calls_data(c, filter_chat):
ON call_log.jid_row_id = jid._id ON call_log.jid_row_id = jid._id
LEFT JOIN chat LEFT JOIN chat
ON call_log.jid_row_id = chat.jid_row_id ON call_log.jid_row_id = chat.jid_row_id
LEFT JOIN jid_map as jid_map_global
ON chat.jid_row_id = jid_map_global.lid_row_id
LEFT JOIN jid lid_global
ON jid_map_global.jid_row_id = lid_global._id
WHERE 1=1 WHERE 1=1
{chat_filter_include} {include_filter}
{chat_filter_exclude}""" {exclude_filter}"""
c.execute(query) c.execute(query)
return c return c
@@ -945,13 +1000,13 @@ def _process_call_record(content, chat, data, timezone_offset):
timestamp=content["timestamp"], timestamp=content["timestamp"],
time=content["timestamp"], time=content["timestamp"],
key_id=content["call_id"], key_id=content["call_id"],
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET, timezone_offset=timezone_offset,
received_timestamp=None, # TODO: Add timestamp received_timestamp=None, # TODO: Add timestamp
read_timestamp=None # TODO: Add timestamp read_timestamp=None # TODO: Add timestamp
) )
# Get caller/callee name # Get caller/callee name
_jid = content["raw_string"] _jid = content["key_remote_jid"]
name = data.get_chat(_jid).name if _jid in data else content["chat_subject"] or None name = data.get_chat(_jid).name if _jid in data else content["chat_subject"] or None
if _jid is not None and "@" in _jid: if _jid is not None and "@" in _jid:
fallback = _jid.split('@')[0] fallback = _jid.split('@')[0]
@@ -996,6 +1051,7 @@ def _construct_call_description(content, call):
return description return description
# TODO: Marked for enhancement on multi-threaded processing
def create_html( def create_html(
data, data,
output_folder, output_folder,
@@ -1011,7 +1067,6 @@ def create_html(
template = setup_template(template, no_avatar, experimental) template = setup_template(template, no_avatar, experimental)
total_row_number = len(data) total_row_number = len(data)
logger.info(f"Generating chats...(0/{total_row_number})\r")
# Create output directory if it doesn't exist # Create output directory if it doesn't exist
if not os.path.isdir(output_folder): if not os.path.isdir(output_folder):
@@ -1019,7 +1074,8 @@ def create_html(
w3css = get_status_location(output_folder, offline_static) w3css = get_status_location(output_folder, offline_static)
for current, contact in enumerate(data): with tqdm(total=total_row_number, desc="Generating HTML", unit="file", leave=False) as pbar:
for contact in data:
current_chat = data.get_chat(contact) current_chat = data.get_chat(contact)
if len(current_chat) == 0: if len(current_chat) == 0:
# Skip empty chats # Skip empty chats
@@ -1051,11 +1107,9 @@ def create_html(
headline headline
) )
if current % 10 == 0: pbar.update(1)
logger.info(f"Generating chats...({current}/{total_row_number})\r") total_time = pbar.format_dict['elapsed']
logger.info(f"Generated {total_row_number} chats in {convert_time_unit(total_time)}{CLEAR_LINE}")
logger.info(f"Generated {total_row_number} chats{CLEAR_LINE}")
def _generate_single_chat(current_chat, safe_file_name, name, contact, output_folder, template, w3css, headline): def _generate_single_chat(current_chat, safe_file_name, name, contact, output_folder, template, w3css, headline):
"""Generate a single HTML file for a chat.""" """Generate a single HTML file for a chat."""

View File

@@ -279,7 +279,7 @@ class Message:
key_id: Union[int, str], key_id: Union[int, str],
received_timestamp: int = None, received_timestamp: int = None,
read_timestamp: int = None, read_timestamp: int = None,
timezone_offset: int = 0, timezone_offset: Optional[Timing] = Timing(0),
message_type: Optional[int] = None message_type: Optional[int] = None
) -> None: ) -> None:
""" """
@@ -300,10 +300,9 @@ class Message:
""" """
self.from_me = bool(from_me) self.from_me = bool(from_me)
self.timestamp = timestamp / 1000 if timestamp > 9999999999 else timestamp self.timestamp = timestamp / 1000 if timestamp > 9999999999 else timestamp
timing = Timing(timezone_offset)
if isinstance(time, (int, float)): if isinstance(time, (int, float)):
self.time = timing.format_timestamp(self.timestamp, "%H:%M") self.time = timezone_offset.format_timestamp(self.timestamp, "%H:%M")
elif isinstance(time, str): elif isinstance(time, str):
self.time = time self.time = time
else: else:
@@ -318,14 +317,14 @@ class Message:
self.mime = None self.mime = None
self.message_type = message_type self.message_type = message_type
if isinstance(received_timestamp, (int, float)): if isinstance(received_timestamp, (int, float)):
self.received_timestamp = timing.format_timestamp( self.received_timestamp = timezone_offset.format_timestamp(
received_timestamp, "%Y/%m/%d %H:%M") received_timestamp, "%Y/%m/%d %H:%M")
elif isinstance(received_timestamp, str): elif isinstance(received_timestamp, str):
self.received_timestamp = received_timestamp self.received_timestamp = received_timestamp
else: else:
self.received_timestamp = None self.received_timestamp = None
if isinstance(read_timestamp, (int, float)): if isinstance(read_timestamp, (int, float)):
self.read_timestamp = timing.format_timestamp( self.read_timestamp = timezone_offset.format_timestamp(
read_timestamp, "%Y/%m/%d %H:%M") read_timestamp, "%Y/%m/%d %H:%M")
elif isinstance(read_timestamp, str): elif isinstance(read_timestamp, str):
self.read_timestamp = read_timestamp self.read_timestamp = read_timestamp

View File

@@ -4,8 +4,9 @@ import os
import logging import logging
from datetime import datetime from datetime import datetime
from mimetypes import MimeTypes from mimetypes import MimeTypes
from tqdm import tqdm
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device, convert_time_unit
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -34,17 +35,16 @@ def messages(path, data, assume_first_as_me=False):
# Second pass: process the messages # Second pass: process the messages
with open(path, "r", encoding="utf8") as file: with open(path, "r", encoding="utf8") as file:
with tqdm(total=total_row_number, desc="Processing messages & media", unit="msg&media", leave=False) as pbar:
for index, line in enumerate(file): for index, line in enumerate(file):
you, user_identification_done = process_line( you, user_identification_done = process_line(
line, index, chat, path, you, line, index, chat, path, you,
assume_first_as_me, user_identification_done assume_first_as_me, user_identification_done
) )
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} messages & media in {convert_time_unit(total_time)}{CLEAR_LINE}")
# Show progress
if index % 1000 == 0:
logger.info(f"Processing messages & media...({index}/{total_row_number})\r")
logger.info(f"Processed {total_row_number} messages & media{CLEAR_LINE}")
return data return data

View File

@@ -4,12 +4,13 @@ import os
import logging import logging
import shutil import shutil
from glob import glob from glob import glob
from tqdm import tqdm
from pathlib import Path from pathlib import Path
from mimetypes import MimeTypes from mimetypes import MimeTypes
from markupsafe import escape as htmle from markupsafe import escape as htmle
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, CURRENT_TZ_OFFSET, get_chat_condition from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, get_chat_condition, Device
from Whatsapp_Chat_Exporter.utility import bytes_to_readable, convert_time_unit, safe_name, Device from Whatsapp_Chat_Exporter.utility import bytes_to_readable, convert_time_unit, safe_name
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -23,8 +24,8 @@ def contacts(db, data):
logger.info(f"Pre-processing contacts...({total_row_number})\r") logger.info(f"Pre-processing contacts...({total_row_number})\r")
c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""") c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
content = c.fetchone() with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
while content is not None: while (content := c.fetchone()) is not None:
zwhatsapp_id = content["ZWHATSAPPID"] zwhatsapp_id = content["ZWHATSAPPID"]
if not zwhatsapp_id.endswith("@s.whatsapp.net"): if not zwhatsapp_id.endswith("@s.whatsapp.net"):
zwhatsapp_id += "@s.whatsapp.net" zwhatsapp_id += "@s.whatsapp.net"
@@ -32,8 +33,9 @@ def contacts(db, data):
current_chat = ChatStore(Device.IOS) current_chat = ChatStore(Device.IOS)
current_chat.status = content["ZABOUTTEXT"] current_chat.status = content["ZABOUTTEXT"]
data.add_chat(zwhatsapp_id, current_chat) data.add_chat(zwhatsapp_id, current_chat)
content = c.fetchone() pbar.update(1)
logger.info(f"Pre-processed {total_row_number} contacts{CLEAR_LINE}") total_time = pbar.format_dict['elapsed']
logger.info(f"Pre-processed {total_row_number} contacts in {convert_time_unit(total_time)}{CLEAR_LINE}")
def process_contact_avatars(current_chat, media_folder, contact_id): def process_contact_avatars(current_chat, media_folder, contact_id):
@@ -92,7 +94,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
""" """
c.execute(contact_query) c.execute(contact_query)
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
logger.info(f"Processing contacts...({total_row_number})\r")
# Get distinct contacts # Get distinct contacts
contacts_query = f""" contacts_query = f"""
@@ -114,8 +115,8 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
c.execute(contacts_query) c.execute(contacts_query)
# Process each contact # Process each contact
content = c.fetchone() with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
while content is not None: while (content := c.fetchone()) is not None:
contact_name = get_contact_name(content) contact_name = get_contact_name(content)
contact_id = content["ZCONTACTJID"] contact_id = content["ZCONTACTJID"]
@@ -129,9 +130,9 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
# Process avatar images # Process avatar images
process_contact_avatars(current_chat, media_folder, contact_id) process_contact_avatars(current_chat, media_folder, contact_id)
content = c.fetchone() pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}") logger.info(f"Processed {total_row_number} contacts in {convert_time_unit(total_time)}{CLEAR_LINE}")
# Get message count # Get message count
message_count_query = f""" message_count_query = f"""
@@ -190,9 +191,8 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
message_map = {row[0][:17]: row[1] or row[2] for row in cursor2.fetchall() if row[0]} message_map = {row[0][:17]: row[1] or row[2] for row in cursor2.fetchall() if row[0]}
# Process each message # Process each message
i = 0 with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar:
content = c.fetchone() while (content := c.fetchone()) is not None:
while content is not None:
contact_id = content["ZCONTACTJID"] contact_id = content["ZCONTACTJID"]
message_pk = content["Z_PK"] message_pk = content["Z_PK"]
is_group_message = content["ZGROUPINFO"] is not None is_group_message = content["ZGROUPINFO"] is not None
@@ -211,7 +211,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
timestamp=ts, timestamp=ts,
time=ts, time=ts,
key_id=content["ZSTANZAID"][:17], key_id=content["ZSTANZAID"][:17],
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET, timezone_offset=timezone_offset,
message_type=content["ZMESSAGETYPE"], message_type=content["ZMESSAGETYPE"],
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None, received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
read_timestamp=None # TODO: Add timestamp read_timestamp=None # TODO: Add timestamp
@@ -224,12 +224,9 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
if not invalid: if not invalid:
current_chat.add_message(message_pk, message) current_chat.add_message(message_pk, message)
# Update progress pbar.update(1)
i += 1 total_time = pbar.format_dict['elapsed']
if i % 1000 == 0: logger.info(f"Processed {total_row_number} messages in {convert_time_unit(total_time)}{CLEAR_LINE}")
logger.info(f"Processing messages...({i}/{total_row_number})\r")
content = c.fetchone()
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
def process_message_data(message, content, is_group_message, data, message_map, no_reply): def process_message_data(message, content, is_group_message, data, message_map, no_reply):
@@ -315,7 +312,7 @@ def process_message_text(message, content):
message.data = msg message.data = msg
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False): def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False, fix_dot_files=False):
"""Process media files from WhatsApp messages.""" """Process media files from WhatsApp messages."""
c = db.cursor() c = db.cursor()
@@ -371,20 +368,15 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
# Process each media item # Process each media item
mime = MimeTypes() mime = MimeTypes()
i = 0 with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar:
content = c.fetchone() while (content := c.fetchone()) is not None:
while content is not None: process_media_item(content, data, media_folder, mime, separate_media, fix_dot_files)
process_media_item(content, data, media_folder, mime, separate_media) pbar.update(1)
total_time = pbar.format_dict['elapsed']
# Update progress logger.info(f"Processed {total_row_number} media in {convert_time_unit(total_time)}{CLEAR_LINE}")
i += 1
if i % 100 == 0:
logger.info(f"Processing media...({i}/{total_row_number})\r")
content = c.fetchone()
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
def process_media_item(content, data, media_folder, mime, separate_media): def process_media_item(content, data, media_folder, mime, separate_media, fix_dot_files=False):
"""Process a single media item.""" """Process a single media item."""
file_path = f"{media_folder}/Message/{content['ZMEDIALOCALPATH']}" file_path = f"{media_folder}/Message/{content['ZMEDIALOCALPATH']}"
current_chat = data.get_chat(content["ZCONTACTJID"]) current_chat = data.get_chat(content["ZCONTACTJID"])
@@ -395,8 +387,6 @@ def process_media_item(content, data, media_folder, mime, separate_media):
current_chat.media_base = media_folder + "/" current_chat.media_base = media_folder + "/"
if os.path.isfile(file_path): if os.path.isfile(file_path):
message.data = '/'.join(file_path.split("/")[1:])
# Set MIME type # Set MIME type
if content["ZVCARDSTRING"] is None: if content["ZVCARDSTRING"] is None:
guess = mime.guess_type(file_path)[0] guess = mime.guess_type(file_path)[0]
@@ -404,6 +394,16 @@ def process_media_item(content, data, media_folder, mime, separate_media):
else: else:
message.mime = content["ZVCARDSTRING"] message.mime = content["ZVCARDSTRING"]
if fix_dot_files and file_path.endswith("."):
extension = mime.guess_extension(message.mime)
if message.mime == "application/octet-stream" or not extension:
new_file_path = file_path[:-1]
else:
extension = mime.guess_extension(message.mime)
new_file_path = file_path[:-1] + extension
os.rename(file_path, new_file_path)
file_path = new_file_path
# Handle separate media option # Handle separate media option
if separate_media: if separate_media:
chat_display_name = safe_name( chat_display_name = safe_name(
@@ -413,7 +413,9 @@ def process_media_item(content, data, media_folder, mime, separate_media):
Path(new_folder).mkdir(parents=True, exist_ok=True) Path(new_folder).mkdir(parents=True, exist_ok=True)
new_path = os.path.join(new_folder, current_filename) new_path = os.path.join(new_folder, current_filename)
shutil.copy2(file_path, new_path) shutil.copy2(file_path, new_path)
message.data = '/'.join(new_path.split("\\")[1:]) message.data = '/'.join(new_path.split("/")[1:])
else:
message.data = '/'.join(file_path.split("/")[1:])
else: else:
# Handle missing media # Handle missing media
message.data = "The media is missing" message.data = "The media is missing"
@@ -467,10 +469,12 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
Path(path).mkdir(parents=True, exist_ok=True) Path(path).mkdir(parents=True, exist_ok=True)
# Process each vCard # Process each vCard
for index, content in enumerate(contents): with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar:
for content in contents:
process_vcard_item(content, path, data) process_vcard_item(content, path, data)
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r") pbar.update(1)
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}") total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} vCards in {convert_time_unit(total_time)}{CLEAR_LINE}")
def process_vcard_item(content, path, data): def process_vcard_item(content, path, data):
@@ -530,8 +534,6 @@ def calls(db, data, timezone_offset, filter_chat):
if total_row_number == 0: if total_row_number == 0:
return return
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n")
# Fetch call records # Fetch call records
calls_query = f""" calls_query = f"""
SELECT ZCALLIDSTRING, SELECT ZCALLIDSTRING,
@@ -556,14 +558,15 @@ def calls(db, data, timezone_offset, filter_chat):
# Create calls chat # Create calls chat
chat = ChatStore(Device.ANDROID, "WhatsApp Calls") chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar:
content = c.fetchone() while (content := c.fetchone()) is not None:
while content is not None:
process_call_record(content, chat, data, timezone_offset) process_call_record(content, chat, data, timezone_offset)
content = c.fetchone() pbar.update(1)
total_time = pbar.format_dict['elapsed']
# Add calls chat to data # Add calls chat to data
data.add_chat("000000000000000", chat) data.add_chat("000000000000000", chat)
logger.info(f"Processed {total_row_number} calls in {convert_time_unit(total_time)}{CLEAR_LINE}")
def process_call_record(content, chat, data, timezone_offset): def process_call_record(content, chat, data, timezone_offset):
@@ -574,7 +577,7 @@ def process_call_record(content, chat, data, timezone_offset):
timestamp=ts, timestamp=ts,
time=ts, time=ts,
key_id=content["ZCALLIDSTRING"], key_id=content["ZCALLIDSTRING"],
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET timezone_offset=timezone_offset
) )
# Set sender info # Set sender info

View File

@@ -6,7 +6,9 @@ import sqlite3
import os import os
import getpass import getpass
from sys import exit, platform as osname from sys import exit, platform as osname
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, WhatsAppIdentifier import sys
from tqdm import tqdm
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, WhatsAppIdentifier, convert_time_unit
from Whatsapp_Chat_Exporter.bplist import BPListReader from Whatsapp_Chat_Exporter.bplist import BPListReader
try: try:
from iphone_backup_decrypt import EncryptedBackup, RelativePath from iphone_backup_decrypt import EncryptedBackup, RelativePath
@@ -79,6 +81,8 @@ class BackupExtractor:
logger.info(f"Encryption detected on the backup!{CLEAR_LINE}") logger.info(f"Encryption detected on the backup!{CLEAR_LINE}")
password = getpass.getpass("Enter the password for the backup:") password = getpass.getpass("Enter the password for the backup:")
sys.stdout.write("\033[F\033[K")
sys.stdout.flush()
self._decrypt_backup(password) self._decrypt_backup(password)
self._extract_decrypted_files() self._extract_decrypted_files()
@@ -89,7 +93,7 @@ class BackupExtractor:
Args: Args:
password (str): The password for the encrypted backup. password (str): The password for the encrypted backup.
""" """
logger.info(f"Trying to decrypt the iOS backup...{CLEAR_LINE}") logger.info(f"Trying to open the iOS backup...{CLEAR_LINE}")
self.backup = EncryptedBackup( self.backup = EncryptedBackup(
backup_directory=self.base_dir, backup_directory=self.base_dir,
passphrase=password, passphrase=password,
@@ -97,7 +101,7 @@ class BackupExtractor:
check_same_thread=False, check_same_thread=False,
decrypt_chunk_size=self.decrypt_chunk_size, decrypt_chunk_size=self.decrypt_chunk_size,
) )
logger.info(f"iOS backup decrypted successfully{CLEAR_LINE}") logger.info(f"iOS backup is opened successfully{CLEAR_LINE}")
logger.info("Decrypting WhatsApp database...\r") logger.info("Decrypting WhatsApp database...\r")
try: try:
self.backup.extract_file( self.backup.extract_file(
@@ -130,9 +134,12 @@ class BackupExtractor:
def _extract_decrypted_files(self): def _extract_decrypted_files(self):
"""Extract all WhatsApp files after decryption""" """Extract all WhatsApp files after decryption"""
pbar = tqdm(desc="Decrypting and extracting files", unit="file", leave=False)
def extract_progress_handler(file_id, domain, relative_path, n, total_files): def extract_progress_handler(file_id, domain, relative_path, n, total_files):
if n % 100 == 0: if pbar.total is None:
logger.info(f"Decrypting and extracting files...({n}/{total_files})\r") pbar.total = total_files
pbar.n = n
pbar.refresh()
return True return True
self.backup.extract_files( self.backup.extract_files(
@@ -141,7 +148,9 @@ class BackupExtractor:
preserve_folders=True, preserve_folders=True,
filter_callback=extract_progress_handler filter_callback=extract_progress_handler
) )
logger.info(f"All required files are decrypted and extracted.{CLEAR_LINE}") total_time = pbar.format_dict['elapsed']
pbar.close()
logger.info(f"All required files are decrypted and extracted in {convert_time_unit(total_time)}{CLEAR_LINE}")
def _extract_unencrypted_backup(self): def _extract_unencrypted_backup(self):
""" """
@@ -192,7 +201,6 @@ class BackupExtractor:
c = manifest.cursor() c = manifest.cursor()
c.execute(f"SELECT count() FROM Files WHERE domain = '{_wts_id}'") c.execute(f"SELECT count() FROM Files WHERE domain = '{_wts_id}'")
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
logger.info(f"Extracting WhatsApp files...(0/{total_row_number})\r")
c.execute( c.execute(
f""" f"""
SELECT fileID, relativePath, flags, file AS metadata, SELECT fileID, relativePath, flags, file AS metadata,
@@ -205,10 +213,9 @@ class BackupExtractor:
if not os.path.isdir(_wts_id): if not os.path.isdir(_wts_id):
os.mkdir(_wts_id) os.mkdir(_wts_id)
row = c.fetchone() with tqdm(total=total_row_number, desc="Extracting WhatsApp files", unit="file", leave=False) as pbar:
while row is not None: while (row := c.fetchone()) is not None:
if not row["relativePath"]: # Skip empty relative paths if not row["relativePath"]: # Skip empty relative paths
row = c.fetchone()
continue continue
destination = os.path.join(_wts_id, row["relativePath"]) destination = os.path.join(_wts_id, row["relativePath"])
@@ -224,14 +231,12 @@ class BackupExtractor:
elif flags == 1: # File elif flags == 1: # File
shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination) shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination)
metadata = BPListReader(row["metadata"]).parse() metadata = BPListReader(row["metadata"]).parse()
creation = metadata["$objects"][1]["Birth"] _creation = metadata["$objects"][1]["Birth"]
modification = metadata["$objects"][1]["LastModified"] modification = metadata["$objects"][1]["LastModified"]
os.utime(destination, (modification, modification)) os.utime(destination, (modification, modification))
pbar.update(1)
if row["_index"] % 100 == 0: total_time = pbar.format_dict['elapsed']
logger.info(f"Extracting WhatsApp files...({row['_index']}/{total_row_number})\r") logger.info(f"Extracted {total_row_number} WhatsApp files in {convert_time_unit(total_time)}{CLEAR_LINE}")
row = c.fetchone()
logger.info(f"Extracted WhatsApp files...({total_row_number}){CLEAR_LINE}")
def extract_media(base_dir, identifiers, decrypt_chunk_size): def extract_media(base_dir, identifiers, decrypt_chunk_size):

View File

@@ -5,13 +5,13 @@ import json
import os import os
import unicodedata import unicodedata
import re import re
import string
import math import math
import shutil import shutil
from bleach import clean as sanitize from bleach import clean as sanitize
from markupsafe import Markup from markupsafe import Markup
from datetime import datetime, timedelta from datetime import datetime, timedelta
from enum import IntEnum from enum import IntEnum
from tqdm import tqdm
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing
from typing import Dict, List, Optional, Tuple, Union from typing import Dict, List, Optional, Tuple, Union
try: try:
@@ -248,13 +248,13 @@ def import_from_json(json_file: str, data: ChatCollection):
with open(json_file, "r") as f: with open(json_file, "r") as f:
temp_data = json.loads(f.read()) temp_data = json.loads(f.read())
total_row_number = len(tuple(temp_data.keys())) total_row_number = len(tuple(temp_data.keys()))
logger.info(f"Importing chats from JSON...(0/{total_row_number})\r") with tqdm(total=total_row_number, desc="Importing chats from JSON", unit="chat", leave=False) as pbar:
for index, (jid, chat_data) in enumerate(temp_data.items()): for jid, chat_data in temp_data.items():
chat = ChatStore.from_json(chat_data) chat = ChatStore.from_json(chat_data)
data.add_chat(jid, chat) data.add_chat(jid, chat)
logger.info( pbar.update(1)
f"Importing chats from JSON...({index + 1}/{total_row_number})\r") total_time = pbar.format_dict['elapsed']
logger.info(f"Imported {total_row_number} chats from JSON{CLEAR_LINE}") logger.info(f"Imported {total_row_number} chats from JSON in {convert_time_unit(total_time)}{CLEAR_LINE}")
def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_print_json: int, avoid_encoding_json: bool): def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_print_json: int, avoid_encoding_json: bool):
@@ -439,7 +439,7 @@ CRYPT14_OFFSETS = (
{"iv": 67, "db": 193}, {"iv": 67, "db": 193},
{"iv": 67, "db": 194}, {"iv": 67, "db": 194},
{"iv": 67, "db": 158}, {"iv": 67, "db": 158},
{"iv": 67, "db": 196} {"iv": 67, "db": 196},
) )
@@ -534,7 +534,7 @@ def determine_metadata(content: sqlite3.Row, init_msg: Optional[str]) -> Optiona
else: else:
msg = "The security code in this chat changed" msg = "The security code in this chat changed"
elif content["action_type"] == 58: elif content["action_type"] == 58:
msg = "You blocked this contact" msg = "You blocked/unblocked this contact"
elif content["action_type"] == 67: elif content["action_type"] == 67:
return # (PM) this contact use secure service from Facebook??? return # (PM) this contact use secure service from Facebook???
elif content["action_type"] == 69: elif content["action_type"] == 69:
@@ -639,11 +639,17 @@ def get_from_string(msg: Dict, chat_id: str) -> str:
def get_chat_type(chat_id: str) -> str: def get_chat_type(chat_id: str) -> str:
"""Return the chat type based on the whatsapp id""" """Return the chat type based on the whatsapp id"""
if chat_id.endswith("@s.whatsapp.net"): if chat_id == "000000000000000":
return "calls"
elif chat_id.endswith("@s.whatsapp.net"):
return "personal_chat" return "personal_chat"
if chat_id.endswith("@g.us"): elif chat_id.endswith("@g.us"):
return "private_group" return "private_group"
logger.warning("Unknown chat type for %s, defaulting to private_group", chat_id) elif chat_id == "status@broadcast":
return "status_broadcast"
elif chat_id.endswith("@broadcast"):
return "broadcast_channel"
logger.warning(f"Unknown chat type for {chat_id}, defaulting to private_group{CLEAR_LINE}")
return "private_group" return "private_group"
@@ -674,7 +680,7 @@ def telegram_json_format(jik: str, data: Dict, timezone_offset) -> Dict:
except ValueError: except ValueError:
# not a real chat: e.g. statusbroadcast # not a real chat: e.g. statusbroadcast
chat_id = 0 chat_id = 0
obj = { json_obj = {
"name": data["name"] if data["name"] else jik, "name": data["name"] if data["name"] else jik,
"type": get_chat_type(jik), "type": get_chat_type(jik),
"id": chat_id, "id": chat_id,
@@ -694,14 +700,15 @@ def telegram_json_format(jik: str, data: Dict, timezone_offset) -> Dict:
"text": msg["data"], "text": msg["data"],
} }
], ],
} for msgId, msg in data["messages"].items()] }
for msgId, msg in data["messages"].items()]
} }
# remove empty messages and replies # remove empty messages and replies
for msg_id, msg in enumerate(obj["messages"]): for msg_id, msg in enumerate(json_obj["messages"]):
if not msg["reply_to_message_id"]: if not msg["reply_to_message_id"]:
del obj["messages"][msg_id]["reply_to_message_id"] del json_obj["messages"][msg_id]["reply_to_message_id"]
obj["messages"] = [m for m in obj["messages"] if m["text"]] json_obj["messages"] = [m for m in json_obj["messages"] if m["text"]]
return obj return json_obj
class WhatsAppIdentifier(StrEnum): class WhatsAppIdentifier(StrEnum):

View File

@@ -281,7 +281,9 @@
{% filter escape %}{{ msg.data }}{% endfilter %} {% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %} {% endif %}
{% if msg.caption is not none %} {% if msg.caption is not none %}
<p class='mt-1 {% if "audio/" in msg.mime %}text-[#808080]{% endif %}'>
{{ msg.caption | urlize(none, true, '_blank') }} {{ msg.caption | urlize(none, true, '_blank') }}
</p>
{% endif %} {% endif %}
{% endif %} {% endif %}
{% endif %} {% endif %}
@@ -351,7 +353,9 @@
{% filter escape %}{{ msg.data }}{% endfilter %} {% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %} {% endif %}
{% if msg.caption is not none %} {% if msg.caption is not none %}
<p class='mt-1 {% if "audio/" in msg.mime %}text-[#808080]{% endif %}'>
{{ msg.caption | urlize(none, true, '_blank') }} {{ msg.caption | urlize(none, true, '_blank') }}
</p>
{% endif %} {% endif %}
{% endif %} {% endif %}
{% endif %} {% endif %}

View File

@@ -36,17 +36,19 @@ classifiers = [
requires-python = ">=3.10" requires-python = ">=3.10"
dependencies = [ dependencies = [
"jinja2", "jinja2",
"bleach" "bleach",
"tqdm"
] ]
[project.optional-dependencies] [project.optional-dependencies]
android_backup = ["pycryptodome", "javaobj-py3"] android_backup = ["pycryptodome", "javaobj-py3"]
ios_backup = ["iphone_backup_decrypt @ git+https://github.com/KnugiHK/iphone_backup_decrypt"]
crypt12 = ["pycryptodome"] crypt12 = ["pycryptodome"]
crypt14 = ["pycryptodome"] crypt14 = ["pycryptodome"]
crypt15 = ["pycryptodome", "javaobj-py3"] crypt15 = ["pycryptodome", "javaobj-py3"]
all = ["pycryptodome", "javaobj-py3"] all = ["pycryptodome", "javaobj-py3", "iphone_backup_decrypt @ git+https://github.com/KnugiHK/iphone_backup_decrypt"]
everything = ["pycryptodome", "javaobj-py3"] everything = ["pycryptodome", "javaobj-py3", "iphone_backup_decrypt @ git+https://github.com/KnugiHK/iphone_backup_decrypt"]
backup = ["pycryptodome", "javaobj-py3"] backup = ["pycryptodome", "javaobj-py3", "iphone_backup_decrypt @ git+https://github.com/KnugiHK/iphone_backup_decrypt"]
[project.scripts] [project.scripts]
wtsexporter = "Whatsapp_Chat_Exporter.__main__:main" wtsexporter = "Whatsapp_Chat_Exporter.__main__:main"

27
tests/conftest.py Normal file
View File

@@ -0,0 +1,27 @@
import pytest
import os
def pytest_collection_modifyitems(config, items):
"""
Moves test_nuitka_binary.py to the end and fails if the file is missing.
"""
target_file = "test_nuitka_binary.py"
# Sanity Check: Ensure the file actually exists in the tests directory
test_dir = os.path.join(config.rootdir, "tests")
file_path = os.path.join(test_dir, target_file)
if not os.path.exists(file_path):
pytest.exit(f"\n[FATAL] Required test file '{target_file}' not found in {test_dir}. "
f"Order enforcement failed!", returncode=1)
nuitka_tests = []
remaining_tests = []
for item in items:
if target_file in item.nodeid:
nuitka_tests.append(item)
else:
remaining_tests.append(item)
items[:] = remaining_tests + nuitka_tests