Refactor to use tqdm for showing progress

This commit is contained in:
KnugiHK
2026-01-17 13:18:31 +08:00
parent 1c7d6f7912
commit d200130335
7 changed files with 264 additions and 302 deletions

View File

@@ -19,6 +19,7 @@ from Whatsapp_Chat_Exporter.utility import telegram_json_format
from argparse import ArgumentParser, SUPPRESS from argparse import ArgumentParser, SUPPRESS
from datetime import datetime from datetime import datetime
from getpass import getpass from getpass import getpass
from tqdm import tqdm
from sys import exit from sys import exit
from typing import Optional, List, Dict from typing import Optional, List, Dict
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
@@ -665,24 +666,27 @@ def export_multiple_json(args, data: Dict) -> None:
# Export each chat # Export each chat
total = len(data.keys()) total = len(data.keys())
for index, jik in enumerate(data.keys()): with tqdm(total=total, desc="Generating JSON files", unit="file", leave=False) as pbar:
if data[jik]["name"] is not None: for jik in data.keys():
contact = data[jik]["name"].replace('/', '') if data[jik]["name"] is not None:
else: contact = data[jik]["name"].replace('/', '')
contact = jik.replace('+', '') else:
contact = jik.replace('+', '')
if args.telegram: if args.telegram:
messages = telegram_json_format(jik, data[jik], args.timezone_offset) messages = telegram_json_format(jik, data[jik], args.timezone_offset)
else: else:
messages = {jik: data[jik]} messages = {jik: data[jik]}
with open(f"{json_path}/{safe_name(contact)}.json", "w") as f: with open(f"{json_path}/{safe_name(contact)}.json", "w") as f:
file_content = json.dumps( file_content = json.dumps(
messages, messages,
ensure_ascii=not args.avoid_encoding_json, ensure_ascii=not args.avoid_encoding_json,
indent=args.pretty_print_json indent=args.pretty_print_json
) )
f.write(file_content) f.write(file_content)
logger.info(f"Writing JSON file...({index + 1}/{total})\r") pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Generated {total} JSON files in {total_time:.2f} seconds{CLEAR_LINE}")
def process_exported_chat(args, data: ChatCollection) -> None: def process_exported_chat(args, data: ChatCollection) -> None:

View File

@@ -1,13 +1,11 @@
import time
import hmac import hmac
import io import io
import logging import logging
import threading
import zlib import zlib
import concurrent.futures import concurrent.futures
from tqdm import tqdm
from typing import Tuple, Union from typing import Tuple, Union
from hashlib import sha256 from hashlib import sha256
from sys import exit
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CRYPT14_OFFSETS, Crypt, DbType from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CRYPT14_OFFSETS, Crypt, DbType
try: try:
@@ -165,82 +163,64 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
# Attempt known offsets first # Attempt known offsets first
for offsets in CRYPT14_OFFSETS: for offsets in CRYPT14_OFFSETS:
iv = database[offsets["iv"]:offsets["iv"] + 16] iv = offsets["iv"]
db_ciphertext = database[offsets["db"]:] db = offsets["db"]
try: try:
decrypted_db = _decrypt_database(db_ciphertext, main_key, iv) decrypted_db = _attempt_decrypt_task((iv, iv + 16, db), database, main_key)
except (zlib.error, ValueError): except (zlib.error, ValueError):
pass # Try next offset continue
else: else:
logger.debug( logger.debug(
f"Decryption successful with known offsets: IV {offsets['iv']}, DB {offsets['db']}{CLEAR_LINE}" f"Decryption successful with known offsets: IV {iv}, DB {db}{CLEAR_LINE}"
) )
return decrypted_db # Successful decryption return decrypted_db # Successful decryption
def animate_message(stop_event): offset_max = 200
base_msg = "Common offsets failed. Initiating brute-force with multithreading" logger.info(f"Common offsets failed. Attempt to brute-force...{CLEAR_LINE}")
dots = ["", ".", "..", "..."] with tqdm(total=offset_max ** 2, desc="Brute-forcing offsets", unit="trial", leave=False) as pbar:
i = 0 with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
while not stop_event.is_set(): # Map futures to their offsets
logger.info(f"{base_msg}{dots[i % len(dots)]}\x1b[K\r") future_to_offset = {
time.sleep(0.3) executor.submit(_attempt_decrypt_task, offset, database, main_key): offset
i += 1 for offset in brute_force_offset(offset_max, offset_max)
logger.info(f"Common offsets failed but brute-forcing the offset works!{CLEAR_LINE}") }
stop_event = threading.Event() try:
anim_thread = threading.Thread(target=animate_message, args=(stop_event,)) for future in concurrent.futures.as_completed(future_to_offset):
anim_thread.start() pbar.update(1)
result = future.result()
if result is not None:
# Success! Shutdown other tasks immediately
executor.shutdown(wait=False, cancel_futures=True)
start_iv, _, start_db = future_to_offset[future]
logger.info(
f"The offsets of your IV and database are {start_iv} and "
f"{start_db}, respectively. To include your offsets in the "
"program, please report it by creating an issue on GitHub: "
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47"
f"\nShutting down other threads...{CLEAR_LINE}"
)
return result
# Convert brute force generator into a list for parallel processing except KeyboardInterrupt:
offset_combinations = list(brute_force_offset()) executor.shutdown(wait=False, cancel_futures=True)
raise KeyboardInterrupt("Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}")
def attempt_decrypt(offset_tuple):
"""Attempt decryption with the given offsets."""
start_iv, end_iv, start_db = offset_tuple
iv = database[start_iv:end_iv]
db_ciphertext = database[start_db:]
logger.debug(""f"Trying offsets: IV {start_iv}-{end_iv}, DB {start_db}{CLEAR_LINE}")
try:
db = _decrypt_database(db_ciphertext, main_key, iv)
except (zlib.error, ValueError):
return None # Decryption failed, move to next
else:
stop_event.set()
anim_thread.join()
logger.info(
f"The offsets of your IV and database are {start_iv} and "
f"{start_db}, respectively. To include your offsets in the "
"program, please report it by creating an issue on GitHub: "
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47"
f"\nShutting down other threads...{CLEAR_LINE}"
)
return db
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
future_to_offset = {executor.submit(attempt_decrypt, offset)
: offset for offset in offset_combinations}
try:
for future in concurrent.futures.as_completed(future_to_offset):
result = future.result()
if result is not None:
# Shutdown remaining threads
executor.shutdown(wait=False, cancel_futures=True)
return result
except KeyboardInterrupt:
stop_event.set()
anim_thread.join()
logger.info(f"Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}")
executor.shutdown(wait=False, cancel_futures=True)
exit(1)
finally:
stop_event.set()
anim_thread.join()
raise OffsetNotFoundError("Could not find the correct offsets for decryption.") raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
def _attempt_decrypt_task(offset_tuple, database, main_key):
"""Attempt decryption with the given offsets."""
start_iv, end_iv, start_db = offset_tuple
iv = database[start_iv:end_iv]
db_ciphertext = database[start_db:]
try:
return _decrypt_database(db_ciphertext, main_key, iv)
except (zlib.error, ValueError):
return None
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes: def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
"""Decrypt a crypt12 database. """Decrypt a crypt12 database.

View File

@@ -4,6 +4,7 @@ import logging
import sqlite3 import sqlite3
import os import os
import shutil import shutil
from tqdm import tqdm
from pathlib import Path from pathlib import Path
from mimetypes import MimeTypes from mimetypes import MimeTypes
from markupsafe import escape as htmle from markupsafe import escape as htmle
@@ -47,12 +48,15 @@ def contacts(db, data, enrich_from_vcards):
logger.info(f"Processed {total_row_number} contacts\n") logger.info(f"Processed {total_row_number} contacts\n")
c.execute("SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts;") c.execute("SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts;")
row = c.fetchone()
while row is not None: with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"])) while (row := _fetch_row_safely(c)) is not None:
if row["status"] is not None: current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"]))
current_chat.status = row["status"] if row["status"] is not None:
row = c.fetchone() current_chat.status = row["status"]
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}")
return True return True
@@ -72,7 +76,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
""" """
c = db.cursor() c = db.cursor()
total_row_number = _get_message_count(c, filter_empty, filter_date, filter_chat) total_row_number = _get_message_count(c, filter_empty, filter_date, filter_chat)
logger.info(f"Processing messages...(0/{total_row_number})\r")
try: try:
content_cursor = _get_messages_cursor_legacy(c, filter_empty, filter_date, filter_chat) content_cursor = _get_messages_cursor_legacy(c, filter_empty, filter_date, filter_chat)
@@ -84,22 +87,12 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
except Exception as e: except Exception as e:
raise e raise e
i = 0 with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar:
# Fetch the first row safely while (content := _fetch_row_safely(content_cursor)) is not None:
content = _fetch_row_safely(content_cursor) _process_single_message(data, content, table_message, timezone_offset)
pbar.update(1)
while content is not None: total_time = pbar.format_dict['elapsed']
_process_single_message(data, content, table_message, timezone_offset) logger.info(f"Processed {total_row_number} messages in {total_time:.2f} seconds{CLEAR_LINE}")
i += 1
if i % 1000 == 0:
logger.info(f"Processing messages...({i}/{total_row_number})\r")
# Fetch the next row safely
content = _fetch_row_safely(content_cursor)
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
# Helper functions for message processing # Helper functions for message processing
@@ -499,8 +492,6 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
""" """
c = db.cursor() c = db.cursor()
total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat) total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat)
logger.info(f"Processing media...(0/{total_row_number})\r")
try: try:
content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat) content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat)
except sqlite3.OperationalError: except sqlite3.OperationalError:
@@ -512,18 +503,12 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
# Ensure thumbnails directory exists # Ensure thumbnails directory exists
Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True) Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True)
i = 0 with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar:
while content is not None: while (content := _fetch_row_safely(content_cursor)) is not None:
_process_single_media(data, content, media_folder, mime, separate_media) _process_single_media(data, content, media_folder, mime, separate_media)
pbar.update(1)
i += 1 total_time = pbar.format_dict['elapsed']
if i % 100 == 0: logger.info(f"Processed {total_row_number} media in {total_time:.2f} seconds{CLEAR_LINE}")
logger.info(f"Processing media...({i}/{total_row_number})\r")
content = content_cursor.fetchone()
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
# Helper functions for media processing # Helper functions for media processing
@@ -701,17 +686,17 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
rows = _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty) rows = _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty)
total_row_number = len(rows) total_row_number = len(rows)
logger.info(f"Processing vCards...(0/{total_row_number})\r")
# Create vCards directory if it doesn't exist # Create vCards directory if it doesn't exist
path = os.path.join(media_folder, "vCards") path = os.path.join(media_folder, "vCards")
Path(path).mkdir(parents=True, exist_ok=True) Path(path).mkdir(parents=True, exist_ok=True)
for index, row in enumerate(rows): with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar:
_process_vcard_row(row, path, data) for row in rows:
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r") _process_vcard_row(row, path, data)
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}") pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} vCards in {total_time:.2f} seconds{CLEAR_LINE}")
def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty): def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for modern WhatsApp database schema.""" """Execute vCard query for modern WhatsApp database schema."""
@@ -816,15 +801,15 @@ def calls(db, data, timezone_offset, filter_chat):
chat = ChatStore(Device.ANDROID, "WhatsApp Calls") chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call # Process each call
content = calls_data.fetchone() with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar:
while content is not None: while (content := _fetch_row_safely(calls_data)) is not None:
_process_call_record(content, chat, data, timezone_offset) _process_call_record(content, chat, data, timezone_offset)
content = calls_data.fetchone() pbar.update(1)
total_time = pbar.format_dict['elapsed']
# Add the calls chat to the data # Add the calls chat to the data
data.add_chat("000000000000000", chat) data.add_chat("000000000000000", chat)
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}") logger.info(f"Processed {total_row_number} calls in {total_time:.2f} seconds{CLEAR_LINE}")
def _get_calls_count(c, filter_chat): def _get_calls_count(c, filter_chat):
"""Get the count of call records that match the filter.""" """Get the count of call records that match the filter."""
@@ -948,7 +933,6 @@ def create_html(
template = setup_template(template, no_avatar, experimental) template = setup_template(template, no_avatar, experimental)
total_row_number = len(data) total_row_number = len(data)
logger.info(f"Generating chats...(0/{total_row_number})\r")
# Create output directory if it doesn't exist # Create output directory if it doesn't exist
if not os.path.isdir(output_folder): if not os.path.isdir(output_folder):
@@ -956,43 +940,42 @@ def create_html(
w3css = get_status_location(output_folder, offline_static) w3css = get_status_location(output_folder, offline_static)
for current, contact in enumerate(data): with tqdm(total=total_row_number, desc="Generating HTML", unit="file", leave=False) as pbar:
current_chat = data.get_chat(contact) for contact in data:
if len(current_chat) == 0: current_chat = data.get_chat(contact)
# Skip empty chats if len(current_chat) == 0:
continue # Skip empty chats
continue
safe_file_name, name = get_file_name(contact, current_chat) safe_file_name, name = get_file_name(contact, current_chat)
if maximum_size is not None: if maximum_size is not None:
_generate_paginated_chat( _generate_paginated_chat(
current_chat, current_chat,
safe_file_name, safe_file_name,
name, name,
contact, contact,
output_folder, output_folder,
template, template,
w3css, w3css,
maximum_size, maximum_size,
headline headline
) )
else: else:
_generate_single_chat( _generate_single_chat(
current_chat, current_chat,
safe_file_name, safe_file_name,
name, name,
contact, contact,
output_folder, output_folder,
template, template,
w3css, w3css,
headline headline
) )
if current % 10 == 0:
logger.info(f"Generating chats...({current}/{total_row_number})\r")
logger.info(f"Generated {total_row_number} chats{CLEAR_LINE}")
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Generated {total_row_number} chats in {total_time:.2f} seconds{CLEAR_LINE}")
def _generate_single_chat(current_chat, safe_file_name, name, contact, output_folder, template, w3css, headline): def _generate_single_chat(current_chat, safe_file_name, name, contact, output_folder, template, w3css, headline):
"""Generate a single HTML file for a chat.""" """Generate a single HTML file for a chat."""

View File

@@ -4,6 +4,7 @@ import os
import logging import logging
from datetime import datetime from datetime import datetime
from mimetypes import MimeTypes from mimetypes import MimeTypes
from tqdm import tqdm
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device
@@ -34,17 +35,16 @@ def messages(path, data, assume_first_as_me=False):
# Second pass: process the messages # Second pass: process the messages
with open(path, "r", encoding="utf8") as file: with open(path, "r", encoding="utf8") as file:
for index, line in enumerate(file): with tqdm(total=total_row_number, desc="Processing messages & media", unit="msg&media", leave=False) as pbar:
you, user_identification_done = process_line( for index, line in enumerate(file):
line, index, chat, path, you, you, user_identification_done = process_line(
assume_first_as_me, user_identification_done line, index, chat, path, you,
) assume_first_as_me, user_identification_done
)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} messages & media in {total_time:.2f} seconds{CLEAR_LINE}")
# Show progress
if index % 1000 == 0:
logger.info(f"Processing messages & media...({index}/{total_row_number})\r")
logger.info(f"Processed {total_row_number} messages & media{CLEAR_LINE}")
return data return data

View File

@@ -4,6 +4,7 @@ import os
import logging import logging
import shutil import shutil
from glob import glob from glob import glob
from tqdm import tqdm
from pathlib import Path from pathlib import Path
from mimetypes import MimeTypes from mimetypes import MimeTypes
from markupsafe import escape as htmle from markupsafe import escape as htmle
@@ -23,17 +24,18 @@ def contacts(db, data):
logger.info(f"Pre-processing contacts...({total_row_number})\r") logger.info(f"Pre-processing contacts...({total_row_number})\r")
c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""") c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
content = c.fetchone() with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
while content is not None: while (content := c.fetchone()) is not None:
zwhatsapp_id = content["ZWHATSAPPID"] zwhatsapp_id = content["ZWHATSAPPID"]
if not zwhatsapp_id.endswith("@s.whatsapp.net"): if not zwhatsapp_id.endswith("@s.whatsapp.net"):
zwhatsapp_id += "@s.whatsapp.net" zwhatsapp_id += "@s.whatsapp.net"
current_chat = ChatStore(Device.IOS) current_chat = ChatStore(Device.IOS)
current_chat.status = content["ZABOUTTEXT"] current_chat.status = content["ZABOUTTEXT"]
data.add_chat(zwhatsapp_id, current_chat) data.add_chat(zwhatsapp_id, current_chat)
content = c.fetchone() pbar.update(1)
logger.info(f"Pre-processed {total_row_number} contacts{CLEAR_LINE}") total_time = pbar.format_dict['elapsed']
logger.info(f"Pre-processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}")
def process_contact_avatars(current_chat, media_folder, contact_id): def process_contact_avatars(current_chat, media_folder, contact_id):
@@ -92,7 +94,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
""" """
c.execute(contact_query) c.execute(contact_query)
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
logger.info(f"Processing contacts...({total_row_number})\r")
# Get distinct contacts # Get distinct contacts
contacts_query = f""" contacts_query = f"""
@@ -114,24 +115,24 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
c.execute(contacts_query) c.execute(contacts_query)
# Process each contact # Process each contact
content = c.fetchone() with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
while content is not None: while (content := c.fetchone()) is not None:
contact_name = get_contact_name(content) contact_name = get_contact_name(content)
contact_id = content["ZCONTACTJID"] contact_id = content["ZCONTACTJID"]
# Add or update chat # Add or update chat
if contact_id not in data: if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder)) current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
else: else:
current_chat = data.get_chat(contact_id) current_chat = data.get_chat(contact_id)
current_chat.name = contact_name current_chat.name = contact_name
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg") current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
# Process avatar images # Process avatar images
process_contact_avatars(current_chat, media_folder, contact_id) process_contact_avatars(current_chat, media_folder, contact_id)
content = c.fetchone() pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}") logger.info(f"Processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}")
# Get message count # Get message count
message_count_query = f""" message_count_query = f"""
@@ -190,46 +191,42 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
message_map = {row[0][:17]: row[1] or row[2] for row in cursor2.fetchall() if row[0]} message_map = {row[0][:17]: row[1] or row[2] for row in cursor2.fetchall() if row[0]}
# Process each message # Process each message
i = 0 with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar:
content = c.fetchone() while (content := c.fetchone()) is not None:
while content is not None: contact_id = content["ZCONTACTJID"]
contact_id = content["ZCONTACTJID"] message_pk = content["Z_PK"]
message_pk = content["Z_PK"] is_group_message = content["ZGROUPINFO"] is not None
is_group_message = content["ZGROUPINFO"] is not None
# Ensure chat exists # Ensure chat exists
if contact_id not in data: if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS)) current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
process_contact_avatars(current_chat, media_folder, contact_id) process_contact_avatars(current_chat, media_folder, contact_id)
else: else:
current_chat = data.get_chat(contact_id) current_chat = data.get_chat(contact_id)
# Create message object # Create message object
ts = APPLE_TIME + content["ZMESSAGEDATE"] ts = APPLE_TIME + content["ZMESSAGEDATE"]
message = Message( message = Message(
from_me=content["ZISFROMME"], from_me=content["ZISFROMME"],
timestamp=ts, timestamp=ts,
time=ts, time=ts,
key_id=content["ZSTANZAID"][:17], key_id=content["ZSTANZAID"][:17],
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET, timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET,
message_type=content["ZMESSAGETYPE"], message_type=content["ZMESSAGETYPE"],
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None, received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
read_timestamp=None # TODO: Add timestamp read_timestamp=None # TODO: Add timestamp
) )
# Process message data # Process message data
invalid = process_message_data(message, content, is_group_message, data, message_map, no_reply) invalid = process_message_data(message, content, is_group_message, data, message_map, no_reply)
# Add valid messages to chat # Add valid messages to chat
if not invalid: if not invalid:
current_chat.add_message(message_pk, message) current_chat.add_message(message_pk, message)
# Update progress pbar.update(1)
i += 1 total_time = pbar.format_dict['elapsed']
if i % 1000 == 0: logger.info(f"Processed {total_row_number} messages in {total_time:.2f} seconds{CLEAR_LINE}")
logger.info(f"Processing messages...({i}/{total_row_number})\r")
content = c.fetchone()
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
def process_message_data(message, content, is_group_message, data, message_map, no_reply): def process_message_data(message, content, is_group_message, data, message_map, no_reply):
@@ -371,17 +368,12 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
# Process each media item # Process each media item
mime = MimeTypes() mime = MimeTypes()
i = 0 with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar:
content = c.fetchone() while (content := c.fetchone()) is not None:
while content is not None: process_media_item(content, data, media_folder, mime, separate_media)
process_media_item(content, data, media_folder, mime, separate_media) pbar.update(1)
total_time = pbar.format_dict['elapsed']
# Update progress logger.info(f"Processed {total_row_number} media in {total_time:.2f} seconds{CLEAR_LINE}")
i += 1
if i % 100 == 0:
logger.info(f"Processing media...({i}/{total_row_number})\r")
content = c.fetchone()
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
def process_media_item(content, data, media_folder, mime, separate_media): def process_media_item(content, data, media_folder, mime, separate_media):
@@ -467,10 +459,12 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
Path(path).mkdir(parents=True, exist_ok=True) Path(path).mkdir(parents=True, exist_ok=True)
# Process each vCard # Process each vCard
for index, content in enumerate(contents): with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar:
process_vcard_item(content, path, data) for content in contents:
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r") process_vcard_item(content, path, data)
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}") pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} vCards in {total_time:.2f} seconds{CLEAR_LINE}")
def process_vcard_item(content, path, data): def process_vcard_item(content, path, data):
@@ -530,8 +524,6 @@ def calls(db, data, timezone_offset, filter_chat):
if total_row_number == 0: if total_row_number == 0:
return return
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n")
# Fetch call records # Fetch call records
calls_query = f""" calls_query = f"""
SELECT ZCALLIDSTRING, SELECT ZCALLIDSTRING,
@@ -556,14 +548,15 @@ def calls(db, data, timezone_offset, filter_chat):
# Create calls chat # Create calls chat
chat = ChatStore(Device.ANDROID, "WhatsApp Calls") chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar:
content = c.fetchone() while (content := c.fetchone()) is not None:
while content is not None: process_call_record(content, chat, data, timezone_offset)
process_call_record(content, chat, data, timezone_offset) pbar.update(1)
content = c.fetchone() total_time = pbar.format_dict['elapsed']
# Add calls chat to data # Add calls chat to data
data.add_chat("000000000000000", chat) data.add_chat("000000000000000", chat)
logger.info(f"Processed {total_row_number} calls in {total_time:.2f} seconds{CLEAR_LINE}")
def process_call_record(content, chat, data, timezone_offset): def process_call_record(content, chat, data, timezone_offset):

View File

@@ -6,6 +6,7 @@ import sqlite3
import os import os
import getpass import getpass
from sys import exit, platform as osname from sys import exit, platform as osname
from tqdm import tqdm
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, WhatsAppIdentifier from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, WhatsAppIdentifier
from Whatsapp_Chat_Exporter.bplist import BPListReader from Whatsapp_Chat_Exporter.bplist import BPListReader
try: try:
@@ -89,7 +90,7 @@ class BackupExtractor:
Args: Args:
password (str): The password for the encrypted backup. password (str): The password for the encrypted backup.
""" """
logger.info(f"Trying to decrypt the iOS backup...{CLEAR_LINE}") logger.info(f"Trying to open the iOS backup...{CLEAR_LINE}")
self.backup = EncryptedBackup( self.backup = EncryptedBackup(
backup_directory=self.base_dir, backup_directory=self.base_dir,
passphrase=password, passphrase=password,
@@ -97,7 +98,7 @@ class BackupExtractor:
check_same_thread=False, check_same_thread=False,
decrypt_chunk_size=self.decrypt_chunk_size, decrypt_chunk_size=self.decrypt_chunk_size,
) )
logger.info(f"iOS backup decrypted successfully{CLEAR_LINE}") logger.info(f"iOS backup is opened successfully{CLEAR_LINE}")
logger.info("Decrypting WhatsApp database...\r") logger.info("Decrypting WhatsApp database...\r")
try: try:
self.backup.extract_file( self.backup.extract_file(
@@ -130,9 +131,12 @@ class BackupExtractor:
def _extract_decrypted_files(self): def _extract_decrypted_files(self):
"""Extract all WhatsApp files after decryption""" """Extract all WhatsApp files after decryption"""
pbar = tqdm(desc="Decrypting and extracting files", unit="file", leave=False)
def extract_progress_handler(file_id, domain, relative_path, n, total_files): def extract_progress_handler(file_id, domain, relative_path, n, total_files):
if n % 100 == 0: if pbar.total is None:
logger.info(f"Decrypting and extracting files...({n}/{total_files})\r") pbar.total = total_files
pbar.n = n
pbar.refresh()
return True return True
self.backup.extract_files( self.backup.extract_files(
@@ -141,7 +145,9 @@ class BackupExtractor:
preserve_folders=True, preserve_folders=True,
filter_callback=extract_progress_handler filter_callback=extract_progress_handler
) )
logger.info(f"All required files are decrypted and extracted.{CLEAR_LINE}") total_time = pbar.format_dict['elapsed']
pbar.close()
logger.info(f"All required files are decrypted and extracted in {total_time:.2f} seconds{CLEAR_LINE}")
def _extract_unencrypted_backup(self): def _extract_unencrypted_backup(self):
""" """
@@ -192,7 +198,6 @@ class BackupExtractor:
c = manifest.cursor() c = manifest.cursor()
c.execute(f"SELECT count() FROM Files WHERE domain = '{_wts_id}'") c.execute(f"SELECT count() FROM Files WHERE domain = '{_wts_id}'")
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
logger.info(f"Extracting WhatsApp files...(0/{total_row_number})\r")
c.execute( c.execute(
f""" f"""
SELECT fileID, relativePath, flags, file AS metadata, SELECT fileID, relativePath, flags, file AS metadata,
@@ -205,33 +210,30 @@ class BackupExtractor:
if not os.path.isdir(_wts_id): if not os.path.isdir(_wts_id):
os.mkdir(_wts_id) os.mkdir(_wts_id)
row = c.fetchone() with tqdm(total=total_row_number, desc="Extracting WhatsApp files", unit="file", leave=False) as pbar:
while row is not None: while (row := c.fetchone()) is not None:
if not row["relativePath"]: # Skip empty relative paths if not row["relativePath"]: # Skip empty relative paths
row = c.fetchone() continue
continue
destination = os.path.join(_wts_id, row["relativePath"]) destination = os.path.join(_wts_id, row["relativePath"])
hashes = row["fileID"] hashes = row["fileID"]
folder = hashes[:2] folder = hashes[:2]
flags = row["flags"] flags = row["flags"]
if flags == 2: # Directory if flags == 2: # Directory
try: try:
os.mkdir(destination) os.mkdir(destination)
except FileExistsError: except FileExistsError:
pass pass
elif flags == 1: # File elif flags == 1: # File
shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination) shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination)
metadata = BPListReader(row["metadata"]).parse() metadata = BPListReader(row["metadata"]).parse()
creation = metadata["$objects"][1]["Birth"] _creation = metadata["$objects"][1]["Birth"]
modification = metadata["$objects"][1]["LastModified"] modification = metadata["$objects"][1]["LastModified"]
os.utime(destination, (modification, modification)) os.utime(destination, (modification, modification))
pbar.update(1)
if row["_index"] % 100 == 0: total_time = pbar.format_dict['elapsed']
logger.info(f"Extracting WhatsApp files...({row['_index']}/{total_row_number})\r") logger.info(f"Extracted {total_row_number} WhatsApp files in {total_time:.2f} seconds{CLEAR_LINE}")
row = c.fetchone()
logger.info(f"Extracted WhatsApp files...({total_row_number}){CLEAR_LINE}")
def extract_media(base_dir, identifiers, decrypt_chunk_size): def extract_media(base_dir, identifiers, decrypt_chunk_size):

View File

@@ -5,13 +5,13 @@ import json
import os import os
import unicodedata import unicodedata
import re import re
import string
import math import math
import shutil import shutil
from bleach import clean as sanitize from bleach import clean as sanitize
from markupsafe import Markup from markupsafe import Markup
from datetime import datetime, timedelta from datetime import datetime, timedelta
from enum import IntEnum from enum import IntEnum
from tqdm import tqdm
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing
from typing import Dict, List, Optional, Tuple, Union from typing import Dict, List, Optional, Tuple, Union
try: try:
@@ -248,13 +248,13 @@ def import_from_json(json_file: str, data: ChatCollection):
with open(json_file, "r") as f: with open(json_file, "r") as f:
temp_data = json.loads(f.read()) temp_data = json.loads(f.read())
total_row_number = len(tuple(temp_data.keys())) total_row_number = len(tuple(temp_data.keys()))
logger.info(f"Importing chats from JSON...(0/{total_row_number})\r") with tqdm(total=total_row_number, desc="Importing chats from JSON", unit="chat", leave=False) as pbar:
for index, (jid, chat_data) in enumerate(temp_data.items()): for jid, chat_data in temp_data.items():
chat = ChatStore.from_json(chat_data) chat = ChatStore.from_json(chat_data)
data.add_chat(jid, chat) data.add_chat(jid, chat)
logger.info( pbar.update(1)
f"Importing chats from JSON...({index + 1}/{total_row_number})\r") total_time = pbar.format_dict['elapsed']
logger.info(f"Imported {total_row_number} chats from JSON{CLEAR_LINE}") logger.info(f"Imported {total_row_number} chats from JSON in {total_time:.2f} seconds{CLEAR_LINE}")
def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_print_json: int, avoid_encoding_json: bool): def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_print_json: int, avoid_encoding_json: bool):
@@ -439,7 +439,7 @@ CRYPT14_OFFSETS = (
{"iv": 67, "db": 193}, {"iv": 67, "db": 193},
{"iv": 67, "db": 194}, {"iv": 67, "db": 194},
{"iv": 67, "db": 158}, {"iv": 67, "db": 158},
{"iv": 67, "db": 196} {"iv": 67, "db": 196},
) )