Merge branch 'dev'

This commit is contained in:
KnugiHK
2026-01-01 15:08:52 +08:00
32 changed files with 2989 additions and 1639 deletions

39
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,39 @@
name: Run Pytest on Dev Branch Push
on:
push:
branches:
- dev
jobs:
ci:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ["3.13", "3.14"]
include:
- os: ubuntu-latest
python-version: "3.10"
- os: ubuntu-latest
python-version: "3.11"
- os: ubuntu-latest
python-version: "3.12"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} on ${{ matrix.os }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[all] pytest nuitka
- name: Run pytest
run: pytest

View File

@@ -7,6 +7,9 @@ on:
permissions: permissions:
contents: read contents: read
id-token: write
attestations: write
jobs: jobs:
linux: linux:
@@ -20,7 +23,7 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install pycryptodome vobject javaobj-py3 ordered-set zstandard nuitka==2.8.9 pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.8.9
pip install . pip install .
- name: Build binary with Nuitka - name: Build binary with Nuitka
run: | run: |
@@ -28,6 +31,10 @@ jobs:
--include-data-file=./Whatsapp_Chat_Exporter/whatsapp.html=./Whatsapp_Chat_Exporter/whatsapp.html \ --include-data-file=./Whatsapp_Chat_Exporter/whatsapp.html=./Whatsapp_Chat_Exporter/whatsapp.html \
--assume-yes-for-downloads Whatsapp_Chat_Exporter --output-filename=wtsexporter_linux_x64 --assume-yes-for-downloads Whatsapp_Chat_Exporter --output-filename=wtsexporter_linux_x64
sha256sum wtsexporter_linux_x64 sha256sum wtsexporter_linux_x64
- name: Generate artifact attestation
uses: actions/attest-build-provenance@v3
with:
subject-path: ./wtsexporter_linux_x64
- uses: actions/upload-artifact@v6 - uses: actions/upload-artifact@v6
with: with:
name: binary-linux name: binary-linux
@@ -45,13 +52,17 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install pycryptodome vobject javaobj-py3 ordered-set zstandard nuitka==2.8.9 pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.8.9
pip install . pip install .
- name: Build binary with Nuitka - name: Build binary with Nuitka
run: | run: |
python -m nuitka --onefile --include-data-file=./Whatsapp_Chat_Exporter/whatsapp.html=./Whatsapp_Chat_Exporter/whatsapp.html --assume-yes-for-downloads Whatsapp_Chat_Exporter --output-filename=wtsexporter python -m nuitka --onefile --include-data-file=./Whatsapp_Chat_Exporter/whatsapp.html=./Whatsapp_Chat_Exporter/whatsapp.html --assume-yes-for-downloads Whatsapp_Chat_Exporter --output-filename=wtsexporter
copy wtsexporter.exe wtsexporter_x64.exe Rename-Item -Path "wtsexporter.exe" -NewName "wtsexporter_x64.exe"
Get-FileHash wtsexporter_x64.exe Get-FileHash wtsexporter_x64.exe
- name: Generate artifact attestation
uses: actions/attest-build-provenance@v3
with:
subject-path: .\wtsexporter_x64.exe
- uses: actions/upload-artifact@v6 - uses: actions/upload-artifact@v6
with: with:
name: binary-windows name: binary-windows
@@ -69,16 +80,21 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install pycryptodome vobject javaobj-py3 ordered-set zstandard nuitka==2.8.9 pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.8.9
pip install . pip install .
- name: Build binary with Nuitka - name: Build binary with Nuitka
run: | run: |
python -m nuitka --onefile \ python -m nuitka --onefile \
--include-data-file=./Whatsapp_Chat_Exporter/whatsapp.html=./Whatsapp_Chat_Exporter/whatsapp.html \ --include-data-file=./Whatsapp_Chat_Exporter/whatsapp.html=./Whatsapp_Chat_Exporter/whatsapp.html \
--assume-yes-for-downloads Whatsapp_Chat_Exporter --output-filename=wtsexporter_macos_x64 --assume-yes-for-downloads Whatsapp_Chat_Exporter --output-filename=wtsexporter_macos_arm64
shasum -a 256 wtsexporter_macos_x64 shasum -a 256 wtsexporter_macos_arm64
- name: Generate artifact attestation
uses: actions/attest-build-provenance@v3
with:
subject-path: ./wtsexporter_macos_arm64
- uses: actions/upload-artifact@v6 - uses: actions/upload-artifact@v6
with: with:
name: binary-macos name: binary-macos
path: | path: |
./wtsexporter_macos_x64 ./wtsexporter_macos_arm64

2
.gitignore vendored
View File

@@ -138,7 +138,9 @@ __main__
# Dev time intermidiates & temp files # Dev time intermidiates & temp files
result/ result/
output/
WhatsApp/ WhatsApp/
AppDomainGroup-group.net.whatsapp.WhatsApp.shared/
/*.db /*.db
/*.db-* /*.db-*
/myout /myout

View File

@@ -1,32 +0,0 @@
The Whatsapp Chat Exporter is licensed under the MIT license. For more information,
refer to https://github.com/KnugiHK/WhatsApp-Chat-Exporter/wiki/Open-Source-Licenses.
------
Copyright (c) Django Software Foundation and individual contributors.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of Django nor the names of its contributors may be used
to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -136,12 +136,10 @@ wtsexporter -i -b ~/Library/Application\ Support/MobileSync/Backup/[device id]
``` ```
## Results ## Results
After extracting, you will get these: After extracting, you will get this:
#### Private Message
![Private Message](imgs/pm.png) ![Private Message](imgs/pm.png)
#### Group Message
![Group Message](imgs/group.png)
## More options ## More options
Invoke the wtsexporter with --help option will show you all options available. Invoke the wtsexporter with --help option will show you all options available.
@@ -233,6 +231,19 @@ Contact Enrichment:
Use with --enrich-from-vcards. When numbers in the vcf file does not have a country code, this Use with --enrich-from-vcards. When numbers in the vcf file does not have a country code, this
will be used. 1 is for US, 66 for Thailand etc. Most likely use the number of your own country will be used. 1 is for US, 66 for Thailand etc. Most likely use the number of your own country
Incremental Merging:
--incremental-merge Performs an incremental merge of two exports. Requires setting both --source-
dir and --target-dir. The chats (JSON files only) and media from the source
directory will be merged into the target directory. No chat messages or media
will be deleted from the target directory; only new chat messages and media
will be added to it. This enables chat messages and media to be deleted from
the device to free up space, while ensuring they are preserved in the exported
backups.
--source-dir SOURCE_DIR
Sets the source directory. Used for performing incremental merges.
--target-dir TARGET_DIR
Sets the target directory. Used for performing incremental merges.
Miscellaneous: Miscellaneous:
-s, --showkey Show the HEX key used to decrypt the database -s, --showkey Show the HEX key used to decrypt the database
--check-update Check for updates (require Internet access) --check-update Check for updates (require Internet access)
@@ -243,10 +254,14 @@ Miscellaneous:
--max-bruteforce-worker MAX_BRUTEFORCE_WORKER --max-bruteforce-worker MAX_BRUTEFORCE_WORKER
Specify the maximum number of worker for bruteforce decryption. Specify the maximum number of worker for bruteforce decryption.
WhatsApp Chat Exporter: 0.12.1 Licensed with MIT. See https://wts.knugi.dev/docs?dest=osl for all open source WhatsApp Chat Exporter: 0.13.0rc1 Licensed with MIT. See https://wts.knugi.dev/docs?dest=osl for all open source
licenses. licenses.
``` ```
# Python Support Policy
This project officially supports all non-EOL (End-of-Life) versions of Python. Once a Python version reaches EOL, it is dropped in the next release. See [Python's EOL Schedule](https://devguide.python.org/versions/).
# Legal Stuff & Disclaimer # Legal Stuff & Disclaimer
This is a MIT licensed project. This is a MIT licensed project.

View File

@@ -7,39 +7,60 @@ import shutil
import json import json
import string import string
import glob import glob
import logging
import importlib.metadata import importlib.metadata
from Whatsapp_Chat_Exporter import android_crypt, exported_handler, android_handler from Whatsapp_Chat_Exporter import android_crypt, exported_handler, android_handler
from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, check_update, DbType from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, Crypt, check_update
from Whatsapp_Chat_Exporter.utility import readable_to_bytes, sanitize_filename from Whatsapp_Chat_Exporter.utility import readable_to_bytes, safe_name, bytes_to_readable
from Whatsapp_Chat_Exporter.utility import import_from_json, bytes_to_readable from Whatsapp_Chat_Exporter.utility import import_from_json, incremental_merge, DbType
from Whatsapp_Chat_Exporter.utility import telegram_json_format
from argparse import ArgumentParser, SUPPRESS from argparse import ArgumentParser, SUPPRESS
from datetime import datetime from datetime import datetime
from getpass import getpass from getpass import getpass
from sys import exit from sys import exit
from typing import Tuple, Optional, List, Dict, Any, Union from typing import Optional, List, Dict
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
# Try to import vobject for contacts processing
try: logger = logging.getLogger(__name__)
import vobject __version__ = importlib.metadata.version("whatsapp_chat_exporter")
except ModuleNotFoundError: WTSEXPORTER_BANNER = f"""========================================================================================================
vcards_deps_installed = False ██╗ ██╗██╗ ██╗ █████╗ ████████╗███████╗ █████╗ ██████╗ ██████╗
else: ██║ ██║██║ ██║██╔══██╗╚══██╔══╝██╔════╝██╔══██╗██╔══██╗██╔══██╗
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards ██║ █╗ ██║███████║███████║ ██║ ███████╗███████║██████╔╝██████╔╝
vcards_deps_installed = True ██║███╗██║██╔══██║██╔══██║ ██║ ╚════██║██╔══██║██╔═══╝ ██╔═══╝
╚███╔███╔╝██║ ██║██║ ██║ ██║ ███████║██║ ██║██║ ██║
╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝
██████╗██╗ ██╗ █████╗ ████████╗ ███████╗██╗ ██╗██████╗ ██████╗ ██████╗ ████████╗███████╗██████╗
██╔════╝██║ ██║██╔══██╗╚══██╔══╝ ██╔════╝╚██╗██╔╝██╔══██╗██╔═══██╗██╔══██╗╚══██╔══╝██╔════╝██╔══██╗
██║ ███████║███████║ ██║ █████╗ ╚███╔╝ ██████╔╝██║ ██║██████╔╝ ██║ █████╗ ██████╔╝
██║ ██╔══██║██╔══██║ ██║ ██╔══╝ ██╔██╗ ██╔═══╝ ██║ ██║██╔══██╗ ██║ ██╔══╝ ██╔══██╗
╚██████╗██║ ██║██║ ██║ ██║ ███████╗██╔╝ ██╗██║ ╚██████╔╝██║ ██║ ██║ ███████╗██║ ██║
╚═════╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝╚═╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝
WhatsApp Chat Exporter: A customizable Android and iOS/iPadOS WhatsApp database parser
Version: {__version__}
========================================================================================================"""
def setup_argument_parser() -> ArgumentParser: def setup_argument_parser() -> ArgumentParser:
"""Set up and return the argument parser with all options.""" """Set up and return the argument parser with all options."""
parser = ArgumentParser( parser = ArgumentParser(
description='A customizable Android and iOS/iPadOS WhatsApp database parser that ' description='A customizable Android and iOS/iPadOS WhatsApp database parser that '
'will give you the history of your WhatsApp conversations in HTML ' 'will give you the history of your WhatsApp conversations in HTML '
'and JSON. Android Backup Crypt12, Crypt14 and Crypt15 supported.', 'and JSON. Android Backup Crypt12, Crypt14 and Crypt15 supported.',
epilog=f'WhatsApp Chat Exporter: {importlib.metadata.version("whatsapp_chat_exporter")} Licensed with MIT. See ' epilog=f'WhatsApp Chat Exporter: {__version__} Licensed with MIT. See '
'https://wts.knugi.dev/docs?dest=osl for all open source licenses.' 'https://wts.knugi.dev/docs?dest=osl for all open source licenses.'
)
# General options
parser.add_argument(
"--debug", dest="debug", default=False, action='store_true',
help="Enable debug mode"
) )
# Device type arguments # Device type arguments
device_group = parser.add_argument_group('Device Type') device_group = parser.add_argument_group('Device Type')
device_group.add_argument( device_group.add_argument(
@@ -54,7 +75,7 @@ def setup_argument_parser() -> ArgumentParser:
"-e", "--exported", dest="exported", default=None, "-e", "--exported", dest="exported", default=None,
help="Define the target as exported chat file and specify the path to the file" help="Define the target as exported chat file and specify the path to the file"
) )
# Input file paths # Input file paths
input_group = parser.add_argument_group('Input Files') input_group = parser.add_argument_group('Input Files')
input_group.add_argument( input_group.add_argument(
@@ -86,7 +107,7 @@ def setup_argument_parser() -> ArgumentParser:
"--wab", "--wa-backup", dest="wab", default=None, "--wab", "--wa-backup", dest="wab", default=None,
help="Path to contact database in crypt15 format" help="Path to contact database in crypt15 format"
) )
# Output options # Output options
output_group = parser.add_argument_group('Output Options') output_group = parser.add_argument_group('Output Options')
output_group.add_argument( output_group.add_argument(
@@ -106,10 +127,14 @@ def setup_argument_parser() -> ArgumentParser:
help="Do not output html files" help="Do not output html files"
) )
output_group.add_argument( output_group.add_argument(
"--size", "--output-size", "--split", dest="size", nargs='?', const=0, default=None, "--size", "--output-size", "--split", dest="size", nargs='?', const="0", default=None,
help="Maximum (rough) size of a single output file in bytes, 0 for auto" help="Maximum (rough) size of a single output file in bytes, 0 for auto"
) )
output_group.add_argument(
"--no-reply", dest="no_reply_ios", default=False, action='store_true',
help="Do not process replies (iOS only) (default: handle replies)"
)
# JSON formatting options # JSON formatting options
json_group = parser.add_argument_group('JSON Options') json_group = parser.add_argument_group('JSON Options')
json_group.add_argument( json_group.add_argument(
@@ -120,6 +145,10 @@ def setup_argument_parser() -> ArgumentParser:
'--pretty-print-json', dest='pretty_print_json', default=None, nargs='?', const=2, type=int, '--pretty-print-json', dest='pretty_print_json', default=None, nargs='?', const=2, type=int,
help="Pretty print the output JSON." help="Pretty print the output JSON."
) )
json_group.add_argument(
"--tg", "--telegram", dest="telegram", default=False, action='store_true',
help="Output the JSON in a format compatible with Telegram export (implies json-per-chat)"
)
json_group.add_argument( json_group.add_argument(
"--per-chat", dest="json_per_chat", default=False, action='store_true', "--per-chat", dest="json_per_chat", default=False, action='store_true',
help="Output the JSON file per chat" help="Output the JSON file per chat"
@@ -128,7 +157,7 @@ def setup_argument_parser() -> ArgumentParser:
"--import", dest="import_json", default=False, action='store_true', "--import", dest="import_json", default=False, action='store_true',
help="Import JSON file and convert to HTML output" help="Import JSON file and convert to HTML output"
) )
# HTML options # HTML options
html_group = parser.add_argument_group('HTML Options') html_group = parser.add_argument_group('HTML Options')
html_group.add_argument( html_group.add_argument(
@@ -148,14 +177,14 @@ def setup_argument_parser() -> ArgumentParser:
help="Do not render avatar in HTML output" help="Do not render avatar in HTML output"
) )
html_group.add_argument( html_group.add_argument(
"--experimental-new-theme", dest="whatsapp_theme", default=False, action='store_true', "--old-theme", dest="telegram_theme", default=False, action='store_true',
help="Use the newly designed WhatsApp-alike theme" help="Use the old Telegram-alike theme"
) )
html_group.add_argument( html_group.add_argument(
"--headline", dest="headline", default="Chat history with ??", "--headline", dest="headline", default="Chat history with ??",
help="The custom headline for the HTML output. Use '??' as a placeholder for the chat name" help="The custom headline for the HTML output. Use '??' as a placeholder for the chat name"
) )
# Media handling # Media handling
media_group = parser.add_argument_group('Media Handling') media_group = parser.add_argument_group('Media Handling')
media_group.add_argument( media_group.add_argument(
@@ -166,7 +195,7 @@ def setup_argument_parser() -> ArgumentParser:
"--create-separated-media", dest="separate_media", default=False, action='store_true', "--create-separated-media", dest="separate_media", default=False, action='store_true',
help="Create a copy of the media seperated per chat in <MEDIA>/separated/ directory" help="Create a copy of the media seperated per chat in <MEDIA>/separated/ directory"
) )
# Filtering options # Filtering options
filter_group = parser.add_argument_group('Filtering Options') filter_group = parser.add_argument_group('Filtering Options')
filter_group.add_argument( filter_group.add_argument(
@@ -195,7 +224,7 @@ def setup_argument_parser() -> ArgumentParser:
"Setting this flag will cause the exporter to render those. " "Setting this flag will cause the exporter to render those. "
"This is useful if chat(s) are missing from the output") "This is useful if chat(s) are missing from the output")
) )
# Contact enrichment # Contact enrichment
contact_group = parser.add_argument_group('Contact Enrichment') contact_group = parser.add_argument_group('Contact Enrichment')
contact_group.add_argument( contact_group.add_argument(
@@ -206,7 +235,34 @@ def setup_argument_parser() -> ArgumentParser:
"--default-country-code", dest="default_country_code", default=None, "--default-country-code", dest="default_country_code", default=None,
help="Use with --enrich-from-vcards. When numbers in the vcf file does not have a country code, this will be used. 1 is for US, 66 for Thailand etc. Most likely use the number of your own country" help="Use with --enrich-from-vcards. When numbers in the vcf file does not have a country code, this will be used. 1 is for US, 66 for Thailand etc. Most likely use the number of your own country"
) )
# Incremental merging
inc_merging_group = parser.add_argument_group('Incremental Merging')
inc_merging_group.add_argument(
"--incremental-merge",
dest="incremental_merge",
default=False,
action='store_true',
help=("Performs an incremental merge of two exports. "
"Requires setting both --source-dir and --target-dir. "
"The chats (JSON files only) and media from the source directory will be merged into the target directory. "
"No chat messages or media will be deleted from the target directory; only new chat messages and media will be added to it. "
"This enables chat messages and media to be deleted from the device to free up space, while ensuring they are preserved in the exported backups."
)
)
inc_merging_group.add_argument(
"--source-dir",
dest="source_dir",
default=None,
help="Sets the source directory. Used for performing incremental merges."
)
inc_merging_group.add_argument(
"--target-dir",
dest="target_dir",
default=None,
help="Sets the target directory. Used for performing incremental merges."
)
# Miscellaneous # Miscellaneous
misc_group = parser.add_argument_group('Miscellaneous') misc_group = parser.add_argument_group('Miscellaneous')
misc_group.add_argument( misc_group.add_argument(
@@ -233,7 +289,11 @@ def setup_argument_parser() -> ArgumentParser:
"--max-bruteforce-worker", dest="max_bruteforce_worker", default=10, type=int, "--max-bruteforce-worker", dest="max_bruteforce_worker", default=10, type=int,
help="Specify the maximum number of worker for bruteforce decryption." help="Specify the maximum number of worker for bruteforce decryption."
) )
misc_group.add_argument(
"--no-banner", dest="no_banner", default=False, action='store_true',
help="Do not show the banner"
)
return parser return parser
@@ -245,50 +305,60 @@ def validate_args(parser: ArgumentParser, args) -> None:
if not args.android and not args.ios and not args.exported and not args.import_json: if not args.android and not args.ios and not args.exported and not args.import_json:
parser.error("You must define the device type.") parser.error("You must define the device type.")
if args.no_html and not args.json and not args.text_format: if args.no_html and not args.json and not args.text_format:
parser.error("You must either specify a JSON output file, text file output directory or enable HTML output.") parser.error(
"You must either specify a JSON output file, text file output directory or enable HTML output.")
if args.import_json and (args.android or args.ios or args.exported or args.no_html): if args.import_json and (args.android or args.ios or args.exported or args.no_html):
parser.error("You can only use --import with -j and without --no-html, -a, -i, -e.") parser.error(
"You can only use --import with -j and without --no-html, -a, -i, -e.")
elif args.import_json and not os.path.isfile(args.json): elif args.import_json and not os.path.isfile(args.json):
parser.error("JSON file not found.") parser.error("JSON file not found.")
if args.incremental_merge and (args.source_dir is None or args.target_dir is None):
parser.error(
"You must specify both --source-dir and --target-dir for incremental merge.")
if args.android and args.business: if args.android and args.business:
parser.error("WhatsApp Business is only available on iOS for now.") parser.error("WhatsApp Business is only available on iOS for now.")
if "??" not in args.headline: if "??" not in args.headline:
parser.error("--headline must contain '??' for replacement.") parser.error("--headline must contain '??' for replacement.")
# JSON validation # JSON validation
if args.json_per_chat and args.json and ( if args.json_per_chat and args.json and (
(args.json.endswith(".json") and os.path.isfile(args.json)) or (args.json.endswith(".json") and os.path.isfile(args.json)) or
(not args.json.endswith(".json") and os.path.isfile(args.json)) (not args.json.endswith(".json") and os.path.isfile(args.json))
): ):
parser.error("When --per-chat is enabled, the destination of --json must be a directory.") parser.error(
"When --per-chat is enabled, the destination of --json must be a directory.")
# vCards validation # vCards validation
if args.enrich_from_vcards is not None and args.default_country_code is None: if args.enrich_from_vcards is not None and args.default_country_code is None:
parser.error("When --enrich-from-vcards is provided, you must also set --default-country-code") parser.error(
"When --enrich-from-vcards is provided, you must also set --default-country-code")
# Size validation
if args.size is not None and not isinstance(args.size, int) and not args.size.isnumeric(): # Size validation and conversion
if args.size is not None:
try: try:
args.size = readable_to_bytes(args.size) args.size = readable_to_bytes(args.size)
except ValueError: except ValueError:
parser.error("The value for --split must be ended in pure bytes or with a proper unit (e.g., 1048576 or 1MB)") parser.error(
"The value for --split must be pure bytes or use a proper unit (e.g., 1048576 or 1MB)"
)
# Date filter validation and processing # Date filter validation and processing
if args.filter_date is not None: if args.filter_date is not None:
process_date_filter(parser, args) process_date_filter(parser, args)
# Crypt15 key validation # Crypt15 key validation
if args.key is None and args.backup is not None and args.backup.endswith("crypt15"): if args.key is None and args.backup is not None and args.backup.endswith("crypt15"):
args.key = getpass("Enter your encryption key: ") args.key = getpass("Enter your encryption key: ")
# Theme validation # Theme validation
if args.whatsapp_theme: if args.telegram_theme:
args.template = "whatsapp_new.html" args.template = "whatsapp_old.html"
# Chat filter validation # Chat filter validation
if args.filter_chat_include is not None and args.filter_chat_exclude is not None: if args.filter_chat_include is not None and args.filter_chat_exclude is not None:
parser.error("Chat inclusion and exclusion filters cannot be used together.") parser.error(
"Chat inclusion and exclusion filters cannot be used together.")
validate_chat_filters(parser, args.filter_chat_include) validate_chat_filters(parser, args.filter_chat_include)
validate_chat_filters(parser, args.filter_chat_exclude) validate_chat_filters(parser, args.filter_chat_exclude)
@@ -298,21 +368,24 @@ def validate_chat_filters(parser: ArgumentParser, chat_filter: Optional[List[str
if chat_filter is not None: if chat_filter is not None:
for chat in chat_filter: for chat in chat_filter:
if not chat.isnumeric(): if not chat.isnumeric():
parser.error("Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat") parser.error(
"Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat")
def process_date_filter(parser: ArgumentParser, args) -> None: def process_date_filter(parser: ArgumentParser, args) -> None:
"""Process and validate date filter arguments.""" """Process and validate date filter arguments."""
if " - " in args.filter_date: if " - " in args.filter_date:
start, end = args.filter_date.split(" - ") start, end = args.filter_date.split(" - ")
start = int(datetime.strptime(start, args.filter_date_format).timestamp()) start = int(datetime.strptime(
start, args.filter_date_format).timestamp())
end = int(datetime.strptime(end, args.filter_date_format).timestamp()) end = int(datetime.strptime(end, args.filter_date_format).timestamp())
if start < 1009843200 or end < 1009843200: if start < 1009843200 or end < 1009843200:
parser.error("WhatsApp was first released in 2009...") parser.error("WhatsApp was first released in 2009...")
if start > end: if start > end:
parser.error("The start date cannot be a moment after the end date.") parser.error(
"The start date cannot be a moment after the end date.")
if args.android: if args.android:
args.filter_date = f"BETWEEN {start}000 AND {end}000" args.filter_date = f"BETWEEN {start}000 AND {end}000"
elif args.ios: elif args.ios:
@@ -324,13 +397,15 @@ def process_date_filter(parser: ArgumentParser, args) -> None:
def process_single_date_filter(parser: ArgumentParser, args) -> None: def process_single_date_filter(parser: ArgumentParser, args) -> None:
"""Process single date comparison filters.""" """Process single date comparison filters."""
if len(args.filter_date) < 3: if len(args.filter_date) < 3:
parser.error("Unsupported date format. See https://wts.knugi.dev/docs?dest=date") parser.error(
"Unsupported date format. See https://wts.knugi.dev/docs?dest=date")
_timestamp = int(datetime.strptime(args.filter_date[2:], args.filter_date_format).timestamp())
_timestamp = int(datetime.strptime(
args.filter_date[2:], args.filter_date_format).timestamp())
if _timestamp < 1009843200: if _timestamp < 1009843200:
parser.error("WhatsApp was first released in 2009...") parser.error("WhatsApp was first released in 2009...")
if args.filter_date[:2] == "> ": if args.filter_date[:2] == "> ":
if args.android: if args.android:
args.filter_date = f">= {_timestamp}000" args.filter_date = f">= {_timestamp}000"
@@ -342,21 +417,16 @@ def process_single_date_filter(parser: ArgumentParser, args) -> None:
elif args.ios: elif args.ios:
args.filter_date = f"<= {_timestamp - APPLE_TIME}" args.filter_date = f"<= {_timestamp - APPLE_TIME}"
else: else:
parser.error("Unsupported date format. See https://wts.knugi.dev/docs?dest=date") parser.error(
"Unsupported date format. See https://wts.knugi.dev/docs?dest=date")
def setup_contact_store(args) -> Optional['ContactsFromVCards']: def setup_contact_store(args) -> Optional['ContactsFromVCards']:
"""Set up and return a contact store if needed.""" """Set up and return a contact store if needed."""
if args.enrich_from_vcards is not None: if args.enrich_from_vcards is not None:
if not vcards_deps_installed:
print(
"You don't have the dependency to enrich contacts with vCard.\n"
"Read more on how to deal with enriching contacts:\n"
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
)
exit(1)
contact_store = ContactsFromVCards() contact_store = ContactsFromVCards()
contact_store.load_vcf_file(args.enrich_from_vcards, args.default_country_code) contact_store.load_vcf_file(
args.enrich_from_vcards, args.default_country_code)
return contact_store return contact_store
return None return None
@@ -364,11 +434,11 @@ def setup_contact_store(args) -> Optional['ContactsFromVCards']:
def decrypt_android_backup(args) -> int: def decrypt_android_backup(args) -> int:
"""Decrypt Android backup files and return error code.""" """Decrypt Android backup files and return error code."""
if args.key is None or args.backup is None: if args.key is None or args.backup is None:
print("You must specify the backup file with -b and a key with -k") logger.error(f"You must specify the backup file with -b and a key with -k{CLEAR_LINE}")
return 1 return 1
print("Decryption key specified, decrypting WhatsApp backup...") logger.info(f"Decryption key specified, decrypting WhatsApp backup...{CLEAR_LINE}")
# Determine crypt type # Determine crypt type
if "crypt12" in args.backup: if "crypt12" in args.backup:
crypt = Crypt.CRYPT12 crypt = Crypt.CRYPT12
@@ -377,9 +447,10 @@ def decrypt_android_backup(args) -> int:
elif "crypt15" in args.backup: elif "crypt15" in args.backup:
crypt = Crypt.CRYPT15 crypt = Crypt.CRYPT15
else: else:
print("Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.") logger.error(
f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
return 1 return 1
# Get key # Get key
keyfile_stream = False keyfile_stream = False
if not os.path.isfile(args.key) and all(char in string.hexdigits for char in args.key.replace(" ", "")): if not os.path.isfile(args.key) and all(char in string.hexdigits for char in args.key.replace(" ", "")):
@@ -387,10 +458,10 @@ def decrypt_android_backup(args) -> int:
else: else:
key = open(args.key, "rb") key = open(args.key, "rb")
keyfile_stream = True keyfile_stream = True
# Read backup # Read backup
db = open(args.backup, "rb").read() db = open(args.backup, "rb").read()
# Process WAB if provided # Process WAB if provided
error_wa = 0 error_wa = 0
if args.wab: if args.wab:
@@ -407,7 +478,7 @@ def decrypt_android_backup(args) -> int:
) )
if isinstance(key, io.IOBase): if isinstance(key, io.IOBase):
key.seek(0) key.seek(0)
# Decrypt message database # Decrypt message database
error_message = android_crypt.decrypt_backup( error_message = android_crypt.decrypt_backup(
db, db,
@@ -419,7 +490,7 @@ def decrypt_android_backup(args) -> int:
keyfile_stream=keyfile_stream, keyfile_stream=keyfile_stream,
max_worker=args.max_bruteforce_worker max_worker=args.max_bruteforce_worker
) )
# Handle errors # Handle errors
if error_wa != 0: if error_wa != 0:
return error_wa return error_wa
@@ -429,22 +500,22 @@ def decrypt_android_backup(args) -> int:
def handle_decrypt_error(error: int) -> None: def handle_decrypt_error(error: int) -> None:
"""Handle decryption errors with appropriate messages.""" """Handle decryption errors with appropriate messages."""
if error == 1: if error == 1:
print("Dependencies of decrypt_backup and/or extract_encrypted_key" logger.error("Dependencies of decrypt_backup and/or extract_encrypted_key"
" are not present. For details, see README.md.") " are not present. For details, see README.md.\n")
exit(3) exit(3)
elif error == 2: elif error == 2:
print("Failed when decompressing the decrypted backup. " logger.error("Failed when decompressing the decrypted backup. "
"Possibly incorrect offsets used in decryption.") "Possibly incorrect offsets used in decryption.\n")
exit(4) exit(4)
else: else:
print("Unknown error occurred.", error) logger.error("Unknown error occurred.\n")
exit(5) exit(5)
def process_contacts(args, data: ChatCollection, contact_store=None) -> None: def process_contacts(args, data: ChatCollection) -> None:
"""Process contacts from the database.""" """Process contacts from the database."""
contact_db = args.wa if args.wa else "wa.db" if args.android else "ContactsV2.sqlite" contact_db = args.wa if args.wa else "wa.db" if args.android else "ContactsV2.sqlite"
if os.path.isfile(contact_db): if os.path.isfile(contact_db):
with sqlite3.connect(contact_db) as db: with sqlite3.connect(contact_db) as db:
db.row_factory = sqlite3.Row db.row_factory = sqlite3.Row
@@ -457,42 +528,42 @@ def process_contacts(args, data: ChatCollection, contact_store=None) -> None:
def process_messages(args, data: ChatCollection) -> None: def process_messages(args, data: ChatCollection) -> None:
"""Process messages, media and vcards from the database.""" """Process messages, media and vcards from the database."""
msg_db = args.db if args.db else "msgstore.db" if args.android else args.identifiers.MESSAGE msg_db = args.db if args.db else "msgstore.db" if args.android else args.identifiers.MESSAGE
if not os.path.isfile(msg_db): if not os.path.isfile(msg_db):
print( logger.error(
"The message database does not exist. You may specify the path " "The message database does not exist. You may specify the path "
"to database file with option -d or check your provided path." "to database file with option -d or check your provided path.\n"
) )
exit(6) exit(6)
filter_chat = (args.filter_chat_include, args.filter_chat_exclude) filter_chat = (args.filter_chat_include, args.filter_chat_exclude)
with sqlite3.connect(msg_db) as db: with sqlite3.connect(msg_db) as db:
db.row_factory = sqlite3.Row db.row_factory = sqlite3.Row
# Process messages # Process messages
if args.android: if args.android:
message_handler = android_handler message_handler = android_handler
else: else:
message_handler = ios_handler message_handler = ios_handler
message_handler.messages( message_handler.messages(
db, data, args.media, args.timezone_offset, db, data, args.media, args.timezone_offset, args.filter_date,
args.filter_date, filter_chat, args.filter_empty filter_chat, args.filter_empty, args.no_reply_ios
) )
# Process media # Process media
message_handler.media( message_handler.media(
db, data, args.media, args.filter_date, db, data, args.media, args.filter_date,
filter_chat, args.filter_empty, args.separate_media filter_chat, args.filter_empty, args.separate_media
) )
# Process vcards # Process vcards
message_handler.vcard( message_handler.vcard(
db, data, args.media, args.filter_date, db, data, args.media, args.filter_date,
filter_chat, args.filter_empty filter_chat, args.filter_empty
) )
# Process calls # Process calls
process_calls(args, db, data, filter_chat) process_calls(args, db, data, filter_chat)
@@ -511,30 +582,29 @@ def handle_media_directory(args) -> None:
"""Handle media directory copying or moving.""" """Handle media directory copying or moving."""
if os.path.isdir(args.media): if os.path.isdir(args.media):
media_path = os.path.join(args.output, args.media) media_path = os.path.join(args.output, args.media)
if os.path.isdir(media_path): if os.path.isdir(media_path):
print("\nWhatsApp directory already exists in output directory. Skipping...", end="\n") logger.info(
f"WhatsApp directory already exists in output directory. Skipping...{CLEAR_LINE}")
else: else:
if args.move_media: if args.move_media:
try: try:
print("\nMoving media directory...", end="\n") logger.info(f"Moving media directory...\r")
shutil.move(args.media, f"{args.output}/") shutil.move(args.media, f"{args.output}/")
logger.info(f"Media directory has been moved to the output directory{CLEAR_LINE}")
except PermissionError: except PermissionError:
print("\nCannot remove original WhatsApp directory. " logger.warning("Cannot remove original WhatsApp directory. "
"Perhaps the directory is opened?", end="\n") "Perhaps the directory is opened?\n")
else: else:
print("\nCopying media directory...", end="\n") logger.info(f"Copying media directory...\r")
shutil.copytree(args.media, media_path) shutil.copytree(args.media, media_path)
logger.info(f"Media directory has been copied to the output directory{CLEAR_LINE}")
def create_output_files(args, data: ChatCollection, contact_store=None) -> None: def create_output_files(args, data: ChatCollection) -> None:
"""Create output files in the specified formats.""" """Create output files in the specified formats."""
# Create HTML files if requested # Create HTML files if requested
if not args.no_html: if not args.no_html:
# Enrich from vcards if available
if contact_store and not contact_store.is_empty():
contact_store.enrich_from_vcards(data)
android_handler.create_html( android_handler.create_html(
data, data,
args.output, args.output,
@@ -543,32 +613,29 @@ def create_output_files(args, data: ChatCollection, contact_store=None) -> None:
args.offline, args.offline,
args.size, args.size,
args.no_avatar, args.no_avatar,
args.whatsapp_theme, args.telegram_theme,
args.headline args.headline
) )
# Create text files if requested # Create text files if requested
if args.text_format: if args.text_format:
print("Writing text file...") logger.info(f"Writing text file...{CLEAR_LINE}")
android_handler.create_txt(data, args.text_format) android_handler.create_txt(data, args.text_format)
# Create JSON files if requested # Create JSON files if requested
if args.json and not args.import_json: if args.json and not args.import_json:
export_json(args, data, contact_store) export_json(args, data)
def export_json(args, data: ChatCollection, contact_store=None) -> None: def export_json(args, data: ChatCollection) -> None:
"""Export data to JSON format.""" """Export data to JSON format."""
# Enrich from vcards if available # TODO: remove all non-target chats from data if filtering is applied?
if contact_store and not contact_store.is_empty():
contact_store.enrich_from_vcards(data)
# Convert ChatStore objects to JSON # Convert ChatStore objects to JSON
if isinstance(data.get(next(iter(data), None)), ChatStore): if isinstance(data.get(next(iter(data), None)), ChatStore):
data = {jik: chat.to_json() for jik, chat in data.items()} data = {jik: chat.to_json() for jik, chat in data.items()}
# Export as a single file or per chat # Export as a single file or per chat
if not args.json_per_chat: if not args.json_per_chat and not args.telegram:
export_single_json(args, data) export_single_json(args, data)
else: else:
export_multiple_json(args, data) export_multiple_json(args, data)
@@ -582,19 +649,20 @@ def export_single_json(args, data: Dict) -> None:
ensure_ascii=not args.avoid_encoding_json, ensure_ascii=not args.avoid_encoding_json,
indent=args.pretty_print_json indent=args.pretty_print_json
) )
print(f"\nWriting JSON file...({bytes_to_readable(len(json_data))})") logger.info(f"Writing JSON file...\r")
f.write(json_data) f.write(json_data)
logger.info(f"JSON file saved...({bytes_to_readable(len(json_data))}){CLEAR_LINE}")
def export_multiple_json(args, data: Dict) -> None: def export_multiple_json(args, data: Dict) -> None:
"""Export data to multiple JSON files, one per chat.""" """Export data to multiple JSON files, one per chat."""
# Adjust output path if needed # Adjust output path if needed
json_path = args.json[:-5] if args.json.endswith(".json") else args.json json_path = args.json[:-5] if args.json.endswith(".json") else args.json
# Create directory if it doesn't exist # Create directory if it doesn't exist
if not os.path.isdir(json_path): if not os.path.isdir(json_path):
os.makedirs(json_path, exist_ok=True) os.makedirs(json_path, exist_ok=True)
# Export each chat # Export each chat
total = len(data.keys()) total = len(data.keys())
for index, jik in enumerate(data.keys()): for index, jik in enumerate(data.keys()):
@@ -602,22 +670,25 @@ def export_multiple_json(args, data: Dict) -> None:
contact = data[jik]["name"].replace('/', '') contact = data[jik]["name"].replace('/', '')
else: else:
contact = jik.replace('+', '') contact = jik.replace('+', '')
with open(f"{json_path}/{sanitize_filename(contact)}.json", "w") as f: if args.telegram:
messages = telegram_json_format(jik, data[jik], args.timezone_offset)
else:
messages = {jik: data[jik]}
with open(f"{json_path}/{safe_name(contact)}.json", "w") as f:
file_content = json.dumps( file_content = json.dumps(
{jik: data[jik]}, messages,
ensure_ascii=not args.avoid_encoding_json, ensure_ascii=not args.avoid_encoding_json,
indent=args.pretty_print_json indent=args.pretty_print_json
) )
f.write(file_content) f.write(file_content)
print(f"Writing JSON file...({index + 1}/{total})", end="\r") logger.info(f"Writing JSON file...({index + 1}/{total})\r")
print()
def process_exported_chat(args, data: ChatCollection) -> None: def process_exported_chat(args, data: ChatCollection) -> None:
"""Process an exported chat file.""" """Process an exported chat file."""
exported_handler.messages(args.exported, data, args.assume_first_as_me) exported_handler.messages(args.exported, data, args.assume_first_as_me)
if not args.no_html: if not args.no_html:
android_handler.create_html( android_handler.create_html(
data, data,
@@ -627,37 +698,61 @@ def process_exported_chat(args, data: ChatCollection) -> None:
args.offline, args.offline,
args.size, args.size,
args.no_avatar, args.no_avatar,
args.whatsapp_theme, args.telegram_theme,
args.headline args.headline
) )
# Copy files to output directory # Copy files to output directory
for file in glob.glob(r'*.*'): for file in glob.glob(r'*.*'):
shutil.copy(file, args.output) shutil.copy(file, args.output)
def setup_logging(level):
log_handler_stdout = logging.StreamHandler()
log_handler_stdout.terminator = ""
handlers = [log_handler_stdout]
if level == logging.DEBUG:
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
handlers.append(logging.FileHandler(f"wtsexpoter-debug-{timestamp}.log", mode="w"))
logging.basicConfig(
level=level,
format="[%(levelname)s] %(message)s",
handlers=handlers
)
def main(): def main():
"""Main function to run the WhatsApp Chat Exporter.""" """Main function to run the WhatsApp Chat Exporter."""
# Set up and parse arguments # Set up and parse arguments
parser = setup_argument_parser() parser = setup_argument_parser()
args = parser.parse_args() args = parser.parse_args()
# Check for updates # Check for updates
if args.check_update: if args.check_update:
exit(check_update()) exit(check_update())
# Validate arguments # Validate arguments
validate_args(parser, args) validate_args(parser, args)
# Print banner if not suppressed
if not args.no_banner:
print(WTSEXPORTER_BANNER)
if args.debug:
setup_logging(logging.DEBUG)
logger.debug("Debug mode enabled.\n")
else:
setup_logging(logging.INFO)
# Create output directory if it doesn't exist # Create output directory if it doesn't exist
os.makedirs(args.output, exist_ok=True) os.makedirs(args.output, exist_ok=True)
# Initialize data collection # Initialize data collection
data = ChatCollection() data = ChatCollection()
# Set up contact store for vCard enrichment if needed # Set up contact store for vCard enrichment if needed
contact_store = setup_contact_store(args) contact_store = setup_contact_store(args)
if args.import_json: if args.import_json:
# Import from JSON # Import from JSON
import_from_json(args.json, data) import_from_json(args.json, data)
@@ -669,7 +764,7 @@ def main():
args.offline, args.offline,
args.size, args.size,
args.no_avatar, args.no_avatar,
args.whatsapp_theme, args.telegram_theme,
args.headline args.headline
) )
elif args.exported: elif args.exported:
@@ -681,13 +776,13 @@ def main():
# Set default media path if not provided # Set default media path if not provided
if args.media is None: if args.media is None:
args.media = "WhatsApp" args.media = "WhatsApp"
# Set default DB paths if not provided # Set default DB paths if not provided
if args.db is None: if args.db is None:
args.db = "msgstore.db" args.db = "msgstore.db"
if args.wa is None: if args.wa is None:
args.wa = "wa.db" args.wa = "wa.db"
# Decrypt backup if needed # Decrypt backup if needed
if args.key is not None: if args.key is not None:
error = decrypt_android_backup(args) error = decrypt_android_backup(args)
@@ -700,37 +795,54 @@ def main():
else: else:
from Whatsapp_Chat_Exporter.utility import WhatsAppIdentifier as identifiers from Whatsapp_Chat_Exporter.utility import WhatsAppIdentifier as identifiers
args.identifiers = identifiers args.identifiers = identifiers
# Set default media path if not provided # Set default media path if not provided
if args.media is None: if args.media is None:
args.media = identifiers.DOMAIN args.media = identifiers.DOMAIN
# Extract media from backup if needed # Extract media from backup if needed
if args.backup is not None: if args.backup is not None:
if not os.path.isdir(args.media): if not os.path.isdir(args.media):
ios_media_handler.extract_media(args.backup, identifiers, args.decrypt_chunk_size) ios_media_handler.extract_media(
args.backup, identifiers, args.decrypt_chunk_size)
else: else:
print("WhatsApp directory already exists, skipping WhatsApp file extraction.") logger.info(
f"WhatsApp directory already exists, skipping WhatsApp file extraction.{CLEAR_LINE}")
# Set default DB paths if not provided # Set default DB paths if not provided
if args.db is None: if args.db is None:
args.db = identifiers.MESSAGE args.db = identifiers.MESSAGE
if args.wa is None: if args.wa is None:
args.wa = "ContactsV2.sqlite" args.wa = "ContactsV2.sqlite"
# Process contacts
process_contacts(args, data, contact_store)
# Process messages, media, and calls
process_messages(args, data)
# Create output files
create_output_files(args, data, contact_store)
# Handle media directory
handle_media_directory(args)
print("Everything is done!") if args.incremental_merge:
incremental_merge(
args.source_dir,
args.target_dir,
args.media,
args.pretty_print_json,
args.avoid_encoding_json
)
logger.info(f"Incremental merge completed successfully.{CLEAR_LINE}")
else:
# Process contacts
process_contacts(args, data)
# Enrich contacts from vCards if needed
if args.android and contact_store and not contact_store.is_empty():
contact_store.enrich_from_vcards(data)
# Process messages, media, and calls
process_messages(args, data)
# Create output files
create_output_files(args, data)
# Handle media directory
handle_media_directory(args)
logger.info("Everything is done!")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -1,11 +1,14 @@
import time
import hmac import hmac
import io import io
import logging
import threading
import zlib import zlib
import concurrent.futures import concurrent.futures
from typing import Tuple, Union from typing import Tuple, Union
from hashlib import sha256 from hashlib import sha256
from sys import exit from sys import exit
from Whatsapp_Chat_Exporter.utility import CRYPT14_OFFSETS, Crypt, DbType from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CRYPT14_OFFSETS, Crypt, DbType
try: try:
import zlib import zlib
@@ -23,6 +26,9 @@ else:
support_crypt15 = True support_crypt15 = True
logger = logging.getLogger(__name__)
class DecryptionError(Exception): class DecryptionError(Exception):
"""Base class for decryption-related exceptions.""" """Base class for decryption-related exceptions."""
pass pass
@@ -115,6 +121,7 @@ def _decrypt_database(db_ciphertext: bytes, main_key: bytes, iv: bytes) -> bytes
) )
return db return db
def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes: def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes:
"""Decrypt a crypt14 database using multithreading for brute-force offset detection. """Decrypt a crypt14 database using multithreading for brute-force offset detection.
@@ -138,11 +145,28 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
iv = database[offsets["iv"]:offsets["iv"] + 16] iv = database[offsets["iv"]:offsets["iv"] + 16]
db_ciphertext = database[offsets["db"]:] db_ciphertext = database[offsets["db"]:]
try: try:
return _decrypt_database(db_ciphertext, main_key, iv) decrypted_db = _decrypt_database(db_ciphertext, main_key, iv)
except (zlib.error, ValueError): except (zlib.error, ValueError):
pass # Try next offset pass # Try next offset
else:
logger.debug(
f"Decryption successful with known offsets: IV {offsets['iv']}, DB {offsets['db']}{CLEAR_LINE}"
)
return decrypted_db # Successful decryption
print("Common offsets failed. Initiating brute-force with multithreading...") def animate_message(stop_event):
base_msg = "Common offsets failed. Initiating brute-force with multithreading"
dots = ["", ".", "..", "..."]
i = 0
while not stop_event.is_set():
logger.info(f"{base_msg}{dots[i % len(dots)]}\x1b[K\r")
time.sleep(0.3)
i += 1
logger.info(f"Common offsets failed but brute-forcing the offset works!{CLEAR_LINE}")
stop_event = threading.Event()
anim_thread = threading.Thread(target=animate_message, args=(stop_event,))
anim_thread.start()
# Convert brute force generator into a list for parallel processing # Convert brute force generator into a list for parallel processing
offset_combinations = list(brute_force_offset()) offset_combinations = list(brute_force_offset())
@@ -152,22 +176,27 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
start_iv, end_iv, start_db = offset_tuple start_iv, end_iv, start_db = offset_tuple
iv = database[start_iv:end_iv] iv = database[start_iv:end_iv]
db_ciphertext = database[start_db:] db_ciphertext = database[start_db:]
logger.debug(""f"Trying offsets: IV {start_iv}-{end_iv}, DB {start_db}{CLEAR_LINE}")
try: try:
db = _decrypt_database(db_ciphertext, main_key, iv) db = _decrypt_database(db_ciphertext, main_key, iv)
print( except (zlib.error, ValueError):
return None # Decryption failed, move to next
else:
stop_event.set()
anim_thread.join()
logger.info(
f"The offsets of your IV and database are {start_iv} and " f"The offsets of your IV and database are {start_iv} and "
f"{start_db}, respectively. To include your offsets in the " f"{start_db}, respectively. To include your offsets in the "
"program, please report it by creating an issue on GitHub: " "program, please report it by creating an issue on GitHub: "
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47" "https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47"
"\nShutting down other threads..." f"\nShutting down other threads...{CLEAR_LINE}"
) )
return db return db
except (zlib.error, ValueError):
return None # Decryption failed, move to next
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor: with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
future_to_offset = {executor.submit(attempt_decrypt, offset): offset for offset in offset_combinations} future_to_offset = {executor.submit(attempt_decrypt, offset)
: offset for offset in offset_combinations}
try: try:
for future in concurrent.futures.as_completed(future_to_offset): for future in concurrent.futures.as_completed(future_to_offset):
@@ -178,14 +207,18 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
return result return result
except KeyboardInterrupt: except KeyboardInterrupt:
print("\nBrute force interrupted by user (Ctrl+C). Exiting gracefully...") stop_event.set()
anim_thread.join()
logger.info(f"Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}")
executor.shutdown(wait=False, cancel_futures=True) executor.shutdown(wait=False, cancel_futures=True)
exit(1) exit(1)
finally:
stop_event.set()
anim_thread.join()
raise OffsetNotFoundError("Could not find the correct offsets for decryption.") raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes: def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
"""Decrypt a crypt12 database. """Decrypt a crypt12 database.
@@ -287,7 +320,7 @@ def decrypt_backup(
if crypt is not Crypt.CRYPT15 and len(key) != 158: if crypt is not Crypt.CRYPT15 and len(key) != 158:
raise InvalidKeyError("The key file must be 158 bytes") raise InvalidKeyError("The key file must be 158 bytes")
#signature check, this is check is used in crypt 12 and 14 # signature check, this is check is used in crypt 12 and 14
if crypt != Crypt.CRYPT15: if crypt != Crypt.CRYPT15:
t1 = key[30:62] t1 = key[30:62]
@@ -297,7 +330,6 @@ def decrypt_backup(
if t1 != database[3:35] and crypt == Crypt.CRYPT12: if t1 != database[3:35] and crypt == Crypt.CRYPT12:
raise ValueError("The signature of key file and backup file mismatch") raise ValueError("The signature of key file and backup file mismatch")
if crypt == Crypt.CRYPT15: if crypt == Crypt.CRYPT15:
if keyfile_stream: if keyfile_stream:
main_key, hex_key = _extract_enc_key(key) main_key, hex_key = _extract_enc_key(key)
@@ -305,7 +337,7 @@ def decrypt_backup(
main_key, hex_key = _derive_main_enc_key(key) main_key, hex_key = _derive_main_enc_key(key)
if show_crypt15: if show_crypt15:
hex_key_str = ' '.join([hex_key.hex()[c:c+4] for c in range(0, len(hex_key.hex()), 4)]) hex_key_str = ' '.join([hex_key.hex()[c:c+4] for c in range(0, len(hex_key.hex()), 4)])
print(f"The HEX key of the crypt15 backup is: {hex_key_str}") logger.info(f"The HEX key of the crypt15 backup is: {hex_key_str}{CLEAR_LINE}")
else: else:
main_key = key[126:] main_key = key[126:]
@@ -321,7 +353,6 @@ def decrypt_backup(
except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e: except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e:
raise DecryptionError(f"Decryption failed: {e}") from e raise DecryptionError(f"Decryption failed: {e}") from e
if not dry_run: if not dry_run:
with open(output, "wb") as f: with open(output, "wb") as f:
f.write(db) f.write(db)

View File

@@ -1,5 +1,6 @@
#!/usr/bin/python3 #!/usr/bin/python3
import logging
import sqlite3 import sqlite3
import os import os
import shutil import shutil
@@ -9,36 +10,41 @@ from markupsafe import escape as htmle
from base64 import b64decode, b64encode from base64 import b64decode, b64encode
from datetime import datetime from datetime import datetime
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, JidType, Device from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, JidType, Device
from Whatsapp_Chat_Exporter.utility import rendering, get_file_name, setup_template, get_cond_for_empty from Whatsapp_Chat_Exporter.utility import rendering, get_file_name, setup_template, get_cond_for_empty
from Whatsapp_Chat_Exporter.utility import get_status_location, convert_time_unit, determine_metadata from Whatsapp_Chat_Exporter.utility import get_status_location, convert_time_unit, determine_metadata
from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify, bytes_to_readable from Whatsapp_Chat_Exporter.utility import get_chat_condition, safe_name, bytes_to_readable
logger = logging.getLogger(__name__)
def contacts(db, data, enrich_from_vcards): def contacts(db, data, enrich_from_vcards):
""" """
Process WhatsApp contacts from the database. Process WhatsApp contacts from the database.
Args: Args:
db: Database connection db: Database connection
data: Data store object data: Data store object
enrich_from_vcards: Path to vCard file for contact enrichment enrich_from_vcards: Path to vCard file for contact enrichment
Returns: Returns:
bool: False if no contacts found, True otherwise bool: False if no contacts found, True otherwise
""" """
c = db.cursor() c = db.cursor()
c.execute("SELECT count() FROM wa_contacts") c.execute("SELECT count() FROM wa_contacts")
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
if total_row_number == 0: if total_row_number == 0:
if enrich_from_vcards is not None: if enrich_from_vcards is not None:
print("No contacts profiles found in the default database, contacts will be imported from the specified vCard file.") logger.info(
"No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
else: else:
print("No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google") logger.warning(
"No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
return False return False
else: else:
print(f"Processing contacts...({total_row_number})") logger.info(f"Processed {total_row_number} contacts\n")
c.execute("SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts;") c.execute("SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts;")
row = c.fetchone() row = c.fetchone()
@@ -47,14 +53,14 @@ def contacts(db, data, enrich_from_vcards):
if row["status"] is not None: if row["status"] is not None:
current_chat.status = row["status"] current_chat.status = row["status"]
row = c.fetchone() row = c.fetchone()
return True return True
def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty): def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty, no_reply):
""" """
Process WhatsApp messages from the database. Process WhatsApp messages from the database.
Args: Args:
db: Database connection db: Database connection
data: Data store object data: Data store object
@@ -66,7 +72,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
""" """
c = db.cursor() c = db.cursor()
total_row_number = _get_message_count(c, filter_empty, filter_date, filter_chat) total_row_number = _get_message_count(c, filter_empty, filter_date, filter_chat)
print(f"Processing messages...(0/{total_row_number})", end="\r") logger.info(f"Processing messages...(0/{total_row_number})\r")
try: try:
content_cursor = _get_messages_cursor_legacy(c, filter_empty, filter_date, filter_chat) content_cursor = _get_messages_cursor_legacy(c, filter_empty, filter_date, filter_chat)
@@ -81,18 +87,18 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
i = 0 i = 0
# Fetch the first row safely # Fetch the first row safely
content = _fetch_row_safely(content_cursor) content = _fetch_row_safely(content_cursor)
while content is not None: while content is not None:
_process_single_message(data, content, table_message, timezone_offset) _process_single_message(data, content, table_message, timezone_offset)
i += 1 i += 1
if i % 1000 == 0: if i % 1000 == 0:
print(f"Processing messages...({i}/{total_row_number})", end="\r") logger.info(f"Processing messages...({i}/{total_row_number})\r")
# Fetch the next row safely # Fetch the next row safely
content = _fetch_row_safely(content_cursor) content = _fetch_row_safely(content_cursor)
print(f"Processing messages...({total_row_number}/{total_row_number})", end="\r") logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
# Helper functions for message processing # Helper functions for message processing
@@ -102,8 +108,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
try: try:
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM messages FROM messages
@@ -119,8 +127,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
except sqlite3.OperationalError: except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM message FROM message
@@ -142,8 +152,10 @@ def _get_messages_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for legacy database schema.""" """Get cursor for legacy database schema."""
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android") filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
cursor.execute(f"""SELECT messages.key_remote_jid, cursor.execute(f"""SELECT messages.key_remote_jid,
messages._id, messages._id,
@@ -205,8 +217,10 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for new database schema.""" """Get cursor for new database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android") filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid, cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid,
message._id, message._id,
@@ -288,19 +302,18 @@ def _process_single_message(data, content, table_message, timezone_offset):
"""Process a single message row.""" """Process a single message row."""
if content["key_remote_jid"] is None: if content["key_remote_jid"] is None:
return return
# Get or create the chat # Get or create the chat
if not data.get_chat(content["key_remote_jid"]): current_chat = data.get_chat(content["key_remote_jid"])
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(Device.ANDROID, content["chat_subject"])) if current_chat is None:
else: current_chat = data.add_chat(content["key_remote_jid"], ChatStore(
current_chat = data.get_chat(content["key_remote_jid"]) Device.ANDROID, content["chat_subject"]))
# Determine sender_jid_row_id # Determine sender_jid_row_id
if "sender_jid_row_id" in content: if "sender_jid_row_id" in content:
sender_jid_row_id = content["sender_jid_row_id"] sender_jid_row_id = content["sender_jid_row_id"]
else: else:
sender_jid_row_id = None sender_jid_row_id = None
# Create message object # Create message object
message = Message( message = Message(
from_me=not sender_jid_row_id and content["key_from_me"], from_me=not sender_jid_row_id and content["key_from_me"],
@@ -312,19 +325,19 @@ def _process_single_message(data, content, table_message, timezone_offset):
received_timestamp=content["received_timestamp"], received_timestamp=content["received_timestamp"],
read_timestamp=content["read_timestamp"] read_timestamp=content["read_timestamp"]
) )
# Handle binary data # Handle binary data
if isinstance(content["data"], bytes): if isinstance(content["data"], bytes):
_process_binary_message(message, content) _process_binary_message(message, content)
current_chat.add_message(content["_id"], message) current_chat.add_message(content["_id"], message)
return return
# Set sender for group chats # Set sender for group chats
if content["jid_type"] == JidType.GROUP and content["key_from_me"] == 0: if content["jid_type"] == JidType.GROUP and content["key_from_me"] == 0:
_set_group_sender(message, content, data, table_message) _set_group_sender(message, content, data, table_message)
else: else:
message.sender = None message.sender = None
# Handle quoted messages # Handle quoted messages
if content["quoted"] is not None: if content["quoted"] is not None:
message.reply = content["quoted"] message.reply = content["quoted"]
@@ -334,7 +347,7 @@ def _process_single_message(data, content, table_message, timezone_offset):
message.quoted_data = content["quoted_data"] message.quoted_data = content["quoted_data"]
else: else:
message.reply = None message.reply = None
# Handle message caption # Handle message caption
if not table_message and content["media_caption"] is not None: if not table_message and content["media_caption"] is not None:
# Old schema # Old schema
@@ -344,14 +357,14 @@ def _process_single_message(data, content, table_message, timezone_offset):
message.caption = content["data"] message.caption = content["data"]
else: else:
message.caption = None message.caption = None
# Handle message content based on status # Handle message content based on status
if content["status"] == 6: # 6 = Metadata if content["status"] == 6: # 6 = Metadata
_process_metadata_message(message, content, data, table_message) _process_metadata_message(message, content, data, table_message)
else: else:
# Real message # Real message
_process_regular_message(message, content, table_message) _process_regular_message(message, content, table_message)
current_chat.add_message(content["_id"], message) current_chat.add_message(content["_id"], message)
@@ -381,7 +394,7 @@ def _set_group_sender(message, content, data, table_message):
name = data.get_chat(content["remote_resource"]).name name = data.get_chat(content["remote_resource"]).name
if "@" in content["remote_resource"]: if "@" in content["remote_resource"]:
fallback = content["remote_resource"].split('@')[0] fallback = content["remote_resource"].split('@')[0]
message.sender = name or fallback message.sender = name or fallback
@@ -389,7 +402,7 @@ def _process_metadata_message(message, content, data, table_message):
"""Process metadata message.""" """Process metadata message."""
message.meta = True message.meta = True
name = fallback = None name = fallback = None
if table_message: if table_message:
if content["sender_jid_row_id"] > 0: if content["sender_jid_row_id"] > 0:
_jid = content["group_sender_jid"] _jid = content["group_sender_jid"]
@@ -408,12 +421,12 @@ def _process_metadata_message(message, content, data, table_message):
fallback = _jid.split('@')[0] fallback = _jid.split('@')[0]
else: else:
name = "You" name = "You"
message.data = determine_metadata(content, name or fallback) message.data = determine_metadata(content, name or fallback)
if isinstance(message.data, str) and "<br>" in message.data: if isinstance(message.data, str) and "<br>" in message.data:
message.safe = True message.safe = True
if message.data is None: if message.data is None:
if content["video_call"] is not None: # Missed call if content["video_call"] is not None: # Missed call
message.meta = True message.meta = True
@@ -429,7 +442,7 @@ def _process_metadata_message(message, content, data, table_message):
def _process_regular_message(message, content, table_message): def _process_regular_message(message, content, table_message):
"""Process regular (non-metadata) message.""" """Process regular (non-metadata) message."""
message.sticker = content["media_wa_type"] == 20 # Sticker is a message message.sticker = content["media_wa_type"] == 20 # Sticker is a message
if content["key_from_me"] == 1: if content["key_from_me"] == 1:
if content["status"] == 5 and content["edit_version"] == 7 or table_message and content["media_wa_type"] == 15: if content["status"] == 5 and content["edit_version"] == 7 or table_message and content["media_wa_type"] == 15:
msg = "Message deleted" msg = "Message deleted"
@@ -454,7 +467,7 @@ def _process_regular_message(message, content, table_message):
msg = content["data"] msg = content["data"]
if msg is not None: if msg is not None:
msg = _format_message_text(msg) msg = _format_message_text(msg)
message.data = msg message.data = msg
@@ -470,7 +483,7 @@ def _format_message_text(text):
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True): def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True):
""" """
Process WhatsApp media files from the database. Process WhatsApp media files from the database.
Args: Args:
db: Database connection db: Database connection
data: Data store object data: Data store object
@@ -482,30 +495,30 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
""" """
c = db.cursor() c = db.cursor()
total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat) total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat)
print(f"\nProcessing media...(0/{total_row_number})", end="\r") logger.info(f"Processing media...(0/{total_row_number})\r")
try: try:
content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat) content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat)
except sqlite3.OperationalError: except sqlite3.OperationalError:
content_cursor = _get_media_cursor_new(c, filter_empty, filter_date, filter_chat) content_cursor = _get_media_cursor_new(c, filter_empty, filter_date, filter_chat)
content = content_cursor.fetchone() content = content_cursor.fetchone()
mime = MimeTypes() mime = MimeTypes()
# Ensure thumbnails directory exists # Ensure thumbnails directory exists
Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True) Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True)
i = 0 i = 0
while content is not None: while content is not None:
_process_single_media(data, content, media_folder, mime, separate_media) _process_single_media(data, content, media_folder, mime, separate_media)
i += 1 i += 1
if i % 100 == 0: if i % 100 == 0:
print(f"Processing media...({i}/{total_row_number})", end="\r") logger.info(f"Processing media...({i}/{total_row_number})\r")
content = content_cursor.fetchone() content = content_cursor.fetchone()
print(f"Processing media...({total_row_number}/{total_row_number})", end="\r") logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
# Helper functions for media processing # Helper functions for media processing
@@ -515,8 +528,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
try: try:
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM message_media FROM message_media
@@ -534,8 +549,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
except sqlite3.OperationalError: except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM message_media FROM message_media
@@ -557,10 +574,12 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
def _get_media_cursor_legacy(cursor, filter_empty, filter_date, filter_chat): def _get_media_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for legacy media database schema.""" """Get cursor for legacy media database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
cursor.execute(f"""SELECT messages.key_remote_jid, cursor.execute(f"""SELECT messages.key_remote_jid,
message_row_id, message_row_id,
@@ -592,8 +611,10 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for new media database schema.""" """Get cursor for new media database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT jid.raw_string as key_remote_jid, cursor.execute(f"""SELECT jid.raw_string as key_remote_jid,
message_row_id, message_row_id,
@@ -629,10 +650,10 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
current_chat = data.get_chat(content["key_remote_jid"]) current_chat = data.get_chat(content["key_remote_jid"])
message = current_chat.get_message(content["message_row_id"]) message = current_chat.get_message(content["message_row_id"])
message.media = True message.media = True
if os.path.isfile(file_path): if os.path.isfile(file_path):
message.data = file_path message.data = file_path
# Set mime type # Set mime type
if content["mime_type"] is None: if content["mime_type"] is None:
guess = mime.guess_type(file_path)[0] guess = mime.guess_type(file_path)[0]
@@ -642,11 +663,11 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
message.mime = "application/octet-stream" message.mime = "application/octet-stream"
else: else:
message.mime = content["mime_type"] message.mime = content["mime_type"]
# Copy media to separate folder if needed # Copy media to separate folder if needed
if separate_media: if separate_media:
chat_display_name = slugify(current_chat.name or message.sender chat_display_name = safe_name(current_chat.name or message.sender
or content["key_remote_jid"].split('@')[0], True) or content["key_remote_jid"].split('@')[0])
current_filename = file_path.split("/")[-1] current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name) new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True) Path(new_folder).mkdir(parents=True, exist_ok=True)
@@ -657,7 +678,7 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
message.data = "The media is missing" message.data = "The media is missing"
message.mime = "media" message.mime = "media"
message.meta = True message.meta = True
# Handle thumbnail # Handle thumbnail
if content["thumbnail"] is not None: if content["thumbnail"] is not None:
thumb_path = f"{media_folder}/thumbnails/{b64decode(content['file_hash']).hex()}.png" thumb_path = f"{media_folder}/thumbnails/{b64decode(content['file_hash']).hex()}.png"
@@ -676,23 +697,26 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
rows = _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty) rows = _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty)
total_row_number = len(rows) total_row_number = len(rows)
print(f"\nProcessing vCards...(0/{total_row_number})", end="\r") logger.info(f"Processing vCards...(0/{total_row_number})\r")
# Create vCards directory if it doesn't exist # Create vCards directory if it doesn't exist
path = os.path.join(media_folder, "vCards") path = os.path.join(media_folder, "vCards")
Path(path).mkdir(parents=True, exist_ok=True) Path(path).mkdir(parents=True, exist_ok=True)
for index, row in enumerate(rows): for index, row in enumerate(rows):
_process_vcard_row(row, path, data) _process_vcard_row(row, path, data)
print(f"Processing vCards...({index + 1}/{total_row_number})", end="\r") logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}")
def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty): def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for modern WhatsApp database schema.""" """Execute vCard query for modern WhatsApp database schema."""
# Build the filter conditions # Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
@@ -721,8 +745,10 @@ def _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for legacy WhatsApp database schema.""" """Execute vCard query for legacy WhatsApp database schema."""
# Build the filter conditions # Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
@@ -755,11 +781,11 @@ def _process_vcard_row(row, path, data):
file_name = "".join(x for x in media_name if x.isalnum()) file_name = "".join(x for x in media_name if x.isalnum())
file_name = file_name.encode('utf-8')[:230].decode('utf-8', 'ignore') file_name = file_name.encode('utf-8')[:230].decode('utf-8', 'ignore')
file_path = os.path.join(path, f"{file_name}.vcf") file_path = os.path.join(path, f"{file_name}.vcf")
if not os.path.isfile(file_path): if not os.path.isfile(file_path):
with open(file_path, "w", encoding="utf-8") as f: with open(file_path, "w", encoding="utf-8") as f:
f.write(row["vcard"]) f.write(row["vcard"])
message = data.get_chat(row["key_remote_jid"]).get_message(row["message_row_id"]) message = data.get_chat(row["key_remote_jid"]).get_message(row["message_row_id"])
message.data = "This media include the following vCard file(s):<br>" \ message.data = "This media include the following vCard file(s):<br>" \
f'<a href="{htmle(file_path)}">{htmle(media_name)}</a>' f'<a href="{htmle(file_path)}">{htmle(media_name)}</a>'
@@ -771,28 +797,29 @@ def _process_vcard_row(row, path, data):
def calls(db, data, timezone_offset, filter_chat): def calls(db, data, timezone_offset, filter_chat):
"""Process call logs from WhatsApp database.""" """Process call logs from WhatsApp database."""
c = db.cursor() c = db.cursor()
# Check if there are any calls that match the filter # Check if there are any calls that match the filter
total_row_number = _get_calls_count(c, filter_chat) total_row_number = _get_calls_count(c, filter_chat)
if total_row_number == 0: if total_row_number == 0:
return return
print(f"\nProcessing calls...({total_row_number})", end="\r") logger.info(f"Processing calls...({total_row_number})\r")
# Fetch call data # Fetch call data
calls_data = _fetch_calls_data(c, filter_chat) calls_data = _fetch_calls_data(c, filter_chat)
# Create a chat store for all calls # Create a chat store for all calls
chat = ChatStore(Device.ANDROID, "WhatsApp Calls") chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call # Process each call
content = calls_data.fetchone() content = calls_data.fetchone()
while content is not None: while content is not None:
_process_call_record(content, chat, data, timezone_offset) _process_call_record(content, chat, data, timezone_offset)
content = calls_data.fetchone() content = calls_data.fetchone()
# Add the calls chat to the data # Add the calls chat to the data
data.add_chat("000000000000000", chat) data.add_chat("000000000000000", chat)
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}")
def _get_calls_count(c, filter_chat): def _get_calls_count(c, filter_chat):
@@ -855,7 +882,7 @@ def _process_call_record(content, chat, data, timezone_offset):
received_timestamp=None, # TODO: Add timestamp received_timestamp=None, # TODO: Add timestamp
read_timestamp=None # TODO: Add timestamp read_timestamp=None # TODO: Add timestamp
) )
# Get caller/callee name # Get caller/callee name
_jid = content["raw_string"] _jid = content["raw_string"]
name = data.get_chat(_jid).name if _jid in data else content["chat_subject"] or None name = data.get_chat(_jid).name if _jid in data else content["chat_subject"] or None
@@ -864,13 +891,13 @@ def _process_call_record(content, chat, data, timezone_offset):
else: else:
fallback = None fallback = None
call.sender = name or fallback call.sender = name or fallback
# Set metadata # Set metadata
call.meta = True call.meta = True
# Construct call description based on call type and result # Construct call description based on call type and result
call.data = _construct_call_description(content, call) call.data = _construct_call_description(content, call)
# Add call to chat # Add call to chat
chat.add_message(content["_id"], call) chat.add_message(content["_id"], call)
@@ -882,7 +909,7 @@ def _construct_call_description(content, call):
f"call {'to' if call.from_me else 'from'} " f"call {'to' if call.from_me else 'from'} "
f"{call.sender} was " f"{call.sender} was "
) )
if content['call_result'] in (0, 4, 7): if content['call_result'] in (0, 4, 7):
description += "cancelled." if call.from_me else "missed." description += "cancelled." if call.from_me else "missed."
elif content['call_result'] == 2: elif content['call_result'] == 2:
@@ -898,26 +925,26 @@ def _construct_call_description(content, call):
) )
else: else:
description += "in an unknown state." description += "in an unknown state."
return description return description
def create_html( def create_html(
data, data,
output_folder, output_folder,
template=None, template=None,
embedded=False, embedded=False,
offline_static=False, offline_static=False,
maximum_size=None, maximum_size=None,
no_avatar=False, no_avatar=False,
experimental=False, experimental=False,
headline=None headline=None
): ):
"""Generate HTML chat files from data.""" """Generate HTML chat files from data."""
template = setup_template(template, no_avatar, experimental) template = setup_template(template, no_avatar, experimental)
total_row_number = len(data) total_row_number = len(data)
print(f"\nGenerating chats...(0/{total_row_number})", end="\r") logger.info(f"Generating chats...(0/{total_row_number})\r")
# Create output directory if it doesn't exist # Create output directory if it doesn't exist
if not os.path.isdir(output_folder): if not os.path.isdir(output_folder):
@@ -930,37 +957,37 @@ def create_html(
if len(current_chat) == 0: if len(current_chat) == 0:
# Skip empty chats # Skip empty chats
continue continue
safe_file_name, name = get_file_name(contact, current_chat) safe_file_name, name = get_file_name(contact, current_chat)
if maximum_size is not None: if maximum_size is not None:
_generate_paginated_chat( _generate_paginated_chat(
current_chat, current_chat,
safe_file_name, safe_file_name,
name, name,
contact, contact,
output_folder, output_folder,
template, template,
w3css, w3css,
maximum_size, maximum_size,
headline headline
) )
else: else:
_generate_single_chat( _generate_single_chat(
current_chat, current_chat,
safe_file_name, safe_file_name,
name, name,
contact, contact,
output_folder, output_folder,
template, template,
w3css, w3css,
headline headline
) )
if current % 10 == 0:
print(f"Generating chats...({current}/{total_row_number})", end="\r")
print(f"Generating chats...({total_row_number}/{total_row_number})", end="\r") if current % 10 == 0:
logger.info(f"Generating chats...({current}/{total_row_number})\r")
logger.info(f"Generated {total_row_number} chats{CLEAR_LINE}")
def _generate_single_chat(current_chat, safe_file_name, name, contact, output_folder, template, w3css, headline): def _generate_single_chat(current_chat, safe_file_name, name, contact, output_folder, template, w3css, headline):
@@ -984,20 +1011,20 @@ def _generate_paginated_chat(current_chat, safe_file_name, name, contact, output
current_size = 0 current_size = 0
current_page = 1 current_page = 1
render_box = [] render_box = []
# Use default maximum size if set to 0 # Use default maximum size if set to 0
if maximum_size == 0: if maximum_size == 0:
maximum_size = MAX_SIZE maximum_size = MAX_SIZE
last_msg = current_chat.get_last_message().key_id last_msg = current_chat.get_last_message().key_id
for message in current_chat.values(): for message in current_chat.values():
# Calculate message size # Calculate message size
if message.data is not None and not message.meta and not message.media: if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE current_size += len(message.data) + ROW_SIZE
else: else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size: if current_size > maximum_size:
# Create a new page # Create a new page
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html" output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
@@ -1041,25 +1068,25 @@ def _generate_paginated_chat(current_chat, safe_file_name, name, contact, output
def create_txt(data, output): def create_txt(data, output):
"""Generate text files from chat data.""" """Generate text files from chat data."""
os.makedirs(output, exist_ok=True) os.makedirs(output, exist_ok=True)
for jik, chat in data.items(): for jik, chat in data.items():
if len(chat) == 0: if len(chat) == 0:
continue continue
# Determine file name # Determine file name
if chat.name is not None: if chat.name is not None:
contact = chat.name.replace('/', '') contact = chat.name.replace('/', '')
else: else:
contact = jik.replace('+', '') contact = jik.replace('+', '')
output_file = os.path.join(output, f"{contact}.txt") output_file = os.path.join(output, f"{contact}.txt")
with open(output_file, "w", encoding="utf8") as f: with open(output_file, "w", encoding="utf8") as f:
for message in chat.values(): for message in chat.values():
# Skip metadata in text format # Skip metadata in text format
if message.meta and message.mime != "media": if message.meta and message.mime != "media":
continue continue
# Format the message # Format the message
formatted_message = _format_message_for_txt(message, contact) formatted_message = _format_message_for_txt(message, contact)
f.write(f"{formatted_message}\n") f.write(f"{formatted_message}\n")
@@ -1068,16 +1095,16 @@ def create_txt(data, output):
def _format_message_for_txt(message, contact): def _format_message_for_txt(message, contact):
"""Format a message for text output.""" """Format a message for text output."""
date = datetime.fromtimestamp(message.timestamp).date() date = datetime.fromtimestamp(message.timestamp).date()
# Determine the sender name # Determine the sender name
if message.from_me: if message.from_me:
name = "You" name = "You"
else: else:
name = message.sender if message.sender else contact name = message.sender if message.sender else contact
prefix = f"[{date} {message.time}] {name}: " prefix = f"[{date} {message.time}] {name}: "
prefix_length = len(prefix) prefix_length = len(prefix)
# Handle different message types # Handle different message types
if message.media and ("/" in message.mime or message.mime == "media"): if message.media and ("/" in message.mime or message.mime == "media"):
if message.data == "The media is missing": if message.data == "The media is missing":
@@ -1089,9 +1116,9 @@ def _format_message_for_txt(message, contact):
message_text = "" message_text = ""
else: else:
message_text = message.data.replace('<br>', f'\n{" " * prefix_length}') message_text = message.data.replace('<br>', f'\n{" " * prefix_length}')
# Add caption if present # Add caption if present
if message.caption is not None: if message.caption is not None:
message_text += "\n" + ' ' * len(prefix) + message.caption.replace('<br>', f'\n{" " * prefix_length}') message_text += "\n" + ' ' * len(prefix) + message.caption.replace('<br>', f'\n{" " * prefix_length}')
return f"{prefix}{message_text}" return f"{prefix}{message_text}"

View File

@@ -24,51 +24,19 @@ import struct
import codecs import codecs
from datetime import datetime, timedelta from datetime import datetime, timedelta
class BPListWriter(object):
def __init__(self, objects):
self.bplist = ""
self.objects = objects
def binary(self):
'''binary -> string
Generates bplist
'''
self.data = 'bplist00'
# TODO: flatten objects and count max length size
# TODO: write objects and save offsets
# TODO: write offsets
# TODO: write metadata
return self.data
def write(self, filename):
'''
Writes bplist to file
'''
if self.bplist != "":
pass
# TODO: save self.bplist to file
else:
raise Exception('BPlist not yet generated')
class BPListReader(object): class BPListReader(object):
def __init__(self, s): def __init__(self, s):
self.data = s self.data = s
self.objects = [] self.objects = []
self.resolved = {} self.resolved = {}
def __unpackIntStruct(self, sz, s): def __unpackIntStruct(self, sz, s):
'''__unpackIntStruct(size, string) -> int '''__unpackIntStruct(size, string) -> int
Unpacks the integer of given size (1, 2 or 4 bytes) from string Unpacks the integer of given size (1, 2 or 4 bytes) from string
''' '''
if sz == 1: if sz == 1:
ot = '!B' ot = '!B'
elif sz == 2: elif sz == 2:
ot = '!H' ot = '!H'
@@ -79,17 +47,17 @@ class BPListReader(object):
else: else:
raise Exception('int unpack size '+str(sz)+' unsupported') raise Exception('int unpack size '+str(sz)+' unsupported')
return struct.unpack(ot, s)[0] return struct.unpack(ot, s)[0]
def __unpackInt(self, offset): def __unpackInt(self, offset):
'''__unpackInt(offset) -> int '''__unpackInt(offset) -> int
Unpacks int field from plist at given offset Unpacks int field from plist at given offset
''' '''
return self.__unpackIntMeta(offset)[1] return self.__unpackIntMeta(offset)[1]
def __unpackIntMeta(self, offset): def __unpackIntMeta(self, offset):
'''__unpackIntMeta(offset) -> (size, int) '''__unpackIntMeta(offset) -> (size, int)
Unpacks int field from plist at given offset and returns its size and value Unpacks int field from plist at given offset and returns its size and value
''' '''
obj_header = self.data[offset] obj_header = self.data[offset]
@@ -99,7 +67,7 @@ class BPListReader(object):
def __resolveIntSize(self, obj_info, offset): def __resolveIntSize(self, obj_info, offset):
'''__resolveIntSize(obj_info, offset) -> (count, offset) '''__resolveIntSize(obj_info, offset) -> (count, offset)
Calculates count of objref* array entries and returns count and offset to first element Calculates count of objref* array entries and returns count and offset to first element
''' '''
if obj_info == 0x0F: if obj_info == 0x0F:
@@ -112,10 +80,10 @@ class BPListReader(object):
def __unpackFloatStruct(self, sz, s): def __unpackFloatStruct(self, sz, s):
'''__unpackFloatStruct(size, string) -> float '''__unpackFloatStruct(size, string) -> float
Unpacks the float of given size (4 or 8 bytes) from string Unpacks the float of given size (4 or 8 bytes) from string
''' '''
if sz == 4: if sz == 4:
ot = '!f' ot = '!f'
elif sz == 8: elif sz == 8:
ot = '!d' ot = '!d'
@@ -125,7 +93,7 @@ class BPListReader(object):
def __unpackFloat(self, offset): def __unpackFloat(self, offset):
'''__unpackFloat(offset) -> float '''__unpackFloat(offset) -> float
Unpacks float field from plist at given offset Unpacks float field from plist at given offset
''' '''
obj_header = self.data[offset] obj_header = self.data[offset]
@@ -135,70 +103,79 @@ class BPListReader(object):
def __unpackDate(self, offset): def __unpackDate(self, offset):
td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0]) td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0])
return datetime(year=2001,month=1,day=1) + timedelta(seconds=td) return datetime(year=2001, month=1, day=1) + timedelta(seconds=td)
def __unpackItem(self, offset): def __unpackItem(self, offset):
'''__unpackItem(offset) '''__unpackItem(offset)
Unpacks and returns an item from plist Unpacks and returns an item from plist
''' '''
obj_header = self.data[offset] obj_header = self.data[offset]
obj_type, obj_info = (obj_header & 0xF0), (obj_header & 0x0F) obj_type, obj_info = (obj_header & 0xF0), (obj_header & 0x0F)
if obj_type == 0x00: if obj_type == 0x00:
if obj_info == 0x00: # null 0000 0000 if obj_info == 0x00: # null 0000 0000
return None return None
elif obj_info == 0x08: # bool 0000 1000 // false elif obj_info == 0x08: # bool 0000 1000 // false
return False return False
elif obj_info == 0x09: # bool 0000 1001 // true elif obj_info == 0x09: # bool 0000 1001 // true
return True return True
elif obj_info == 0x0F: # fill 0000 1111 // fill byte elif obj_info == 0x0F: # fill 0000 1111 // fill byte
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
else: else:
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset)+ 'failed') raise Exception('unpack item type '+str(obj_header)+' at '+str(offset) + 'failed')
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
return self.__unpackInt(offset) return self.__unpackInt(offset)
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
return self.__unpackFloat(offset) return self.__unpackFloat(offset)
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
return self.__unpackDate(offset) return self.__unpackDate(offset)
elif obj_type == 0x40: # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
elif obj_type == 0x40:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] # XXX: we return data as str return self.data[objref:objref+obj_count] # XXX: we return data as str
elif obj_type == 0x50: # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
elif obj_type == 0x50:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] return self.data[objref:objref+obj_count]
elif obj_type == 0x60: # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
elif obj_type == 0x60:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count*2].decode('utf-16be') return self.data[objref:objref+obj_count*2].decode('utf-16be')
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
# FIXME: Accept as a string for now # FIXME: Accept as a string for now
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] return self.data[objref:objref+obj_count]
elif obj_type == 0xA0: # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xA0:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
arr = [] arr = []
for i in range(obj_count): for i in range(obj_count):
arr.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size])) arr.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
return arr return arr
elif obj_type == 0xC0: # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xC0:
# XXX: not serializable via apple implementation # XXX: not serializable via apple implementation
raise Exception("0xC0 Not Implemented") # FIXME: implement raise Exception("0xC0 Not Implemented") # FIXME: implement
elif obj_type == 0xD0: # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xD0:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
keys = [] keys = []
for i in range(obj_count): for i in range(obj_count):
keys.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size])) keys.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
values = [] values = []
objref += obj_count*self.object_ref_size objref += obj_count*self.object_ref_size
for i in range(obj_count): for i in range(obj_count):
values.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size])) values.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
dic = {} dic = {}
for i in range(obj_count): for i in range(obj_count):
dic[keys[i]] = values[i] dic[keys[i]] = values[i]
return dic return dic
else: else:
raise Exception('don\'t know how to unpack obj type '+hex(obj_type)+' at '+str(offset)) raise Exception('don\'t know how to unpack obj type '+hex(obj_type)+' at '+str(offset))
def __resolveObject(self, idx): def __resolveObject(self, idx):
try: try:
return self.resolved[idx] return self.resolved[idx]
@@ -212,7 +189,7 @@ class BPListReader(object):
return newArr return newArr
if type(obj) == dict: if type(obj) == dict:
newDic = {} newDic = {}
for k,v in obj.items(): for k, v in obj.items():
key_resolved = self.__resolveObject(k) key_resolved = self.__resolveObject(k)
if isinstance(key_resolved, str): if isinstance(key_resolved, str):
rk = key_resolved rk = key_resolved
@@ -225,15 +202,16 @@ class BPListReader(object):
else: else:
self.resolved[idx] = obj self.resolved[idx] = obj
return obj return obj
def parse(self): def parse(self):
# read header # read header
if self.data[:8] != b'bplist00': if self.data[:8] != b'bplist00':
raise Exception('Bad magic') raise Exception('Bad magic')
# read trailer # read trailer
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack('!6xBB4xI4xI4xI', self.data[-32:]) self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack(
#print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset '!6xBB4xI4xI4xI', self.data[-32:])
# print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
# read offset table # read offset table
self.offset_table = self.data[self.table_offset:-32] self.offset_table = self.data[self.table_offset:-32]
@@ -243,50 +221,25 @@ class BPListReader(object):
offset_entry = ot[:self.offset_size] offset_entry = ot[:self.offset_size]
ot = ot[self.offset_size:] ot = ot[self.offset_size:]
self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry)) self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry))
#print "** plist offsets:",self.offsets # print "** plist offsets:",self.offsets
# read object table # read object table
self.objects = [] self.objects = []
k = 0 k = 0
for i in self.offsets: for i in self.offsets:
obj = self.__unpackItem(i) obj = self.__unpackItem(i)
#print "** plist unpacked",k,type(obj),obj,"at",i # print "** plist unpacked",k,type(obj),obj,"at",i
k += 1 k += 1
self.objects.append(obj) self.objects.append(obj)
# rebuild object tree # rebuild object tree
#for i in range(len(self.objects)): # for i in range(len(self.objects)):
# self.__resolveObject(i) # self.__resolveObject(i)
# return root object # return root object
return self.__resolveObject(self.top_object) return self.__resolveObject(self.top_object)
@classmethod @classmethod
def plistWithString(cls, s): def plistWithString(cls, s):
parser = cls(s) parser = cls(s)
return parser.parse() return parser.parse()
# helpers for testing
def plist(obj):
from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(obj, NSPropertyListBinaryFormat_v1_0, 0, None)
return str(b.bytes())
def unplist(s):
from Foundation import NSData, NSPropertyListSerialization
d = NSData.dataWithBytes_length_(s, len(s))
return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None)
if __name__ == "__main__":
import os
import sys
import json
file_path = sys.argv[1]
with open(file_path, "rb") as fp:
data = fp.read()
out = BPListReader(data).parse()
with open(file_path + ".json", "w") as fp:
json.dump(out, indent=4)

View File

@@ -7,6 +7,7 @@ class Timing:
""" """
Handles timestamp formatting with timezone support. Handles timestamp formatting with timezone support.
""" """
def __init__(self, timezone_offset: Optional[int]) -> None: def __init__(self, timezone_offset: Optional[int]) -> None:
""" """
Initialize Timing object. Initialize Timing object.
@@ -27,7 +28,7 @@ class Timing:
Returns: Returns:
Optional[str]: Formatted timestamp string, or None if timestamp is None Optional[str]: Formatted timestamp string, or None if timestamp is None
""" """
if timestamp: if timestamp is not None:
timestamp = timestamp / 1000 if timestamp > 9999999999 else timestamp timestamp = timestamp / 1000 if timestamp > 9999999999 else timestamp
return datetime.fromtimestamp(timestamp, TimeZone(self.timezone_offset)).strftime(format) return datetime.fromtimestamp(timestamp, TimeZone(self.timezone_offset)).strftime(format)
return None return None
@@ -37,6 +38,7 @@ class TimeZone(tzinfo):
""" """
Custom timezone class with fixed offset. Custom timezone class with fixed offset.
""" """
def __init__(self, offset: int) -> None: def __init__(self, offset: int) -> None:
""" """
Initialize TimeZone object. Initialize TimeZone object.
@@ -151,6 +153,7 @@ class ChatStore:
""" """
Stores chat information and messages. Stores chat information and messages.
""" """
def __init__(self, type: str, name: Optional[str] = None, media: Optional[str] = None) -> None: def __init__(self, type: str, name: Optional[str] = None, media: Optional[str] = None) -> None:
""" """
Initialize ChatStore object. Initialize ChatStore object.
@@ -159,7 +162,7 @@ class ChatStore:
type (str): Device type (IOS or ANDROID) type (str): Device type (IOS or ANDROID)
name (Optional[str]): Chat name name (Optional[str]): Chat name
media (Optional[str]): Path to media folder media (Optional[str]): Path to media folder
Raises: Raises:
TypeError: If name is not a string or None TypeError: If name is not a string or None
""" """
@@ -182,7 +185,7 @@ class ChatStore:
self.their_avatar_thumb = None self.their_avatar_thumb = None
self.status = None self.status = None
self.media_base = "" self.media_base = ""
def __len__(self) -> int: def __len__(self) -> int:
"""Get number of chats. Required for dict-like access.""" """Get number of chats. Required for dict-like access."""
return len(self._messages) return len(self._messages)
@@ -192,7 +195,7 @@ class ChatStore:
if not isinstance(message, Message): if not isinstance(message, Message):
raise TypeError("message must be a Message object") raise TypeError("message must be a Message object")
self._messages[id] = message self._messages[id] = message
def get_message(self, id: str) -> 'Message': def get_message(self, id: str) -> 'Message':
"""Get a message from the chat store.""" """Get a message from the chat store."""
return self._messages.get(id) return self._messages.get(id)
@@ -204,20 +207,30 @@ class ChatStore:
def to_json(self) -> Dict[str, Any]: def to_json(self) -> Dict[str, Any]:
"""Convert chat store to JSON-serializable dict.""" """Convert chat store to JSON-serializable dict."""
return { json_dict = {
'name': self.name, key: value
'type': self.type, for key, value in self.__dict__.items()
'my_avatar': self.my_avatar, if key != '_messages'
'their_avatar': self.their_avatar,
'their_avatar_thumb': self.their_avatar_thumb,
'status': self.status,
'messages': {id: msg.to_json() for id, msg in self._messages.items()}
} }
json_dict['messages'] = {id: msg.to_json() for id, msg in self._messages.items()}
return json_dict
@classmethod
def from_json(cls, data: Dict) -> 'ChatStore':
"""Create a chat store from JSON data."""
chat = cls(data.get("type"), data.get("name"))
for key, value in data.items():
if hasattr(chat, key) and key not in ("messages", "type", "name"):
setattr(chat, key, value)
for id, msg_data in data.get("messages", {}).items():
message = Message.from_json(msg_data)
chat.add_message(id, message)
return chat
def get_last_message(self) -> 'Message': def get_last_message(self) -> 'Message':
"""Get the most recent message in the chat.""" """Get the most recent message in the chat."""
return tuple(self._messages.values())[-1] return tuple(self._messages.values())[-1]
def items(self): def items(self):
"""Get message items pairs.""" """Get message items pairs."""
return self._messages.items() return self._messages.items()
@@ -230,20 +243,42 @@ class ChatStore:
"""Get all message keys in the chat.""" """Get all message keys in the chat."""
return self._messages.keys() return self._messages.keys()
def merge_with(self, other: 'ChatStore'):
"""Merge another ChatStore into this one.
Args:
other (ChatStore): The ChatStore to merge with
"""
if not isinstance(other, ChatStore):
raise TypeError("Can only merge with another ChatStore object")
# Update fields if they are not None in the other ChatStore
self.name = other.name or self.name
self.type = other.type or self.type
self.my_avatar = other.my_avatar or self.my_avatar
self.their_avatar = other.their_avatar or self.their_avatar
self.their_avatar_thumb = other.their_avatar_thumb or self.their_avatar_thumb
self.status = other.status or self.status
# Merge messages
self._messages.update(other._messages)
class Message: class Message:
""" """
Represents a single message in a chat. Represents a single message in a chat.
""" """
def __init__( def __init__(
self, self,
*, *,
from_me: Union[bool, int], from_me: Union[bool, int],
timestamp: int, timestamp: int,
time: Union[int, float, str], time: Union[int, float, str],
key_id: int, key_id: Union[int, str],
received_timestamp: int, received_timestamp: int = None,
read_timestamp: int, read_timestamp: int = None,
timezone_offset: int = 0, timezone_offset: int = 0,
message_type: Optional[int] = None message_type: Optional[int] = None
) -> None: ) -> None:
@@ -255,8 +290,8 @@ class Message:
timestamp (int): Message timestamp timestamp (int): Message timestamp
time (Union[int, float, str]): Message time time (Union[int, float, str]): Message time
key_id (int): Message unique identifier key_id (int): Message unique identifier
received_timestamp (int): When message was received received_timestamp (int, optional): When message was received. Defaults to None
read_timestamp (int): When message was read read_timestamp (int, optional): When message was read. Defaults to None
timezone_offset (int, optional): Hours offset from UTC. Defaults to 0 timezone_offset (int, optional): Hours offset from UTC. Defaults to 0
message_type (Optional[int], optional): Type of message. Defaults to None message_type (Optional[int], optional): Type of message. Defaults to None
@@ -266,7 +301,7 @@ class Message:
self.from_me = bool(from_me) self.from_me = bool(from_me)
self.timestamp = timestamp / 1000 if timestamp > 9999999999 else timestamp self.timestamp = timestamp / 1000 if timestamp > 9999999999 else timestamp
timing = Timing(timezone_offset) timing = Timing(timezone_offset)
if isinstance(time, (int, float)): if isinstance(time, (int, float)):
self.time = timing.format_timestamp(self.timestamp, "%H:%M") self.time = timing.format_timestamp(self.timestamp, "%H:%M")
elif isinstance(time, str): elif isinstance(time, str):
@@ -281,10 +316,22 @@ class Message:
self.sender = None self.sender = None
self.safe = False self.safe = False
self.mime = None self.mime = None
self.message_type = message_type, self.message_type = message_type
self.received_timestamp = timing.format_timestamp(received_timestamp, "%Y/%m/%d %H:%M") if isinstance(received_timestamp, (int, float)):
self.read_timestamp = timing.format_timestamp(read_timestamp, "%Y/%m/%d %H:%M") self.received_timestamp = timing.format_timestamp(
received_timestamp, "%Y/%m/%d %H:%M")
elif isinstance(received_timestamp, str):
self.received_timestamp = received_timestamp
else:
self.received_timestamp = None
if isinstance(read_timestamp, (int, float)):
self.read_timestamp = timing.format_timestamp(
read_timestamp, "%Y/%m/%d %H:%M")
elif isinstance(read_timestamp, str):
self.read_timestamp = read_timestamp
else:
self.read_timestamp = None
# Extra attributes # Extra attributes
self.reply = None self.reply = None
self.quoted_data = None self.quoted_data = None
@@ -295,19 +342,24 @@ class Message:
def to_json(self) -> Dict[str, Any]: def to_json(self) -> Dict[str, Any]:
"""Convert message to JSON-serializable dict.""" """Convert message to JSON-serializable dict."""
return { return {
'from_me': self.from_me, key: value
'timestamp': self.timestamp, for key, value in self.__dict__.items()
'time': self.time, }
'media': self.media,
'key_id': self.key_id, @classmethod
'meta': self.meta, def from_json(cls, data: Dict) -> 'Message':
'data': self.data, message = cls(
'sender': self.sender, from_me=data["from_me"],
'safe': self.safe, timestamp=data["timestamp"],
'mime': self.mime, time=data["time"],
'reply': self.reply, key_id=data["key_id"],
'quoted_data': self.quoted_data, message_type=data.get("message_type"),
'caption': self.caption, received_timestamp=data.get("received_timestamp"),
'thumb': self.thumb, read_timestamp=data.get("read_timestamp")
'sticker': self.sticker )
} added = ("from_me", "timestamp", "time", "key_id", "message_type",
"received_timestamp", "read_timestamp")
for key, value in data.items():
if hasattr(message, key) and key not in added:
setattr(message, key, value)
return message

View File

@@ -1,21 +1,25 @@
#!/usr/bin/python3 #!/usr/bin/python3
import os import os
import logging
from datetime import datetime from datetime import datetime
from mimetypes import MimeTypes from mimetypes import MimeTypes
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import Device from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device
logger = logging.getLogger(__name__)
def messages(path, data, assume_first_as_me=False): def messages(path, data, assume_first_as_me=False):
""" """
Extracts messages from an exported WhatsApp chat file. Extracts messages from an exported WhatsApp chat file.
Args: Args:
path: Path to the exported chat file path: Path to the exported chat file
data: Data container object to store the parsed chat data: Data container object to store the parsed chat
assume_first_as_me: If True, assumes the first message is sent from the user without asking assume_first_as_me: If True, assumes the first message is sent from the user without asking
Returns: Returns:
Updated data container with extracted messages Updated data container with extracted messages
""" """
@@ -23,55 +27,55 @@ def messages(path, data, assume_first_as_me=False):
chat = data.add_chat("ExportedChat", ChatStore(Device.EXPORTED)) chat = data.add_chat("ExportedChat", ChatStore(Device.EXPORTED))
you = "" # Will store the username of the current user you = "" # Will store the username of the current user
user_identification_done = False # Flag to track if user identification has been done user_identification_done = False # Flag to track if user identification has been done
# First pass: count total lines for progress reporting # First pass: count total lines for progress reporting
with open(path, "r", encoding="utf8") as file: with open(path, "r", encoding="utf8") as file:
total_row_number = sum(1 for _ in file) total_row_number = sum(1 for _ in file)
# Second pass: process the messages # Second pass: process the messages
with open(path, "r", encoding="utf8") as file: with open(path, "r", encoding="utf8") as file:
for index, line in enumerate(file): for index, line in enumerate(file):
you, user_identification_done = process_line( you, user_identification_done = process_line(
line, index, chat, path, you, line, index, chat, path, you,
assume_first_as_me, user_identification_done assume_first_as_me, user_identification_done
) )
# Show progress # Show progress
if index % 1000 == 0: if index % 1000 == 0:
print(f"Processing messages & media...({index}/{total_row_number})", end="\r") logger.info(f"Processing messages & media...({index}/{total_row_number})\r")
print(f"Processing messages & media...({total_row_number}/{total_row_number})") logger.info(f"Processed {total_row_number} messages & media{CLEAR_LINE}")
return data return data
def process_line(line, index, chat, file_path, you, assume_first_as_me, user_identification_done): def process_line(line, index, chat, file_path, you, assume_first_as_me, user_identification_done):
""" """
Process a single line from the chat file Process a single line from the chat file
Returns: Returns:
Tuple of (updated_you_value, updated_user_identification_done_flag) Tuple of (updated_you_value, updated_user_identification_done_flag)
""" """
parts = line.split(" - ", 1) parts = line.split(" - ", 1)
# Check if this is a new message (has timestamp format) # Check if this is a new message (has timestamp format)
if len(parts) > 1: if len(parts) > 1:
time = parts[0] time = parts[0]
you, user_identification_done = process_new_message( you, user_identification_done = process_new_message(
time, parts[1], index, chat, you, file_path, time, parts[1], index, chat, you, file_path,
assume_first_as_me, user_identification_done assume_first_as_me, user_identification_done
) )
else: else:
# This is a continuation of the previous message # This is a continuation of the previous message
process_message_continuation(line, index, chat) process_message_continuation(line, index, chat)
return you, user_identification_done return you, user_identification_done
def process_new_message(time, content, index, chat, you, file_path, def process_new_message(time, content, index, chat, you, file_path,
assume_first_as_me, user_identification_done): assume_first_as_me, user_identification_done):
""" """
Process a line that contains a new message Process a line that contains a new message
Returns: Returns:
Tuple of (updated_you_value, updated_user_identification_done_flag) Tuple of (updated_you_value, updated_user_identification_done_flag)
""" """
@@ -84,7 +88,7 @@ def process_new_message(time, content, index, chat, you, file_path,
received_timestamp=None, received_timestamp=None,
read_timestamp=None read_timestamp=None
) )
# Check if this is a system message (no name:message format) # Check if this is a system message (no name:message format)
if ":" not in content: if ":" not in content:
msg.data = content msg.data = content
@@ -92,7 +96,7 @@ def process_new_message(time, content, index, chat, you, file_path,
else: else:
# Process user message # Process user message
name, message = content.strip().split(":", 1) name, message = content.strip().split(":", 1)
# Handle user identification # Handle user identification
if you == "": if you == "":
if chat.name is None: if chat.name is None:
@@ -109,17 +113,17 @@ def process_new_message(time, content, index, chat, you, file_path,
# If we know the chat name, anyone else must be "you" # If we know the chat name, anyone else must be "you"
if name != chat.name: if name != chat.name:
you = name you = name
# Set the chat name if needed # Set the chat name if needed
if chat.name is None and name != you: if chat.name is None and name != you:
chat.name = name chat.name = name
# Determine if this message is from the current user # Determine if this message is from the current user
msg.from_me = (name == you) msg.from_me = (name == you)
# Process message content # Process message content
process_message_content(msg, message, file_path) process_message_content(msg, message, file_path)
chat.add_message(index, msg) chat.add_message(index, msg)
return you, user_identification_done return you, user_identification_done
@@ -140,11 +144,11 @@ def process_attached_file(msg, message, file_path):
"""Process an attached file in a message""" """Process an attached file in a message"""
mime = MimeTypes() mime = MimeTypes()
msg.media = True msg.media = True
# Extract file path and check if it exists # Extract file path and check if it exists
file_name = message.split("(file attached)")[0].strip() file_name = message.split("(file attached)")[0].strip()
attached_file_path = os.path.join(os.path.dirname(file_path), file_name) attached_file_path = os.path.join(os.path.dirname(file_path), file_name)
if os.path.isfile(attached_file_path): if os.path.isfile(attached_file_path):
msg.data = attached_file_path msg.data = attached_file_path
guess = mime.guess_type(attached_file_path)[0] guess = mime.guess_type(attached_file_path)[0]
@@ -161,9 +165,9 @@ def process_message_continuation(line, index, chat):
lookback = index - 1 lookback = index - 1
while lookback not in chat.keys(): while lookback not in chat.keys():
lookback -= 1 lookback -= 1
msg = chat.get_message(lookback) msg = chat.get_message(lookback)
# Add the continuation line to the message # Add the continuation line to the message
if msg.media: if msg.media:
msg.caption = line.strip() msg.caption = line.strip()
@@ -178,4 +182,4 @@ def prompt_for_user_identification(name):
if ans == "y": if ans == "y":
return name return name
elif ans == "n": elif ans == "n":
return "" return ""

View File

@@ -1,14 +1,18 @@
#!/usr/bin/python3 #!/usr/bin/python3
import os import os
import logging
import shutil import shutil
from glob import glob from glob import glob
from pathlib import Path from pathlib import Path
from mimetypes import MimeTypes from mimetypes import MimeTypes
from markupsafe import escape as htmle from markupsafe import escape as htmle
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CURRENT_TZ_OFFSET, get_chat_condition from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, CURRENT_TZ_OFFSET, get_chat_condition
from Whatsapp_Chat_Exporter.utility import bytes_to_readable, convert_time_unit, slugify, Device from Whatsapp_Chat_Exporter.utility import bytes_to_readable, convert_time_unit, safe_name, Device
logger = logging.getLogger(__name__)
def contacts(db, data): def contacts(db, data):
@@ -16,26 +20,27 @@ def contacts(db, data):
c = db.cursor() c = db.cursor()
c.execute("""SELECT count() FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""") c.execute("""SELECT count() FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
print(f"Pre-processing contacts...({total_row_number})") logger.info(f"Pre-processing contacts...({total_row_number})\r")
c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""") c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
content = c.fetchone() content = c.fetchone()
while content is not None: while content is not None:
zwhatsapp_id = content["ZWHATSAPPID"] zwhatsapp_id = content["ZWHATSAPPID"]
if not zwhatsapp_id.endswith("@s.whatsapp.net"): if not zwhatsapp_id.endswith("@s.whatsapp.net"):
zwhatsapp_id += "@s.whatsapp.net" zwhatsapp_id += "@s.whatsapp.net"
current_chat = ChatStore(Device.IOS) current_chat = ChatStore(Device.IOS)
current_chat.status = content["ZABOUTTEXT"] current_chat.status = content["ZABOUTTEXT"]
data.add_chat(zwhatsapp_id, current_chat) data.add_chat(zwhatsapp_id, current_chat)
content = c.fetchone() content = c.fetchone()
logger.info(f"Pre-processed {total_row_number} contacts{CLEAR_LINE}")
def process_contact_avatars(current_chat, media_folder, contact_id): def process_contact_avatars(current_chat, media_folder, contact_id):
"""Process and assign avatar images for a contact.""" """Process and assign avatar images for a contact."""
path = f'{media_folder}/Media/Profile/{contact_id.split("@")[0]}' path = f'{media_folder}/Media/Profile/{contact_id.split("@")[0]}'
avatars = glob(f"{path}*") avatars = glob(f"{path}*")
if 0 < len(avatars) <= 1: if 0 < len(avatars) <= 1:
current_chat.their_avatar = avatars[0] current_chat.their_avatar = avatars[0]
else: else:
@@ -55,16 +60,18 @@ def get_contact_name(content):
return content["ZPUSHNAME"] return content["ZPUSHNAME"]
def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty): def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty, no_reply):
"""Process WhatsApp messages and contacts from the database.""" """Process WhatsApp messages and contacts from the database."""
c = db.cursor() c = db.cursor()
cursor2 = db.cursor() cursor2 = db.cursor()
# Build the chat filter conditions # Build the chat filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else '' date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Process contacts first # Process contacts first
contact_query = f""" contact_query = f"""
SELECT count() SELECT count()
@@ -85,7 +92,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
""" """
c.execute(contact_query) c.execute(contact_query)
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
print(f"Processing contacts...({total_row_number})") logger.info(f"Processing contacts...({total_row_number})\r")
# Get distinct contacts # Get distinct contacts
contacts_query = f""" contacts_query = f"""
@@ -105,13 +112,13 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
GROUP BY ZCONTACTJID; GROUP BY ZCONTACTJID;
""" """
c.execute(contacts_query) c.execute(contacts_query)
# Process each contact # Process each contact
content = c.fetchone() content = c.fetchone()
while content is not None: while content is not None:
contact_name = get_contact_name(content) contact_name = get_contact_name(content)
contact_id = content["ZCONTACTJID"] contact_id = content["ZCONTACTJID"]
# Add or update chat # Add or update chat
if contact_id not in data: if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder)) current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
@@ -119,11 +126,13 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
current_chat = data.get_chat(contact_id) current_chat = data.get_chat(contact_id)
current_chat.name = contact_name current_chat.name = contact_name
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg") current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
# Process avatar images # Process avatar images
process_contact_avatars(current_chat, media_folder, contact_id) process_contact_avatars(current_chat, media_folder, contact_id)
content = c.fetchone() content = c.fetchone()
logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}")
# Get message count # Get message count
message_count_query = f""" message_count_query = f"""
SELECT count() SELECT count()
@@ -139,8 +148,8 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
""" """
c.execute(message_count_query) c.execute(message_count_query)
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
print(f"Processing messages...(0/{total_row_number})", end="\r") logger.info(f"Processing messages...(0/{total_row_number})\r")
# Fetch messages # Fetch messages
messages_query = f""" messages_query = f"""
SELECT ZCONTACTJID, SELECT ZCONTACTJID,
@@ -168,7 +177,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
ORDER BY ZMESSAGEDATE ASC; ORDER BY ZMESSAGEDATE ASC;
""" """
c.execute(messages_query) c.execute(messages_query)
# Process each message # Process each message
i = 0 i = 0
content = c.fetchone() content = c.fetchone()
@@ -176,14 +185,14 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
contact_id = content["ZCONTACTJID"] contact_id = content["ZCONTACTJID"]
message_pk = content["Z_PK"] message_pk = content["Z_PK"]
is_group_message = content["ZGROUPINFO"] is not None is_group_message = content["ZGROUPINFO"] is not None
# Ensure chat exists # Ensure chat exists
if contact_id not in data: if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS)) current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
process_contact_avatars(current_chat, media_folder, contact_id) process_contact_avatars(current_chat, media_folder, contact_id)
else: else:
current_chat = data.get_chat(contact_id) current_chat = data.get_chat(contact_id)
# Create message object # Create message object
ts = APPLE_TIME + content["ZMESSAGEDATE"] ts = APPLE_TIME + content["ZMESSAGEDATE"]
message = Message( message = Message(
@@ -196,24 +205,23 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None, received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
read_timestamp=None # TODO: Add timestamp read_timestamp=None # TODO: Add timestamp
) )
# Process message data # Process message data
invalid = process_message_data(message, content, is_group_message, data, cursor2) invalid = process_message_data(message, content, is_group_message, data, cursor2, no_reply)
# Add valid messages to chat # Add valid messages to chat
if not invalid: if not invalid:
current_chat.add_message(message_pk, message) current_chat.add_message(message_pk, message)
# Update progress # Update progress
i += 1 i += 1
if i % 1000 == 0: if i % 1000 == 0:
print(f"Processing messages...({i}/{total_row_number})", end="\r") logger.info(f"Processing messages...({i}/{total_row_number})\r")
content = c.fetchone() content = c.fetchone()
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
print(f"Processing messages...({total_row_number}/{total_row_number})", end="\r")
def process_message_data(message, content, is_group_message, data, cursor2): def process_message_data(message, content, is_group_message, data, cursor2, no_reply):
"""Process and set message data from content row.""" """Process and set message data from content row."""
# Handle group sender info # Handle group sender info
if is_group_message and content["ZISFROMME"] == 0: if is_group_message and content["ZISFROMME"] == 0:
@@ -230,13 +238,13 @@ def process_message_data(message, content, is_group_message, data, cursor2):
message.sender = name or fallback message.sender = name or fallback
else: else:
message.sender = None message.sender = None
# Handle metadata messages # Handle metadata messages
if content["ZMESSAGETYPE"] == 6: if content["ZMESSAGETYPE"] == 6:
return process_metadata_message(message, content, is_group_message) return process_metadata_message(message, content, is_group_message)
# Handle quoted replies # Handle quoted replies
if content["ZMETADATA"] is not None and content["ZMETADATA"].startswith(b"\x2a\x14") and False: if content["ZMETADATA"] is not None and content["ZMETADATA"].startswith(b"\x2a\x14") and not no_reply:
quoted = content["ZMETADATA"][2:19] quoted = content["ZMETADATA"][2:19]
message.reply = quoted.decode() message.reply = quoted.decode()
cursor2.execute(f"""SELECT ZTEXT cursor2.execute(f"""SELECT ZTEXT
@@ -244,17 +252,17 @@ def process_message_data(message, content, is_group_message, data, cursor2):
WHERE ZSTANZAID LIKE '{message.reply}%'""") WHERE ZSTANZAID LIKE '{message.reply}%'""")
quoted_content = cursor2.fetchone() quoted_content = cursor2.fetchone()
if quoted_content and "ZTEXT" in quoted_content: if quoted_content and "ZTEXT" in quoted_content:
message.quoted_data = quoted_content["ZTEXT"] message.quoted_data = quoted_content["ZTEXT"]
else: else:
message.quoted_data = None message.quoted_data = None
# Handle stickers # Handle stickers
if content["ZMESSAGETYPE"] == 15: if content["ZMESSAGETYPE"] == 15:
message.sticker = True message.sticker = True
# Process message text # Process message text
process_message_text(message, content) process_message_text(message, content)
return False # Message is valid return False # Message is valid
@@ -299,19 +307,21 @@ def process_message_text(message, content):
msg = content["ZTEXT"] msg = content["ZTEXT"]
if msg is not None: if msg is not None:
msg = msg.replace("\r\n", "<br>").replace("\n", "<br>") msg = msg.replace("\r\n", "<br>").replace("\n", "<br>")
message.data = msg message.data = msg
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False): def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False):
"""Process media files from WhatsApp messages.""" """Process media files from WhatsApp messages."""
c = db.cursor() c = db.cursor()
# Build filter conditions # Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID","ZMEMBERJID"], "ZGROUPINFO", "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else '' date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Get media count # Get media count
media_count_query = f""" media_count_query = f"""
SELECT count() SELECT count()
@@ -329,8 +339,8 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
""" """
c.execute(media_count_query) c.execute(media_count_query)
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
print(f"\nProcessing media...(0/{total_row_number})", end="\r") logger.info(f"Processing media...(0/{total_row_number})\r")
# Fetch media items # Fetch media items
media_query = f""" media_query = f"""
SELECT ZCONTACTJID, SELECT ZCONTACTJID,
@@ -354,21 +364,20 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
ORDER BY ZCONTACTJID ASC ORDER BY ZCONTACTJID ASC
""" """
c.execute(media_query) c.execute(media_query)
# Process each media item # Process each media item
mime = MimeTypes() mime = MimeTypes()
i = 0 i = 0
content = c.fetchone() content = c.fetchone()
while content is not None: while content is not None:
process_media_item(content, data, media_folder, mime, separate_media) process_media_item(content, data, media_folder, mime, separate_media)
# Update progress # Update progress
i += 1 i += 1
if i % 100 == 0: if i % 100 == 0:
print(f"Processing media...({i}/{total_row_number})", end="\r") logger.info(f"Processing media...({i}/{total_row_number})\r")
content = c.fetchone() content = c.fetchone()
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
print(f"Processing media...({total_row_number}/{total_row_number})", end="\r")
def process_media_item(content, data, media_folder, mime, separate_media): def process_media_item(content, data, media_folder, mime, separate_media):
@@ -377,23 +386,24 @@ def process_media_item(content, data, media_folder, mime, separate_media):
current_chat = data.get_chat(content["ZCONTACTJID"]) current_chat = data.get_chat(content["ZCONTACTJID"])
message = current_chat.get_message(content["ZMESSAGE"]) message = current_chat.get_message(content["ZMESSAGE"])
message.media = True message.media = True
if current_chat.media_base == "": if current_chat.media_base == "":
current_chat.media_base = media_folder + "/" current_chat.media_base = media_folder + "/"
if os.path.isfile(file_path): if os.path.isfile(file_path):
message.data = '/'.join(file_path.split("/")[1:]) message.data = '/'.join(file_path.split("/")[1:])
# Set MIME type # Set MIME type
if content["ZVCARDSTRING"] is None: if content["ZVCARDSTRING"] is None:
guess = mime.guess_type(file_path)[0] guess = mime.guess_type(file_path)[0]
message.mime = guess if guess is not None else "application/octet-stream" message.mime = guess if guess is not None else "application/octet-stream"
else: else:
message.mime = content["ZVCARDSTRING"] message.mime = content["ZVCARDSTRING"]
# Handle separate media option # Handle separate media option
if separate_media: if separate_media:
chat_display_name = slugify(current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True) chat_display_name = safe_name(
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0])
current_filename = file_path.split("/")[-1] current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name) new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True) Path(new_folder).mkdir(parents=True, exist_ok=True)
@@ -405,7 +415,7 @@ def process_media_item(content, data, media_folder, mime, separate_media):
message.data = "The media is missing" message.data = "The media is missing"
message.mime = "media" message.mime = "media"
message.meta = True message.meta = True
# Add caption if available # Add caption if available
if content["ZTITLE"] is not None: if content["ZTITLE"] is not None:
message.caption = content["ZTITLE"] message.caption = content["ZTITLE"]
@@ -414,12 +424,14 @@ def process_media_item(content, data, media_folder, mime, separate_media):
def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty): def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
"""Process vCard contacts from WhatsApp messages.""" """Process vCard contacts from WhatsApp messages."""
c = db.cursor() c = db.cursor()
# Build filter conditions # Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else '' date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Fetch vCard mentions # Fetch vCard mentions
vcard_query = f""" vcard_query = f"""
SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM, SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM,
@@ -444,8 +456,8 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
c.execute(vcard_query) c.execute(vcard_query)
contents = c.fetchall() contents = c.fetchall()
total_row_number = len(contents) total_row_number = len(contents)
print(f"\nProcessing vCards...(0/{total_row_number})", end="\r") logger.info(f"Processing vCards...(0/{total_row_number})\r")
# Create vCards directory # Create vCards directory
path = f'{media_folder}/Message/vCards' path = f'{media_folder}/Message/vCards'
Path(path).mkdir(parents=True, exist_ok=True) Path(path).mkdir(parents=True, exist_ok=True)
@@ -453,7 +465,8 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
# Process each vCard # Process each vCard
for index, content in enumerate(contents): for index, content in enumerate(contents):
process_vcard_item(content, path, data) process_vcard_item(content, path, data)
print(f"Processing vCards...({index + 1}/{total_row_number})", end="\r") logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}")
def process_vcard_item(content, path, data): def process_vcard_item(content, path, data):
@@ -478,9 +491,10 @@ def process_vcard_item(content, path, data):
f.write(vcard_string) f.write(vcard_string)
# Create vCard summary and update message # Create vCard summary and update message
vcard_summary = "This media include the following vCard file(s):<br>" vcard_summary = "This media include the following vCard file(s):<br>"
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name, fp in zip(vcard_names, file_paths)]) vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name,
fp in zip(vcard_names, file_paths)])
message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"]) message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"])
message.data = vcard_summary message.data = vcard_summary
message.mime = "text/x-vcard" message.mime = "text/x-vcard"
@@ -492,11 +506,13 @@ def process_vcard_item(content, path, data):
def calls(db, data, timezone_offset, filter_chat): def calls(db, data, timezone_offset, filter_chat):
"""Process WhatsApp call records.""" """Process WhatsApp call records."""
c = db.cursor() c = db.cursor()
# Build filter conditions # Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios") filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
# Get call count # Get call count
call_count_query = f""" call_count_query = f"""
SELECT count() SELECT count()
@@ -509,9 +525,9 @@ def calls(db, data, timezone_offset, filter_chat):
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
if total_row_number == 0: if total_row_number == 0:
return return
print(f"\nProcessing calls...({total_row_number})", end="\r") logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n")
# Fetch call records # Fetch call records
calls_query = f""" calls_query = f"""
SELECT ZCALLIDSTRING, SELECT ZCALLIDSTRING,
@@ -532,16 +548,16 @@ def calls(db, data, timezone_offset, filter_chat):
{chat_filter_exclude} {chat_filter_exclude}
""" """
c.execute(calls_query) c.execute(calls_query)
# Create calls chat # Create calls chat
chat = ChatStore(Device.ANDROID, "WhatsApp Calls") chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call # Process each call
content = c.fetchone() content = c.fetchone()
while content is not None: while content is not None:
process_call_record(content, chat, data, timezone_offset) process_call_record(content, chat, data, timezone_offset)
content = c.fetchone() content = c.fetchone()
# Add calls chat to data # Add calls chat to data
data.add_chat("000000000000000", chat) data.add_chat("000000000000000", chat)
@@ -556,7 +572,7 @@ def process_call_record(content, chat, data, timezone_offset):
key_id=content["ZCALLIDSTRING"], key_id=content["ZCALLIDSTRING"],
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET
) )
# Set sender info # Set sender info
_jid = content["ZGROUPCALLCREATORUSERJIDSTRING"] _jid = content["ZGROUPCALLCREATORUSERJIDSTRING"]
name = data.get_chat(_jid).name if _jid in data else None name = data.get_chat(_jid).name if _jid in data else None
@@ -565,11 +581,11 @@ def process_call_record(content, chat, data, timezone_offset):
else: else:
fallback = None fallback = None
call.sender = name or fallback call.sender = name or fallback
# Set call metadata # Set call metadata
call.meta = True call.meta = True
call.data = format_call_data(call, content) call.data = format_call_data(call, content)
# Add call to chat # Add call to chat
chat.add_message(call.key_id, call) chat.add_message(call.key_id, call)
@@ -583,7 +599,7 @@ def format_call_data(call, content):
f"call {'to' if call.from_me else 'from'} " f"call {'to' if call.from_me else 'from'} "
f"{call.sender} was " f"{call.sender} was "
) )
# Call outcome # Call outcome
if content['ZOUTCOME'] in (1, 4): if content['ZOUTCOME'] in (1, 4):
call_data += "not answered." if call.from_me else "missed." call_data += "not answered." if call.from_me else "missed."
@@ -598,5 +614,5 @@ def format_call_data(call, content):
) )
else: else:
call_data += "in an unknown state." call_data += "in an unknown state."
return call_data return call_data

View File

@@ -1,11 +1,12 @@
#!/usr/bin/python3 #!/usr/bin/python3
import logging
import shutil import shutil
import sqlite3 import sqlite3
import os import os
import getpass import getpass
from sys import exit from sys import exit, platform as osname
from Whatsapp_Chat_Exporter.utility import WhatsAppIdentifier from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, WhatsAppIdentifier
from Whatsapp_Chat_Exporter.bplist import BPListReader from Whatsapp_Chat_Exporter.bplist import BPListReader
try: try:
from iphone_backup_decrypt import EncryptedBackup, RelativePath from iphone_backup_decrypt import EncryptedBackup, RelativePath
@@ -15,6 +16,9 @@ else:
support_encrypted = True support_encrypted = True
logger = logging.getLogger(__name__)
class BackupExtractor: class BackupExtractor:
""" """
A class to handle the extraction of WhatsApp data from iOS backups, A class to handle the extraction of WhatsApp data from iOS backups,
@@ -42,27 +46,38 @@ class BackupExtractor:
Returns: Returns:
bool: True if encrypted, False otherwise. bool: True if encrypted, False otherwise.
""" """
with sqlite3.connect(os.path.join(self.base_dir, "Manifest.db")) as db: try:
c = db.cursor() with sqlite3.connect(os.path.join(self.base_dir, "Manifest.db")) as db:
try: c = db.cursor()
c.execute("SELECT count() FROM Files") try:
c.fetchone() # Execute and fetch to trigger potential errors c.execute("SELECT count() FROM Files")
except (sqlite3.OperationalError, sqlite3.DatabaseError): c.fetchone() # Execute and fetch to trigger potential errors
return True except (sqlite3.OperationalError, sqlite3.DatabaseError):
return True
else:
return False
except sqlite3.DatabaseError as e:
if str(e) == "authorization denied" and osname == "darwin":
logger.error(
"You don't have permission to access the backup database. Please"
"check your permissions or try moving the backup to somewhere else."
)
exit(8)
else: else:
return False raise e
def _extract_encrypted_backup(self): def _extract_encrypted_backup(self):
""" """
Handles the extraction of data from an encrypted iOS backup. Handles the extraction of data from an encrypted iOS backup.
""" """
if not support_encrypted: if not support_encrypted:
print("You don't have the dependencies to handle encrypted backup.") logger.error("You don't have the dependencies to handle encrypted backup."
print("Read more on how to deal with encrypted backup:") "Read more on how to deal with encrypted backup:"
print("https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage") "https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
)
return return
print("Encryption detected on the backup!") logger.info(f"Encryption detected on the backup!{CLEAR_LINE}")
password = getpass.getpass("Enter the password for the backup:") password = getpass.getpass("Enter the password for the backup:")
self._decrypt_backup(password) self._decrypt_backup(password)
self._extract_decrypted_files() self._extract_decrypted_files()
@@ -74,7 +89,7 @@ class BackupExtractor:
Args: Args:
password (str): The password for the encrypted backup. password (str): The password for the encrypted backup.
""" """
print("Trying to decrypt the iOS backup...", end="") logger.info(f"Trying to decrypt the iOS backup...{CLEAR_LINE}")
self.backup = EncryptedBackup( self.backup = EncryptedBackup(
backup_directory=self.base_dir, backup_directory=self.base_dir,
passphrase=password, passphrase=password,
@@ -82,7 +97,8 @@ class BackupExtractor:
check_same_thread=False, check_same_thread=False,
decrypt_chunk_size=self.decrypt_chunk_size, decrypt_chunk_size=self.decrypt_chunk_size,
) )
print("Done\nDecrypting WhatsApp database...", end="") logger.info(f"iOS backup decrypted successfully{CLEAR_LINE}")
logger.info("Decrypting WhatsApp database...\r")
try: try:
self.backup.extract_file( self.backup.extract_file(
relative_path=RelativePath.WHATSAPP_MESSAGES, relative_path=RelativePath.WHATSAPP_MESSAGES,
@@ -100,23 +116,23 @@ class BackupExtractor:
output_filename=self.identifiers.CALL, output_filename=self.identifiers.CALL,
) )
except ValueError: except ValueError:
print("Failed to decrypt backup: incorrect password?") logger.error("Failed to decrypt backup: incorrect password?")
exit(7) exit(7)
except FileNotFoundError: except FileNotFoundError:
print( logger.error(
"Essential WhatsApp files are missing from the iOS backup. " "Essential WhatsApp files are missing from the iOS backup. "
"Perhapse you enabled end-to-end encryption for the backup? " "Perhapse you enabled end-to-end encryption for the backup? "
"See https://wts.knugi.dev/docs.html?dest=iose2e" "See https://wts.knugi.dev/docs.html?dest=iose2e"
) )
exit(6) exit(6)
else: else:
print("Done") logger.info(f"WhatsApp database decrypted successfully{CLEAR_LINE}")
def _extract_decrypted_files(self): def _extract_decrypted_files(self):
"""Extract all WhatsApp files after decryption""" """Extract all WhatsApp files after decryption"""
def extract_progress_handler(file_id, domain, relative_path, n, total_files): def extract_progress_handler(file_id, domain, relative_path, n, total_files):
if n % 100 == 0: if n % 100 == 0:
print(f"Decrypting and extracting files...({n}/{total_files})", end="\r") logger.info(f"Decrypting and extracting files...({n}/{total_files})\r")
return True return True
self.backup.extract_files( self.backup.extract_files(
@@ -125,7 +141,7 @@ class BackupExtractor:
preserve_folders=True, preserve_folders=True,
filter_callback=extract_progress_handler filter_callback=extract_progress_handler
) )
print(f"All required files are decrypted and extracted. ", end="\n") logger.info(f"All required files are decrypted and extracted.{CLEAR_LINE}")
def _extract_unencrypted_backup(self): def _extract_unencrypted_backup(self):
""" """
@@ -144,10 +160,10 @@ class BackupExtractor:
if not os.path.isfile(wts_db_path): if not os.path.isfile(wts_db_path):
if self.identifiers is WhatsAppIdentifier: if self.identifiers is WhatsAppIdentifier:
print("WhatsApp database not found.") logger.error("WhatsApp database not found.")
else: else:
print("WhatsApp Business database not found.") logger.error("WhatsApp Business database not found.")
print( logger.error(
"Essential WhatsApp files are missing from the iOS backup. " "Essential WhatsApp files are missing from the iOS backup. "
"Perhapse you enabled end-to-end encryption for the backup? " "Perhapse you enabled end-to-end encryption for the backup? "
"See https://wts.knugi.dev/docs.html?dest=iose2e" "See https://wts.knugi.dev/docs.html?dest=iose2e"
@@ -157,12 +173,12 @@ class BackupExtractor:
shutil.copyfile(wts_db_path, self.identifiers.MESSAGE) shutil.copyfile(wts_db_path, self.identifiers.MESSAGE)
if not os.path.isfile(contact_db_path): if not os.path.isfile(contact_db_path):
print("Contact database not found. Skipping...") logger.warning(f"Contact database not found. Skipping...{CLEAR_LINE}")
else: else:
shutil.copyfile(contact_db_path, self.identifiers.CONTACT) shutil.copyfile(contact_db_path, self.identifiers.CONTACT)
if not os.path.isfile(call_db_path): if not os.path.isfile(call_db_path):
print("Call database not found. Skipping...") logger.warning(f"Call database not found. Skipping...{CLEAR_LINE}")
else: else:
shutil.copyfile(call_db_path, self.identifiers.CALL) shutil.copyfile(call_db_path, self.identifiers.CALL)
@@ -176,7 +192,7 @@ class BackupExtractor:
c = manifest.cursor() c = manifest.cursor()
c.execute(f"SELECT count() FROM Files WHERE domain = '{_wts_id}'") c.execute(f"SELECT count() FROM Files WHERE domain = '{_wts_id}'")
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
print(f"Extracting WhatsApp files...(0/{total_row_number})", end="\r") logger.info(f"Extracting WhatsApp files...(0/{total_row_number})\r")
c.execute( c.execute(
f""" f"""
SELECT fileID, relativePath, flags, file AS metadata, SELECT fileID, relativePath, flags, file AS metadata,
@@ -213,9 +229,9 @@ class BackupExtractor:
os.utime(destination, (modification, modification)) os.utime(destination, (modification, modification))
if row["_index"] % 100 == 0: if row["_index"] % 100 == 0:
print(f"Extracting WhatsApp files...({row['_index']}/{total_row_number})", end="\r") logger.info(f"Extracting WhatsApp files...({row['_index']}/{total_row_number})\r")
row = c.fetchone() row = c.fetchone()
print(f"Extracting WhatsApp files...({total_row_number}/{total_row_number})", end="\n") logger.info(f"Extracted WhatsApp files...({total_row_number}){CLEAR_LINE}")
def extract_media(base_dir, identifiers, decrypt_chunk_size): def extract_media(base_dir, identifiers, decrypt_chunk_size):
@@ -229,4 +245,3 @@ def extract_media(base_dir, identifiers, decrypt_chunk_size):
""" """
extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size) extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size)
extractor.extract() extractor.extract()

View File

@@ -1,22 +1,26 @@
import logging
import sqlite3 import sqlite3
import jinja2 import jinja2
import json import json
import os import os
import unicodedata import unicodedata
import re import re
import string
import math import math
import shutil
from bleach import clean as sanitize from bleach import clean as sanitize
from markupsafe import Markup from markupsafe import Markup
from datetime import datetime, timedelta from datetime import datetime, timedelta
from enum import IntEnum from enum import IntEnum
from Whatsapp_Chat_Exporter.data_model import ChatStore from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple, Union
try: try:
from enum import StrEnum, IntEnum from enum import StrEnum, IntEnum
except ImportError: except ImportError:
# < Python 3.11 # < Python 3.11
# This should be removed when the support for Python 3.10 ends. # This should be removed when the support for Python 3.10 ends. (31 Oct 2026)
from enum import Enum from enum import Enum
class StrEnum(str, Enum): class StrEnum(str, Enum):
pass pass
@@ -26,6 +30,9 @@ except ImportError:
MAX_SIZE = 4 * 1024 * 1024 # Default 4MB MAX_SIZE = 4 * 1024 * 1024 # Default 4MB
ROW_SIZE = 0x3D0 ROW_SIZE = 0x3D0
CURRENT_TZ_OFFSET = datetime.now().astimezone().utcoffset().seconds / 3600 CURRENT_TZ_OFFSET = datetime.now().astimezone().utcoffset().seconds / 3600
CLEAR_LINE = "\x1b[K\n"
logger = logging.getLogger(__name__)
def convert_time_unit(time_second: int) -> str: def convert_time_unit(time_second: int) -> str:
@@ -37,23 +44,31 @@ def convert_time_unit(time_second: int) -> str:
Returns: Returns:
str: A human-readable string representing the time duration. str: A human-readable string representing the time duration.
""" """
time = str(timedelta(seconds=time_second)) if time_second < 1:
if "day" not in time: return "less than a second"
if time_second < 1: elif time_second == 1:
time = "less than a second" return "a second"
elif time_second == 1:
time = "a second" delta = timedelta(seconds=time_second)
elif time_second < 60: parts = []
time = time[5:][1 if time_second < 10 else 0:] + " seconds"
elif time_second == 60: days = delta.days
time = "a minute" if days > 0:
elif time_second < 3600: parts.append(f"{days} day{'s' if days > 1 else ''}")
time = time[2:] + " minutes"
elif time_second == 3600: hours = delta.seconds // 3600
time = "an hour" if hours > 0:
else: parts.append(f"{hours} hour{'s' if hours > 1 else ''}")
time += " hour"
return time minutes = (delta.seconds % 3600) // 60
if minutes > 0:
parts.append(f"{minutes} minute{'s' if minutes > 1 else ''}")
seconds = delta.seconds % 60
if seconds > 0:
parts.append(f"{seconds} second{'s' if seconds > 1 else ''}")
return " ".join(parts)
def bytes_to_readable(size_bytes: int) -> str: def bytes_to_readable(size_bytes: int) -> str:
@@ -70,8 +85,8 @@ def bytes_to_readable(size_bytes: int) -> str:
Returns: Returns:
A human-readable string representing the file size. A human-readable string representing the file size.
""" """
if size_bytes == 0: if size_bytes < 1024:
return "0B" return f"{size_bytes} B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024))) i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i) p = math.pow(1024, i)
@@ -99,14 +114,19 @@ def readable_to_bytes(size_str: str) -> int:
'TB': 1024**4, 'TB': 1024**4,
'PB': 1024**5, 'PB': 1024**5,
'EB': 1024**6, 'EB': 1024**6,
'ZB': 1024**7, 'ZB': 1024**7,
'YB': 1024**8 'YB': 1024**8
} }
size_str = size_str.upper().strip() size_str = size_str.upper().strip()
number, unit = size_str[:-2].strip(), size_str[-2:].strip() if size_str.isnumeric():
if unit not in SIZE_UNITS or not number.isnumeric(): # If the string is purely numeric, assume it's in bytes
raise ValueError("Invalid input for size_str. Example: 1024GB") return int(size_str)
return int(number) * SIZE_UNITS[unit] match = re.fullmatch(r'^(\d+(\.\d*)?)\s*([KMGTPEZY]?B)?$', size_str)
if not match:
raise ValueError("Invalid size format for size_str. Expected format like '10MB', '1024GB', or '512'.")
unit = ''.join(filter(str.isalpha, size_str)).strip()
number = ''.join(c for c in size_str if c.isdigit() or c == '.').strip()
return int(float(number) * SIZE_UNITS[unit])
def sanitize_except(html: str) -> Markup: def sanitize_except(html: str) -> Markup:
@@ -149,41 +169,44 @@ def check_update():
try: try:
raw = urllib.request.urlopen(PACKAGE_JSON) raw = urllib.request.urlopen(PACKAGE_JSON)
except Exception: except Exception:
print("Failed to check for updates.") logger.error("Failed to check for updates.")
return 1 return 1
else: else:
with raw: with raw:
package_info = json.load(raw) package_info = json.load(raw)
latest_version = tuple(map(int, package_info["info"]["version"].split("."))) latest_version = tuple(
map(int, package_info["info"]["version"].split(".")))
__version__ = importlib.metadata.version("whatsapp_chat_exporter") __version__ = importlib.metadata.version("whatsapp_chat_exporter")
current_version = tuple(map(int, __version__.split("."))) current_version = tuple(map(int, __version__.split(".")))
if current_version < latest_version: if current_version < latest_version:
print("===============Update===============") logger.info(
print("A newer version of WhatsApp Chat Exporter is available.") "===============Update===============\n"
print("Current version: " + __version__) "A newer version of WhatsApp Chat Exporter is available.\n"
print("Latest version: " + package_info["info"]["version"]) f"Current version: {__version__}\n"
f"Latest version: {package_info['info']['version']}\n"
)
if platform == "win32": if platform == "win32":
print("Update with: pip install --upgrade whatsapp-chat-exporter") logger.info("Update with: pip install --upgrade whatsapp-chat-exporter\n")
else: else:
print("Update with: pip3 install --upgrade whatsapp-chat-exporter") logger.info("Update with: pip3 install --upgrade whatsapp-chat-exporter\n")
print("====================================") logger.info("====================================\n")
else: else:
print("You are using the latest version of WhatsApp Chat Exporter.") logger.info("You are using the latest version of WhatsApp Chat Exporter.\n")
return 0 return 0
def rendering( def rendering(
output_file_name, output_file_name,
template, template,
name, name,
msgs, msgs,
contact, contact,
w3css, w3css,
chat, chat,
headline, headline,
next=False, next=False,
previous=False previous=False
): ):
if chat.their_avatar_thumb is None and chat.their_avatar is not None: if chat.their_avatar_thumb is None and chat.their_avatar is not None:
their_avatar_thumb = chat.their_avatar their_avatar_thumb = chat.their_avatar
else: else:
@@ -215,59 +238,104 @@ class Device(StrEnum):
EXPORTED = "exported" EXPORTED = "exported"
def import_from_json(json_file: str, data: Dict[str, ChatStore]): def import_from_json(json_file: str, data: ChatCollection):
"""Imports chat data from a JSON file into the data dictionary. """Imports chat data from a JSON file into the data dictionary.
Args: Args:
json_file: The path to the JSON file. json_file: The path to the JSON file.
data: The dictionary to store the imported chat data. data: The dictionary to store the imported chat data.
""" """
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
with open(json_file, "r") as f: with open(json_file, "r") as f:
temp_data = json.loads(f.read()) temp_data = json.loads(f.read())
total_row_number = len(tuple(temp_data.keys())) total_row_number = len(tuple(temp_data.keys()))
print(f"Importing chats from JSON...(0/{total_row_number})", end="\r") logger.info(f"Importing chats from JSON...(0/{total_row_number})\r")
for index, (jid, chat_data) in enumerate(temp_data.items()): for index, (jid, chat_data) in enumerate(temp_data.items()):
chat = ChatStore(chat_data.get("type"), chat_data.get("name")) chat = ChatStore.from_json(chat_data)
chat.my_avatar = chat_data.get("my_avatar") data.add_chat(jid, chat)
chat.their_avatar = chat_data.get("their_avatar") logger.info(
chat.their_avatar_thumb = chat_data.get("their_avatar_thumb") f"Importing chats from JSON...({index + 1}/{total_row_number})\r")
chat.status = chat_data.get("status") logger.info(f"Imported {total_row_number} chats from JSON{CLEAR_LINE}")
for id, msg in chat_data.get("messages").items():
message = Message(
from_me=msg["from_me"],
timestamp=msg["timestamp"],
time=msg["time"],
key_id=msg["key_id"],
received_timestamp=msg.get("received_timestamp"),
read_timestamp=msg.get("read_timestamp")
)
message.media = msg.get("media")
message.meta = msg.get("meta")
message.data = msg.get("data")
message.sender = msg.get("sender")
message.safe = msg.get("safe")
message.mime = msg.get("mime")
message.reply = msg.get("reply")
message.quoted_data = msg.get("quoted_data")
message.caption = msg.get("caption")
message.thumb = msg.get("thumb")
message.sticker = msg.get("sticker")
chat.add_message(id, message)
data[jid] = chat
print(f"Importing chats from JSON...({index + 1}/{total_row_number})", end="\r")
def sanitize_filename(file_name: str) -> str: def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_print_json: int, avoid_encoding_json: bool):
"""Sanitizes a filename by removing invalid and unsafe characters. """Merges JSON files from the source directory into the target directory.
Args: Args:
file_name: The filename to sanitize. source_dir (str): The path to the source directory containing JSON files.
target_dir (str): The path to the target directory to merge into.
Returns: media_dir (str): The path to the media directory.
The sanitized filename.
""" """
return "".join(x for x in file_name if x.isalnum() or x in "- ") json_files = [f for f in os.listdir(source_dir) if f.endswith('.json')]
if not json_files:
logger.error("No JSON files found in the source directory.")
return
logger.info("JSON files found:", json_files)
for json_file in json_files:
source_path = os.path.join(source_dir, json_file)
target_path = os.path.join(target_dir, json_file)
if not os.path.exists(target_path):
logger.info(f"Copying '{json_file}' to target directory...")
os.makedirs(target_dir, exist_ok=True)
shutil.copy2(source_path, target_path)
else:
logger.info(
f"Merging '{json_file}' with existing file in target directory...")
with open(source_path, 'r') as src_file, open(target_path, 'r') as tgt_file:
source_data = json.load(src_file)
target_data = json.load(tgt_file)
# Parse JSON into ChatStore objects using from_json()
source_chats = {jid: ChatStore.from_json(
chat) for jid, chat in source_data.items()}
target_chats = {jid: ChatStore.from_json(
chat) for jid, chat in target_data.items()}
# Merge chats using merge_with()
for jid, chat in source_chats.items():
if jid in target_chats:
target_chats[jid].merge_with(chat)
else:
target_chats[jid] = chat
# Serialize merged data
merged_data = {jid: chat.to_json()
for jid, chat in target_chats.items()}
# Check if the merged data differs from the original target data
if json.dumps(merged_data, sort_keys=True) != json.dumps(target_data, sort_keys=True):
logger.info(
f"Changes detected in '{json_file}', updating target file...")
with open(target_path, 'w') as merged_file:
json.dump(
merged_data,
merged_file,
indent=pretty_print_json,
ensure_ascii=not avoid_encoding_json,
)
else:
logger.info(
f"No changes detected in '{json_file}', skipping update.")
# Merge media directories
source_media_path = os.path.join(source_dir, media_dir)
target_media_path = os.path.join(target_dir, media_dir)
logger.info(
f"Merging media directories. Source: {source_media_path}, target: {target_media_path}")
if os.path.exists(source_media_path):
for root, _, files in os.walk(source_media_path):
relative_path = os.path.relpath(root, source_media_path)
target_root = os.path.join(target_media_path, relative_path)
os.makedirs(target_root, exist_ok=True)
for file in files:
source_file = os.path.join(root, file)
target_file = os.path.join(target_root, file)
# we only copy if the file doesn't exist in the target or if the source is newer
if not os.path.exists(target_file) or os.path.getmtime(source_file) > os.path.getmtime(target_file):
logger.info(f"Copying '{source_file}' to '{target_file}'...")
shutil.copy2(source_file, target_file)
def get_file_name(contact: str, chat: ChatStore) -> Tuple[str, str]: def get_file_name(contact: str, chat: ChatStore) -> Tuple[str, str]:
@@ -299,7 +367,7 @@ def get_file_name(contact: str, chat: ChatStore) -> Tuple[str, str]:
else: else:
name = phone_number name = phone_number
return sanitize_filename(file_name), name return safe_name(file_name), name
def get_cond_for_empty(enable: bool, jid_field: str, broadcast_field: str) -> str: def get_cond_for_empty(enable: bool, jid_field: str, broadcast_field: str) -> str:
@@ -335,23 +403,29 @@ def get_chat_condition(filter: Optional[List[str]], include: bool, columns: List
if filter is not None: if filter is not None:
conditions = [] conditions = []
if len(columns) < 2 and jid is not None: if len(columns) < 2 and jid is not None:
raise ValueError("There must be at least two elements in argument columns if jid is not None") raise ValueError(
"There must be at least two elements in argument columns if jid is not None")
if jid is not None: if jid is not None:
if platform == "android": if platform == "android":
is_group = f"{jid}.type == 1" is_group = f"{jid}.type == 1"
elif platform == "ios": elif platform == "ios":
is_group = f"{jid} IS NOT NULL" is_group = f"{jid} IS NOT NULL"
else: else:
raise ValueError("Only android and ios are supported for argument platform if jid is not None") raise ValueError(
"Only android and ios are supported for argument platform if jid is not None")
for index, chat in enumerate(filter): for index, chat in enumerate(filter):
if include: if include:
conditions.append(f"{' OR' if index > 0 else ''} {columns[0]} LIKE '%{chat}%'") conditions.append(
f"{' OR' if index > 0 else ''} {columns[0]} LIKE '%{chat}%'")
if len(columns) > 1: if len(columns) > 1:
conditions.append(f" OR ({columns[1]} LIKE '%{chat}%' AND {is_group})") conditions.append(
f" OR ({columns[1]} LIKE '%{chat}%' AND {is_group})")
else: else:
conditions.append(f"{' AND' if index > 0 else ''} {columns[0]} NOT LIKE '%{chat}%'") conditions.append(
f"{' AND' if index > 0 else ''} {columns[0]} NOT LIKE '%{chat}%'")
if len(columns) > 1: if len(columns) > 1:
conditions.append(f" AND ({columns[1]} NOT LIKE '%{chat}%' AND {is_group})") conditions.append(
f" AND ({columns[1]} NOT LIKE '%{chat}%' AND {is_group})")
return f"AND ({' '.join(conditions)})" return f"AND ({' '.join(conditions)})"
else: else:
return "" return ""
@@ -447,7 +521,7 @@ def determine_metadata(content: sqlite3.Row, init_msg: Optional[str]) -> Optiona
else: else:
msg = f"{old} changed their number to {new}" msg = f"{old} changed their number to {new}"
elif content["action_type"] == 46: elif content["action_type"] == 46:
return # Voice message in PM??? Seems no need to handle. return # Voice message in PM??? Seems no need to handle.
elif content["action_type"] == 47: elif content["action_type"] == 47:
msg = "The contact is an official business account" msg = "The contact is an official business account"
elif content["action_type"] == 50: elif content["action_type"] == 50:
@@ -464,7 +538,8 @@ def determine_metadata(content: sqlite3.Row, init_msg: Optional[str]) -> Optiona
elif content["action_type"] == 67: elif content["action_type"] == 67:
return # (PM) this contact use secure service from Facebook??? return # (PM) this contact use secure service from Facebook???
elif content["action_type"] == 69: elif content["action_type"] == 69:
return # (PM) this contact use secure service from Facebook??? What's the difference with 67???? # (PM) this contact use secure service from Facebook??? What's the difference with 67????
return
else: else:
return # Unsupported return # Unsupported
return msg return msg
@@ -491,8 +566,10 @@ def get_status_location(output_folder: str, offline_static: str) -> str:
w3css_path = os.path.join(static_folder, "w3.css") w3css_path = os.path.join(static_folder, "w3.css")
if not os.path.isfile(w3css_path): if not os.path.isfile(w3css_path):
with urllib.request.urlopen(w3css) as resp: with urllib.request.urlopen(w3css) as resp:
with open(w3css_path, "wb") as f: f.write(resp.read()) with open(w3css_path, "wb") as f:
f.write(resp.read())
w3css = os.path.join(offline_static, "w3.css") w3css = os.path.join(offline_static, "w3.css")
return w3css
def setup_template(template: Optional[str], no_avatar: bool, experimental: bool = False) -> jinja2.Template: def setup_template(template: Optional[str], no_avatar: bool, experimental: bool = False) -> jinja2.Template:
@@ -522,43 +599,130 @@ def setup_template(template: Optional[str], no_avatar: bool, experimental: bool
template_env.filters['sanitize_except'] = sanitize_except template_env.filters['sanitize_except'] = sanitize_except
return template_env.get_template(template_file) return template_env.get_template(template_file)
# iOS Specific # iOS Specific
APPLE_TIME = 978307200 APPLE_TIME = 978307200
def slugify(value: str, allow_unicode: bool = False) -> str: def safe_name(text: Union[str, bytes]) -> str:
""" """
Convert text to ASCII-only slugs for URL-safe strings. Sanitize the input text and generates a safe file name.
Taken from https://github.com/django/django/blob/master/django/utils/text.py This function serves a similar purpose to slugify() from
Django previously used in this project, but is a clean-room
Reimplementation tailored for performance and a narrower
Use case for this project. Licensed under the same terms
As the project (MIT).
Args: Args:
value (str): The string to convert to a slug. text (str|bytes): The string to be sanitized.
allow_unicode (bool, optional): Whether to allow Unicode characters. Defaults to False.
Returns: Returns:
str: The slugified string with only alphanumerics, underscores, or hyphens. str: The sanitized string with only alphanumerics, underscores, or hyphens.
""" """
value = str(value) if isinstance(text, bytes):
if allow_unicode: text = text.decode("utf-8", "ignore")
value = unicodedata.normalize('NFKC', value) elif not isinstance(text, str):
else: raise TypeError("value must be a string or bytes")
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') normalized_text = unicodedata.normalize("NFKC", text)
value = re.sub(r'[^\w\s-]', '', value.lower()) safe_chars = [char for char in normalized_text if char.isalnum() or char in "-_ ."]
return re.sub(r'[-\s]+', '-', value).strip('-_') return "-".join(''.join(safe_chars).split())
def get_from_string(msg: Dict, chat_id: str) -> str:
"""Return the number or name for the sender"""
if msg["from_me"]:
return "Me"
if msg["sender"]:
return str(msg["sender"])
return str(chat_id)
def get_chat_type(chat_id: str) -> str:
"""Return the chat type based on the whatsapp id"""
if chat_id.endswith("@s.whatsapp.net"):
return "personal_chat"
if chat_id.endswith("@g.us"):
return "private_group"
logger.warning("Unknown chat type for %s, defaulting to private_group", chat_id)
return "private_group"
def get_from_id(msg: Dict, chat_id: str) -> str:
"""Return the user id for the sender"""
if msg["from_me"]:
return "user00000"
if msg["sender"]:
return "user" + msg["sender"]
return f"user{chat_id}"
def get_reply_id(data: Dict, reply_key: int) -> Optional[int]:
"""Get the id of the message corresponding to the reply"""
if not reply_key:
return None
for msg_id, msg in data["messages"].items():
if msg["key_id"] == reply_key:
return msg_id
return None
def telegram_json_format(jik: str, data: Dict, timezone_offset) -> Dict:
"""Convert the data to the Telegram export format"""
timing = Timing(timezone_offset or CURRENT_TZ_OFFSET)
try:
chat_id = int(''.join([c for c in jik if c.isdigit()]))
except ValueError:
# not a real chat: e.g. statusbroadcast
chat_id = 0
obj = {
"name": data["name"] if data["name"] else jik,
"type": get_chat_type(jik),
"id": chat_id,
"messages": [ {
"id": int(msgId),
"type": "message",
"date": timing.format_timestamp(msg["timestamp"], "%Y-%m-%dT%H:%M:%S"),
"date_unixtime": int(msg["timestamp"]),
"from": get_from_string(msg, chat_id),
"from_id": get_from_id(msg, chat_id),
"reply_to_message_id": get_reply_id(data, msg["reply"]),
"text": msg["data"],
"text_entities": [
{
# TODO this will lose formatting and different types
"type": "plain",
"text": msg["data"],
}
],
} for msgId, msg in data["messages"].items()]
}
# remove empty messages and replies
for msg_id, msg in enumerate(obj["messages"]):
if not msg["reply_to_message_id"]:
del obj["messages"][msg_id]["reply_to_message_id"]
obj["messages"] = [m for m in obj["messages"] if m["text"]]
return obj
class WhatsAppIdentifier(StrEnum): class WhatsAppIdentifier(StrEnum):
MESSAGE = "7c7fba66680ef796b916b067077cc246adacf01d" # AppDomainGroup-group.net.whatsapp.WhatsApp.shared-ChatStorage.sqlite # AppDomainGroup-group.net.whatsapp.WhatsApp.shared-ChatStorage.sqlite
CONTACT = "b8548dc30aa1030df0ce18ef08b882cf7ab5212f" # AppDomainGroup-group.net.whatsapp.WhatsApp.shared-ContactsV2.sqlite MESSAGE = "7c7fba66680ef796b916b067077cc246adacf01d"
CALL = "1b432994e958845fffe8e2f190f26d1511534088" # AppDomainGroup-group.net.whatsapp.WhatsApp.shared-CallHistory.sqlite # AppDomainGroup-group.net.whatsapp.WhatsApp.shared-ContactsV2.sqlite
CONTACT = "b8548dc30aa1030df0ce18ef08b882cf7ab5212f"
# AppDomainGroup-group.net.whatsapp.WhatsApp.shared-CallHistory.sqlite
CALL = "1b432994e958845fffe8e2f190f26d1511534088"
DOMAIN = "AppDomainGroup-group.net.whatsapp.WhatsApp.shared" DOMAIN = "AppDomainGroup-group.net.whatsapp.WhatsApp.shared"
class WhatsAppBusinessIdentifier(StrEnum): class WhatsAppBusinessIdentifier(StrEnum):
MESSAGE = "724bd3b98b18518b455a87c1f3ac3a0d189c4466" # AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-ChatStorage.sqlite # AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-ChatStorage.sqlite
CONTACT = "d7246a707f51ddf8b17ee2dddabd9e0a4da5c552" # AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-ContactsV2.sqlite MESSAGE = "724bd3b98b18518b455a87c1f3ac3a0d189c4466"
CALL = "b463f7c4365eefc5a8723930d97928d4e907c603" # AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-CallHistory.sqlite # AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-ContactsV2.sqlite
DOMAIN = "AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared" CONTACT = "d7246a707f51ddf8b17ee2dddabd9e0a4da5c552"
# AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-CallHistory.sqlite
CALL = "b463f7c4365eefc5a8723930d97928d4e907c603"
DOMAIN = "AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared"
class JidType(IntEnum): class JidType(IntEnum):
PM = 0 PM = 0

View File

@@ -1,5 +1,12 @@
import vobject import logging
import re
import quopri
from typing import List, TypedDict from typing import List, TypedDict
from Whatsapp_Chat_Exporter.data_model import ChatStore
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device
logger = logging.getLogger(__name__)
class ExportedContactNumbers(TypedDict): class ExportedContactNumbers(TypedDict):
@@ -21,32 +28,155 @@ class ContactsFromVCards:
for number, name in self.contact_mapping: for number, name in self.contact_mapping:
# short number must be a bad contact, lets skip it # short number must be a bad contact, lets skip it
if len(number) <= 5: if len(number) <= 5:
continue
chats_search = filter_chats_by_prefix(chats, number).values()
if chats_search:
for chat in chats_search:
if not hasattr(chat, 'name') or (hasattr(chat, 'name') and chat.name is None):
setattr(chat, 'name', name)
else:
chats.add_chat(number + "@s.whatsapp.net", ChatStore(Device.ANDROID, name))
def decode_quoted_printable(value: str, charset: str) -> str:
"""Decode a vCard value that may be quoted-printable UTF-8."""
try:
bytes_val = quopri.decodestring(value)
return bytes_val.decode(charset, errors="replace")
except Exception:
# Fallback: return the original value if decoding fails
logger.warning(
f"Failed to decode quoted-printable value: {value}, "
f"charset: {charset}. Please report this issue.{CLEAR_LINE}"
)
return value
def _parse_vcard_line(line: str) -> tuple[str, dict[str, str], str] | None:
"""
Parses a single vCard property line into its components:
Property Name, Parameters (as a dict), and Value.
Example: 'FN;CHARSET=UTF-8:John Doe' -> ('FN', {'CHARSET': 'UTF-8'}, 'John Doe')
"""
# Find the first colon, which separates the property/parameters from the value.
colon_index = line.find(':')
if colon_index == -1:
return None # Invalid vCard line format
prop_and_params = line[:colon_index].strip()
value = line[colon_index + 1:].strip()
# Split property name from parameters
parts = prop_and_params.split(';')
property_name = parts[0].upper()
parameters = {}
for part in parts[1:]:
if '=' in part:
key, val = part.split('=', 1)
parameters[key.upper()] = val.strip('"') # Remove potential quotes from value
return property_name, parameters, value
def get_vcard_value(entry: str, field_name: str) -> list[str]:
"""
Scans the vCard entry for lines starting with the specific field_name
and returns a list of its decoded values, handling parameters like
ENCODING and CHARSET.
"""
target_name = field_name.upper()
cached_line = ""
charset = "utf-8"
values = []
for line in entry.splitlines():
line = line.strip()
if cached_line:
if line.endswith('='):
cached_line += line[:-1]
continue # Wait for the next line to complete the value
values.append(decode_quoted_printable(cached_line + line, charset))
cached_line = ""
else:
# Skip empty lines or lines that don't start with the target field (after stripping)
if not line or not line.upper().startswith(target_name):
continue continue
for chat in filter_chats_by_prefix(chats, number).values(): parsed = _parse_vcard_line(line)
if not hasattr(chat, 'name') or (hasattr(chat, 'name') and chat.name is None): if parsed is None:
setattr(chat, 'name', name) continue
prop_name, params, raw_value = parsed
if prop_name != target_name:
continue
encoding = params.get('ENCODING')
charset = params.get('CHARSET', 'utf-8')
# Apply decoding if ENCODING parameter is present
if encoding == 'QUOTED-PRINTABLE':
if raw_value.endswith('='):
# Handle soft line breaks in quoted-printable and cache the line
cached_line += raw_value[:-1]
continue # Wait for the next line to complete the value
values.append(decode_quoted_printable(raw_value, charset))
elif encoding:
raise NotImplementedError(f"Encoding '{encoding}' not supported yet.")
else:
values.append(raw_value)
return values
def process_vcard_entry(entry: str) -> dict | bool:
"""
Process a vCard entry using pure string manipulation
Args:
entry: A string containing a single vCard block.
Returns:
A dictionary of the extracted data or False if required fields are missing.
"""
name = None
# Extract name in priority: FN -> N -> ORG
for field in ("FN", "N", "ORG"):
if name_values := get_vcard_value(entry, field):
name = name_values[0].replace(';', ' ') # Simple cleanup for structured name
break
if not name:
return False
numbers = get_vcard_value(entry, "TEL")
if not numbers:
return False
return {
"full_name": name,
# Remove duplications
"numbers": set(numbers),
}
def read_vcards_file(vcf_file_path, default_country_code: str): def read_vcards_file(vcf_file_path, default_country_code: str):
contacts = [] contacts = []
with open(vcf_file_path, mode="r", encoding="utf-8") as f: with open(vcf_file_path, "r", encoding="utf-8", errors="ignore") as f:
reader = vobject.readComponents(f) content = f.read()
for row in reader:
if hasattr(row, 'fn'): # Split into individual vCards
name = str(row.fn.value) vcards = content.split("BEGIN:VCARD")
elif hasattr(row, 'n'): for vcard in vcards:
name = str(row.n.value) if "END:VCARD" not in vcard:
else: continue
name = None
if not hasattr(row, 'tel') or name is None: if contact := process_vcard_entry(vcard):
continue
contact: ExportedContactNumbers = {
"full_name": name,
"numbers": list(map(lambda tel: tel.value, row.tel_list)),
}
contacts.append(contact) contacts.append(contact)
logger.info(f"Imported {len(contacts)} contacts/vcards{CLEAR_LINE}")
return map_number_to_name(contacts, default_country_code) return map_number_to_name(contacts, default_country_code)
@@ -77,6 +207,6 @@ def normalize_number(number: str, country_code: str):
return number[len(starting_char):] return number[len(starting_char):]
# leading zero should be removed # leading zero should be removed
if starting_char == '0': if number.startswith('0'):
number = number[1:] number = number[1:]
return country_code + number # fall back return country_code + number # fall back

View File

@@ -1,20 +0,0 @@
# from contacts_names_from_vcards import readVCardsFile
from Whatsapp_Chat_Exporter.vcards_contacts import normalize_number, read_vcards_file
def test_readVCardsFile():
assert len(read_vcards_file("contacts.vcf", "973")) > 0
def test_create_number_to_name_dicts():
pass
def test_fuzzy_match_numbers():
pass
def test_normalize_number():
assert normalize_number('0531234567', '1') == '1531234567'
assert normalize_number('001531234567', '2') == '1531234567'
assert normalize_number('+1531234567', '34') == '1531234567'
assert normalize_number('053(123)4567', '34') == '34531234567'
assert normalize_number('0531-234-567', '58') == '58531234567'

View File

@@ -1,329 +1,467 @@
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>
<title>Whatsapp - {{ name }}</title> <title>Whatsapp - {{ name }}</title>
<meta charset="UTF-8"> <meta charset="UTF-8">
<link rel="stylesheet" href="{{w3css}}"> <script src="https://cdn.tailwindcss.com"></script>
<style> <script>
html, body { tailwind.config = {
font-size: 12px; theme: {
scroll-behavior: smooth; extend: {
} colors: {
header { whatsapp: {
position: fixed; light: '#e7ffdb',
z-index: 20; DEFAULT: '#25D366',
border-bottom: 2px solid #e3e6e7; dark: '#075E54',
font-size: 2em; chat: '#efeae2',
font-weight: bolder; 'chat-light': '#f0f2f5',
background-color: white; }
padding: 20px 0 20px 0; }
} }
footer { }
border-top: 2px solid #e3e6e7; }
padding: 20px 0 20px 0; </script>
} <style>
article { body, html {
width:500px; height: 100%;
margin:100px auto; margin: 0;
z-index:10; padding: 0;
font-size: 15px; scroll-behavior: smooth !important;
word-wrap: break-word; }
} .chat-list {
img, video { height: calc(100vh - 120px);
max-width:100%; overflow-y: auto;
} }
div.reply{ .message-list {
font-size: 13px; height: calc(100vh - 90px);
text-decoration: none; overflow-y: auto;
} }
div:target::before { @media (max-width: 640px) {
content: ''; .chat-list, .message-list {
display: block; height: calc(100vh - 108px);
height: 115px; }
margin-top: -115px; }
visibility: hidden; header {
} position: fixed;
div:target { z-index: 20;
border-style: solid; border-bottom: 2px solid #e3e6e7;
border-width: 2px; font-size: 2em;
animation: border-blink 0.5s steps(1) 5; font-weight: bolder;
border-color: rgba(0,0,0,0) background-color: white;
} padding: 20px 0 20px 0;
table { }
width: 100%; footer {
} margin-top: 10px;
@keyframes border-blink { border-top: 2px solid #e3e6e7;
0% { padding: 20px 0 20px 0;
border-color: #2196F3; }
} article {
50% { width:430px;
border-color: rgba(0,0,0,0); margin: auto;
} z-index:10;
} font-size: 15px;
.avatar { word-wrap: break-word;
border-radius:50%; }
overflow:hidden; img, video, audio{
max-width: 64px; max-width:100%;
max-height: 64px; box-sizing: border-box;
} }
.name { div.reply{
color: #3892da; font-size: 13px;
} text-decoration: none;
.pad-left-10 { }
padding-left: 10px; div:target::before {
} content: '';
.pad-right-10 { display: block;
padding-right: 10px; height: 115px;
} margin-top: -115px;
.reply_link { visibility: hidden;
color: #168acc; }
} div:target {
.blue { animation: 3s highlight;
color: #70777a; }
} .avatar {
.sticker { border-radius:50%;
max-width: 100px !important; overflow:hidden;
max-height: 100px !important; max-width: 64px;
} max-height: 64px;
</style> }
<base href="{{ media_base }}" target="_blank"> .name {
</head> color: #3892da;
<body> }
<header class="w3-center w3-top"> .pad-left-10 {
{{ headline }} padding-left: 10px;
{% if status is not none %} }
<br> .pad-right-10 {
<span class="w3-small">{{ status }}</span> padding-right: 10px;
{% endif %} }
</header> .reply_link {
<article class="w3-container"> color: #168acc;
<div class="table"> }
{% set last = {'last': 946688461.001} %} .blue {
{% for msg in msgs -%} color: #70777a;
<div class="w3-row w3-padding-small w3-margin-bottom" id="{{ msg.key_id }}"> }
{% if determine_day(last.last, msg.timestamp) is not none %} .sticker {
<div class="w3-center w3-padding-16 blue">{{ determine_day(last.last, msg.timestamp) }}</div> max-width: 100px !important;
{% if last.update({'last': msg.timestamp}) %}{% endif %} max-height: 100px !important;
{% endif %} }
{% if msg.from_me == true %} @keyframes highlight {
<div class="w3-row"> from {
<div class="w3-left blue">{{ msg.time }}</div> background-color: rgba(37, 211, 102, 0.1);
<div class="name w3-right-align pad-left-10">You</div> }
</div> to {
<div class="w3-row"> background-color: transparent;
{% if not no_avatar and my_avatar is not none %} }
<div class="w3-col m10 l10"> }
{% else %} .search-input {
<div class="w3-col m12 l12"> transform: translateY(-100%);
{% endif %} transition: transform 0.3s ease-in-out;
<div class="w3-right-align"> }
{% if msg.reply is not none %} .search-input.active {
<div class="reply"> transform: translateY(0);
<span class="blue">Replying to </span> }
<a href="#{{msg.reply}}" target="_self" class="reply_link no-base"> .reply-box:active {
{% if msg.quoted_data is not none %} background-color:rgb(200 202 205 / var(--tw-bg-opacity, 1));
"{{msg.quoted_data}}" }
{% else %} .info-box-tooltip {
this message --tw-translate-x: -50%;
{% endif %} transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
</a> }
</div> </style>
{% endif %} <script>
{% if msg.meta == true or msg.media == false and msg.data is none %} function search(event) {
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center"> keywords = document.getElementById("mainHeaderSearchInput").value;
{% if msg.safe %} hits = [];
<p>{{ msg.data | safe or 'Not supported WhatsApp internal message' }}</p> document.querySelectorAll(".message-text").forEach(elem => {
{% else %} if (elem.innerText.trim().includes(keywords)){
<p>{{ msg.data or 'Not supported WhatsApp internal message' }}</p> hits.push(elem.parentElement.parentElement.id);
{% endif %} }
</div> })
{% if msg.caption is not none %} console.log(hits);
<div class="w3-container"> }
{{ msg.caption | urlize(none, true, '_blank') }} </script>
</div> <base href="{{ media_base }}" target="_blank">
{% endif %} </head>
{% else %} <body>
{% if msg.media == false %} <article class="h-screen bg-whatsapp-chat-light">
{{ msg.data | sanitize_except() | urlize(none, true, '_blank') }} <div class="w-full flex flex-col">
{% else %} <div class="p-3 bg-whatsapp-dark flex items-center justify-between border-l border-[#d1d7db]">
{% if "image/" in msg.mime %} <div class="flex items-center">
<a href="{{ msg.data }}"> {% if not no_avatar %}
<img src="{{ msg.thumb if msg.thumb is not none else msg.data }}" {{ 'class="sticker"' | safe if msg.sticker }} loading="lazy"/> <div class="w3-col m2 l2">
</a> {% if their_avatar is not none %}
{% elif "audio/" in msg.mime %} <a href="{{ their_avatar }}"><img src="{{ their_avatar_thumb or '' }}" onerror="this.style.display='none'" class="w-10 h-10 rounded-full mr-3" loading="lazy"></a>
<audio controls="controls" autobuffer="autobuffer"> {% else %}
<source src="{{ msg.data }}" /> <img src="{{ their_avatar_thumb or '' }}" onerror="this.style.display='none'" class="w-10 h-10 rounded-full mr-3" loading="lazy">
</audio> {% endif %}
{% elif "video/" in msg.mime %} </div>
<video class="lazy" autobuffer {% if msg.message_type|int == 13 or msg.message_type|int == 11 %}autoplay muted loop playsinline{%else%}controls{% endif %}> {% endif %}
<source type="{{ msg.mime }}" data-src="{{ msg.data }}" /> <div>
</video> <h2 class="text-white font-medium">{{ headline }}</h2>
{% elif "/" in msg.mime %} {% if status is not none %}<p class="text-[#8696a0] text-xs">{{ status }}</p>{% endif %}
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center"> </div>
<p>The file cannot be displayed here, however it should be located at <a href="./{{ msg.data }}">here</a></p> </div>
</div> <div class="flex space-x-4">
{% else %} <!-- <button id="searchButton">
{% filter escape %}{{ msg.data }}{% endfilter %} <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#aebac1]" fill="none" viewBox="0 0 24 24" stroke="currentColor">
{% endif %} <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
{% if msg.caption is not none %} </svg>
<div class="w3-container"> </button> -->
{{ msg.caption | urlize(none, true, '_blank') }} <!-- <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#aebac1]" fill="none" viewBox="0 0 24 24" stroke="currentColor">
</div> <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 19l-7-7 7-7" />
{% endif %} </svg> -->
{% endif %} {% if previous %}
{% endif %} <a href="./{{ previous }}" target="_self">
</div> <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#aebac1]" fill="none" viewBox="0 0 24 24" stroke="currentColor">
</div> <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 5l-7 7 7 7" />
{% if not no_avatar and my_avatar is not none %} </svg>
<div class="w3-col m2 l2 pad-left-10"> </a>
<a href="{{ my_avatar }}"> {% endif %}
<img src="{{ my_avatar }}" onerror="this.style.display='none'" class="avatar" loading="lazy"> {% if next %}
</a> <a href="./{{ next }}" target="_self">
</div> <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#aebac1]" fill="none" viewBox="0 0 24 24" stroke="currentColor">
{% endif %} <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" />
</div> </svg>
{% else %} </a>
<div class="w3-row"> {% endif %}
<div class="w3-left pad-right-10 name"> </div>
{% if msg.sender is not none %} <!-- Search Input Overlay -->
{{ msg.sender }} <div id="mainSearchInput" class="search-input absolute article top-0 bg-whatsapp-dark p-3 flex items-center space-x-3">
{% else %} <button id="closeMainSearch" class="text-[#aebac1]">
{{ name }} <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
{% endif %} <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 19l-7-7 7-7" />
</div> </svg>
<div class="w3-right-align blue">{{ msg.time }}</div> </button>
</div> <input type="text" placeholder="Search..." class="flex-1 bg-[#1f2c34] text-white rounded-lg px-3 py-1 focus:outline-none" id="mainHeaderSearchInput" onkeyup="search(event)">
<div class="w3-row"> </div>
{% if not no_avatar %} </div>
<div class="w3-col m2 l2"> </div>
{% if their_avatar is not none %} <div class="flex-1 p-5 message-list">
<a href="{{ their_avatar }}"><img src="{{ their_avatar_thumb or '' }}" onerror="this.style.display='none'" class="avatar" loading="lazy"></a> <div class="flex flex-col space-y-2">
{% else %} <!--Date-->
<img src="{{ their_avatar_thumb or '' }}" onerror="this.style.display='none'" class="avatar" loading="lazy"> {% set last = {'last': 946688461.001} %}
{% endif %} {% for msg in msgs -%}
</div> {% if determine_day(last.last, msg.timestamp) is not none %}
<div class="w3-col m10 l10"> <div class="flex justify-center">
{% else %} <div class="bg-[#e1f2fb] rounded-lg px-2 py-1 text-xs text-[#54656f]">
<div class="w3-col m12 l12"> {{ determine_day(last.last, msg.timestamp) }}
{% endif %} </div>
<div class="w3-left-align"> </div>
{% if msg.reply is not none %} {% if last.update({'last': msg.timestamp}) %}{% endif %}
<div class="reply"> {% endif %}
<span class="blue">Replying to </span> <!--Actual messages-->
<a href="#{{msg.reply}}" target="_self" class="reply_link no-base"> {% if msg.from_me == true %}
{% if msg.quoted_data is not none %} <div class="flex justify-end items-center group" id="{{ msg.key_id }}">
"{{msg.quoted_data}}" <div class="opacity-0 group-hover:opacity-100 transition-opacity duration-200 relative mr-2">
{% else %} <div class="relative">
this message <div class="relative group/tooltip">
{% endif %} <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#8696a0] hover:text-[#54656f] cursor-pointer" fill="none" viewBox="0 0 24 24" stroke="currentColor">
</a> <use href="#info-icon"></use>
</div> </svg>
{% endif %} <div class="absolute bottom-full info-box-tooltip mb-2 hidden group-hover/tooltip:block z-50">
{% if msg.meta == true or msg.media == false and msg.data is none %} <div class="bg-black text-white text-xs rounded py-1 px-2 whitespace-nowrap">
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center"> Delivered at {{msg.received_timestamp or 'unknown'}}
{% if msg.safe %} {% if msg.read_timestamp is not none %}
<p>{{ msg.data | safe or 'Not supported WhatsApp internal message' }}</p> <br>Read at {{ msg.read_timestamp }}
{% else %} {% endif %}
<p>{{ msg.data or 'Not supported WhatsApp internal message' }}</p> </div>
{% endif %} <div class="absolute top-full right-3 -mt-1 border-4 border-transparent border-t-black"></div>
</div> </div>
{% if msg.caption is not none %} </div>
<div class="w3-container"> </div>
{{ msg.caption | urlize(none, true, '_blank') }} </div>
</div> <div class="bg-whatsapp-light rounded-lg p-2 max-w-[80%] shadow-sm">
{% endif %} {% if msg.reply is not none %}
{% else %} <a href="#{{msg.reply}}" target="_self" class="no-base">
{% if msg.media == false %} <div class="mb-2 p-1 bg-whatsapp-chat-light rounded border-l-4 border-whatsapp text-sm reply-box">
{{ msg.data | sanitize_except() | urlize(none, true, '_blank') }} <p class="text-whatsapp font-medium text-xs">Replying to</p>
{% else %} <p class="text-[#111b21] text-xs truncate">
{% if "image/" in msg.mime %} {% if msg.quoted_data is not none %}
<a href="{{ msg.data }}"> "{{msg.quoted_data}}"
<img src="{{ msg.thumb if msg.thumb is not none else msg.data }}" {{ 'class="sticker"' | safe if msg.sticker }} loading="lazy"/> {% else %}
</a> this message
{% elif "audio/" in msg.mime %} {% endif %}
<audio controls="controls" autobuffer="autobuffer"> </p>
<source src="{{ msg.data }}" /> </div>
</audio> </a>
{% elif "video/" in msg.mime %} {% endif %}
<video class="lazy" autobuffer {% if msg.message_type|int == 13 or msg.message_type|int == 11 %}autoplay muted loop playsinline{%else%}controls{% endif %}> <p class="text-[#111b21] text-sm message-text">
<source type="{{ msg.mime }}" data-src="{{ msg.data }}" /> {% if msg.meta == true or msg.media == false and msg.data is none %}
</video> <div class="flex justify-center mb-2">
{% elif "/" in msg.mime %} <div class="bg-[#FFF3C5] rounded-lg px-3 py-2 text-sm text-[#856404] flex items-center">
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center"> {% if msg.safe %}
<p>The file cannot be displayed here, however it should be located at <a href="./{{ msg.data }}">here</a></p> {{ msg.data | safe or 'Not supported WhatsApp internal message' }}
</div> {% else %}
{% else %} {{ msg.data or 'Not supported WhatsApp internal message' }}
{% filter escape %}{{ msg.data }}{% endfilter %} {% endif %}
{% endif %} </div>
{% if msg.caption is not none %} </div>
<div class="w3-container"> {% if msg.caption is not none %}
{{ msg.caption | urlize(none, true, '_blank') }} <p>{{ msg.caption | urlize(none, true, '_blank') }}</p>
</div> {% endif %}
{% endif %} {% else %}
{% endif %} {% if msg.media == false %}
{% endif %} {{ msg.data | sanitize_except() | urlize(none, true, '_blank') }}
</div> {% else %}
</div> {% if "image/" in msg.mime %}
</div> <a href="{{ msg.data }}">
{% endif %} <img src="{{ msg.thumb if msg.thumb is not none else msg.data }}" {{ 'class="sticker"' | safe if msg.sticker }} loading="lazy"/>
</div> </a>
{% endfor %} {% elif "audio/" in msg.mime %}
</div> <audio controls="controls" autobuffer="autobuffer">
</article> <source src="{{ msg.data }}" />
<footer class="w3-center"> </audio>
<h2> {% elif "video/" in msg.mime %}
{% if previous %} <video class="lazy" autobuffer {% if msg.message_type|int == 13 or msg.message_type|int == 11 %}autoplay muted loop playsinline{%else%}controls{% endif %}>
<a href="./{{ previous }}" target="_self">Previous</a> <source type="{{ msg.mime }}" data-src="{{ msg.data }}" />
{% endif %} </video>
<h2> {% elif "/" in msg.mime %}
{% if next %} The file cannot be displayed here, however it should be located at <a href="./{{ msg.data }}">here</a>
<a href="./{{ next }}" target="_self">Next</a> {% else %}
{% else %} {% filter escape %}{{ msg.data }}{% endfilter %}
End of History {% endif %}
{% endif %} {% if msg.caption is not none %}
</h2> {{ msg.caption | urlize(none, true, '_blank') }}
<br> {% endif %}
Portions of this page are reproduced from <a href="https://web.dev/articles/lazy-loading-video">work</a> created and <a href="https://developers.google.com/readme/policies">shared by Google</a> and used according to terms described in the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache 2.0 License</a>. {% endif %}
</footer> {% endif %}
<script> </p>
document.addEventListener("DOMContentLoaded", function() { <p class="text-[10px] text-[#667781] text-right mt-1">{{ msg.time }}</p>
var lazyVideos = [].slice.call(document.querySelectorAll("video.lazy")); </div>
</div>
{% else %}
<div class="flex justify-start items-center group" id="{{ msg.key_id }}">
<div class="bg-white rounded-lg p-2 max-w-[80%] shadow-sm">
{% if msg.reply is not none %}
<a href="#{{msg.reply}}" target="_self" class="no-base">
<div class="mb-2 p-1 bg-whatsapp-chat-light rounded border-l-4 border-whatsapp text-sm reply-box">
<p class="text-whatsapp font-medium text-xs">Replying to</p>
<p class="text-[#808080] text-xs truncate">
{% if msg.quoted_data is not none %}
{{msg.quoted_data}}
{% else %}
this message
{% endif %}
</p>
</div>
</a>
{% endif %}
<p class="text-[#111b21] text-sm">
{% if msg.meta == true or msg.media == false and msg.data is none %}
<div class="flex justify-center mb-2">
<div class="bg-[#FFF3C5] rounded-lg px-3 py-2 text-sm text-[#856404] flex items-center">
{% if msg.safe %}
{{ msg.data | safe or 'Not supported WhatsApp internal message' }}
{% else %}
{{ msg.data or 'Not supported WhatsApp internal message' }}
{% endif %}
</div>
</div>
{% if msg.caption is not none %}
<p>{{ msg.caption | urlize(none, true, '_blank') }}</p>
{% endif %}
{% else %}
{% if msg.media == false %}
{{ msg.data | sanitize_except() | urlize(none, true, '_blank') }}
{% else %}
{% if "image/" in msg.mime %}
<a href="{{ msg.data }}">
<img src="{{ msg.thumb if msg.thumb is not none else msg.data }}" {{ 'class="sticker"' | safe if msg.sticker }} loading="lazy"/>
</a>
{% elif "audio/" in msg.mime %}
<audio controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</audio>
{% elif "video/" in msg.mime %}
<video class="lazy" autobuffer {% if msg.message_type|int == 13 or msg.message_type|int == 11 %}autoplay muted loop playsinline{%else%}controls{% endif %}>
<source type="{{ msg.mime }}" data-src="{{ msg.data }}" />
</video>
{% elif "/" in msg.mime %}
The file cannot be displayed here, however it should be located at <a href="./{{ msg.data }}">here</a>
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %}
{% if msg.caption is not none %}
{{ msg.caption | urlize(none, true, '_blank') }}
{% endif %}
{% endif %}
{% endif %}
</p>
<div class="flex items-baseline text-[10px] text-[#667781] mt-1 gap-2">
<span class="flex-shrink-0">
{% if msg.sender is not none %}
{{ msg.sender }}
{% endif %}
</span>
<span class="flex-grow min-w-[4px]"></span>
<span class="flex-shrink-0">{{ msg.time }}</span>
</div>
</div>
<!-- <div class="opacity-0 group-hover:opacity-100 transition-opacity duration-200 relative ml-2">
<div class="relative">
<div class="relative group/tooltip">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#8696a0] hover:text-[#54656f] cursor-pointer" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<use href="#info-icon"></use>
</svg>
<div class="absolute bottom-full info-box-tooltip mb-2 hidden group-hover/tooltip:block z-50">
<div class="bg-black text-white text-xs rounded py-1 px-2 whitespace-nowrap">
Received at {{msg.received_timestamp or 'unknown'}}
</div>
<div class="absolute top-full right-3 ml-1 border-4 border-transparent border-t-black"></div>
</div>
</div>
</div>
</div> -->
</div>
{% endif %}
{% endfor %}
</div>
<footer>
<h2 class="text-center">
{% if not next %}
End of History
{% endif %}
</h2>
<br>
Portions of this page are reproduced from <a href="https://web.dev/articles/lazy-loading-video">work</a> created and <a href="https://developers.google.com/readme/policies">shared by Google</a> and used according to terms described in the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache 2.0 License</a>.
</footer>
<svg style="display: none;">
<!-- Tooltip info icon -->
<symbol id="info-icon" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</symbol>
</svg>
</div>
</article>
</body>
<script>
// Search functionality
const searchButton = document.getElementById('searchButton');
const mainSearchInput = document.getElementById('mainSearchInput');
const closeMainSearch = document.getElementById('closeMainSearch');
const mainHeaderSearchInput = document.getElementById('mainHeaderSearchInput');
if ("IntersectionObserver" in window) { // Function to show search input
var lazyVideoObserver = new IntersectionObserver(function(entries, observer) { const showSearch = () => {
entries.forEach(function(video) { mainSearchInput.classList.add('active');
if (video.isIntersecting) { mainHeaderSearchInput.focus();
for (var source in video.target.children) { };
var videoSource = video.target.children[source];
if (typeof videoSource.tagName === "string" && videoSource.tagName === "SOURCE") {
videoSource.src = videoSource.dataset.src;
}
}
video.target.load(); // Function to hide search input
video.target.classList.remove("lazy"); const hideSearch = () => {
lazyVideoObserver.unobserve(video.target); mainSearchInput.classList.remove('active');
} mainHeaderSearchInput.value = '';
}); };
});
lazyVideos.forEach(function(lazyVideo) { // Event listeners
lazyVideoObserver.observe(lazyVideo); searchButton.addEventListener('click', showSearch);
}); closeMainSearch.addEventListener('click', hideSearch);
}
}); // Handle ESC key
</script> document.addEventListener('keydown', (event) => {
<script> if (event.key === 'Escape' && mainSearchInput.classList.contains('active')) {
// Prevent the <base> tag from affecting links with the class "no-base" hideSearch();
document.querySelectorAll('.no-base').forEach(link => { }
link.addEventListener('click', function(event) { });
const href = this.getAttribute('href'); </script>
if (href.startsWith('#')) { <script>
window.location.hash = href; document.addEventListener("DOMContentLoaded", function() {
event.preventDefault(); var lazyVideos = [].slice.call(document.querySelectorAll("video.lazy"));
}
}); if ("IntersectionObserver" in window) {
}); var lazyVideoObserver = new IntersectionObserver(function(entries, observer) {
</script> entries.forEach(function(video) {
</body> if (video.isIntersecting) {
for (var source in video.target.children) {
var videoSource = video.target.children[source];
if (typeof videoSource.tagName === "string" && videoSource.tagName === "SOURCE") {
videoSource.src = videoSource.dataset.src;
}
}
video.target.load();
video.target.classList.remove("lazy");
lazyVideoObserver.unobserve(video.target);
}
});
});
lazyVideos.forEach(function(lazyVideo) {
lazyVideoObserver.observe(lazyVideo);
});
}
});
</script>
<script>
// Prevent the <base> tag from affecting links with the class "no-base"
document.querySelectorAll('.no-base').forEach(link => {
link.addEventListener('click', function(event) {
const href = this.getAttribute('href');
if (href.startsWith('#')) {
window.location.hash = href;
event.preventDefault();
}
});
});
</script>
</html> </html>

View File

@@ -1,467 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Whatsapp - {{ name }}</title>
<meta charset="UTF-8">
<script src="https://cdn.tailwindcss.com"></script>
<script>
tailwind.config = {
theme: {
extend: {
colors: {
whatsapp: {
light: '#e7ffdb',
DEFAULT: '#25D366',
dark: '#075E54',
chat: '#efeae2',
'chat-light': '#f0f2f5',
}
}
}
}
}
</script>
<style>
body, html {
height: 100%;
margin: 0;
padding: 0;
scroll-behavior: smooth !important;
}
.chat-list {
height: calc(100vh - 120px);
overflow-y: auto;
}
.message-list {
height: calc(100vh - 90px);
overflow-y: auto;
}
@media (max-width: 640px) {
.chat-list, .message-list {
height: calc(100vh - 108px);
}
}
header {
position: fixed;
z-index: 20;
border-bottom: 2px solid #e3e6e7;
font-size: 2em;
font-weight: bolder;
background-color: white;
padding: 20px 0 20px 0;
}
footer {
margin-top: 10px;
border-top: 2px solid #e3e6e7;
padding: 20px 0 20px 0;
}
article {
width:430px;
margin: auto;
z-index:10;
font-size: 15px;
word-wrap: break-word;
}
img, video, audio{
max-width:100%;
box-sizing: border-box;
}
div.reply{
font-size: 13px;
text-decoration: none;
}
div:target::before {
content: '';
display: block;
height: 115px;
margin-top: -115px;
visibility: hidden;
}
div:target {
animation: 3s highlight;
}
.avatar {
border-radius:50%;
overflow:hidden;
max-width: 64px;
max-height: 64px;
}
.name {
color: #3892da;
}
.pad-left-10 {
padding-left: 10px;
}
.pad-right-10 {
padding-right: 10px;
}
.reply_link {
color: #168acc;
}
.blue {
color: #70777a;
}
.sticker {
max-width: 100px !important;
max-height: 100px !important;
}
@keyframes highlight {
from {
background-color: rgba(37, 211, 102, 0.1);
}
to {
background-color: transparent;
}
}
.search-input {
transform: translateY(-100%);
transition: transform 0.3s ease-in-out;
}
.search-input.active {
transform: translateY(0);
}
.reply-box:active {
background-color:rgb(200 202 205 / var(--tw-bg-opacity, 1));
}
.info-box-tooltip {
--tw-translate-x: -50%;
transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
}
</style>
<script>
function search(event) {
keywords = document.getElementById("mainHeaderSearchInput").value;
hits = [];
document.querySelectorAll(".message-text").forEach(elem => {
if (elem.innerText.trim().includes(keywords)){
hits.push(elem.parentElement.parentElement.id);
}
})
console.log(hits);
}
</script>
<base href="{{ media_base }}" target="_blank">
</head>
<body>
<article class="h-screen bg-whatsapp-chat-light">
<div class="w-full flex flex-col">
<div class="p-3 bg-whatsapp-dark flex items-center justify-between border-l border-[#d1d7db]">
<div class="flex items-center">
{% if not no_avatar %}
<div class="w3-col m2 l2">
{% if their_avatar is not none %}
<a href="{{ their_avatar }}"><img src="{{ their_avatar_thumb or '' }}" onerror="this.style.display='none'" class="w-10 h-10 rounded-full mr-3" loading="lazy"></a>
{% else %}
<img src="{{ their_avatar_thumb or '' }}" onerror="this.style.display='none'" class="w-10 h-10 rounded-full mr-3" loading="lazy">
{% endif %}
</div>
{% endif %}
<div>
<h2 class="text-white font-medium">{{ headline }}</h2>
{% if status is not none %}<p class="text-[#8696a0] text-xs">{{ status }}</p>{% endif %}
</div>
</div>
<div class="flex space-x-4">
<!-- <button id="searchButton">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#aebac1]" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
</svg>
</button> -->
<!-- <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#aebac1]" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 19l-7-7 7-7" />
</svg> -->
{% if previous %}
<a href="./{{ previous }}" target="_self">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#aebac1]" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 5l-7 7 7 7" />
</svg>
</a>
{% endif %}
{% if next %}
<a href="./{{ next }}" target="_self">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#aebac1]" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" />
</svg>
</a>
{% endif %}
</div>
<!-- Search Input Overlay -->
<div id="mainSearchInput" class="search-input absolute article top-0 bg-whatsapp-dark p-3 flex items-center space-x-3">
<button id="closeMainSearch" class="text-[#aebac1]">
<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 19l-7-7 7-7" />
</svg>
</button>
<input type="text" placeholder="Search..." class="flex-1 bg-[#1f2c34] text-white rounded-lg px-3 py-1 focus:outline-none" id="mainHeaderSearchInput" onkeyup="search(event)">
</div>
</div>
</div>
<div class="flex-1 p-5 message-list">
<div class="flex flex-col space-y-2">
<!--Date-->
{% set last = {'last': 946688461.001} %}
{% for msg in msgs -%}
{% if determine_day(last.last, msg.timestamp) is not none %}
<div class="flex justify-center">
<div class="bg-[#e1f2fb] rounded-lg px-2 py-1 text-xs text-[#54656f]">
{{ determine_day(last.last, msg.timestamp) }}
</div>
</div>
{% if last.update({'last': msg.timestamp}) %}{% endif %}
{% endif %}
<!--Actual messages-->
{% if msg.from_me == true %}
<div class="flex justify-end items-center group" id="{{ msg.key_id }}">
<div class="opacity-0 group-hover:opacity-100 transition-opacity duration-200 relative mr-2">
<div class="relative">
<div class="relative group/tooltip">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#8696a0] hover:text-[#54656f] cursor-pointer" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<use href="#info-icon"></use>
</svg>
<div class="absolute bottom-full info-box-tooltip mb-2 hidden group-hover/tooltip:block z-50">
<div class="bg-black text-white text-xs rounded py-1 px-2 whitespace-nowrap">
Delivered at {{msg.received_timestamp or 'unknown'}}
{% if msg.read_timestamp is not none %}
<br>Read at {{ msg.read_timestamp }}
{% endif %}
</div>
<div class="absolute top-full right-3 -mt-1 border-4 border-transparent border-t-black"></div>
</div>
</div>
</div>
</div>
<div class="bg-whatsapp-light rounded-lg p-2 max-w-[80%] shadow-sm">
{% if msg.reply is not none %}
<a href="#{{msg.reply}}" target="_self" class="no-base">
<div class="mb-2 p-1 bg-whatsapp-chat-light rounded border-l-4 border-whatsapp text-sm reply-box">
<p class="text-whatsapp font-medium text-xs">Replying to</p>
<p class="text-[#111b21] text-xs truncate">
{% if msg.quoted_data is not none %}
"{{msg.quoted_data}}"
{% else %}
this message
{% endif %}
</p>
</div>
</a>
{% endif %}
<p class="text-[#111b21] text-sm message-text">
{% if msg.meta == true or msg.media == false and msg.data is none %}
<div class="flex justify-center mb-2">
<div class="bg-[#FFF3C5] rounded-lg px-3 py-2 text-sm text-[#856404] flex items-center">
{% if msg.safe %}
{{ msg.data | safe or 'Not supported WhatsApp internal message' }}
{% else %}
{{ msg.data or 'Not supported WhatsApp internal message' }}
{% endif %}
</div>
</div>
{% if msg.caption is not none %}
<p>{{ msg.caption | urlize(none, true, '_blank') }}</p>
{% endif %}
{% else %}
{% if msg.media == false %}
{{ msg.data | sanitize_except() | urlize(none, true, '_blank') }}
{% else %}
{% if "image/" in msg.mime %}
<a href="{{ msg.data }}">
<img src="{{ msg.thumb if msg.thumb is not none else msg.data }}" {{ 'class="sticker"' | safe if msg.sticker }} loading="lazy"/>
</a>
{% elif "audio/" in msg.mime %}
<audio controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</audio>
{% elif "video/" in msg.mime %}
<video class="lazy" autobuffer {% if msg.message_type|int == 13 or msg.message_type|int == 11 %}autoplay muted loop playsinline{%else%}controls{% endif %}>
<source type="{{ msg.mime }}" data-src="{{ msg.data }}" />
</video>
{% elif "/" in msg.mime %}
The file cannot be displayed here, however it should be located at <a href="./{{ msg.data }}">here</a>
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %}
{% if msg.caption is not none %}
{{ msg.caption | urlize(none, true, '_blank') }}
{% endif %}
{% endif %}
{% endif %}
</p>
<p class="text-[10px] text-[#667781] text-right mt-1">{{ msg.time }}</p>
</div>
</div>
{% else %}
<div class="flex justify-start items-center group" id="{{ msg.key_id }}">
<div class="bg-white rounded-lg p-2 max-w-[80%] shadow-sm">
{% if msg.reply is not none %}
<a href="#{{msg.reply}}" target="_self" class="no-base">
<div class="mb-2 p-1 bg-whatsapp-chat-light rounded border-l-4 border-whatsapp text-sm reply-box">
<p class="text-whatsapp font-medium text-xs">Replying to</p>
<p class="text-[#808080] text-xs truncate">
{% if msg.quoted_data is not none %}
{{msg.quoted_data}}
{% else %}
this message
{% endif %}
</p>
</div>
</a>
{% endif %}
<p class="text-[#111b21] text-sm">
{% if msg.meta == true or msg.media == false and msg.data is none %}
<div class="flex justify-center mb-2">
<div class="bg-[#FFF3C5] rounded-lg px-3 py-2 text-sm text-[#856404] flex items-center">
{% if msg.safe %}
{{ msg.data | safe or 'Not supported WhatsApp internal message' }}
{% else %}
{{ msg.data or 'Not supported WhatsApp internal message' }}
{% endif %}
</div>
</div>
{% if msg.caption is not none %}
<p>{{ msg.caption | urlize(none, true, '_blank') }}</p>
{% endif %}
{% else %}
{% if msg.media == false %}
{{ msg.data | sanitize_except() | urlize(none, true, '_blank') }}
{% else %}
{% if "image/" in msg.mime %}
<a href="{{ msg.data }}">
<img src="{{ msg.thumb if msg.thumb is not none else msg.data }}" {{ 'class="sticker"' | safe if msg.sticker }} loading="lazy"/>
</a>
{% elif "audio/" in msg.mime %}
<audio controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</audio>
{% elif "video/" in msg.mime %}
<video class="lazy" autobuffer {% if msg.message_type|int == 13 or msg.message_type|int == 11 %}autoplay muted loop playsinline{%else%}controls{% endif %}>
<source type="{{ msg.mime }}" data-src="{{ msg.data }}" />
</video>
{% elif "/" in msg.mime %}
The file cannot be displayed here, however it should be located at <a href="./{{ msg.data }}">here</a>
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %}
{% if msg.caption is not none %}
{{ msg.caption | urlize(none, true, '_blank') }}
{% endif %}
{% endif %}
{% endif %}
</p>
<div class="flex items-baseline text-[10px] text-[#667781] mt-1 gap-2">
<span class="flex-shrink-0">
{% if msg.sender is not none %}
{{ msg.sender }}
{% endif %}
</span>
<span class="flex-grow min-w-[4px]"></span>
<span class="flex-shrink-0">{{ msg.time }}</span>
</div>
</div>
<!-- <div class="opacity-0 group-hover:opacity-100 transition-opacity duration-200 relative ml-2">
<div class="relative">
<div class="relative group/tooltip">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-[#8696a0] hover:text-[#54656f] cursor-pointer" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<use href="#info-icon"></use>
</svg>
<div class="absolute bottom-full info-box-tooltip mb-2 hidden group-hover/tooltip:block z-50">
<div class="bg-black text-white text-xs rounded py-1 px-2 whitespace-nowrap">
Received at {{msg.received_timestamp or 'unknown'}}
</div>
<div class="absolute top-full right-3 ml-1 border-4 border-transparent border-t-black"></div>
</div>
</div>
</div>
</div> -->
</div>
{% endif %}
{% endfor %}
</div>
<footer>
<h2 class="text-center">
{% if not next %}
End of History
{% endif %}
</h2>
<br>
Portions of this page are reproduced from <a href="https://web.dev/articles/lazy-loading-video">work</a> created and <a href="https://developers.google.com/readme/policies">shared by Google</a> and used according to terms described in the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache 2.0 License</a>.
</footer>
<svg style="display: none;">
<!-- Tooltip info icon -->
<symbol id="info-icon" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</symbol>
</svg>
</div>
</article>
</body>
<script>
// Search functionality
const searchButton = document.getElementById('searchButton');
const mainSearchInput = document.getElementById('mainSearchInput');
const closeMainSearch = document.getElementById('closeMainSearch');
const mainHeaderSearchInput = document.getElementById('mainHeaderSearchInput');
// Function to show search input
const showSearch = () => {
mainSearchInput.classList.add('active');
mainHeaderSearchInput.focus();
};
// Function to hide search input
const hideSearch = () => {
mainSearchInput.classList.remove('active');
mainHeaderSearchInput.value = '';
};
// Event listeners
searchButton.addEventListener('click', showSearch);
closeMainSearch.addEventListener('click', hideSearch);
// Handle ESC key
document.addEventListener('keydown', (event) => {
if (event.key === 'Escape' && mainSearchInput.classList.contains('active')) {
hideSearch();
}
});
</script>
<script>
document.addEventListener("DOMContentLoaded", function() {
var lazyVideos = [].slice.call(document.querySelectorAll("video.lazy"));
if ("IntersectionObserver" in window) {
var lazyVideoObserver = new IntersectionObserver(function(entries, observer) {
entries.forEach(function(video) {
if (video.isIntersecting) {
for (var source in video.target.children) {
var videoSource = video.target.children[source];
if (typeof videoSource.tagName === "string" && videoSource.tagName === "SOURCE") {
videoSource.src = videoSource.dataset.src;
}
}
video.target.load();
video.target.classList.remove("lazy");
lazyVideoObserver.unobserve(video.target);
}
});
});
lazyVideos.forEach(function(lazyVideo) {
lazyVideoObserver.observe(lazyVideo);
});
}
});
</script>
<script>
// Prevent the <base> tag from affecting links with the class "no-base"
document.querySelectorAll('.no-base').forEach(link => {
link.addEventListener('click', function(event) {
const href = this.getAttribute('href');
if (href.startsWith('#')) {
window.location.hash = href;
event.preventDefault();
}
});
});
</script>
</html>

View File

@@ -0,0 +1,329 @@
<!DOCTYPE html>
<html>
<head>
<title>Whatsapp - {{ name }}</title>
<meta charset="UTF-8">
<link rel="stylesheet" href="{{w3css}}">
<style>
html, body {
font-size: 12px;
scroll-behavior: smooth;
}
header {
position: fixed;
z-index: 20;
border-bottom: 2px solid #e3e6e7;
font-size: 2em;
font-weight: bolder;
background-color: white;
padding: 20px 0 20px 0;
}
footer {
border-top: 2px solid #e3e6e7;
padding: 20px 0 20px 0;
}
article {
width:500px;
margin:100px auto;
z-index:10;
font-size: 15px;
word-wrap: break-word;
}
img, video {
max-width:100%;
}
div.reply{
font-size: 13px;
text-decoration: none;
}
div:target::before {
content: '';
display: block;
height: 115px;
margin-top: -115px;
visibility: hidden;
}
div:target {
border-style: solid;
border-width: 2px;
animation: border-blink 0.5s steps(1) 5;
border-color: rgba(0,0,0,0)
}
table {
width: 100%;
}
@keyframes border-blink {
0% {
border-color: #2196F3;
}
50% {
border-color: rgba(0,0,0,0);
}
}
.avatar {
border-radius:50%;
overflow:hidden;
max-width: 64px;
max-height: 64px;
}
.name {
color: #3892da;
}
.pad-left-10 {
padding-left: 10px;
}
.pad-right-10 {
padding-right: 10px;
}
.reply_link {
color: #168acc;
}
.blue {
color: #70777a;
}
.sticker {
max-width: 100px !important;
max-height: 100px !important;
}
</style>
<base href="{{ media_base }}" target="_blank">
</head>
<body>
<header class="w3-center w3-top">
{{ headline }}
{% if status is not none %}
<br>
<span class="w3-small">{{ status }}</span>
{% endif %}
</header>
<article class="w3-container">
<div class="table">
{% set last = {'last': 946688461.001} %}
{% for msg in msgs -%}
<div class="w3-row w3-padding-small w3-margin-bottom" id="{{ msg.key_id }}">
{% if determine_day(last.last, msg.timestamp) is not none %}
<div class="w3-center w3-padding-16 blue">{{ determine_day(last.last, msg.timestamp) }}</div>
{% if last.update({'last': msg.timestamp}) %}{% endif %}
{% endif %}
{% if msg.from_me == true %}
<div class="w3-row">
<div class="w3-left blue">{{ msg.time }}</div>
<div class="name w3-right-align pad-left-10">You</div>
</div>
<div class="w3-row">
{% if not no_avatar and my_avatar is not none %}
<div class="w3-col m10 l10">
{% else %}
<div class="w3-col m12 l12">
{% endif %}
<div class="w3-right-align">
{% if msg.reply is not none %}
<div class="reply">
<span class="blue">Replying to </span>
<a href="#{{msg.reply}}" target="_self" class="reply_link no-base">
{% if msg.quoted_data is not none %}
"{{msg.quoted_data}}"
{% else %}
this message
{% endif %}
</a>
</div>
{% endif %}
{% if msg.meta == true or msg.media == false and msg.data is none %}
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center">
{% if msg.safe %}
<p>{{ msg.data | safe or 'Not supported WhatsApp internal message' }}</p>
{% else %}
<p>{{ msg.data or 'Not supported WhatsApp internal message' }}</p>
{% endif %}
</div>
{% if msg.caption is not none %}
<div class="w3-container">
{{ msg.caption | urlize(none, true, '_blank') }}
</div>
{% endif %}
{% else %}
{% if msg.media == false %}
{{ msg.data | sanitize_except() | urlize(none, true, '_blank') }}
{% else %}
{% if "image/" in msg.mime %}
<a href="{{ msg.data }}">
<img src="{{ msg.thumb if msg.thumb is not none else msg.data }}" {{ 'class="sticker"' | safe if msg.sticker }} loading="lazy"/>
</a>
{% elif "audio/" in msg.mime %}
<audio controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</audio>
{% elif "video/" in msg.mime %}
<video class="lazy" autobuffer {% if msg.message_type|int == 13 or msg.message_type|int == 11 %}autoplay muted loop playsinline{%else%}controls{% endif %}>
<source type="{{ msg.mime }}" data-src="{{ msg.data }}" />
</video>
{% elif "/" in msg.mime %}
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center">
<p>The file cannot be displayed here, however it should be located at <a href="./{{ msg.data }}">here</a></p>
</div>
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %}
{% if msg.caption is not none %}
<div class="w3-container">
{{ msg.caption | urlize(none, true, '_blank') }}
</div>
{% endif %}
{% endif %}
{% endif %}
</div>
</div>
{% if not no_avatar and my_avatar is not none %}
<div class="w3-col m2 l2 pad-left-10">
<a href="{{ my_avatar }}">
<img src="{{ my_avatar }}" onerror="this.style.display='none'" class="avatar" loading="lazy">
</a>
</div>
{% endif %}
</div>
{% else %}
<div class="w3-row">
<div class="w3-left pad-right-10 name">
{% if msg.sender is not none %}
{{ msg.sender }}
{% else %}
{{ name }}
{% endif %}
</div>
<div class="w3-right-align blue">{{ msg.time }}</div>
</div>
<div class="w3-row">
{% if not no_avatar %}
<div class="w3-col m2 l2">
{% if their_avatar is not none %}
<a href="{{ their_avatar }}"><img src="{{ their_avatar_thumb or '' }}" onerror="this.style.display='none'" class="avatar" loading="lazy"></a>
{% else %}
<img src="{{ their_avatar_thumb or '' }}" onerror="this.style.display='none'" class="avatar" loading="lazy">
{% endif %}
</div>
<div class="w3-col m10 l10">
{% else %}
<div class="w3-col m12 l12">
{% endif %}
<div class="w3-left-align">
{% if msg.reply is not none %}
<div class="reply">
<span class="blue">Replying to </span>
<a href="#{{msg.reply}}" target="_self" class="reply_link no-base">
{% if msg.quoted_data is not none %}
"{{msg.quoted_data}}"
{% else %}
this message
{% endif %}
</a>
</div>
{% endif %}
{% if msg.meta == true or msg.media == false and msg.data is none %}
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center">
{% if msg.safe %}
<p>{{ msg.data | safe or 'Not supported WhatsApp internal message' }}</p>
{% else %}
<p>{{ msg.data or 'Not supported WhatsApp internal message' }}</p>
{% endif %}
</div>
{% if msg.caption is not none %}
<div class="w3-container">
{{ msg.caption | urlize(none, true, '_blank') }}
</div>
{% endif %}
{% else %}
{% if msg.media == false %}
{{ msg.data | sanitize_except() | urlize(none, true, '_blank') }}
{% else %}
{% if "image/" in msg.mime %}
<a href="{{ msg.data }}">
<img src="{{ msg.thumb if msg.thumb is not none else msg.data }}" {{ 'class="sticker"' | safe if msg.sticker }} loading="lazy"/>
</a>
{% elif "audio/" in msg.mime %}
<audio controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</audio>
{% elif "video/" in msg.mime %}
<video class="lazy" autobuffer {% if msg.message_type|int == 13 or msg.message_type|int == 11 %}autoplay muted loop playsinline{%else%}controls{% endif %}>
<source type="{{ msg.mime }}" data-src="{{ msg.data }}" />
</video>
{% elif "/" in msg.mime %}
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center">
<p>The file cannot be displayed here, however it should be located at <a href="./{{ msg.data }}">here</a></p>
</div>
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %}
{% if msg.caption is not none %}
<div class="w3-container">
{{ msg.caption | urlize(none, true, '_blank') }}
</div>
{% endif %}
{% endif %}
{% endif %}
</div>
</div>
</div>
{% endif %}
</div>
{% endfor %}
</div>
</article>
<footer class="w3-center">
<h2>
{% if previous %}
<a href="./{{ previous }}" target="_self">Previous</a>
{% endif %}
<h2>
{% if next %}
<a href="./{{ next }}" target="_self">Next</a>
{% else %}
End of History
{% endif %}
</h2>
<br>
Portions of this page are reproduced from <a href="https://web.dev/articles/lazy-loading-video">work</a> created and <a href="https://developers.google.com/readme/policies">shared by Google</a> and used according to terms described in the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache 2.0 License</a>.
</footer>
<script>
document.addEventListener("DOMContentLoaded", function() {
var lazyVideos = [].slice.call(document.querySelectorAll("video.lazy"));
if ("IntersectionObserver" in window) {
var lazyVideoObserver = new IntersectionObserver(function(entries, observer) {
entries.forEach(function(video) {
if (video.isIntersecting) {
for (var source in video.target.children) {
var videoSource = video.target.children[source];
if (typeof videoSource.tagName === "string" && videoSource.tagName === "SOURCE") {
videoSource.src = videoSource.dataset.src;
}
}
video.target.load();
video.target.classList.remove("lazy");
lazyVideoObserver.unobserve(video.target);
}
});
});
lazyVideos.forEach(function(lazyVideo) {
lazyVideoObserver.observe(lazyVideo);
});
}
});
</script>
<script>
// Prevent the <base> tag from affecting links with the class "no-base"
document.querySelectorAll('.no-base').forEach(link => {
link.addEventListener('click', function(event) {
const href = this.getAttribute('href');
if (href.startsWith('#')) {
window.location.hash = href;
event.preventDefault();
}
});
});
</script>
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 126 KiB

After

Width:  |  Height:  |  Size: 116 KiB

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "whatsapp-chat-exporter" name = "whatsapp-chat-exporter"
version = "0.12.1" version = "0.13.0rc1"
description = "A Whatsapp database parser that provides history of your Whatsapp conversations in HTML and JSON. Android, iOS, iPadOS, Crypt12, Crypt14, Crypt15 supported." description = "A Whatsapp database parser that provides history of your Whatsapp conversations in HTML and JSON. Android, iOS, iPadOS, Crypt12, Crypt14, Crypt15 supported."
readme = "README.md" readme = "README.md"
authors = [ authors = [
@@ -19,11 +19,11 @@ keywords = [
] ]
classifiers = [ classifiers = [
"Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"License :: OSI Approved :: MIT License", "License :: OSI Approved :: MIT License",
"Operating System :: OS Independent", "Operating System :: OS Independent",
"Development Status :: 4 - Beta", "Development Status :: 4 - Beta",
@@ -33,7 +33,7 @@ classifiers = [
"Topic :: Utilities", "Topic :: Utilities",
"Topic :: Database" "Topic :: Database"
] ]
requires-python = ">=3.9" requires-python = ">=3.10"
dependencies = [ dependencies = [
"jinja2", "jinja2",
"bleach" "bleach"
@@ -44,10 +44,9 @@ android_backup = ["pycryptodome", "javaobj-py3"]
crypt12 = ["pycryptodome"] crypt12 = ["pycryptodome"]
crypt14 = ["pycryptodome"] crypt14 = ["pycryptodome"]
crypt15 = ["pycryptodome", "javaobj-py3"] crypt15 = ["pycryptodome", "javaobj-py3"]
all = ["pycryptodome", "javaobj-py3", "vobject"] all = ["pycryptodome", "javaobj-py3"]
everything = ["pycryptodome", "javaobj-py3", "vobject"] everything = ["pycryptodome", "javaobj-py3"]
backup = ["pycryptodome", "javaobj-py3"] backup = ["pycryptodome", "javaobj-py3"]
vcards = ["vobject", "pycryptodome", "javaobj-py3"]
[project.scripts] [project.scripts]
wtsexporter = "Whatsapp_Chat_Exporter.__main__:main" wtsexporter = "Whatsapp_Chat_Exporter.__main__:main"
@@ -60,3 +59,8 @@ include = ["Whatsapp_Chat_Exporter"]
[tool.setuptools.package-data] [tool.setuptools.package-data]
Whatsapp_Chat_Exporter = ["*.html"] Whatsapp_Chat_Exporter = ["*.html"]
[dependency-groups]
dev = [
"pytest>=8.3.5",
]

View File

@@ -6,19 +6,20 @@ Contributed by @magpires https://github.com/KnugiHK/WhatsApp-Chat-Exporter/issue
import re import re
import argparse import argparse
def process_phone_number(raw_phone): def process_phone_number(raw_phone):
""" """
Process the raw phone string from the VCARD and return two formatted numbers: Process the raw phone string from the VCARD and return two formatted numbers:
- The original formatted number, and - The original formatted number, and
- A modified formatted number with the extra (ninth) digit removed, if applicable. - A modified formatted number with the extra (ninth) digit removed, if applicable.
Desired output: Desired output:
For a number with a 9-digit subscriber: For a number with a 9-digit subscriber:
Original: "+55 {area} {first 5 of subscriber}-{last 4 of subscriber}" Original: "+55 {area} {first 5 of subscriber}-{last 4 of subscriber}"
Modified: "+55 {area} {subscriber[1:5]}-{subscriber[5:]}" Modified: "+55 {area} {subscriber[1:5]}-{subscriber[5:]}"
For example, for an input that should represent "027912345678", the outputs are: For example, for an input that should represent "027912345678", the outputs are:
"+55 27 91234-5678" and "+55 27 1234-5678" "+55 27 91234-5678" and "+55 27 1234-5678"
This function handles numbers that may already include a "+55" prefix. This function handles numbers that may already include a "+55" prefix.
It expects that after cleaning, a valid number (without the country code) should have either 10 digits It expects that after cleaning, a valid number (without the country code) should have either 10 digits
(2 for area + 8 for subscriber) or 11 digits (2 for area + 9 for subscriber). (2 for area + 8 for subscriber) or 11 digits (2 for area + 9 for subscriber).
@@ -26,18 +27,18 @@ def process_phone_number(raw_phone):
""" """
# Store the original input for processing # Store the original input for processing
number_to_process = raw_phone.strip() number_to_process = raw_phone.strip()
# Remove all non-digit characters # Remove all non-digit characters
digits = re.sub(r'\D', '', number_to_process) digits = re.sub(r'\D', '', number_to_process)
# If the number starts with '55', remove it for processing # If the number starts with '55', remove it for processing
if digits.startswith("55") and len(digits) > 11: if digits.startswith("55") and len(digits) > 11:
digits = digits[2:] digits = digits[2:]
# Remove trunk zero if present # Remove trunk zero if present
if digits.startswith("0"): if digits.startswith("0"):
digits = digits[1:] digits = digits[1:]
# After cleaning, we expect a valid number to have either 10 or 11 digits # After cleaning, we expect a valid number to have either 10 or 11 digits
# If there are extra digits, use the last 11 (for a 9-digit subscriber) or last 10 (for an 8-digit subscriber) # If there are extra digits, use the last 11 (for a 9-digit subscriber) or last 10 (for an 8-digit subscriber)
if len(digits) > 11: if len(digits) > 11:
@@ -46,7 +47,7 @@ def process_phone_number(raw_phone):
elif len(digits) > 10 and len(digits) < 11: elif len(digits) > 10 and len(digits) < 11:
# In some cases with an 8-digit subscriber, take the last 10 digits # In some cases with an 8-digit subscriber, take the last 10 digits
digits = digits[-10:] digits = digits[-10:]
# Check if we have a valid number after processing # Check if we have a valid number after processing
if len(digits) not in (10, 11): if len(digits) not in (10, 11):
return None, None return None, None
@@ -70,6 +71,7 @@ def process_phone_number(raw_phone):
return original_formatted, modified_formatted return original_formatted, modified_formatted
def process_vcard(input_vcard, output_vcard): def process_vcard(input_vcard, output_vcard):
""" """
Process a VCARD file to standardize telephone entries and add a second TEL line Process a VCARD file to standardize telephone entries and add a second TEL line
@@ -77,13 +79,13 @@ def process_vcard(input_vcard, output_vcard):
""" """
with open(input_vcard, 'r', encoding='utf-8') as file: with open(input_vcard, 'r', encoding='utf-8') as file:
lines = file.readlines() lines = file.readlines()
output_lines = [] output_lines = []
# Regex to capture any telephone line. # Regex to capture any telephone line.
# It matches lines starting with "TEL:" or "TEL;TYPE=..." or with prefixes like "item1.TEL:". # It matches lines starting with "TEL:" or "TEL;TYPE=..." or with prefixes like "item1.TEL:".
phone_pattern = re.compile(r'^(?P<prefix>.*TEL(?:;TYPE=[^:]+)?):(?P<number>.*)$') phone_pattern = re.compile(r'^(?P<prefix>.*TEL(?:;TYPE=[^:]+)?):(?P<number>.*)$')
for line in lines: for line in lines:
stripped_line = line.rstrip("\n") stripped_line = line.rstrip("\n")
match = phone_pattern.match(stripped_line) match = phone_pattern.match(stripped_line)
@@ -99,10 +101,11 @@ def process_vcard(input_vcard, output_vcard):
output_lines.append(f"TEL;TYPE=CELL:{mod_formatted}\n") output_lines.append(f"TEL;TYPE=CELL:{mod_formatted}\n")
else: else:
output_lines.append(line) output_lines.append(line)
with open(output_vcard, 'w', encoding='utf-8') as file: with open(output_vcard, 'w', encoding='utf-8') as file:
file.writelines(output_lines) file.writelines(output_lines)
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers." description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers."
@@ -110,6 +113,6 @@ if __name__ == '__main__':
parser.add_argument('input_vcard', type=str, help='Input VCARD file') parser.add_argument('input_vcard', type=str, help='Input VCARD file')
parser.add_argument('output_vcard', type=str, help='Output VCARD file') parser.add_argument('output_vcard', type=str, help='Output VCARD file')
args = parser.parse_args() args = parser.parse_args()
process_vcard(args.input_vcard, args.output_vcard) process_vcard(args.input_vcard, args.output_vcard)
print(f"VCARD processed and saved to {args.output_vcard}") print(f"VCARD processed and saved to {args.output_vcard}")

View File

@@ -27,23 +27,24 @@ def _extract_encrypted_key(keyfile):
return _generate_hmac_of_hmac(key_stream) return _generate_hmac_of_hmac(key_stream)
key = open("encrypted_backup.key", "rb").read() if __name__ == "__main__":
database = open("wa.db.crypt15", "rb").read() key = open("encrypted_backup.key", "rb").read()
main_key, hex_key = _extract_encrypted_key(key) database = open("wa.db.crypt15", "rb").read()
for i in range(100): main_key, hex_key = _extract_encrypted_key(key)
iv = database[i:i+16] for i in range(100):
for j in range(100): iv = database[i:i+16]
cipher = AES.new(main_key, AES.MODE_GCM, iv) for j in range(100):
db_ciphertext = database[j:] cipher = AES.new(main_key, AES.MODE_GCM, iv)
db_compressed = cipher.decrypt(db_ciphertext) db_ciphertext = database[j:]
try: db_compressed = cipher.decrypt(db_ciphertext)
db = zlib.decompress(db_compressed) try:
except zlib.error: db = zlib.decompress(db_compressed)
... except zlib.error:
else: ...
if db[0:6] == b"SQLite": else:
print(f"Found!\nIV: {i}\nOffset: {j}") if db[0:6] == b"SQLite":
print(db_compressed[:10]) print(f"Found!\nIV: {i}\nOffset: {j}")
exit() print(db_compressed[:10])
exit()
print("Not found! Try to increase maximum search.") print("Not found! Try to increase maximum search.")

0
tests/__init__.py Normal file
View File

44
tests/data/contacts.vcf Normal file
View File

@@ -0,0 +1,44 @@
BEGIN:VCARD
VERSION:3.0
FN:Sample Contact
TEL;TYPE=CELL:+85288888888
END:VCARD
BEGIN:VCARD
VERSION:2.1
N:Lopez;Yard Lawn Guy;Jose;;
FN:Yard Lawn Guy, Jose Lopez
TEL;HOME:5673334444
END:VCARD
BEGIN:VCARD
VERSION:2.1
N;CHARSET=UTF-8;ENCODING=QUOTED-PRINTABLE:;=4A=6F=68=6E=20=42=75=74=6C=65=72=20=F0=9F=8C=9F=
=F0=9F=92=AB=F0=9F=8C=9F;;;
FN;CHARSET=UTF-8;ENCODING=QUOTED-PRINTABLE:=4A=6F=68=6E=20=42=75=74=6C=65=72=20=F0=9F=8C=9F=
=F0=9F=92=AB=F0=9F=8C=9F
TEL;PREF:5556667777
END:VCARD
BEGIN:VCARD
VERSION:2.1
TEL;WORK;PREF:1234567890
ORG:Airline Contact #'s
NOTE;ENCODING=QUOTED-PRINTABLE:=53=70=69=72=69=74=20=41=69=72=6C=69=
=6E=65=73=20=38=30=30=2D=37=37=32=2D=37=31=31=37=55=6E=69=74=65=64=
=20=41=69=72=6C=69=6E=65=73=20=38=30=30=2D=32=34=31=2D=36=35=32=32
END:VCARD
BEGIN:VCARD
VERSION:2.1
TEL;WORK;PREF:3451112222
X-SAMSUNGADR;ENCODING=QUOTED-PRINTABLE:;;=31=31=31=31=32=20=4E=6F=72=74=68=20=45=6C=64=72=
=69=64=67=65=20=50=61=72=6B=77=61=79;=44=61=6C=6C=61=73;=54=58;=32=32=32=32=32
ORG:James Peacock Elementary
END:VCARD
BEGIN:VCARD
VERSION:2.1
TEL;CELL:8889990001
ORG:AAA Car Service
END:VCARD

View File

@@ -4,13 +4,14 @@ import tempfile
import os import os
from unittest.mock import patch from unittest.mock import patch
from brazilian_number_processing import process_phone_number, process_vcard from scripts.brazilian_number_processing import process_phone_number, process_vcard
class TestVCardProcessor(unittest.TestCase): class TestVCardProcessor(unittest.TestCase):
def test_process_phone_number(self): def test_process_phone_number(self):
"""Test the process_phone_number function with various inputs.""" """Test the process_phone_number function with various inputs."""
# Test cases for 9-digit subscriber numbers # Test cases for 9-digit subscriber numbers
test_cases_9_digit = [ test_cases_9_digit = [
# Standard 11-digit number (2 area + 9 subscriber) # Standard 11-digit number (2 area + 9 subscriber)
@@ -30,7 +31,7 @@ class TestVCardProcessor(unittest.TestCase):
# With extra non-digit characters # With extra non-digit characters
("+55-27-9.1234_5678", "+55 27 91234-5678", "+55 27 1234-5678"), ("+55-27-9.1234_5678", "+55 27 91234-5678", "+55 27 1234-5678"),
] ]
# Test cases for 8-digit subscriber numbers # Test cases for 8-digit subscriber numbers
test_cases_8_digit = [ test_cases_8_digit = [
# Standard 10-digit number (2 area + 8 subscriber) # Standard 10-digit number (2 area + 8 subscriber)
@@ -46,7 +47,7 @@ class TestVCardProcessor(unittest.TestCase):
# With country code and trunk zero # With country code and trunk zero
("+55 0 27 1234-5678", "+55 27 1234-5678", None), ("+55 0 27 1234-5678", "+55 27 1234-5678", None),
] ]
# Edge cases # Edge cases
edge_cases = [ edge_cases = [
# Too few digits # Too few digits
@@ -60,19 +61,19 @@ class TestVCardProcessor(unittest.TestCase):
# Unusual formatting but valid number # Unusual formatting but valid number
("(+55) [27] 9.1234_5678", "+55 27 91234-5678", "+55 27 1234-5678"), ("(+55) [27] 9.1234_5678", "+55 27 91234-5678", "+55 27 1234-5678"),
] ]
# Run tests for all cases # Run tests for all cases
all_cases = test_cases_9_digit + test_cases_8_digit + edge_cases all_cases = test_cases_9_digit + test_cases_8_digit + edge_cases
for raw_phone, expected_orig, expected_mod in all_cases: for raw_phone, expected_orig, expected_mod in all_cases:
with self.subTest(raw_phone=raw_phone): with self.subTest(raw_phone=raw_phone):
orig, mod = process_phone_number(raw_phone) orig, mod = process_phone_number(raw_phone)
self.assertEqual(orig, expected_orig) self.assertEqual(orig, expected_orig)
self.assertEqual(mod, expected_mod) self.assertEqual(mod, expected_mod)
def test_process_vcard(self): def test_process_vcard(self):
"""Test the process_vcard function with various VCARD formats.""" """Test the process_vcard function with various VCARD formats."""
# Test case 1: Standard TEL entries # Test case 1: Standard TEL entries
vcard1 = """BEGIN:VCARD vcard1 = """BEGIN:VCARD
VERSION:3.0 VERSION:3.0
@@ -202,26 +203,26 @@ END:VCARD
(vcard5, expected5), (vcard5, expected5),
(vcard6, expected6) (vcard6, expected6)
] ]
for i, (input_vcard, expected_output) in enumerate(test_cases): for i, (input_vcard, expected_output) in enumerate(test_cases):
with self.subTest(case=i+1): with self.subTest(case=i+1):
# Create temporary files for input and output # Create temporary files for input and output
with tempfile.NamedTemporaryFile(mode='w+', delete=False, encoding='utf-8') as input_file: with tempfile.NamedTemporaryFile(mode='w+', delete=False, encoding='utf-8') as input_file:
input_file.write(input_vcard) input_file.write(input_vcard)
input_path = input_file.name input_path = input_file.name
output_path = input_path + '.out' output_path = input_path + '.out'
try: try:
# Process the VCARD # Process the VCARD
process_vcard(input_path, output_path) process_vcard(input_path, output_path)
# Read and verify the output # Read and verify the output
with open(output_path, 'r', encoding='utf-8') as output_file: with open(output_path, 'r', encoding='utf-8') as output_file:
actual_output = output_file.read() actual_output = output_file.read()
self.assertEqual(actual_output, expected_output) self.assertEqual(actual_output, expected_output)
finally: finally:
# Clean up temporary files # Clean up temporary files
if os.path.exists(input_path): if os.path.exists(input_path):
@@ -231,7 +232,7 @@ END:VCARD
def test_script_argument_handling(self): def test_script_argument_handling(self):
"""Test the script's command-line argument handling.""" """Test the script's command-line argument handling."""
test_input = """BEGIN:VCARD test_input = """BEGIN:VCARD
VERSION:3.0 VERSION:3.0
N:Test;User;;; N:Test;User;;;
@@ -239,16 +240,17 @@ FN:User Test
TEL:+5527912345678 TEL:+5527912345678
END:VCARD END:VCARD
""" """
# Create a temporary input file # Create a temporary input file
with tempfile.NamedTemporaryFile(mode='w+', delete=False, encoding='utf-8') as input_file: with tempfile.NamedTemporaryFile(mode='w+', delete=False, encoding='utf-8') as input_file:
input_file.write(test_input) input_file.write(test_input)
input_path = input_file.name input_path = input_file.name
output_path = input_path + '.out' output_path = input_path + '.out'
try: try:
test_args = ['python' if os.name == 'nt' else 'python3', 'brazilian_number_processing.py', input_path, output_path] test_args = ['python' if os.name == 'nt' else 'python3',
'scripts/brazilian_number_processing.py', input_path, output_path]
# We're just testing that the argument parsing works # We're just testing that the argument parsing works
subprocess.call( subprocess.call(
test_args, test_args,
@@ -257,7 +259,7 @@ END:VCARD
) )
# Check if the output file was created # Check if the output file was created
self.assertTrue(os.path.exists(output_path)) self.assertTrue(os.path.exists(output_path))
finally: finally:
# Clean up temporary files # Clean up temporary files
if os.path.exists(input_path): if os.path.exists(input_path):
@@ -265,5 +267,6 @@ END:VCARD
if os.path.exists(output_path): if os.path.exists(output_path):
os.unlink(output_path) os.unlink(output_path)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

50
tests/test_exporter.py Normal file
View File

@@ -0,0 +1,50 @@
import subprocess
import pytest
@pytest.fixture
def command_runner():
"""
A pytest fixture to simplify running commands. This is a helper
function that you can use in multiple tests.
"""
def _run_command(command_list, check=True):
"""
Runs a command and returns the result.
Args:
command_list (list): A list of strings representing the command
and its arguments (e.g., ["python", "my_script.py", "arg1"]).
check (bool, optional): If True, raise an exception if the
command returns a non-zero exit code. Defaults to True.
Returns:
subprocess.CompletedProcess: The result of the command.
"""
return subprocess.run(
command_list,
capture_output=True,
text=True,
check=check,
)
return _run_command
def test_sanity_check(command_runner):
"""
This is a basic sanity check to make sure all modules can be imported
This runs the exporter without any arguments. It should fail with a
message about missing arguments.
"""
result = command_runner(["wtsexporter"], False)
expected_stderr = "You must define the device type"
assert expected_stderr in result.stderr, f"STDERR was: {result.stderr}"
assert result.returncode == 2
def test_android(command_runner):
...
def test_ios(command_runner):
...

View File

@@ -0,0 +1,341 @@
import os
import json
import pytest
from unittest.mock import patch, mock_open, call, MagicMock
from Whatsapp_Chat_Exporter.utility import incremental_merge
from Whatsapp_Chat_Exporter.data_model import ChatStore
# Test data setup
BASE_PATH = "AppDomainGroup-group.net.whatsapp.WhatsApp.shared"
chat_data_1 = {
"12345678@s.whatsapp.net": {
"name": "Friend",
"type": "ios",
"my_avatar": os.path.join(BASE_PATH, "Media", "Profile", "Photo.jpg"),
"their_avatar": os.path.join(BASE_PATH, "Media", "Profile", "12345678-1709851420.thumb"),
"their_avatar_thumb": None,
"status": None,
"messages": {
"24690": {
"from_me": True,
"timestamp": 1463926635.571629,
"time": "10:17",
"media": False,
"key_id": "34B5EF10FBCA37B7E",
"meta": False,
"data": "I'm here",
"safe": False,
"sticker": False
},
"24691": { # This message only exists in target
"from_me": False,
"timestamp": 1463926641.571629,
"time": "10:17",
"media": False,
"key_id": "34B5EF10FBCA37B8E",
"meta": False,
"data": "Great to see you",
"safe": False,
"sticker": False
}
}
}
}
chat_data_2 = {
"12345678@s.whatsapp.net": {
"name": "Friend",
"type": "ios",
"my_avatar": os.path.join(BASE_PATH, "Media", "Profile", "Photo.jpg"),
"their_avatar": os.path.join(BASE_PATH, "Media", "Profile", "12345678-1709851420.thumb"),
"their_avatar_thumb": None,
"status": None,
"messages": {
"24690": {
"from_me": True,
"timestamp": 1463926635.571629,
"time": "10:17",
"media": False,
"key_id": "34B5EF10FBCA37B7E",
"meta": False,
"data": "I'm here",
"safe": False,
"sticker": False
},
"24692": { # This message only exists in source
"from_me": False,
"timestamp": 1463926642.571629,
"time": "10:17",
"media": False,
"key_id": "34B5EF10FBCA37B9E",
"meta": False,
"data": "Hi there!",
"safe": False,
"sticker": False
},
}
}
}
# Expected merged data - should contain all messages with all fields initialized as they would be by Message class
chat_data_merged = {
"12345678@s.whatsapp.net": {
"name": "Friend",
"type": "ios",
"my_avatar": os.path.join(BASE_PATH, "Media", "Profile", "Photo.jpg"),
"their_avatar": os.path.join(BASE_PATH, "Media", "Profile", "12345678-1709851420.thumb"),
"their_avatar_thumb": None,
"status": None,
"media_base": "",
"messages": {
"24690": {
"from_me": True,
"timestamp": 1463926635.571629,
"time": "10:17",
"media": False,
"key_id": "34B5EF10FBCA37B7E",
"meta": False,
"data": "I'm here",
"sender": None,
"safe": False,
"mime": None,
"reply": None,
"quoted_data": None,
"caption": None,
"thumb": None,
"sticker": False,
"message_type": None,
"received_timestamp": None,
"read_timestamp": None
},
"24691": {
"from_me": False,
"timestamp": 1463926641.571629,
"time": "10:17",
"media": False,
"key_id": "34B5EF10FBCA37B8E",
"meta": False,
"data": "Great to see you",
"sender": None,
"safe": False,
"mime": None,
"reply": None,
"quoted_data": None,
"caption": None,
"thumb": None,
"sticker": False,
"message_type": None,
"received_timestamp": None,
"read_timestamp": None
},
"24692": {
"from_me": False,
"timestamp": 1463926642.571629,
"time": "10:17",
"media": False,
"key_id": "34B5EF10FBCA37B9E",
"meta": False,
"data": "Hi there!",
"sender": None,
"safe": False,
"mime": None,
"reply": None,
"quoted_data": None,
"caption": None,
"thumb": None,
"sticker": False,
"message_type": None,
"received_timestamp": None,
"read_timestamp": None
},
}
}
}
@pytest.fixture
def mock_filesystem():
with (
patch("os.path.exists") as mock_exists,
patch("os.makedirs") as mock_makedirs,
patch("os.path.getmtime") as mock_getmtime,
patch("os.listdir") as mock_listdir,
patch("os.walk") as mock_walk,
patch("shutil.copy2") as mock_copy2,
):
yield {
"exists": mock_exists,
"makedirs": mock_makedirs,
"getmtime": mock_getmtime,
"listdir": mock_listdir,
"walk": mock_walk,
"copy2": mock_copy2,
}
def test_incremental_merge_new_file(mock_filesystem):
"""Test merging when target file doesn't exist"""
source_dir = "/source"
target_dir = "/target"
media_dir = "media"
# Setup mock filesystem
mock_filesystem["exists"].side_effect = lambda x: x == "/source"
mock_filesystem["listdir"].return_value = ["chat.json"]
# Run the function
incremental_merge(source_dir, target_dir, media_dir, 2, True)
# Verify the operations
mock_filesystem["makedirs"].assert_called_once_with(target_dir, exist_ok=True)
mock_filesystem["copy2"].assert_called_once_with(
os.path.join(source_dir, "chat.json"),
os.path.join(target_dir, "chat.json")
)
def test_incremental_merge_existing_file_with_changes(mock_filesystem):
"""Test merging when target file exists and has changes"""
source_dir = "source"
target_dir = "target"
media_dir = "media"
# Setup mock filesystem
mock_filesystem["exists"].side_effect = lambda x: True
mock_filesystem["listdir"].return_value = ["chat.json"]
# Mock file operations with consistent path separators
source_file = os.path.join(source_dir, "chat.json")
target_file = os.path.join(target_dir, "chat.json")
mock_file_content = {
source_file: json.dumps(chat_data_2),
target_file: json.dumps(chat_data_1),
}
written_chunks = []
def mock_file_write(data):
written_chunks.append(data)
mock_write = MagicMock(side_effect=mock_file_write)
with patch("builtins.open", mock_open()) as mock_file:
def mock_file_read(filename, mode="r"):
if mode == 'w':
file_mock = mock_open().return_value
file_mock.write.side_effect = mock_write
return file_mock
else:
# Use normalized path for lookup
norm_filename = os.path.normpath(filename)
content = mock_file_content.get(norm_filename, '')
file_mock = mock_open(read_data=content).return_value
return file_mock
mock_file.side_effect = mock_file_read
# Run the function
incremental_merge(source_dir, target_dir, media_dir, 2, True)
# Verify file operations using os.path.join
mock_file.assert_any_call(source_file, "r")
mock_file.assert_any_call(target_file, "r")
mock_file.assert_any_call(target_file, "w")
# Rest of verification code...
assert mock_write.called, "Write method was never called"
written_data = json.loads(''.join(written_chunks))
assert written_data is not None, "No data was written"
assert written_data == chat_data_merged, "Merged data does not match expected result"
messages = written_data["12345678@s.whatsapp.net"]["messages"]
assert "24690" in messages, "Common message should be present"
assert "24691" in messages, "Target-only message should be preserved"
assert "24692" in messages, "Source-only message should be added"
assert len(messages) == 3, "Should have exactly 3 messages"
def test_incremental_merge_existing_file_no_changes(mock_filesystem):
"""Test merging when target file exists but has no changes"""
source_dir = "source"
target_dir = "target"
media_dir = "media"
# Setup mock filesystem
mock_filesystem["exists"].side_effect = lambda x: True
mock_filesystem["listdir"].return_value = ["chat.json"]
# Mock file operations with consistent path separators
source_file = os.path.join(source_dir, "chat.json")
target_file = os.path.join(target_dir, "chat.json")
mock_file_content = {
source_file: json.dumps(chat_data_1),
target_file: json.dumps(chat_data_1),
}
with patch("builtins.open", mock_open()) as mock_file:
def mock_file_read(filename, mode="r"):
if mode == 'w':
file_mock = mock_open().return_value
return file_mock
else:
# Use normalized path for lookup
norm_filename = os.path.normpath(filename)
content = mock_file_content.get(norm_filename, '')
file_mock = mock_open(read_data=content).return_value
return file_mock
mock_file.side_effect = mock_file_read
# Run the function
incremental_merge(source_dir, target_dir, media_dir, 2, True)
# Verify no write operations occurred on target file
write_calls = [
call for call in mock_file.mock_calls if call[0] == "().write"]
assert len(write_calls) == 0
def test_incremental_merge_media_copy(mock_filesystem):
"""Test media file copying during merge"""
source_dir = "source"
target_dir = "target"
media_dir = "media"
# Setup mock filesystem
mock_filesystem["exists"].side_effect = lambda x: True
mock_filesystem["listdir"].return_value = ["chat.json"]
mock_filesystem["walk"].return_value = [
(os.path.join(source_dir, "media"), ["subfolder"], ["file1.jpg"]),
(os.path.join(source_dir, "media", "subfolder"), [], ["file2.jpg"]),
]
mock_filesystem["getmtime"].side_effect = lambda x: 1000 if "source" in x else 500
# Mock file operations with consistent path separators
source_file = os.path.join(source_dir, "chat.json")
target_file = os.path.join(target_dir, "chat.json")
mock_file_content = {
source_file: json.dumps(chat_data_1),
target_file: json.dumps(chat_data_1),
}
with patch("builtins.open", mock_open()) as mock_file:
def mock_file_read(filename, mode="r"):
if mode == 'w':
file_mock = mock_open().return_value
return file_mock
else:
# Use normalized path for lookup
norm_filename = os.path.normpath(filename)
content = mock_file_content.get(norm_filename, '')
file_mock = mock_open(read_data=content).return_value
return file_mock
mock_file.side_effect = mock_file_read
# Run the function
incremental_merge(source_dir, target_dir, media_dir, 2, True)
# Verify media file operations
assert mock_filesystem["makedirs"].call_count >= 2 # At least target dir and media dir
assert mock_filesystem["copy2"].call_count == 2 # Two media files copied

View File

@@ -0,0 +1,76 @@
import os
import sys
import pytest
import subprocess
@pytest.fixture
def command_runner():
"""
A pytest fixture to simplify running commands. This is a helper
function that you can use in multiple tests.
"""
def _run_command(command_list, check=True):
"""
Runs a command and returns the result.
Args:
command_list (list): A list of strings representing the command
and its arguments (e.g., ["python", "my_script.py", "arg1"]).
check (bool, optional): If True, raise an exception if the
command returns a non-zero exit code. Defaults to True.
Returns:
subprocess.CompletedProcess: The result of the command.
"""
return subprocess.run(
command_list,
capture_output=True,
text=True,
check=check,
)
return _run_command
def test_nuitka_binary():
"""
Tests the creation and execution of a Nuitka-compiled binary.
"""
if sys.version_info >= (3, 14):
print("Skipping Nuitka test: Python 3.14 is not yet fully supported by Nuitka.")
return
nuitka_command = [
"python", "-m", "nuitka", "--onefile", "--assume-yes-for-downloads",
"--include-data-file=./Whatsapp_Chat_Exporter/whatsapp.html=./Whatsapp_Chat_Exporter/whatsapp.html",
"Whatsapp_Chat_Exporter",
"--output-filename=wtsexporter.exe" # use .exe on all platforms for compatibility
]
compile_result = subprocess.run(
nuitka_command,
capture_output=True,
text=True,
check=True
)
print(f"Nuitka compilation output: {compile_result.stdout}")
binary_path = "./wtsexporter.exe"
assert os.path.exists(binary_path), f"Binary {binary_path} was not created."
try:
execute_result = subprocess.run(
[binary_path, "--help"],
capture_output=True,
text=True,
check=True,
)
print(f"Binary execution output: {execute_result.stdout}")
assert "usage:" in execute_result.stdout.lower(), "Binary did not produce expected help output."
except subprocess.CalledProcessError as e:
print(f"Binary execution failed with error: {e.stderr}")
raise
finally:
if os.path.exists(binary_path):
os.remove(binary_path)

256
tests/test_utility.py Normal file
View File

@@ -0,0 +1,256 @@
import pytest
import random
import string
from unittest.mock import patch, mock_open, MagicMock
from Whatsapp_Chat_Exporter.utility import *
def test_convert_time_unit():
assert convert_time_unit(0) == "less than a second"
assert convert_time_unit(1) == "a second"
assert convert_time_unit(10) == "10 seconds"
assert convert_time_unit(60) == "1 minute"
assert convert_time_unit(61) == "1 minute 1 second"
assert convert_time_unit(122) == "2 minutes 2 seconds"
assert convert_time_unit(3600) == "1 hour"
assert convert_time_unit(3661) == "1 hour 1 minute 1 second"
assert convert_time_unit(3720) == "1 hour 2 minutes"
assert convert_time_unit(3660) == "1 hour 1 minute"
assert convert_time_unit(7263) == "2 hours 1 minute 3 seconds"
assert convert_time_unit(86400) == "1 day"
assert convert_time_unit(86461) == "1 day 1 minute 1 second"
assert convert_time_unit(172805) == "2 days 5 seconds"
class TestBytesToReadable:
assert bytes_to_readable(0) == "0 B"
assert bytes_to_readable(500) == "500 B"
assert bytes_to_readable(1024) == "1.0 KB"
assert bytes_to_readable(2048) == "2.0 KB"
assert bytes_to_readable(1536) == "1.5 KB"
assert bytes_to_readable(1024**2) == "1.0 MB"
assert bytes_to_readable(5 * 1024**2) == "5.0 MB"
assert bytes_to_readable(1024**3) == "1.0 GB"
assert bytes_to_readable(1024**4) == "1.0 TB"
assert bytes_to_readable(1024**5) == "1.0 PB"
assert bytes_to_readable(1024**6) == "1.0 EB"
assert bytes_to_readable(1024**7) == "1.0 ZB"
assert bytes_to_readable(1024**8) == "1.0 YB"
class TestReadableToBytes:
def test_conversion(self):
assert readable_to_bytes("0B") == 0
assert readable_to_bytes("100B") == 100
assert readable_to_bytes("50 B") == 50
assert readable_to_bytes("1KB") == 1024
assert readable_to_bytes("2.5 KB") == 2560
assert readable_to_bytes("2.0 KB") == 2048
assert readable_to_bytes("1MB") == 1024**2
assert readable_to_bytes("0.5 MB") == 524288
assert readable_to_bytes("1. MB") == 1048576
assert readable_to_bytes("1GB") == 1024**3
assert readable_to_bytes("1.GB") == 1024**3
assert readable_to_bytes("1TB") == 1024**4
assert readable_to_bytes("1PB") == 1024**5
assert readable_to_bytes("1EB") == 1024**6
assert readable_to_bytes("1ZB") == 1024**7
assert readable_to_bytes("1YB") == 1024**8
def test_case_insensitivity(self):
assert readable_to_bytes("1kb") == 1024
assert readable_to_bytes("2mB") == 2 * 1024**2
def test_whitespace(self):
assert readable_to_bytes(" 10 KB ") == 10 * 1024
assert readable_to_bytes(" 1 MB") == 1024**2
def test_invalid_unit(self):
with pytest.raises(ValueError, match="Invalid size format for size_str"):
readable_to_bytes("100X")
readable_to_bytes("A100")
readable_to_bytes("100$$$$$")
def test_invalid_number(self):
with pytest.raises(ValueError, match="Invalid size format for size_str"):
readable_to_bytes("ABC KB")
def test_missing_unit(self):
assert readable_to_bytes("100") == 100
class TestSanitizeExcept:
def test_no_tags(self):
html = "This is plain text."
assert sanitize_except(html) == Markup("This is plain text.")
def test_allowed_br_tag(self):
html = "Line 1<br>Line 2"
assert sanitize_except(html) == Markup("Line 1<br>Line 2")
html = "<br/>Line"
assert sanitize_except(html) == Markup("<br>Line")
html = "Line<br />"
assert sanitize_except(html) == Markup("Line<br>")
def test_mixed_tags(self):
html = "<b>Bold</b><br><i>Italic</i><img src='evil.gif'><script>alert('XSS')</script>"
assert sanitize_except(html) == Markup(
"&lt;b&gt;Bold&lt;/b&gt;<br>&lt;i&gt;Italic&lt;/i&gt;&lt;img src='evil.gif'&gt;&lt;script&gt;alert('XSS')&lt;/script&gt;")
def test_attribute_stripping(self):
html = "<br class='someclass'>"
assert sanitize_except(html) == Markup("<br>")
class TestDetermineDay:
def test_same_day(self):
timestamp1 = 1678838400 # March 15, 2023 00:00:00 GMT
timestamp2 = 1678881600 # March 15, 2023 12:00:00 GMT
assert determine_day(timestamp1, timestamp2) is None
def test_different_day(self):
timestamp1 = 1678886400 # March 15, 2023 00:00:00 GMT
timestamp2 = 1678972800 # March 16, 2023 00:00:00 GMT
assert determine_day(timestamp1, timestamp2) == datetime(2023, 3, 16).date()
def test_crossing_month(self):
timestamp1 = 1680220800 # March 31, 2023 00:00:00 GMT
timestamp2 = 1680307200 # April 1, 2023 00:00:00 GMT
assert determine_day(timestamp1, timestamp2) == datetime(2023, 4, 1).date()
def test_crossing_year(self):
timestamp1 = 1703980800 # December 31, 2023 00:00:00 GMT
timestamp2 = 1704067200 # January 1, 2024 00:00:00 GMT
assert determine_day(timestamp1, timestamp2) == datetime(2024, 1, 1).date()
class TestGetFileName:
def test_valid_contact_phone_number_no_chat_name(self):
chat = ChatStore(Device.ANDROID, name=None)
filename, name = get_file_name("1234567890@s.whatsapp.net", chat)
assert filename == "1234567890"
assert name == "1234567890"
def test_valid_contact_phone_number_with_chat_name(self):
chat = ChatStore(Device.IOS, name="My Chat Group")
filename, name = get_file_name("1234567890@s.whatsapp.net", chat)
assert filename == "1234567890-My-Chat-Group"
assert name == "My Chat Group"
def test_valid_contact_exported_chat(self):
chat = ChatStore(Device.ANDROID, name="Testing")
filename, name = get_file_name("ExportedChat", chat)
assert filename == "ExportedChat-Testing"
assert name == "Testing"
def test_valid_contact_special_ids(self):
chat = ChatStore(Device.ANDROID, name="Special Chat")
filename_000, name_000 = get_file_name("000000000000000", chat)
assert filename_000 == "000000000000000-Special-Chat"
assert name_000 == "Special Chat"
filename_001, name_001 = get_file_name("000000000000001", chat)
assert filename_001 == "000000000000001-Special-Chat"
assert name_001 == "Special Chat"
def test_unexpected_contact_format(self):
chat = ChatStore(Device.ANDROID, name="Some Chat")
with pytest.raises(ValueError, match="Unexpected contact format: invalid-contact"):
get_file_name("invalid-contact", chat)
def test_contact_with_hyphen_and_chat_name(self):
chat = ChatStore(Device.ANDROID, name="Another Chat")
filename, name = get_file_name("123-456-7890@g.us", chat)
assert filename == "Another-Chat"
assert name == "Another Chat"
def test_contact_with_hyphen_no_chat_name(self):
chat = ChatStore(Device.ANDROID, name=None)
filename, name = get_file_name("123-456-7890@g.us", chat)
assert filename == "123-456-7890"
assert name == "123-456-7890"
class TestGetCondForEmpty:
def test_enable_true(self):
condition = get_cond_for_empty(True, "c.jid", "c.broadcast")
assert condition == "AND (chat.hidden=0 OR c.jid='status@broadcast' OR c.broadcast>0)"
def test_enable_false(self):
condition = get_cond_for_empty(False, "other_jid", "other_broadcast")
assert condition == ""
class TestGetChatCondition:
...
class TestGetStatusLocation:
@patch('os.path.isdir')
@patch('os.path.isfile')
@patch('os.mkdir')
@patch('urllib.request.urlopen')
@patch('builtins.open', new_callable=mock_open)
def test_offline_static_set(self, mock_open_file, mock_urlopen, mock_mkdir, mock_isfile, mock_isdir):
mock_isdir.return_value = False
mock_isfile.return_value = False
mock_response = MagicMock()
mock_response.read.return_value = b'W3.CSS Content'
mock_urlopen.return_value.__enter__.return_value = mock_response
output_folder = "output_folder"
offline_static = "offline_static"
result = get_status_location(output_folder, offline_static)
assert result == os.path.join(offline_static, "w3.css")
mock_mkdir.assert_called_once_with(os.path.join(output_folder, offline_static))
mock_urlopen.assert_called_once_with("https://www.w3schools.com/w3css/4/w3.css")
mock_open_file.assert_called_once_with(os.path.join(output_folder, offline_static, "w3.css"), "wb")
mock_open_file().write.assert_called_once_with(b'W3.CSS Content')
def test_offline_static_not_set(self):
result = get_status_location("output_folder", "")
assert result == "https://www.w3schools.com/w3css/4/w3.css"
class TestSafeName:
def generate_random_string(length=50):
random.seed(10)
return ''.join(random.choice(string.ascii_letters + string.digits + "äöüß") for _ in range(length))
safe_name_test_cases = [
("This is a test string", "This-is-a-test-string"),
("This is a test string with special characters!@#$%^&*()",
"This-is-a-test-string-with-special-characters"),
("This is a test string with numbers 1234567890", "This-is-a-test-string-with-numbers-1234567890"),
("This is a test string with mixed case ThisIsATestString",
"This-is-a-test-string-with-mixed-case-ThisIsATestString"),
("This is a test string with extra spaces \u00A0 \u00A0 \u00A0 ThisIsATestString",
"This-is-a-test-string-with-extra-spaces-ThisIsATestString"),
("This is a test string with unicode characters äöüß",
"This-is-a-test-string-with-unicode-characters-äöüß"),
("這是一個包含中文的測試字符串", "這是一個包含中文的測試字符串"), # Chinese characters, should stay as is
(
f"This is a test string with long length {generate_random_string(1000)}",
f"This-is-a-test-string-with-long-length-{generate_random_string(1000)}",
),
("", ""), # Empty string
(" ", ""), # String with only space
("---", "---"), # String with only hyphens
("___", "___"), # String with only underscores
("a" * 100, "a" * 100), # Long string with single character
("a-b-c-d-e", "a-b-c-d-e"), # String with hyphen
("a_b_c_d_e", "a_b_c_d_e"), # String with underscore
("a b c d e", "a-b-c-d-e"), # String with spaces
("test.com/path/to/resource?param1=value1&param2=value2",
"test.compathtoresourceparam1value1param2value2"), # Test with URL
("filename.txt", "filename.txt"), # Test with filename
("Αυτή είναι μια δοκιμαστική συμβολοσειρά με ελληνικούς χαρακτήρες.",
"Αυτή-είναι-μια-δοκιμαστική-συμβολοσειρά-με-ελληνικούς-χαρακτήρες."), # Greek characters
("This is a test with комбинированные знаки ̆ example",
"This-is-a-test-with-комбинированные-знаки-example") # Mixed with unicode
]
@pytest.mark.parametrize("input_text, expected_output", safe_name_test_cases)
def test_safe_name(self, input_text, expected_output):
result = safe_name(input_text)
assert result == expected_output

View File

@@ -0,0 +1,48 @@
# from contacts_names_from_vcards import readVCardsFile
import os
from Whatsapp_Chat_Exporter.vcards_contacts import normalize_number, read_vcards_file
def test_readVCardsFile():
data_dir = os.path.join(os.path.dirname(__file__), "data")
data = read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")
if data:
print("Found Names")
print("-----------------------")
for count, contact_tuple in enumerate(data, start=1):
# The name is the second element of the tuple (at index 1)
name = contact_tuple[1]
# Print the count and the name
print(f"{count}. {name}")
print(data)
assert len(data) == 6
# Test simple contact name
assert data[0][1] == "Sample Contact"
# Test complex name
assert data[1][1] == "Yard Lawn Guy, Jose Lopez"
# Test name with emoji
assert data[2][1] == "John Butler 🌟💫🌟"
# Test note with multi-line encoding
assert data[3][1] == "Airline Contact #'s"
# Test address with multi-line encoding
assert data[4][1] == "James Peacock Elementary"
# Test business entry using ORG but not F/FN
assert data[5][1] == "AAA Car Service"
def test_create_number_to_name_dicts():
pass
def test_fuzzy_match_numbers():
pass
def test_normalize_number():
assert normalize_number('0531234567', '1') == '1531234567'
assert normalize_number('001531234567', '2') == '1531234567'
assert normalize_number('+1531234567', '34') == '1531234567'
assert normalize_number('053(123)4567', '34') == '34531234567'
assert normalize_number('0531-234-567', '58') == '58531234567'