diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index 4c0b0ec..c315623 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -218,7 +218,19 @@ def main(): "--date-format", dest="filter_date_format", default="%Y-%m-%d %H:%M", - help="The date format for the date filter." + help="The date format for the date filter" + ) + parser.add_argument( + "--include", + dest="filter_chat_include", + nargs='*', + help="Include chats that match the supplied phone number" + ) + parser.add_argument( + "--exclude", + dest="filter_chat_exclude", + nargs='*', + help="Exclude chats that match the supplied phone number" ) args = parser.parse_args() @@ -278,6 +290,17 @@ def main(): else: print("Unsupported date format. See https://wts.knugi.dev/filter.html") exit(1) + if args.filter_chat_include is not None: + for chat in args.filter_chat_include: + if not chat.isnumeric(): + print("Enter a phone number in the chat filter. See https://wts.knugi.dev/filter.html") + exit(1) + if args.filter_chat_exclude is not None: + for chat in args.filter_chat_exclude: + if not chat.isnumeric(): + print("Enter a phone number in the chat filter. See https://wts.knugi.dev/filter.html") + exit(1) + filter_chat = (args.filter_chat_include, args.filter_chat_exclude) data = {} @@ -385,11 +408,11 @@ def main(): if os.path.isfile(msg_db): with sqlite3.connect(msg_db) as db: db.row_factory = sqlite3.Row - messages(db, data, args.media, args.timezone_offset, args.filter_date) - media(db, data, args.media, args.filter_date) - vcard(db, data, args.media, args.filter_date) + messages(db, data, args.media, args.timezone_offset, args.filter_date, filter_chat) + media(db, data, args.media, args.filter_date, filter_chat) + vcard(db, data, args.media, args.filter_date, filter_chat) if args.android: - extract.calls(db, data, args.timezone_offset) + extract.calls(db, data, args.timezone_offset, filter_chat) if not args.no_html: create_html( data, diff --git a/Whatsapp_Chat_Exporter/extract.py b/Whatsapp_Chat_Exporter/extract.py index 9ea2a80..d670eec 100644 --- a/Whatsapp_Chat_Exporter/extract.py +++ b/Whatsapp_Chat_Exporter/extract.py @@ -165,13 +165,28 @@ def contacts(db, data): row = c.fetchone() -def messages(db, data, media_folder, timezone_offset, range): +def messages(db, data, media_folder, timezone_offset, range, filter_chat): # Get message history c = db.cursor() try: - c.execute(f"""SELECT count() FROM messages {f'WHERE timestamp {range}' if range is not None else ''}""") + c.execute(f"""SELECT count() + FROM messages + WHERE 1=1 + {f'AND timestamp {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"messages.key_remote_jid LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"messages.key_remote_jid NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''}""") + except sqlite3.OperationalError: - c.execute(f"""SELECT count() FROM message {f'WHERE timestamp {range}' if range is not None else ''}""") + c.execute(f"""SELECT count() + FROM message + LEFT JOIN chat + ON chat._id = message.chat_row_id + INNER JOIN jid + ON jid._id = chat.jid_row_id + WHERE 1=1 + {f'AND timestamp {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"jid.raw_string LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"jid.raw_string NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''}""") total_row_number = c.fetchone()[0] print(f"Processing messages...(0/{total_row_number})", end="\r") @@ -227,6 +242,8 @@ def messages(db, data, media_folder, timezone_offset, range): ON receipt_user.message_row_id = messages._id WHERE messages.key_remote_jid <> '-1' {f'AND messages.timestamp {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"messages.key_remote_jid LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"messages.key_remote_jid NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''} GROUP BY messages._id ORDER BY messages.timestamp ASC;""" ) @@ -293,6 +310,8 @@ def messages(db, data, media_folder, timezone_offset, range): ON receipt_user.message_row_id = message._id WHERE key_remote_jid <> '-1' {f'AND message.timestamp {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"key_remote_jid LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"key_remote_jid NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''} GROUP BY message._id;""" ) except Exception as e: @@ -457,7 +476,7 @@ def messages(db, data, media_folder, timezone_offset, range): print(f"Processing messages...({total_row_number}/{total_row_number})", end="\r") -def media(db, data, media_folder, range): +def media(db, data, media_folder, range, filter_chat): # Get media c = db.cursor() try: @@ -465,13 +484,23 @@ def media(db, data, media_folder, range): FROM message_media INNER JOIN messages ON message_media.message_row_id = messages._id - {f'WHERE messages.timestamp {range}' if range is not None else ''}""") + WHERE 1=1 + {f'AND messages.timestamp {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"messages.key_remote_jid LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"messages.key_remote_jid NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''}""") except sqlite3.OperationalError: c.execute(f"""SELECT count() FROM message_media INNER JOIN message ON message_media.message_row_id = message._id - {f'WHERE message.timestamp {range}' if range is not None else ''}""") + LEFT JOIN chat + ON chat._id = message.chat_row_id + INNER JOIN jid + ON jid._id = chat.jid_row_id + WHERE 1=1 + {f'AND message.timestamp {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"jid.raw_string LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"jid.raw_string NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''}""") total_row_number = c.fetchone()[0] print(f"\nProcessing media...(0/{total_row_number})", end="\r") i = 0 @@ -493,6 +522,8 @@ def media(db, data, media_folder, range): ON messages.key_remote_jid = jid.raw_string WHERE jid.type <> 7 {f'AND messages.timestamp {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"messages.key_remote_jid LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"messages.key_remote_jid NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''} ORDER BY messages.key_remote_jid ASC""" ) except sqlite3.OperationalError: @@ -515,6 +546,8 @@ def media(db, data, media_folder, range): ON message_media.file_hash = media_hash_thumbnail.media_hash WHERE jid.type <> 7 {f'AND message.timestamp {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"key_remote_jid LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"key_remote_jid NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''} ORDER BY jid.raw_string ASC""" ) content = c.fetchone() @@ -564,7 +597,7 @@ def media(db, data, media_folder, range): f"Processing media...({total_row_number}/{total_row_number})", end="\r") -def vcard(db, data, media_folder, range): +def vcard(db, data, media_folder, range, filter_chat): c = db.cursor() try: c.execute(f"""SELECT message_row_id, @@ -574,7 +607,10 @@ def vcard(db, data, media_folder, range): FROM messages_vcards INNER JOIN messages ON messages_vcards.message_row_id = messages._id - {f'WHERE messages.timestamp {range}' if range is not None else ''} + WHERE 1=1 + {f'AND messages.timestamp {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"messages.key_remote_jid LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"messages.key_remote_jid NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''} ORDER BY messages.key_remote_jid ASC;""" ) except sqlite3.OperationalError: @@ -589,7 +625,10 @@ def vcard(db, data, media_folder, range): ON chat._id = message.chat_row_id INNER JOIN jid ON jid._id = chat.jid_row_id - {f'WHERE message.timestamp {range}' if range is not None else ''} + WHERE 1=1 + {f'AND message.timestamp {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"key_remote_jid LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"key_remote_jid NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''} ORDER BY message.chat_row_id ASC;""" ) @@ -616,14 +655,22 @@ def vcard(db, data, media_folder, range): print(f"Processing vCards...({index + 1}/{total_row_number})", end="\r") -def calls(db, data, timezone_offset): +def calls(db, data, timezone_offset, filter_chat): c = db.cursor() - c.execute("""SELECT count() FROM call_log""") + c.execute(f"""SELECT count() + FROM call_log + INNER JOIN jid + ON call_log.jid_row_id = jid._id + LEFT JOIN chat + ON call_log.jid_row_id = chat.jid_row_id + WHERE 1=1 + {'AND (' + ' OR '.join(f"jid.raw_string LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' OR '.join(f"jid.raw_string NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''}""") total_row_number = c.fetchone()[0] if total_row_number == 0: return print(f"\nProcessing calls...({total_row_number})", end="\r") - c.execute("""SELECT call_log._id, + c.execute(f"""SELECT call_log._id, jid.raw_string, from_me, call_id, @@ -637,7 +684,10 @@ def calls(db, data, timezone_offset): INNER JOIN jid ON call_log.jid_row_id = jid._id LEFT JOIN chat - ON call_log.jid_row_id = chat.jid_row_id""" + ON call_log.jid_row_id = chat.jid_row_id + WHERE 1=1 + {'AND (' + ' OR '.join(f"jid.raw_string LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"jid.raw_string NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''}""" ) chat = ChatStore(Device.ANDROID, "WhatsApp Calls") content = c.fetchone() diff --git a/Whatsapp_Chat_Exporter/extract_iphone.py b/Whatsapp_Chat_Exporter/extract_iphone.py index d8f739f..0e124f3 100644 --- a/Whatsapp_Chat_Exporter/extract_iphone.py +++ b/Whatsapp_Chat_Exporter/extract_iphone.py @@ -24,20 +24,27 @@ def contacts(db, data): content = c.fetchone() -def messages(db, data, media_folder, timezone_offset, range): +def messages(db, data, media_folder, timezone_offset, range, filter_chat): c = db.cursor() # Get contacts - c.execute("""SELECT count() FROM ZWACHATSESSION""") + c.execute(f"""SELECT count() + FROM ZWACHATSESSION + WHERE 1=1 + {'AND (' + ' OR '.join(f"ZWACHATSESSION.ZCONTACTJID LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"ZWACHATSESSION.ZCONTACTJID NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''}""") total_row_number = c.fetchone()[0] print(f"Processing contacts...({total_row_number})") c.execute( - """SELECT ZCONTACTJID, + f"""SELECT ZCONTACTJID, ZPARTNERNAME, ZPUSHNAME FROM ZWACHATSESSION LEFT JOIN ZWAPROFILEPUSHNAME - ON ZWACHATSESSION.ZCONTACTJID = ZWAPROFILEPUSHNAME.ZJID;""" + ON ZWACHATSESSION.ZCONTACTJID = ZWAPROFILEPUSHNAME.ZJID + WHERE 1=1 + {'AND (' + ' OR '.join(f"ZWACHATSESSION.ZCONTACTJID LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"ZWACHATSESSION.ZCONTACTJID NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''};""" ) content = c.fetchone() while content is not None: @@ -65,7 +72,14 @@ def messages(db, data, media_folder, timezone_offset, range): content = c.fetchone() # Get message history - c.execute(f"""SELECT count() FROM ZWAMESSAGE {f'WHERE ZMESSAGEDATE {range}' if range is not None else ''}""") + c.execute(f"""SELECT count() + FROM ZWAMESSAGE + INNER JOIN ZWACHATSESSION + ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK + WHERE 1=1 + {f'AND ZMESSAGEDATE {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"ZWACHATSESSION.ZCONTACTJID LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"ZWACHATSESSION.ZCONTACTJID NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''}""") total_row_number = c.fetchone()[0] print(f"Processing messages...(0/{total_row_number})", end="\r") c.execute(f"""SELECT ZCONTACTJID, @@ -85,7 +99,10 @@ def messages(db, data, media_folder, timezone_offset, range): ON ZWAMESSAGE.Z_PK = ZWAMEDIAITEM.ZMESSAGE INNER JOIN ZWACHATSESSION ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK - {f'WHERE ZMESSAGEDATE {range}' if range is not None else ''} + WHERE 1=1 + {f'AND ZMESSAGEDATE {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"ZCONTACTJID LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"ZCONTACTJID NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''} ORDER BY ZMESSAGEDATE ASC;""") i = 0 content = c.fetchone() @@ -188,14 +205,20 @@ def messages(db, data, media_folder, timezone_offset, range): f"Processing messages...({total_row_number}/{total_row_number})", end="\r") -def media(db, data, media_folder, range): +def media(db, data, media_folder, range, filter_chat): c = db.cursor() # Get media c.execute(f"""SELECT count() FROM ZWAMEDIAITEM INNER JOIN ZWAMESSAGE ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK - {f'WHERE ZMESSAGEDATE {range}' if range is not None else ''}""") + INNER JOIN ZWACHATSESSION + ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK + WHERE 1=1 + {f'AND ZMESSAGEDATE {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"ZWACHATSESSION.ZCONTACTJID LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"ZWACHATSESSION.ZCONTACTJID NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''} + """) total_row_number = c.fetchone()[0] print(f"\nProcessing media...(0/{total_row_number})", end="\r") i = 0 @@ -212,7 +235,9 @@ def media(db, data, media_folder, range): INNER JOIN ZWACHATSESSION ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK WHERE ZMEDIALOCALPATH IS NOT NULL - {f'AND ZWAMESSAGE.ZMESSAGEDATE {range}' if range is not None else ''} + {f'AND ZWAMESSAGE.ZMESSAGEDATE {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"ZCONTACTJID LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"ZCONTACTJID NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''} ORDER BY ZCONTACTJID ASC""") content = c.fetchone() mime = MimeTypes() @@ -256,7 +281,7 @@ def media(db, data, media_folder, range): f"Processing media...({total_row_number}/{total_row_number})", end="\r") -def vcard(db, data, media_folder, range): +def vcard(db, data, media_folder, range, filter_chat): c = db.cursor() c.execute(f"""SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM, ZWAMEDIAITEM.ZMESSAGE, @@ -270,7 +295,10 @@ def vcard(db, data, media_folder, range): ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK INNER JOIN ZWACHATSESSION ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK - {f'WHERE ZWAMESSAGE.ZMESSAGEDATE {range}' if range is not None else ''};""") + WHERE 1=1 + {f'AND ZWAMESSAGE.ZMESSAGEDATE {range}' if range is not None else ''} + {'AND (' + ' OR '.join(f"ZCONTACTJID LIKE '%{chat}%'" for chat in filter_chat[0]) + ')' if filter_chat[0] is not None else ''} + {'AND (' + ' AND '.join(f"ZCONTACTJID NOT LIKE '%{chat}%'" for chat in filter_chat[1]) + ')' if filter_chat[1] is not None else ''};""") contents = c.fetchall() total_row_number = len(contents) print(f"\nProcessing vCards...(0/{total_row_number})", end="\r")