mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-04-28 00:34:54 +00:00
Replacing slugify with a new function
This commit is contained in:
@@ -13,7 +13,7 @@ from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
|||||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, JidType, Device
|
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, JidType, Device
|
||||||
from Whatsapp_Chat_Exporter.utility import rendering, get_file_name, setup_template, get_cond_for_empty
|
from Whatsapp_Chat_Exporter.utility import rendering, get_file_name, setup_template, get_cond_for_empty
|
||||||
from Whatsapp_Chat_Exporter.utility import get_status_location, convert_time_unit, determine_metadata
|
from Whatsapp_Chat_Exporter.utility import get_status_location, convert_time_unit, determine_metadata
|
||||||
from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify, bytes_to_readable
|
from Whatsapp_Chat_Exporter.utility import get_chat_condition, safe_name, bytes_to_readable
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -668,8 +668,8 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
|
|||||||
|
|
||||||
# Copy media to separate folder if needed
|
# Copy media to separate folder if needed
|
||||||
if separate_media:
|
if separate_media:
|
||||||
chat_display_name = slugify(current_chat.name or message.sender
|
chat_display_name = safe_name(current_chat.name or message.sender
|
||||||
or content["key_remote_jid"].split('@')[0], True)
|
or content["key_remote_jid"].split('@')[0])
|
||||||
current_filename = file_path.split("/")[-1]
|
current_filename = file_path.split("/")[-1]
|
||||||
new_folder = os.path.join(media_folder, "separated", chat_display_name)
|
new_folder = os.path.join(media_folder, "separated", chat_display_name)
|
||||||
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from mimetypes import MimeTypes
|
|||||||
from markupsafe import escape as htmle
|
from markupsafe import escape as htmle
|
||||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, CURRENT_TZ_OFFSET, get_chat_condition
|
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, CURRENT_TZ_OFFSET, get_chat_condition
|
||||||
from Whatsapp_Chat_Exporter.utility import bytes_to_readable, convert_time_unit, slugify, Device
|
from Whatsapp_Chat_Exporter.utility import bytes_to_readable, convert_time_unit, safe_name, Device
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -402,8 +402,8 @@ def process_media_item(content, data, media_folder, mime, separate_media):
|
|||||||
|
|
||||||
# Handle separate media option
|
# Handle separate media option
|
||||||
if separate_media:
|
if separate_media:
|
||||||
chat_display_name = slugify(
|
chat_display_name = safe_name(
|
||||||
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
|
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0])
|
||||||
current_filename = file_path.split("/")[-1]
|
current_filename = file_path.split("/")[-1]
|
||||||
new_folder = os.path.join(media_folder, "separated", chat_display_name)
|
new_folder = os.path.join(media_folder, "separated", chat_display_name)
|
||||||
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import json
|
|||||||
import os
|
import os
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import re
|
import re
|
||||||
|
import string
|
||||||
import math
|
import math
|
||||||
import shutil
|
import shutil
|
||||||
from bleach import clean as sanitize
|
from bleach import clean as sanitize
|
||||||
@@ -12,7 +13,7 @@ from markupsafe import Markup
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from enum import IntEnum
|
from enum import IntEnum
|
||||||
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore
|
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore
|
||||||
from typing import Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional, Tuple, Union
|
||||||
try:
|
try:
|
||||||
from enum import StrEnum, IntEnum
|
from enum import StrEnum, IntEnum
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@@ -600,26 +601,28 @@ def setup_template(template: Optional[str], no_avatar: bool, experimental: bool
|
|||||||
APPLE_TIME = 978307200
|
APPLE_TIME = 978307200
|
||||||
|
|
||||||
|
|
||||||
def slugify(value: str, allow_unicode: bool = False) -> str:
|
def safe_name(text: Union[str|bytes]) -> str:
|
||||||
"""
|
"""
|
||||||
Convert text to ASCII-only slugs for URL-safe strings.
|
Sanitize the input text and generates a safe file name.
|
||||||
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|
This function serves a similar purpose to slugify() from
|
||||||
|
Django previously used in this project, but is a clean-room
|
||||||
|
Reimplementation tailored for performance and a narrower
|
||||||
|
Use case for this project. Licensed under the same terms
|
||||||
|
As the project (MIT).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
value (str): The string to convert to a slug.
|
text (str|bytes): The string to be sanitized.
|
||||||
allow_unicode (bool, optional): Whether to allow Unicode characters. Defaults to False.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The slugified string with only alphanumerics, underscores, or hyphens.
|
str: The sanitized string with only alphanumerics, underscores, or hyphens.
|
||||||
"""
|
"""
|
||||||
value = str(value)
|
if isinstance(text, bytes):
|
||||||
if allow_unicode:
|
text = text.decode("utf-8", "ignore")
|
||||||
value = unicodedata.normalize('NFKC', value)
|
elif not isinstance(text, str):
|
||||||
else:
|
raise TypeError("value must be a string or bytes")
|
||||||
value = unicodedata.normalize('NFKD', value).encode(
|
normalized_text = unicodedata.normalize("NFKC", text)
|
||||||
'ascii', 'ignore').decode('ascii')
|
safe_chars = [char for char in normalized_text if char.isalnum() or char in "-_ ."]
|
||||||
value = re.sub(r'[^\w\s-]', '', value.lower())
|
return "-".join(''.join(safe_chars).split())
|
||||||
return re.sub(r'[-\s]+', '-', value).strip('-_')
|
|
||||||
|
|
||||||
|
|
||||||
class WhatsAppIdentifier(StrEnum):
|
class WhatsAppIdentifier(StrEnum):
|
||||||
|
|||||||
43
tests/test_utility.py
Normal file
43
tests/test_utility.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
import pytest
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
|
||||||
|
from Whatsapp_Chat_Exporter.utility import safe_name
|
||||||
|
|
||||||
|
def generate_random_string(length=50):
|
||||||
|
random.seed(10)
|
||||||
|
return ''.join(random.choice(string.ascii_letters + string.digits + "äöüß") for _ in range(length))
|
||||||
|
|
||||||
|
|
||||||
|
# Test cases to validate the safe_name function
|
||||||
|
safe_name_test_cases = [
|
||||||
|
("This is a test string", "This-is-a-test-string"),
|
||||||
|
("This is a test string with special characters!@#$%^&*()", "This-is-a-test-string-with-special-characters"),
|
||||||
|
("This is a test string with numbers 1234567890", "This-is-a-test-string-with-numbers-1234567890"),
|
||||||
|
("This is a test string with mixed case ThisIsATestString", "This-is-a-test-string-with-mixed-case-ThisIsATestString"),
|
||||||
|
("This is a test string with extra spaces ThisIsATestString", "This-is-a-test-string-with-extra-spaces-ThisIsATestString"),
|
||||||
|
("This is a test string with unicode characters äöüß", "This-is-a-test-string-with-unicode-characters-äöüß"),
|
||||||
|
("這是一個包含中文的測試字符串", "這是一個包含中文的測試字符串"), # Chinese characters, should stay as is
|
||||||
|
(
|
||||||
|
f"This is a test string with long length {generate_random_string(1000)}",
|
||||||
|
f"This-is-a-test-string-with-long-length-{generate_random_string(1000)}",
|
||||||
|
),
|
||||||
|
("", ""), # Empty string
|
||||||
|
(" ", ""), # String with only space
|
||||||
|
("---", "---"), # String with only hyphens
|
||||||
|
("___", "___"), # String with only underscores
|
||||||
|
("a" * 100, "a" * 100), # Long string with single character
|
||||||
|
("a-b-c-d-e", "a-b-c-d-e"), # String with hyphen
|
||||||
|
("a_b_c_d_e", "a_b_c_d_e"), # String with underscore
|
||||||
|
("a b c d e", "a-b-c-d-e"), # String with spaces
|
||||||
|
("test.com/path/to/resource?param1=value1¶m2=value2", "test.compathtoresourceparam1value1param2value2"), # Test with URL
|
||||||
|
("filename.txt", "filename.txt"), # Test with filename
|
||||||
|
("Αυτή είναι μια δοκιμαστική συμβολοσειρά με ελληνικούς χαρακτήρες.", "Αυτή-είναι-μια-δοκιμαστική-συμβολοσειρά-με-ελληνικούς-χαρακτήρες."), # Greek characters
|
||||||
|
("This is a test with комбинированные знаки ̆ example", "This-is-a-test-with-комбинированные-знаки-example") # Mixed with unicode
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("input_text, expected_output", safe_name_test_cases)
|
||||||
|
def test_safe_name(input_text, expected_output):
|
||||||
|
result = safe_name(input_text)
|
||||||
|
assert result == expected_output
|
||||||
Reference in New Issue
Block a user