herolib_python/_archive/lib/web/mdcollections/tools.py
2025-08-05 15:15:36 +02:00

100 lines
2.8 KiB
Python

from pathlib import Path
from typing import Union
import os
import re
def should_skip_path(path: Union[str, Path]) -> bool:
"""
Check if a path should be skipped based on its basename.
Skips paths that start with . or _
Args:
path: Path to check (can be file or directory)
Returns:
True if path should be skipped, False otherwise
"""
path = Path(path)
return path.name.startswith(('.', '_'))
def strip_ansi_codes(text):
"""Remove ANSI escape codes from text."""
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', text)
def name_fix(path: str) -> str:
"""
Normalize only the final part (stem) of a path by:
- Converting spaces to underscores
- Making lowercase
Preserves the directory structure and only modifies the final name.
Args:
path: Path to normalize
Returns:
Path with normalized stem but unchanged structure
"""
if not isinstance(path, str):
raise TypeError("Input must be a string")
if '/' in path:
raise ValueError("Path should not contain forward slashes - use for filenames only")
path = strip_ansi_codes(path).strip()
name, ext = os.path.splitext(path)
if not is_image(path) and ext.lower() == '.md':
ext = ""
# Convert to lowercase and replace spaces and other characters
name = name.lower().replace(' ', '_').replace('-', '_').replace(',', '')
name = name.replace('__', '_').rstrip(' ')
# Only strip trailing underscores for image files
if is_image(name):
name = name.rstrip('_')
return f"{name}{ext}"
def path_fix(path: Union[str, Path]) -> Path:
"""
Normalize only the final part (stem) of a path by:
- Converting spaces to underscores
- Making lowercase
Preserves the directory structure and only modifies the final name.
Args:
path: Path to normalize
Returns:
Path with normalized stem but unchanged structure
"""
if not isinstance(path, (str, Path)):
path = str(path)
path = Path(path)
# Keep directory structure unchanged, only normalize the filename
parent = path.parent
filename = name_fix(path.name)
# Recombine with original parent path
return parent / filename
def is_image(basename):
# Define a set of valid image extensions
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.svg']
# Get the file extension from the basename
_, extension = os.path.splitext(basename)
extension = extension.strip()
#print(f" ----- {basename} '{extension.lower()}' {extension.lower() in image_extensions}")
# Check if the extension is in the set of image extensions
return extension.lower() in image_extensions