...

2025-08-20 04:15:43 +02:00
parent 6b9f0cf291
commit e4bb201181
95 changed files with 194 additions and 907 deletions
--- a/herolib/web/init.py
+++ b/herolib/web/init.py
--- a/herolib/web/doctools/init.py
+++ b/herolib/web/doctools/init.py
--- a/herolib/web/doctools/html_replacer.py
+++ b/herolib/web/doctools/html_replacer.py
@@ -0,0 +1,94 @@
+from herotools.logger import logger
+from bs4 import BeautifulSoup
+import re
+from typing import Callable
+from herotools.texttools import name_fix
+
+# Define the type for the content and link fetching functions
+LinkFetcher = Callable[[str, str, str, str, str], str]
+ContentFetcher = Callable[[str, str, str, str], str]
+
+# Private functions to be used internally
+
+def _get_link(language: str, prefix: str, site_name: str, pagename: str, name: str) -> str:
+    # Replace this with your logic to get the actual link
+    logger.debug(f"_get_link: {language[:10]:<10} {site_name}:{pagename}:{name}")
+    return f"{prefix}{language}/{site_name}/{pagename}/{name}.jpg"
+
+def _get_content(language: str, site_name: str, pagename: str, name: str) -> str:
+    # Replace this with your logic to get the actual content
+    logger.debug(f"_get_content: {language[:10]:<10} {site_name}:{pagename}:{name}")
+    return f"Replaced text for {name} on page {pagename} in {language} language on {site_name} site"
+
+def _process_html(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
+    """
+    Function to process HTML and replace content based on tags.
+    This allows us to work with templates and get content based on language to replace in HTML.
+    """
+    language = name_fix(language)
+    site_name = name_fix(site_name)
+    pagename = name_fix(pagename)
+    prefix = prefix.strip()
+    if not prefix.endswith('/'):
+        prefix += '/'
+
+    soup = BeautifulSoup(html_content, 'html.parser')
+
+    # Find all elements with class names starting with !!img: or !!txt:
+    for element in soup.find_all(class_=re.compile(r'!!(img|txt):(.+)')):
+        for cls in element['class']:
+            if cls.startswith('!!img:'):
+                name = cls.split(':')[1]
+                name = name_fix(name)
+                # Get the link to replace the src attribute in !!img: elements
+                link = _get_link(language=language, prefix=prefix, site_name=site_name, pagename=pagename, name=name)
+                if element.name == 'img':
+                    element['src'] = link
+                elif 'src' in element.attrs:
+                    element['src'] = link  # In case the element is not an img but has a src attribute
+            elif cls.startswith('!!txt:'):
+                name = cls.split(':')[1]
+                name = name_fix(name)
+                # Get the content to replace the text in !!txt: elements
+                content = _get_content(language=language, site_name=site_name, pagename=pagename, name=name)
+                element.string = content
+
+    # Output the modified HTML
+    return str(soup)
+
+# Public function to process the HTML content
+def process(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
+    """
+    Public function to process HTML and replace content based on tags.
+    This function wraps the internal _process_html function.
+    """
+    return _process_html(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
+
+# Sample usage with a given language, site name, page name, and HTML content
+if __name__ == "__main__":
+    # Example HTML content
+    html_content = '''
+    <!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Sample Page</title>
+    </head>
+    <body>
+        <h2 class="mb-6 is-size-1 is-size-3-mobile has-text-weight-bold !!txt:title1">Take care of your performance every day.</h2>
+        <img class="responsive !!img:logo" src="old-link.jpg" alt="Company Logo">
+        <p class="content !!txt:description">This is a sample description text.</p>
+    </body>
+    </html>
+    '''
+
+    # Process the HTML content for a specific language, site name, and page
+    language: str = "en"
+    site_name: str = "ExampleSite"
+    pagename: str = "HomePage"
+    prefix: str = "http://localhost/images/"
+    processed_html: str = process(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
+
+    # Print the modified HTML
+    print(processed_html)
--- a/herolib/web/doctools/md_replacer.py
+++ b/herolib/web/doctools/md_replacer.py
@@ -0,0 +1,172 @@
+import sys
+import os
+
+# Add the parent directory of herotools to the Python module search path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from herotools.logger import logger
+from markdown_it import MarkdownIt
+from markdown_it.tree import SyntaxTreeNode
+import re
+from enum import Enum
+from herotools.texttools import name_fix
+from mdformat.renderer import MDRenderer
+from urllib.parse import urlparse
+
+class ImageType(Enum):
+    JPEG = 'jpeg'
+    PNG = 'png'
+    GIF = 'gif'
+    OTHER = 'other'
+
+
+def get_link_page(prefix:str, linkname:str, sitename: str, name: str) -> str:
+    """
+    Generates a page link based on sitename and name.
+    
+    Args:
+        sitename (str): The name of the site.
+        name (str): The name of the page.
+    
+    Returns:
+        str: The generated link.
+    """
+    logger.debug(f"get_link_page: {prefix[:60]:<60} {linkname} {sitename}:{name}")
+    return f"[{linkname}]({prefix}/{sitename}/{name})"
+
+def get_link_image(prefix:str, sitename: str, name: str, image_type: ImageType) -> str:
+    """
+    Generates an image link based on the URL and image type.
+    
+    Args:
+        url (str): The original URL of the image.
+        image_type (ImageType): The type of the image.
+    
+    Returns:
+        str: The generated link.
+    """
+    logger.debug(f"get_link_image: {prefix[:60]:<60} {sitename}:{name}")
+    return f"![]({prefix}/{sitename}/{name})"
+
+def get_include(sitename: str, name: str) -> str:
+    """
+    Generates an include directive link based on sitename and name.
+    
+    Args:
+        sitename (str): The name of the site.
+        name (str): The name of the page to include.
+    
+    Returns:
+        str: The generated include directive.
+    """
+    logger.debug(f"get_include: {sitename}:{name}")
+    return f"include: {sitename}/{name}"
+
+def replace(prefix:str, markdown: str) -> str:
+    """
+    Finds all image links, markdown page links, and custom include directives in the provided markdown text
+    and replaces them using the appropriate functions.
+    
+    Args:
+        markdown (str): The markdown content.
+    
+    Returns:
+        str: The modified markdown content with updated links.
+    """
+    # Initialize the Markdown parser
+    md = MarkdownIt()
+    tokens = md.parse(markdown)
+    ast = SyntaxTreeNode(tokens)
+
+    print(ast.pretty(indent=2, show_text=True))
+
+    def process_node(node: SyntaxTreeNode):
+        # from IPython import embed; embed()
+
+        def get_new_url(url: str):
+            logger.debug(f"url: {url}")
+
+            parsed_url = urlparse(url)
+            # site_name = parsed_url.netloc
+            image_path = parsed_url.path
+            logger.debug(f"parsed_url: {parsed_url}")  
+
+            # prefix = prefix.rstrip('/')
+            # image_path = image_path.strip('/')
+
+            new_url = f"{prefix.rstrip('/')}/{image_path.strip('/')}"
+            logger.debug(f"new_url: {new_url}")
+
+            return new_url
+
+        if node.type == 'image':
+            # Process image link
+            url = node.attrs.get('src', '')
+            new_url = get_new_url(url)
+            node.attrs['src'] = new_url
+
+        elif node.type == 'link':
+            # Process markdown page link
+            url = node.attrs.get('href', '')
+            new_url = get_new_url(url)
+            node.attrs['href'] = new_url
+
+        # Recursively process child nodes
+        for child in node.children or []:
+            process_node(child)
+            
+    def replace_include_directives(match: re.Match) -> str:
+        """
+        Replaces custom include directives with appropriate links.
+        
+        Args:
+            match (re.Match): The match object containing the found include directive.
+        
+        Returns:
+            str: The generated link for the include directive.
+        """
+        url = match.group(1)
+        if ':' in url:
+            site_name, page = url.split(':', 1)
+            page_name = page.split('/')[-1]
+        else:
+            site_name = ""
+            page_name = url        
+        if not page.endswith('.md'):
+            page += '.md'
+        return get_include(prefix, site_name, page_name)
+            
+
+    # Process the root node
+    process_node(ast)
+
+    # Convert the AST back to markdown
+    renderer = MDRenderer()
+    options = {}
+    env = {}
+    rendered_markdown = renderer.render(tokens, options, env)
+
+    # include_pattern = re.compile(r"!!include page:'(.*?)'")
+    # rendered_markdown = include_pattern.sub(replace_include_directives, rendered_markdown) 
+
+    return rendered_markdown
+
+
+
+if __name__ == "__main__":
+
+    text = """
+![Image description](https://example.com/image.png)
+[Page link](sitename:some/path/to/page.md)
+!!include page:'mypage'
+!!include page:'mypage.md'
+!!include page:'mysite:mypage
+!!include page:'mysite:mypage'
+!!include page:'mysite:mypage.md'
+    """
+
+    print(text)
+    text2=replace("http://localhost:8080/pre/", text)
+    print(text2)
+    
+    
--- a/herolib/web/doctools/processor.py
+++ b/herolib/web/doctools/processor.py
@@ -0,0 +1,94 @@
+import os
+import re
+from typing import Callable
+
+from herotools.logger import logger
+from herotools.md5 import file_md5
+from herotools.texttools import name_fix
+
+
+def _example_set_file(site_name: str, path: str, md5: str) -> None:
+    # Placeholder for actual implementation
+    logger.debug(f"set_file     : site_name={site_name[:20]:<20} {path}")
+
+
+def _example_set_img(site_name: str, path: str, md5: str) -> None:
+    # Placeholder for actual implementation
+    logger.debug(f"set_img      : site_name={site_name[:20]:<20} {path}")
+
+
+def _example_set_markdown(
+    site_name: str, path: str, md5: str, content: str
+) -> None:
+    # Placeholder for actual implementation
+    logger.debug(f"set_markdown : site_name={site_name[:20]:<20} {path}")
+
+
+def _example_set_site(site_name: str, path: str) -> None:
+    # Placeholder for actual implementation
+    logger.info(f"set_site : site_name={site_name[:20]:<20} {path}")
+
+
+def _site_process_action(
+    site_name: str,
+    site_path: str,
+    set_file: Callable[[str, str, str], None],
+    set_img: Callable[[str, str, str], None],
+    set_markdown: Callable[[str, str, str, str], None],
+) -> None:
+    logger.debug(f"site process: {site_path[:60]:<60} -> {site_name}")
+    for root, _, files in os.walk(site_path):
+        for file in files:
+            file_path = os.path.join(root, file)
+            file_path_rel = os.path.relpath(file_path, site_path)
+            file_name = os.path.basename(file)
+            # print(file_name)
+            mymd5 = file_md5(file_path)
+            if file.lower().endswith(".md"):
+                with open(file_path, "r", encoding="utf-8") as f:
+                    content = f.read()
+                set_markdown(site_name, file_path_rel, mymd5, content)
+            elif file_name in [".collection", ".site", ".done"]:
+                continue
+            elif re.search(
+                r"\.(jpg|jpeg|png|gif|bmp|tiff|webp)$", file, re.IGNORECASE
+            ):
+                set_img(site_name, file_path_rel, mymd5)
+            else:
+                set_file(site_name, file_path_rel, mymd5)
+
+
+def process(
+    path: str,
+    set_site: Callable[[str, str], None],
+    set_file: Callable[[str, str, str], None],
+    set_img: Callable[[str, str, str], None],
+    set_markdown: Callable[[str, str, str, str], None],
+) -> None:
+    """
+    walk over directory and apply set_file(), set_img() and set_markdown()
+    """
+    path = os.path.abspath(os.path.expanduser(path))
+    logger.info(f"sites process: {path}")
+    for root, dirs, files in os.walk(path):
+        if ".site" in files or ".collection" in files:
+            site_name = name_fix(os.path.basename(root))
+            set_site(site_name, root)
+            _site_process_action(
+                site_name, root, set_file, set_img, set_markdown
+            )
+            # Prevent the os.walk from going deeper into subdirectories
+            dirs[:] = []
+
+
+if __name__ == "__main__":
+    mypath = "~/code/git.threefold.info/projectmycelium/info_projectmycelium/collections"
+
+    # logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
+    process(
+        mypath,
+        _example_set_site,
+        _example_set_file,
+        _example_set_img,
+        _example_set_markdown,
+    )