...
This commit is contained in:
0
herolib/web/__init__.py
Normal file
0
herolib/web/__init__.py
Normal file
0
herolib/web/doctools/__init__.py
Normal file
0
herolib/web/doctools/__init__.py
Normal file
94
herolib/web/doctools/html_replacer.py
Normal file
94
herolib/web/doctools/html_replacer.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from herotools.logger import logger
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
from typing import Callable
|
||||
from herotools.texttools import name_fix
|
||||
|
||||
# Define the type for the content and link fetching functions
|
||||
LinkFetcher = Callable[[str, str, str, str, str], str]
|
||||
ContentFetcher = Callable[[str, str, str, str], str]
|
||||
|
||||
# Private functions to be used internally
|
||||
|
||||
def _get_link(language: str, prefix: str, site_name: str, pagename: str, name: str) -> str:
|
||||
# Replace this with your logic to get the actual link
|
||||
logger.debug(f"_get_link: {language[:10]:<10} {site_name}:{pagename}:{name}")
|
||||
return f"{prefix}{language}/{site_name}/{pagename}/{name}.jpg"
|
||||
|
||||
def _get_content(language: str, site_name: str, pagename: str, name: str) -> str:
|
||||
# Replace this with your logic to get the actual content
|
||||
logger.debug(f"_get_content: {language[:10]:<10} {site_name}:{pagename}:{name}")
|
||||
return f"Replaced text for {name} on page {pagename} in {language} language on {site_name} site"
|
||||
|
||||
def _process_html(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
|
||||
"""
|
||||
Function to process HTML and replace content based on tags.
|
||||
This allows us to work with templates and get content based on language to replace in HTML.
|
||||
"""
|
||||
language = name_fix(language)
|
||||
site_name = name_fix(site_name)
|
||||
pagename = name_fix(pagename)
|
||||
prefix = prefix.strip()
|
||||
if not prefix.endswith('/'):
|
||||
prefix += '/'
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
# Find all elements with class names starting with !!img: or !!txt:
|
||||
for element in soup.find_all(class_=re.compile(r'!!(img|txt):(.+)')):
|
||||
for cls in element['class']:
|
||||
if cls.startswith('!!img:'):
|
||||
name = cls.split(':')[1]
|
||||
name = name_fix(name)
|
||||
# Get the link to replace the src attribute in !!img: elements
|
||||
link = _get_link(language=language, prefix=prefix, site_name=site_name, pagename=pagename, name=name)
|
||||
if element.name == 'img':
|
||||
element['src'] = link
|
||||
elif 'src' in element.attrs:
|
||||
element['src'] = link # In case the element is not an img but has a src attribute
|
||||
elif cls.startswith('!!txt:'):
|
||||
name = cls.split(':')[1]
|
||||
name = name_fix(name)
|
||||
# Get the content to replace the text in !!txt: elements
|
||||
content = _get_content(language=language, site_name=site_name, pagename=pagename, name=name)
|
||||
element.string = content
|
||||
|
||||
# Output the modified HTML
|
||||
return str(soup)
|
||||
|
||||
# Public function to process the HTML content
|
||||
def process(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
|
||||
"""
|
||||
Public function to process HTML and replace content based on tags.
|
||||
This function wraps the internal _process_html function.
|
||||
"""
|
||||
return _process_html(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
|
||||
|
||||
# Sample usage with a given language, site name, page name, and HTML content
|
||||
if __name__ == "__main__":
|
||||
# Example HTML content
|
||||
html_content = '''
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Sample Page</title>
|
||||
</head>
|
||||
<body>
|
||||
<h2 class="mb-6 is-size-1 is-size-3-mobile has-text-weight-bold !!txt:title1">Take care of your performance every day.</h2>
|
||||
<img class="responsive !!img:logo" src="old-link.jpg" alt="Company Logo">
|
||||
<p class="content !!txt:description">This is a sample description text.</p>
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
# Process the HTML content for a specific language, site name, and page
|
||||
language: str = "en"
|
||||
site_name: str = "ExampleSite"
|
||||
pagename: str = "HomePage"
|
||||
prefix: str = "http://localhost/images/"
|
||||
processed_html: str = process(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
|
||||
|
||||
# Print the modified HTML
|
||||
print(processed_html)
|
172
herolib/web/doctools/md_replacer.py
Normal file
172
herolib/web/doctools/md_replacer.py
Normal file
@@ -0,0 +1,172 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the parent directory of herotools to the Python module search path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
from herotools.logger import logger
|
||||
from markdown_it import MarkdownIt
|
||||
from markdown_it.tree import SyntaxTreeNode
|
||||
import re
|
||||
from enum import Enum
|
||||
from herotools.texttools import name_fix
|
||||
from mdformat.renderer import MDRenderer
|
||||
from urllib.parse import urlparse
|
||||
|
||||
class ImageType(Enum):
|
||||
JPEG = 'jpeg'
|
||||
PNG = 'png'
|
||||
GIF = 'gif'
|
||||
OTHER = 'other'
|
||||
|
||||
|
||||
def get_link_page(prefix:str, linkname:str, sitename: str, name: str) -> str:
|
||||
"""
|
||||
Generates a page link based on sitename and name.
|
||||
|
||||
Args:
|
||||
sitename (str): The name of the site.
|
||||
name (str): The name of the page.
|
||||
|
||||
Returns:
|
||||
str: The generated link.
|
||||
"""
|
||||
logger.debug(f"get_link_page: {prefix[:60]:<60} {linkname} {sitename}:{name}")
|
||||
return f"[{linkname}]({prefix}/{sitename}/{name})"
|
||||
|
||||
def get_link_image(prefix:str, sitename: str, name: str, image_type: ImageType) -> str:
|
||||
"""
|
||||
Generates an image link based on the URL and image type.
|
||||
|
||||
Args:
|
||||
url (str): The original URL of the image.
|
||||
image_type (ImageType): The type of the image.
|
||||
|
||||
Returns:
|
||||
str: The generated link.
|
||||
"""
|
||||
logger.debug(f"get_link_image: {prefix[:60]:<60} {sitename}:{name}")
|
||||
return f""
|
||||
|
||||
def get_include(sitename: str, name: str) -> str:
|
||||
"""
|
||||
Generates an include directive link based on sitename and name.
|
||||
|
||||
Args:
|
||||
sitename (str): The name of the site.
|
||||
name (str): The name of the page to include.
|
||||
|
||||
Returns:
|
||||
str: The generated include directive.
|
||||
"""
|
||||
logger.debug(f"get_include: {sitename}:{name}")
|
||||
return f"include: {sitename}/{name}"
|
||||
|
||||
def replace(prefix:str, markdown: str) -> str:
|
||||
"""
|
||||
Finds all image links, markdown page links, and custom include directives in the provided markdown text
|
||||
and replaces them using the appropriate functions.
|
||||
|
||||
Args:
|
||||
markdown (str): The markdown content.
|
||||
|
||||
Returns:
|
||||
str: The modified markdown content with updated links.
|
||||
"""
|
||||
# Initialize the Markdown parser
|
||||
md = MarkdownIt()
|
||||
tokens = md.parse(markdown)
|
||||
ast = SyntaxTreeNode(tokens)
|
||||
|
||||
print(ast.pretty(indent=2, show_text=True))
|
||||
|
||||
def process_node(node: SyntaxTreeNode):
|
||||
# from IPython import embed; embed()
|
||||
|
||||
def get_new_url(url: str):
|
||||
logger.debug(f"url: {url}")
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
# site_name = parsed_url.netloc
|
||||
image_path = parsed_url.path
|
||||
logger.debug(f"parsed_url: {parsed_url}")
|
||||
|
||||
# prefix = prefix.rstrip('/')
|
||||
# image_path = image_path.strip('/')
|
||||
|
||||
new_url = f"{prefix.rstrip('/')}/{image_path.strip('/')}"
|
||||
logger.debug(f"new_url: {new_url}")
|
||||
|
||||
return new_url
|
||||
|
||||
if node.type == 'image':
|
||||
# Process image link
|
||||
url = node.attrs.get('src', '')
|
||||
new_url = get_new_url(url)
|
||||
node.attrs['src'] = new_url
|
||||
|
||||
elif node.type == 'link':
|
||||
# Process markdown page link
|
||||
url = node.attrs.get('href', '')
|
||||
new_url = get_new_url(url)
|
||||
node.attrs['href'] = new_url
|
||||
|
||||
# Recursively process child nodes
|
||||
for child in node.children or []:
|
||||
process_node(child)
|
||||
|
||||
def replace_include_directives(match: re.Match) -> str:
|
||||
"""
|
||||
Replaces custom include directives with appropriate links.
|
||||
|
||||
Args:
|
||||
match (re.Match): The match object containing the found include directive.
|
||||
|
||||
Returns:
|
||||
str: The generated link for the include directive.
|
||||
"""
|
||||
url = match.group(1)
|
||||
if ':' in url:
|
||||
site_name, page = url.split(':', 1)
|
||||
page_name = page.split('/')[-1]
|
||||
else:
|
||||
site_name = ""
|
||||
page_name = url
|
||||
if not page.endswith('.md'):
|
||||
page += '.md'
|
||||
return get_include(prefix, site_name, page_name)
|
||||
|
||||
|
||||
# Process the root node
|
||||
process_node(ast)
|
||||
|
||||
# Convert the AST back to markdown
|
||||
renderer = MDRenderer()
|
||||
options = {}
|
||||
env = {}
|
||||
rendered_markdown = renderer.render(tokens, options, env)
|
||||
|
||||
# include_pattern = re.compile(r"!!include page:'(.*?)'")
|
||||
# rendered_markdown = include_pattern.sub(replace_include_directives, rendered_markdown)
|
||||
|
||||
return rendered_markdown
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
text = """
|
||||

|
||||
[Page link](sitename:some/path/to/page.md)
|
||||
!!include page:'mypage'
|
||||
!!include page:'mypage.md'
|
||||
!!include page:'mysite:mypage
|
||||
!!include page:'mysite:mypage'
|
||||
!!include page:'mysite:mypage.md'
|
||||
"""
|
||||
|
||||
print(text)
|
||||
text2=replace("http://localhost:8080/pre/", text)
|
||||
print(text2)
|
||||
|
||||
|
94
herolib/web/doctools/processor.py
Normal file
94
herolib/web/doctools/processor.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import os
|
||||
import re
|
||||
from typing import Callable
|
||||
|
||||
from herotools.logger import logger
|
||||
from herotools.md5 import file_md5
|
||||
from herotools.texttools import name_fix
|
||||
|
||||
|
||||
def _example_set_file(site_name: str, path: str, md5: str) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.debug(f"set_file : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _example_set_img(site_name: str, path: str, md5: str) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.debug(f"set_img : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _example_set_markdown(
|
||||
site_name: str, path: str, md5: str, content: str
|
||||
) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.debug(f"set_markdown : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _example_set_site(site_name: str, path: str) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.info(f"set_site : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _site_process_action(
|
||||
site_name: str,
|
||||
site_path: str,
|
||||
set_file: Callable[[str, str, str], None],
|
||||
set_img: Callable[[str, str, str], None],
|
||||
set_markdown: Callable[[str, str, str, str], None],
|
||||
) -> None:
|
||||
logger.debug(f"site process: {site_path[:60]:<60} -> {site_name}")
|
||||
for root, _, files in os.walk(site_path):
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
file_path_rel = os.path.relpath(file_path, site_path)
|
||||
file_name = os.path.basename(file)
|
||||
# print(file_name)
|
||||
mymd5 = file_md5(file_path)
|
||||
if file.lower().endswith(".md"):
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
set_markdown(site_name, file_path_rel, mymd5, content)
|
||||
elif file_name in [".collection", ".site", ".done"]:
|
||||
continue
|
||||
elif re.search(
|
||||
r"\.(jpg|jpeg|png|gif|bmp|tiff|webp)$", file, re.IGNORECASE
|
||||
):
|
||||
set_img(site_name, file_path_rel, mymd5)
|
||||
else:
|
||||
set_file(site_name, file_path_rel, mymd5)
|
||||
|
||||
|
||||
def process(
|
||||
path: str,
|
||||
set_site: Callable[[str, str], None],
|
||||
set_file: Callable[[str, str, str], None],
|
||||
set_img: Callable[[str, str, str], None],
|
||||
set_markdown: Callable[[str, str, str, str], None],
|
||||
) -> None:
|
||||
"""
|
||||
walk over directory and apply set_file(), set_img() and set_markdown()
|
||||
"""
|
||||
path = os.path.abspath(os.path.expanduser(path))
|
||||
logger.info(f"sites process: {path}")
|
||||
for root, dirs, files in os.walk(path):
|
||||
if ".site" in files or ".collection" in files:
|
||||
site_name = name_fix(os.path.basename(root))
|
||||
set_site(site_name, root)
|
||||
_site_process_action(
|
||||
site_name, root, set_file, set_img, set_markdown
|
||||
)
|
||||
# Prevent the os.walk from going deeper into subdirectories
|
||||
dirs[:] = []
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mypath = "~/code/git.threefold.info/projectmycelium/info_projectmycelium/collections"
|
||||
|
||||
# logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
process(
|
||||
mypath,
|
||||
_example_set_site,
|
||||
_example_set_file,
|
||||
_example_set_img,
|
||||
_example_set_markdown,
|
||||
)
|
Reference in New Issue
Block a user