This commit is contained in:
2025-08-05 15:15:36 +02:00
parent 4bd960ed05
commit 7fabb4163a
192 changed files with 14901 additions and 0 deletions

View File

@@ -0,0 +1,199 @@
"""Context management module for handling file operations and tracking changes."""
import hashlib
import logging
import os
import shutil
from herotools.pathtools import remove_file_if_exists
from herotools.texttools import name_fix
class MyFile:
"""A class representing a file in the context system with tracking capabilities."""
def __init__(self, path: str):
"""Initialize a MyFile instance.
Args:
path: The path to the file
"""
self.path = path
self.exists = os.path.exists(self.path)
self.changed_in_context = False # Indicates if the file is new or was changed
self._md5 = ""
def md5(self) -> str:
"""Calculate and return MD5 hash of the file.
Returns:
str: The MD5 hash of the file's contents
Raises:
FileNotFoundError: If the file does not exist
"""
if not self.exists:
raise FileNotFoundError(f"File does not exist: {self.path}")
if not self._md5:
with open(self.path, "rb") as file:
self._md5 = hashlib.md5(file.read()).hexdigest()
return self._md5
def name(self) -> str:
"""Return the base name of the file.
Returns:
str: The file's base name
"""
return os.path.basename(self.path)
def ext(self) -> str:
"""Return the file extension in lower case.
Returns:
str: The file's extension in lowercase
"""
return os.path.splitext(self.path)[1].lower()
class Context:
"""A class for managing file contexts and tracking file changes."""
def __init__(self, name: str = "default", reset: bool = False):
"""Initialize a Context instance.
Args:
name: The name of the context
reset: Whether to reset (remove) the existing context
"""
logging.basicConfig(level=logging.DEBUG, format="%(message)s")
self.logger = logging.getLogger(__name__)
contextroot = os.getenv("CONTEXTROOT", "~/context")
self.name = name_fix(name)
self.path = os.path.join(os.path.expanduser(contextroot), self.name)
if reset:
self._remove_context()
def _remove_context(self):
"""Remove the context directory if it exists."""
if os.path.exists(self.path):
try:
shutil.rmtree(self.path)
self.logger.info(f"Context directory removed: {self.path}")
except Exception as e:
self.logger.error(f"Error removing context directory: {e!s}")
def file_set(self, path: str, cat: str, name: str = "", content: str = "") -> MyFile:
"""Set a file in the context with the given category.
Args:
path: Source file path
cat: Category for organizing files
name: Optional custom name for the file
content: Optional content to write to file
Returns:
MyFile: A MyFile instance representing the file in context
Raises:
ValueError: If both path and content are provided
FileNotFoundError: If the source file does not exist
"""
cat = name_fix(cat)
name = name_fix(name)
if content:
if path:
raise ValueError("path and content cannot be both set")
path = os.path.join(self.path, "files", cat, name)
with open(path, "w") as file:
file.write(content)
mf = MyFile(path=path)
if not mf.exists:
raise FileNotFoundError(f"Source file does not exist: {path}")
if not content:
if not name:
name = name_fix(mf.name())
else:
if os.path.splitext(name)[1].lower() != mf.ext():
name_ext = os.path.splitext(name)[1]
raise ValueError(f"Extension {name_ext} must match file extension {mf.ext()}")
file_path = os.path.join(self.path, "files", cat, name)
file_path_md5 = os.path.join(self.path, "files", cat, name + ".md5")
os.makedirs(os.path.dirname(file_path), exist_ok=True)
# Check if the MD5 hash of the file on disk
md5_on_disk = ""
if os.path.exists(file_path_md5):
with open(file_path_md5) as file:
md5_on_disk = file.read().strip()
# Validate that it's a valid MD5 hash
if len(md5_on_disk) != 32 or not all(c in "0123456789abcdef" for c in md5_on_disk.lower()):
raise RuntimeError("Bug: hash is not in the right format")
new_md5 = mf.md5()
changed_in_context = False
if not md5_on_disk or new_md5 != md5_on_disk:
changed_in_context = True
md5_dir = os.path.join(self.path, "files", "md5")
if changed_in_context:
# File did change
old_name = os.path.basename(path)
new_name = os.path.basename(file_path)
self.logger.debug(f"File changed in context {self.name}: {old_name} -> {new_name}")
if mf.path != file_path:
shutil.copy2(mf.path, file_path)
with open(file_path_md5, "w") as file:
file.write(new_md5)
# Remove the old MD5 link if it exists
if md5_on_disk:
old_md5_link = os.path.join(md5_dir, md5_on_disk)
remove_file_if_exists(old_md5_link)
mf.path = file_path
os.makedirs(md5_dir, exist_ok=True)
md5_link = os.path.join(md5_dir, mf.md5())
if not os.path.exists(md5_link):
os.symlink(os.path.relpath(file_path, md5_dir), md5_link)
return mf
def file_get(self, name: str, cat: str, needtoexist: bool = True) -> MyFile:
"""Get a file from the context with the given category.
Args:
name: Name of the file to retrieve
cat: Category the file is stored under
needtoexist: Whether to raise an error if file doesn't exist
Returns:
MyFile: A MyFile instance representing the requested file
Raises:
FileNotFoundError: If needtoexist is True and file doesn't exist
"""
name = name_fix(name)
cat = name_fix(cat)
file_path = os.path.join(self.path, "files", cat, name)
if needtoexist:
if os.path.exists(file_path):
return MyFile(file_path)
else:
self.logger.warning(f"File not found: {file_path}")
raise FileNotFoundError(f"Context file does not exist: {file_path}")
else:
return MyFile(file_path)

155
_archive/lib/dagu/client.py Normal file
View File

@@ -0,0 +1,155 @@
import os
import requests
from requests.auth import HTTPBasicAuth
from dataclasses import dataclass, field
from typing import List, Optional
from datetime import datetime
import time
@dataclass
class DAGStatus:
name: str
status: str
group: Optional[str] = None
schedule: Optional[str] = None
lastRun: Optional[str] = None
nextRun: Optional[str] = None
pid: Optional[int] = None
log: Optional[str] = None
requestId: Optional[str] = None
params: Optional[str] = None
startedAt: Optional[str] = None
finishedAt: Optional[str] = None
suspended: Optional[bool] = None
def get_last_run_epoch(self) -> Optional[int]:
"""Convert lastRun to epoch time."""
return self._convert_to_epoch(self.lastRun)
def get_next_run_epoch(self) -> Optional[int]:
"""Convert nextRun to epoch time."""
return self._convert_to_epoch(self.nextRun)
@staticmethod
def _convert_to_epoch(timestamp: Optional[str]) -> Optional[int]:
"""Helper method to convert an ISO 8601 timestamp to epoch time."""
if timestamp:
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
return int(time.mktime(dt.timetuple()))
return None
class DAGuClient:
def __init__(self, base_url: str = "http://localhost:8888"):
self.base_url = base_url
self.auth = self._get_basic_auth()
def _get_basic_auth(self) -> HTTPBasicAuth:
"""Retrieve the Basic Auth credentials from environment variables."""
username = os.getenv('DAGU_BASICAUTH_USERNAME')
password = os.getenv('DAGU_BASICAUTH_PASSWORD')
if not username or not password:
raise EnvironmentError("Please set the DAGU_BASICAUTH_USERNAME and DAGU_BASICAUTH_PASSWORD environment variables.")
return HTTPBasicAuth(username, password)
def list_dags(self) -> List[DAGStatus]:
"""Fetches the list of DAGs with their statuses from the DAGu REST API."""
try:
response = requests.get(f"{self.base_url}/api/v1/dags", auth=self.auth)
response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx)
dags_data = response.json().get('DAGs', [])
if isinstance(dags_data, list):
return [self._parse_dag(dag) for dag in dags_data]
else:
print(f"Unexpected response format: {dags_data}")
return []
except requests.exceptions.RequestException as e:
print(f"Error during request: {e}")
return []
def _parse_dag(self, dag_entry: dict) -> DAGStatus:
"""Helper function to parse a DAG's JSON data into a DAGStatus object."""
try:
dag_data = dag_entry.get("DAG", {})
status_data = dag_entry.get("Status", {})
return DAGStatus(
name=dag_data.get("Name"),
status=status_data.get("StatusText"),
group=dag_data.get("Group"),
schedule=(dag_data.get("Schedule", [{}])[0].get("Expression")
if dag_data.get("Schedule") else None),
lastRun=status_data.get("FinishedAt"),
nextRun=None, # Adjust as needed based on your API's response format
pid=status_data.get("Pid"),
log=status_data.get("Log"),
requestId=status_data.get("RequestId"),
params=status_data.get("Params"),
startedAt=status_data.get("StartedAt"),
finishedAt=status_data.get("FinishedAt"),
suspended=dag_entry.get("Suspended")
)
except AttributeError as e:
print(f"Error parsing DAG data: {dag_entry}, Error: {e}")
return None
def submit_dag_action(self, name: str, action: str, request_id: Optional[str] = None, params: Optional[str] = None) -> dict:
"""Submit an action to a specified DAG.
Args:
name (str): Name of the DAG.
action (str): Action to be performed ('start', 'stop', or 'retry').
request_id (Optional[str]): Required if action is 'retry'.
params (Optional[str]): Parameters for the DAG execution.
Returns:
dict: Response from the API.
"""
url = f"{self.base_url}/api/v1/dags/{name}"
payload = {
"action": action,
**({"request-id": request_id} if request_id else {}),
**({"params": params} if params else {}),
}
try:
response = requests.post(url, json=payload, auth=self.auth)
response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx)
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error during request: {e}")
print(f"Response content: {response.content}")
return {}
# Example usage
if __name__ == "__main__":
client = DAGuClient()
# List DAGs
try:
dags = client.list_dags()
for dag in dags:
if dag:
print(f"DAG Name: {dag.name}, Status: {dag.status}, Group: {dag.group}, "
f"Schedule: {dag.schedule}, Last Run: {dag.lastRun}, "
f"Next Run: {dag.nextRun}, PID: {dag.pid}, Log: {dag.log}, "
f"Request ID: {dag.requestId}, Params: {dag.params}, "
f"Started At: {dag.startedAt}, Finished At: {dag.finishedAt}, "
f"Suspended: {dag.suspended}")
# Example of using helper methods to get epoch times
if dag.get_last_run_epoch():
print(f"Last Run Epoch: {dag.get_last_run_epoch()}")
if dag.get_next_run_epoch():
print(f"Next Run Epoch: {dag.get_next_run_epoch()}")
except Exception as e:
print(f"Error: {e}")
# Submit an action to a DAG (example: start a DAG)
try:
dag_name = "test11" # Replace with your actual DAG name
action_response = client.submit_dag_action(name=dag_name, action="start")
print(f"Action Response: {action_response}")
except Exception as e:
print(f"Error: {e}")

184
_archive/lib/dagu/dag.py Normal file
View File

@@ -0,0 +1,184 @@
import os
import yaml
from dataclasses import dataclass, field
from typing import List, Dict, Optional
from server import *
@dataclass
class EnvVariable:
key: str
value: str
@dataclass
class HandlerCommand:
command: str
@dataclass
class Handlers:
success: Optional[HandlerCommand] = None
failure: Optional[HandlerCommand] = None
cancel: Optional[HandlerCommand] = None
exit: Optional[HandlerCommand] = None
@dataclass
class RepeatPolicy:
repeat: bool
intervalSec: int
@dataclass
class Precondition:
condition: str
expected: str
@dataclass
class Step:
name: str
command: str
script: Optional[str] = None
depends: List[str] = field(default_factory=list)
description: Optional[str] = None
repeatPolicy: Optional[RepeatPolicy] = None
@dataclass
class DAG:
name: str
description: Optional[str] = None
schedule: Optional[str] = None
group: Optional[str] = None
tags: Optional[str] = None # This should be a single string
env: Dict[str, str] = field(default_factory=dict)
logDir: Optional[str] = None
restartWaitSec: Optional[int] = None
histRetentionDays: Optional[int] = None
delaySec: Optional[int] = None
maxActiveRuns: Optional[int] = None
params: Optional[List[str]] = field(default_factory=list)
preconditions: List[Precondition] = field(default_factory=list)
mailOn: Dict[str, bool] = field(default_factory=dict)
handlerOn: Handlers = field(default_factory=Handlers)
MaxCleanUpTimeSec: Optional[int] = None
steps: List[Step] = field(default_factory=list)
def add_step(self, step: Step):
"""Add a step to the DAG."""
self.steps.append(step)
def to_dict(self) -> Dict:
return {
"name": self.name,
**({"description": self.description} if self.description else {}),
**({"schedule": self.schedule} if self.schedule else {}),
**({"group": self.group} if self.group else {}),
**({"tags": self.tags} if self.tags else {}),
**({"env": [{"key": k, "value": v} for k, v in self.env.items()]} if self.env else {}),
**({"logDir": self.logDir} if self.logDir else {}),
**({"restartWaitSec": self.restartWaitSec} if self.restartWaitSec else {}),
**({"histRetentionDays": self.histRetentionDays} if self.histRetentionDays else {}),
**({"delaySec": self.delaySec} if self.delaySec else {}),
**({"maxActiveRuns": self.maxActiveRuns} if self.maxActiveRuns else {}),
**({"params": " ".join(self.params)} if self.params else {}),
**({"preconditions": [{"condition": pc.condition, "expected": pc.expected} for pc in self.preconditions]} if self.preconditions else {}),
**({"mailOn": self.mailOn} if self.mailOn else {}),
**({"MaxCleanUpTimeSec": self.MaxCleanUpTimeSec} if self.MaxCleanUpTimeSec else {}),
**({"handlerOn": {
"success": {"command": self.handlerOn.success.command} if self.handlerOn.success else None,
"failure": {"command": self.handlerOn.failure.command} if self.handlerOn.failure else None,
"cancel": {"command": self.handlerOn.cancel.command} if self.handlerOn.cancel else None,
"exit": {"command": self.handlerOn.exit.command} if self.handlerOn.exit else None,
}} if any(vars(self.handlerOn).values()) else {}),
"steps": [
{
"name": step.name,
"command": step.command,
**({"script": step.script} if step.script else {}),
**({"depends": step.depends} if step.depends else {}), # Change this back to depends_on if needed
**({"description": step.description} if step.description else {}),
**({"repeatPolicy": {
"repeat": step.repeatPolicy.repeat,
"intervalSec": step.repeatPolicy.intervalSec
}} if step.repeatPolicy else {}),
} for step in self.steps
],
}
def to_yaml(self) -> str:
return yaml.dump(self.to_dict(), sort_keys=False)
def new(**kwargs) -> DAG:
return DAG(**kwargs)
# Example usage to create a new DAG
if __name__ == "__main__":
# Initialize the server with the default DAG directory
server = Server()
# List existing DAGs
print("Listing existing DAGs:")
dags = server.list_dags()
for dag_name in dags:
print(f" - {dag_name}")
# Create a new DAG
dag = new(
name="example_dag",
description="Example DAG to demonstrate functionality",
schedule="0 * * * *",
group="ExampleGroup",
tags="example", # Convert tags to a comma-separated string
env={
"LOG_DIR": "${HOME}/logs",
"PATH": "/usr/local/bin:${PATH}"
},
logDir="${LOG_DIR}",
restartWaitSec=60,
histRetentionDays=3,
delaySec=1,
maxActiveRuns=1,
params=["param1", "param2"],
preconditions=[
Precondition(condition="`echo $2`", expected="param2")
],
mailOn={"failure": True, "success": True},
MaxCleanUpTimeSec=300,
handlerOn=Handlers(
success=HandlerCommand(command="echo succeed"), # Convert to map structure
failure=HandlerCommand(command="echo failed"), # Convert to map structure
cancel=HandlerCommand(command="echo canceled"), # Convert to map structure
exit=HandlerCommand(command="echo finished") # Convert to map structure
)
)
# Add steps to the DAG
dag.add_step(Step(
name="pull_data",
command="sh",
script="echo `date '+%Y-%m-%d'`",
))
dag.add_step(Step(
name="cleanse_data",
command="echo cleansing ${DATA_DIR}/${DATE}.csv",
depends=["pull_data"] # Ensure this is the correct key
))
dag.add_step(Step(
name="transform_data",
command="echo transforming ${DATA_DIR}/${DATE}_clean.csv",
depends=["cleanse_data"] # Ensure this is the correct key
))
dag.add_step(Step(
name="A task",
command="main.sh",
repeatPolicy=RepeatPolicy(repeat=True, intervalSec=60)
))
# Save the new DAG as a YAML file
server.create_dag(dag)
print(f"DAG '{dag.name}' created and saved and started.")
# List DAGs again to see the newly created one
print("\nListing updated DAGs:")
dags = server.list_dags()
for dag_name in dags:
print(f" - {dag_name}")

View File

@@ -0,0 +1,51 @@
import os
import yaml
import glob
from typing import List
from dag import DAG
from client import *
# Assuming the following classes have already been defined:
# - DAG (for creating and managing DAG structures)
# - Step
# - Handlers
# - RepeatPolicy
# - Precondition
class Server:
def __init__(self, dag_dir: str = "~/hero/var/dagu/dags/"):
self.dag_dir = os.path.expanduser(dag_dir)
os.makedirs(self.dag_dir, exist_ok=True) # Ensure the directory exists
def list_dags(self) -> List[str]:
"""Lists the DAGs in the directory."""
dag_files = glob.glob(os.path.join(self.dag_dir, "*.yaml"))
return [os.path.splitext(os.path.basename(dag_file))[0] for dag_file in dag_files]
def delete_dag(self, name: str) -> bool:
"""Deletes a DAG file based on its name."""
dag_file = os.path.join(self.dag_dir, f"{name}.yaml")
if os.path.exists(dag_file):
os.remove(dag_file)
return True
else:
print(f"DAG '{name}' does not exist.")
return False
def create_dag(self, dag:DAG, start:bool = True) -> bool:
"""Creates a new DAG and saves it as a YAML file."""
dag_file = os.path.join(self.dag_dir, f"{dag.name}.yaml")
with open(dag_file, 'w') as file:
yaml.dump(dag.to_dict(), file, sort_keys=False)
if start:
self.start_dag(dag.name)
return True
def start_dag(self,dag_name:str) -> bool:
client = DAGuClient()
action_response = client.submit_dag_action(name=dag_name, action="start")
def stop_dag(self,dag_name:str) -> bool:
client = DAGuClient()
action_response = client.submit_dag_action(name=dag_name, action="stop")

View File

@@ -0,0 +1,13 @@
from .base_types import MDItem, MDPage, MDImage, MDCollection
from .mdcollections import MDCollections
from .scanner import scan_directory
# Re-export all public types and functions
__all__ = [
'MDItem',
'MDPage',
'MDImage',
'MDCollection',
'MDCollections',
'scan_directory'
]

View File

@@ -0,0 +1,177 @@
from pathlib import Path
from typing import List, Dict
from dataclasses import dataclass
from .tools import name_fix
import os
class MDItem:
"""Base class for items in a collection."""
def __init__(self, collection: "MDCollection", rel_path: Path):
if not isinstance(rel_path, Path):
raise TypeError("rel_path must be a Path instance")
self.collection = collection
self.rel_path = rel_path
self.content_ = ""
self.processed = bool
def __str__(self) -> str:
return f"{self.__class__.__name__}: {self.rel_path}"
@property
def full_path(self) -> Path:
"""Returns the full path to the item."""
return self.collection.path / self.rel_path
@property
def path(self) -> str:
"""Returns the fixed name of the item without extension."""
return str(self.full_path.resolve())
@property
def name(self) -> str:
"""Returns the fixed name of the item."""
return name_fix(os.path.basename(self.rel_path))
class MDPage(MDItem):
"""Represents a markdown file in the collection."""
pass
@property
def content(self) -> str:
if not self.content_:
if os.path.exists(self.path):
try:
with open(self.path, 'r', encoding='utf-8') as f:
self.content_ = f.read()
except OSError as e:
raise Exception(f"Error reading file {self.path}: {e}")
else:
raise FileNotFoundError(f"Cannot find markdown file: {self.path}")
return self.content_
class MDImage(MDItem):
"""Represents an image file in the collection."""
pass
@dataclass
class MDCollection:
"""Represents a collection of markdown files and images."""
path: Path
name: str
items: List[MDItem]
def page_get(self, name: str) -> MDPage:
"""
Get a markdown page by name.
Args:
name: Name of the page to find (will be normalized)
Returns:
MDPage object
Raises:
ValueError: If page not found
"""
# Remove .md extension if present
if "__" in name:
raise ValueError("there should be no __ in name of page_get")
if name.endswith('.md'):
name=name[:-3]
normalized_name = name_fix(name)
for item in self.items:
if isinstance(item, MDPage):
item_name = name_fix(item.rel_path.stem)
if item_name == normalized_name:
return item
raise ValueError(f"Page not found: {name}")
def image_get(self, name: str) -> MDImage:
"""
Get an image by name.
Args:
name: Name of the image to find (will be normalized)
Returns:
MDImage object
Raises:
ValueError: If image not found
"""
normalized_name = name_fix(name)
for item in self.items:
if isinstance(item, MDImage):
# For images, compare with extension
item_name = name_fix(os.path.basename(item.rel_path))
if item_name == normalized_name:
return item
raise ValueError(f"Image not found: {name}")
def __str__(self) -> str:
"""Returns a tree-like string representation of the collection."""
result = [f"Collection: {self.name} ({self.path})"]
# Group items by type
pages = [item for item in self.items if isinstance(item, MDPage)]
images = [item for item in self.items if isinstance(item, MDImage)]
# Add pages
if pages:
result.append(" Pages:")
for page in sorted(pages, key=lambda x: str(x.rel_path)):
result.append(f" └─ {page.name}")
# Add images
if images:
result.append(" Images:")
for image in sorted(images, key=lambda x: str(x.rel_path)):
result.append(f" └─ {image.name}")
return "\n".join(result)
def index_page(self) -> MDPage:
"""Generate a dynamic index of all markdown files in the collection."""
# Get all markdown pages and sort them by relative path
pages = sorted(
[item for item in self.items if isinstance(item, MDPage)],
key=lambda x: str(x.rel_path)
)
# Group pages by directory
page_groups: Dict[str, List[MDPage]] = {}
for page in pages:
dir_path = str(page.rel_path.parent)
if dir_path == '.':
dir_path = 'Root'
if dir_path not in page_groups:
page_groups[dir_path] = []
page_groups[dir_path].append(page)
# Generate markdown content
content = ["# Collection Index\n"]
for dir_path in sorted(page_groups.keys()):
# Add directory header
if dir_path != 'Root':
content.append(f"\n## {dir_path}\n")
elif len(page_groups) > 1: # Only show Root header if there are other directories
content.append("\n## Root Directory\n")
# Add pages in current directory
for page in sorted(page_groups[dir_path], key=lambda x: x.name):
# Create display name by removing extension and formatting
display_name = page.rel_path.stem.replace('_', ' ').replace('-', ' ').title()
# Create link using relative path
link_path = str(page.rel_path)
content.append(f'- [{display_name}]({self.name}__{link_path})')
mdp=MDPage(self,Path("index.md"))
mdp.content_ = "\n".join(content)
return mdp

View File

@@ -0,0 +1,25 @@
import os
from pathlib import Path
from typing import Optional
from .mdcollections import MDCollections
def create_collections(path: Optional[str] = None) -> MDCollections:
"""
Factory function to create and initialize an MDCollections instance.
Args:
path: Optional path to scan for collections. Defaults to "data/markdown"
Returns:
Initialized MDCollections instance
Raises:
ValueError: If path is None
"""
if path is None:
raise ValueError("Path cannot be None")
# Expand ~ to home directory if present in path
expanded_path = os.path.expanduser(path)
return MDCollections(root_path=Path(expanded_path))

View File

@@ -0,0 +1,112 @@
from pathlib import Path
from typing import List, Optional
from .base_types import MDCollection, MDPage, MDImage, MDItem
from .scanner import scan_directory
from .tools import name_fix
class MDCollections:
"""Manages multiple markdown collections."""
def __init__(self, root_path: Path):
"""
Initialize collections manager.
Args:
root_path: Root directory containing collections
"""
self.root_path = root_path
self.collections: List[MDCollection] = []
self._scan_collections()
def _scan_collections(self):
"""Scan root directory for collections."""
if not self.root_path.exists():
raise ValueError(f"Root path does not exist: {self.root_path}")
# Scan immediate subdirectories only
for path in sorted(self.root_path.iterdir()):
if path.is_dir():
# Skip directories starting with _ or containing 'archive' in lowercase
if path.name.startswith('_') or 'archive' in path.name.lower():
continue
items = scan_directory(path)
if items: # Only create collection if directory contains markdown files
collection = MDCollection(
path=path,
name=path.name,
items=sorted(items, key=lambda x: x.name)
)
self.collections.append(collection)
# Sort collections by name
self.collections.sort(key=lambda x: x.name)
def collection_get(self, name: str) -> MDCollection:
"""
Get a collection by name.
Args:
name: Name of the collection to find
Returns:
MDCollection object
Raises:
ValueError: If collection not found
"""
for collection in self.collections:
if collection.name == name:
return collection
raise ValueError(f"Collection not found: {name}")
def page_get(self, collection_name: str, page_name: str) -> MDPage:
"""
Get a page from a specific collection.
Args:
collection_name: Name of the collection
page_name: Name of the page
Returns:
MDPage object
Raises:
ValueError: If collection or page not found
"""
page_name=name_fix(page_name)
collection_name=name_fix(collection_name)
collection = self.collection_get(collection_name)
return collection.page_get(page_name)
def image_get(self, collection_name: str, image_name: str) -> MDImage:
"""
Get an image from a specific collection.
Args:
collection_name: Name of the collection
image_name: Name of the image
Returns:
MDImage object
Raises:
ValueError: If collection or image not found
"""
# Handle image name that might contain collection prefix
if "__" in image_name:
image_name, collection_name = image_name.split("__", 1)
image_name = name_fix(image_name)
collection_name = name_fix(collection_name)
collection = self.collection_get(collection_name)
print(f" -- image get: '{collection_name}' '{image_name}'")
return collection.image_get(image_name)
def __str__(self) -> str:
"""Returns a string representation of all collections."""
if not self.collections:
return "No collections found"
return "\n\n".join(str(collection) for collection in self.collections)

View File

@@ -0,0 +1,61 @@
from pathlib import Path
from typing import List, Sequence
from .base_types import MDItem, MDPage, MDImage, MDCollection
def scan_directory(path: Path) -> Sequence[MDItem]:
"""
Scan a directory for markdown files and images.
Args:
path: Directory to scan
Returns:
List of MDItem objects (MDPage or MDImage)
"""
if not path.exists():
raise ValueError(f"Path does not exist: {path}")
if not path.is_dir():
raise ValueError(f"Path is not a directory: {path}")
items: List[MDItem] = []
# Create a temporary collection for the items
temp_collection = MDCollection(
path=path,
name=path.name,
items=[] # Will be populated later
)
# First scan for markdown files
for md_path in path.rglob("*.md"):
# Skip files in hidden directories (starting with .)
if any(part.startswith('.') for part in md_path.parts):
continue
# Get path relative to collection root
rel_path = md_path.relative_to(path)
# Create MDPage
page = MDPage(temp_collection, rel_path)
items.append(page)
# Then scan for images
image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.svg'}
for img_path in path.rglob("*"):
# Skip files in hidden directories (starting with .)
if any(part.startswith('.') for part in img_path.parts):
continue
# Check if file has image extension
if img_path.suffix.lower() in image_extensions:
# Get path relative to collection root
rel_path = img_path.relative_to(path)
# Create MDImage
image = MDImage(temp_collection, rel_path)
items.append(image)
# Update the temporary collection's items
temp_collection.items = items
return items

View File

@@ -0,0 +1,99 @@
from pathlib import Path
from typing import Union
import os
import re
def should_skip_path(path: Union[str, Path]) -> bool:
"""
Check if a path should be skipped based on its basename.
Skips paths that start with . or _
Args:
path: Path to check (can be file or directory)
Returns:
True if path should be skipped, False otherwise
"""
path = Path(path)
return path.name.startswith(('.', '_'))
def strip_ansi_codes(text):
"""Remove ANSI escape codes from text."""
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', text)
def name_fix(path: str) -> str:
"""
Normalize only the final part (stem) of a path by:
- Converting spaces to underscores
- Making lowercase
Preserves the directory structure and only modifies the final name.
Args:
path: Path to normalize
Returns:
Path with normalized stem but unchanged structure
"""
if not isinstance(path, str):
raise TypeError("Input must be a string")
if '/' in path:
raise ValueError("Path should not contain forward slashes - use for filenames only")
path = strip_ansi_codes(path).strip()
name, ext = os.path.splitext(path)
if not is_image(path) and ext.lower() == '.md':
ext = ""
# Convert to lowercase and replace spaces and other characters
name = name.lower().replace(' ', '_').replace('-', '_').replace(',', '')
name = name.replace('__', '_').rstrip(' ')
# Only strip trailing underscores for image files
if is_image(name):
name = name.rstrip('_')
return f"{name}{ext}"
def path_fix(path: Union[str, Path]) -> Path:
"""
Normalize only the final part (stem) of a path by:
- Converting spaces to underscores
- Making lowercase
Preserves the directory structure and only modifies the final name.
Args:
path: Path to normalize
Returns:
Path with normalized stem but unchanged structure
"""
if not isinstance(path, (str, Path)):
path = str(path)
path = Path(path)
# Keep directory structure unchanged, only normalize the filename
parent = path.parent
filename = name_fix(path.name)
# Recombine with original parent path
return parent / filename
def is_image(basename):
# Define a set of valid image extensions
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.svg']
# Get the file extension from the basename
_, extension = os.path.splitext(basename)
extension = extension.strip()
#print(f" ----- {basename} '{extension.lower()}' {extension.lower() in image_extensions}")
# Check if the extension is in the set of image extensions
return extension.lower() in image_extensions

View File

@@ -0,0 +1,9 @@
"""
MDServer package initialization.
This helps Python properly resolve the package imports.
"""
from .markdown_server import MDServer
from .factory import serve_markdown
from .process_markdown import process_markdown
__all__ = ['MDServer', 'serve_markdown', 'process_markdown']

View File

@@ -0,0 +1,19 @@
from typing import Optional, Union
from pathlib import Path
import sys
sys.path.append(str(Path(__file__).parent.parent))
from .markdown_server import MDServer # Import directly from the module file
def serve_markdown(collections_path: str) -> None:
"""
Legacy function to maintain backward compatibility.
Creates an MDServer instance and serves the markdown content.
Args:
collections_path: Path to the collections directory. Can be a string or Path object.
"""
server = MDServer(collections_path=collections_path)
server.serve_markdown()

View File

@@ -0,0 +1,55 @@
import re
from typing import TYPE_CHECKING
from mdcollections.base_types import MDPage
def js_to_python(js_str):
"""Convert JavaScript object notation to Python dictionary syntax."""
# Remove any 'option =' prefix and trailing semicolon
js_str = re.sub(r'^option\s*=\s*', '', js_str)
js_str = re.sub(r';(\s*)$', '', js_str)
# Convert JavaScript property names to Python dictionary keys
js_str = re.sub(r'(\b\w+):', r'"\1":', js_str)
# Convert single quotes to double quotes for string values
# First, replace escaped single quotes with a placeholder
js_str = js_str.replace("\\'", "___ESCAPED_QUOTE___")
# Then replace regular single quotes with double quotes
js_str = js_str.replace("'", '"')
# Finally, restore escaped single quotes
js_str = js_str.replace("___ESCAPED_QUOTE___", "\\'")
# Handle trailing commas
js_str = re.sub(r',(\s*[}\]])', r'\1', js_str)
# Handle special JavaScript values
js_str = js_str.replace('true', 'True').replace('false', 'False').replace('null', 'None')
# Remove any comments
js_str = re.sub(r'//.*?\n|/\*.*?\*/', '', js_str, flags=re.DOTALL)
return js_str.strip()
def process_markdown_echarts(page: MDPage) -> MDPage:
"""Convert ```echarts blocks to ```py sl blocks that use st_echarts."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
def replace_echarts_block(match):
echarts_code = match.group(1).strip()
python_code = js_to_python(echarts_code)
# Create the streamlit code block
streamlit_code = f"""```py sl
from streamlit_echarts import st_echarts
option = {python_code}
st_echarts(options=option, height="400px")
```"""
return streamlit_code
# Process all echarts code blocks
processed_content = re.sub(r"```echarts\n(.*?)\n```", replace_echarts_block, page.content, flags=re.DOTALL)
page.content_ = processed_content
return page

View File

@@ -0,0 +1,119 @@
import os
import re
import streamlit as st
from PIL import Image # Pillow package provides PIL
from typing import Optional, List, Tuple, TYPE_CHECKING
from mdcollections.tools import name_fix, is_image
from mdcollections.base_types import MDPage
from mdcollections.mdcollections import MDCollections
from .process_images import process_image
from .tools import debug
def link_process(link: str, page: MDPage, collections: MDCollections, is_image_link: bool, debug_enabled: bool = False) -> str:
"""Process link path and verify existence in collection."""
if not isinstance(link, str):
raise TypeError("link must be strings")
if not isinstance(collections, MDCollections):
raise TypeError("collection must be MDCollection")
if not isinstance(page, MDPage):
raise TypeError("page must be MDPage")
debug(f"\nProcessing link: {link}")
debug(f"Is image link: {is_image_link}")
# Remove './' if present
if link.startswith("./"):
link = link[2:]
debug("Removed './' prefix from link")
# Get just the filename without directories
link = os.path.basename(link)
debug(f"Extracted basename: {link}")
# Process link format
if not '__' in link:
if ":" in link:
link = link.replace(':', '__')
# Create full link if needed
if not "__" in link:
link = f"{page.collection.name}__{link}"
debug(f"Created full link: {link}")
if link.count("__")>1:
raise RuntimeError(f"cannot have 2x __ in ${link}")
collection_name, item_name = link.split('__', 1)
# Convert to lowercase and replace spaces with underscores
item_name = name_fix(item_name)
collection_name = name_fix(collection_name)
debug(f"Normalized: '{collection_name}__{item_name}'")
# Ensure .md extension for pages
if is_image_link:
try:
md_i = collections.image_get(collection_name=collection_name,image_name=item_name)
debug("Successfully verified image exists")
# process_image(md_i)
# return ""
return f"{collection_name}__{item_name}"
except ValueError:
debug(f"Error - image not found: {link}")
return f'<span style="color: red;">ERROR: Image not found: {link}</span>'
else:
if not item_name.endswith('.md'):
item_name = f"{item_name}.md"
debug(f"Added .md extension: {item_name}")
try:
collections.page_get(collection_name, item_name)
debug("Successfully verified page exists")
except ValueError:
debug(f"Error - page not found: {link}")
return f'<span style="color: red;">ERROR: Page not found: {link}</span>'
return f"?page={collection_name}__{item_name}.md"
def process_links(page: MDPage, collections: MDCollections) -> MDPage:
"""Process links in the markdown content."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
if not isinstance(collections, MDCollections):
raise TypeError("collections must be a MDCollections")
debug(f"Processing links for page: {page.name}")
debug(f"Content length before processing: {len(page.content)} characters")
link_pattern = r'(!?)\[(.*?)\]\((.*?)\)'
def replace_link(match):
is_image_link = match.group(1) == '!'
link_text = match.group(2)
link_path = match.group(3)
debug(f"Found link - Text: {link_text}, Path: {link_path}")
debug(f"Is image link: {is_image_link}")
processed_link = link_process(link_path, page, collections, is_image_link)
if "ERROR:" in processed_link:
debug(f"Link processing error: {processed_link}")
return processed_link #this forwards the error, is html in red
if is_image_link:
debug(f"Returning processed image link: ![{link_text}]({processed_link})")
return f'![{link_text}]({processed_link})'
else:
debug(f"Returning processed text link: [{link_text}]({processed_link})")
return f'[{link_text}]({processed_link})'
page.content_ = re.sub(link_pattern, replace_link, page.content)
debug(f"Content length after processing: {len(page.content)} characters")
debug("Link processing complete")
return page

View File

@@ -0,0 +1,29 @@
import re
from typing import TYPE_CHECKING
from mdcollections.base_types import MDPage
def process_markdown_mermaid(page: MDPage) -> MDPage:
"""Convert ```mermaid blocks to ```py sl blocks that use st_mermaid."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
def replace_mermaid_block(match):
mermaid_code = match.group(1).strip()
# Create the streamlit code block
# Note: The mermaid code needs to be properly escaped as a string
mermaid_code = mermaid_code.replace('"', '\\"') # Escape double quotes
streamlit_code = f'''```py sl
from streamlit_mermaid import st_mermaid
st_mermaid("""
{mermaid_code}
""")
```'''
return streamlit_code
# Process all mermaid code blocks
processed_content = re.sub(r"```mermaid\n(.*?)\n```", replace_mermaid_block, page.content, flags=re.DOTALL)
page.content_ = processed_content
return page

View File

@@ -0,0 +1,69 @@
import re
import streamlit as st
import pandas as pd
import numpy as np
from io import StringIO
import sys
from typing import TYPE_CHECKING
from mdcollections.base_types import MDPage
# if TYPE_CHECKING:
# from .markdown_server import MDServer
def execute_streamlit_code(code_block):
"""
Execute a streamlit code block and capture its output.
The code block should be valid Python code that uses streamlit.
"""
# Create string buffer to capture any print outputs
old_stdout = sys.stdout
redirected_output = StringIO()
sys.stdout = redirected_output
try:
# Execute the code block
# The code block can use st, pd, np which are already imported
exec(code_block, {
'st': st,
'pd': pd,
'np': np
})
# Get any printed output
printed_output = redirected_output.getvalue()
return True, printed_output
except Exception as e:
return False, f"Error: {str(e)}\n\nFailed code:\n{code_block}"
finally:
# Restore stdout
sys.stdout = old_stdout
def process_streamlit_blocks(page: MDPage) -> MDPage:
"""
Find and process ```py sl code blocks in markdown content.
Returns the modified content with executed streamlit code blocks replaced by their output.
"""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
# if not hasattr(md_server, 'collections_manager'):
# raise TypeError("md_server must be an instance of MDServer")
def replace_code_block(match):
code = match.group(1).strip()
success, result = execute_streamlit_code(code)
if not success:
# If execution failed, return the error message
return f"```\n{result}\n```"
# If successful, return empty string - the streamlit components
# will be rendered but the code block itself won't be shown
return ""
# Process the code block
processed_content = re.sub(r"```py\s+sl\n(.*?)\n```", replace_code_block, page.content, flags=re.DOTALL)
page.content_ = processed_content
return page

View File

@@ -0,0 +1,76 @@
import re
import streamlit as st
from PIL import Image
from typing import TYPE_CHECKING, List
from mdcollections.base_types import MDPage, MDImage
# if TYPE_CHECKING:
# from .markdown_server import MDServer
def create_slider_component(images: List[str]) -> None:
"""Create a Streamlit component for image slides."""
st.markdown("""
<style>
.stImage {
cursor: pointer;
}
</style>
""", unsafe_allow_html=True)
# Initialize session state
if 'current_slide' not in st.session_state:
st.session_state.current_slide = 0
# Navigation buttons
col1, col2, col3 = st.columns([1, 4, 1])
with col1:
if st.button("⬅️ Previous"):
st.session_state.current_slide = (st.session_state.current_slide - 1) % len(images)
with col3:
if st.button("Next ➡️"):
st.session_state.current_slide = (st.session_state.current_slide + 1) % len(images)
# Display current image
current_image_spec = images[st.session_state.current_slide]
if not hasattr(st.session_state, 'md_server') or not st.session_state.md_server.collections_manager:
st.error("Collections manager not initialized")
return
try:
image_item = st.session_state.md_server.collections_manager.image_get(current_image_spec)
image = Image.open(image_item.path)
st.image(image, use_column_width=True)
except Exception as e:
st.error(f"Could not load image: {current_image_spec}. Error: {str(e)}")
# Display slide counter
st.caption(f"Slide {st.session_state.current_slide + 1} of {len(images)}")
def process_markdown_slides(page: MDPage) -> MDPage:
"""Convert ```slides blocks to ```py sl blocks that use the slider component."""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
# if not hasattr(md_server, 'collections_manager'):
# raise TypeError("md_server must be an instance of MDServer")
# # Store md_server in session state for use by create_slider_component
# st.session_state.md_server = md_server
def replace_slides_block(match):
slides_content = match.group(1).strip()
image_paths = [line.strip() for line in slides_content.split('\n') if line.strip()]
# Create the streamlit code block
image_paths_str = repr(image_paths)
streamlit_code = f'''```py sl
from .macro_slides import create_slider_component
create_slider_component({image_paths_str})
```'''
return streamlit_code
# Process all slides code blocks
page.content_ = re.sub(r"```slides\n(.*?)\n```", replace_slides_block, page.content, flags=re.DOTALL)
return page

View File

@@ -0,0 +1,237 @@
from typing import Optional, Union
import os
from pathlib import Path
import traceback
import sys
import re
import pudb
try:
import streamlit as st
except ImportError:
raise ImportError("streamlit is required. Install with: pip install streamlit")
from mdcollections.base_types import MDPage, MDImage, MDCollection
from mdcollections.mdcollections import MDCollections
from .process_markdown import process_markdown, summary_load
from .tools import debug
def setup_static_dir(collections_path: str) -> None:
"""
Set up static directory for serving images.
Creates symbolic links from collections to static directory.
"""
pass
# static_dir = os.path.join(collections_path, "static")
# if not os.path.exists(static_dir):
# os.makedirs(static_dir)
# Create symlinks for each collection
# collections = os.listdir(collections_path)
# for collection in collections:
# collection_path = os.path.join(collections_path, collection)
# if os.path.isdir(collection_path) and not collection.startswith('.') and collection != 'static':
# # Create symlink from collection to static/collection
# static_link = os.path.join(static_dir, collection)
# if not os.path.exists(static_link):
# try:
# os.symlink(collection_path, static_link)
# except OSError as e:
# debug(f"Failed to create symlink from {collection_path} to {static_link}: {e}")
def process_markdown_content(content: str, base_path: str, collection_name: str) -> None:
"""
Process and display markdown content.
Args:
content: The markdown content to process
base_path: Base path for resolving relative paths
collection_name: Name of the collection
"""
st.markdown(content)
class MDServer:
def __init__(self,collections_path:str):
"""Initialize the MDServer instance."""
# Convert path to string if it's a Path object
if not isinstance(collections_path, str):
return RuntimeError("collections_path must be a string.")
st.session_state.setdefault('current_collection', None)
st.session_state.setdefault('current_page', None)
st.session_state.setdefault('show_collections_view', False)
st.session_state.setdefault('collections_manager', None)
st.session_state.setdefault('debug_mode', True)
# Get the collections manager
collections_path = os.path.expanduser(collections_path)
print(f"Initializing collections manager for: {collections_path}")
collections_manager = MDCollections(root_path=Path(collections_path))
# Set up static directory for serving images
setup_static_dir(collections_path)
# Set up page config
st.set_page_config(
page_title="Markdown Server",
page_icon="📚",
layout="wide",
initial_sidebar_state="expanded",
)
st.session_state.collections_manager = collections_manager
@property
def collections_manager(self) -> MDCollections:
"""
Property to safely access the collections manager.
Ensures collections_manager is initialized before access.
Returns:
MDCollections: The initialized collections manager
Raises:
RuntimeError: If collections_manager is not initialized
"""
if not st.session_state.get('collections_manager'):
raise RuntimeError("Collections manager not initialized. Please ensure MDServer is properly initialized.")
return st.session_state.collections_manager
@property
def collections(self) -> list:
"""
Property to safely access collections from the collections manager.
Returns:
list: List of available collections
Raises:
RuntimeError: If collections_manager is not initialized
"""
return self.collections_manager.collections
def handle_url_parameters(self) -> None:
"""
Handle URL parameters to load specific pages.
Expected format: ?page=collection_name__page_name.md
Example: ?page=banking_whitepaper__web_3_vision.md
"""
query_params = st.query_params
requested_page = query_params.get('page', None)
if not requested_page:
return
try:
# Split the page parameter using '__' as delimiter
if '__' not in requested_page:
raise ValueError(f"Invalid page format. Expected format: collection_name__page_name.md, got: {requested_page}")
collection_name, page_name = requested_page.split('__', 1)
# Get the page using collections_manager's page_get method
page = self.collections_manager.page_get(
collection_name=collection_name,
page_name=page_name
)
page = process_markdown(page, collections=self.collections_manager)
st.session_state.current_collection = page.collection
st.session_state.current_page = page
except ValueError as e:
# Handle invalid format or page not found errors
st.warning(f"Could not load page: {requested_page}. Error: {str(e)}")
def setup_sidebar(self, collections: MDCollections) -> None:
"""
Set up the sidebar with collection selection.
Args:
collections: List of available collections
"""
with st.sidebar:
# Add Debug Mode toggle that persists across reloads
debug_mode = st.toggle("Debug Mode", st.session_state.debug_mode)
if debug_mode != st.session_state.debug_mode:
st.session_state.debug_mode = debug_mode
# Store in local storage to persist across reloads
st.session_state['debug_mode'] = debug_mode
# Add Collections View action
if st.button("View All Collections"):
st.session_state.show_collections_view = True
st.session_state.current_page = None
return
collection_names = [c.name for c in self.collections]
current_idx = collection_names.index(st.session_state.current_collection.name) if st.session_state.current_collection else 0
selected_collection_name = st.selectbox(
"Choose a collection:",
collection_names,
index=current_idx,
key="collection_selector"
)
# Add sidebar content
with st.sidebar:
# Check for summary.md
collection = self.collections_manager.collection_get(selected_collection_name)
summary_page = summary_load(collection)
st.markdown(summary_page.content, unsafe_allow_html=True)
# Get the selected collection by name
st.session_state.current_collection = self.collections_manager.collection_get(selected_collection_name)
def display_content(self) -> None:
"""Display the markdown content in the main area."""
main_content = st.container()
with main_content:
try:
if st.session_state.show_collections_view:
# Read and process collections view template
collections_view_path = Path(__file__).parent / "pages" / "collections_view.md"
with open(collections_view_path, 'r') as f:
template = f.read()
# Replace placeholder with actual collections string representation
content = template.replace("{collections_str}", str(self.collections_manager))
st.markdown(content)
elif st.session_state.current_page:
st.markdown(st.session_state.current_page.content, unsafe_allow_html=True)
elif st.session_state.current_collection:
# Display collection summary and index when no specific page is selected
st.markdown("### Collection Index")
myindex_page = st.session_state.current_collection.index_page()
myindex_page = process_markdown(myindex_page, collections=self.collections_manager)
st.markdown(myindex_page.content)
else:
st.warning("Please select a collection.")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
def serve_markdown(self) -> None:
"""
Serve markdown content using Streamlit.
"""
try:
if not self.collections:
st.error("No collections found.")
return
# Handle URL parameters
self.handle_url_parameters()
# Setup sidebar
self.setup_sidebar(self.collections_manager)
# Display content
self.display_content()
except Exception as e:
st.error(f"An error occurred: {str(e)}")

View File

@@ -0,0 +1,7 @@
# Collections Overview
```python
{collections_str}
```
The tree structure above is automatically generated from the current state of the collections manager.

View File

@@ -0,0 +1,89 @@
import os
import re
import streamlit as st
from PIL import Image # Pillow package provides PIL
from typing import Optional, List, Tuple, TYPE_CHECKING
from mdcollections.base_types import MDImage, MDPage
from mdcollections.mdcollections import MDCollections
from .tools import debug
def process_image(myimage: MDImage, alt_text: Optional[str] = None) -> str:
"""
Process an image and return HTML img tag for proper rendering in markdown.
Args:
myimage: The MDImage object to process
alt_text: Optional alternative text for the image
Returns:
str: HTML img tag with proper styling
"""
if not isinstance(myimage, MDImage):
raise TypeError("myimage must be a MDImage")
try:
# Verify image can be opened
Image.open(myimage.path)
# Construct static URL using collection name and relative path
static_url = f"/app/static/{myimage.collection.name}/{myimage.rel_path}"
# Create HTML img tag with proper styling
return f'<img src="{static_url}" alt="{alt_text or ""}" style="max-width: 100%; height: auto; display: inline-block; margin: 0.5em 0;">'
except Exception as e:
debug(f"Error processing image {myimage.path}: {str(e)}")
return f"Error loading image: {myimage.path}"
def process_images(page: MDPage, collections: MDCollections) -> MDPage:
"""
Process images in the markdown content while preserving text structure.
Args:
page: The MDPage object containing markdown content
collections: The MDCollections object containing image references
Returns:
MDPage: The processed page with images displayed
"""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
if not isinstance(collections, MDCollections):
raise TypeError("collections must be a MDCollections")
debug(f"Processing images for page: {page.name}")
debug(f"Content length before processing: {len(page.content)} characters")
# Match markdown image syntax: ![alt text](path)
link_pattern = r'!\[(.*?)\]\((.*?)\)'
def replace_link(match):
alt_text = match.group(1)
image_path = match.group(2)
# Split path into collection and image name
try:
parts = image_path.split("__", 1)
if len(parts) != 2:
debug(f"Invalid image path format (missing __): {image_path}")
return f"Invalid image path format: {image_path}"
image_name, collection_name = parts
debug(f"Found image link, will now check - Alt text: {alt_text}, Image: '{image_name}', Collection: '{collection_name}'")
# Get the image from collections using the path
myimage = collections.image_get(image_name, collection_name)
return process_image(myimage, alt_text if alt_text else None)
except ValueError as e:
debug(f"Image not found in collection: {image_path}.\n{e}")
return f"Image not found: {image_path}"
except Exception as e:
debug(f"Error processing image {image_path}: {str(e)}")
return f"Error processing image: {image_path}"
# Process all image links while preserving surrounding text
page.content_ = re.sub(link_pattern, replace_link, page.content)
debug("Image processing complete")
return page

View File

@@ -0,0 +1,80 @@
import os
import re
import streamlit as st
from PIL import Image # Pillow package provides PIL
from typing import Optional, List, Tuple, TYPE_CHECKING
from .macro_sl import process_streamlit_blocks
from .macro_chart import process_markdown_echarts
from .macro_mermaid import process_markdown_mermaid
from .macro_slides import process_markdown_slides
from .macro_sl import process_streamlit_blocks
from .macro_links import process_links
from .process_images import process_images
from mdcollections.tools import name_fix, is_image
from mdcollections.base_types import MDPage, MDCollection
from mdcollections.mdcollections import MDCollections
from .tools import debug,rewrite_summary_links
def summary_load(collection:MDCollection) -> MDPage:
"""Load the summary.md file if it exists, otherwise it creates an index"""
if not isinstance(collection, MDCollection):
raise TypeError("collection must be a MDCollection")
try:
mypage = collection.page_get("summary.md")
mypage.content_=rewrite_summary_links(mypage.content_) #need to rewrite the first part of path as collection, might change in future
return mypage
except ValueError:
return collection.index_page()
def process_markdown(page: MDPage, collections: MDCollections) -> MDPage:
"""Process markdown content and handle images, links, and streamlit code blocks.
Args:
page: The MDPage object to process
collections: The MDCollections object containing all collections
"""
if not isinstance(page, MDPage):
raise TypeError("page must be a MDPage")
if not isinstance(collections, MDCollections):
raise TypeError("collections must be a MDCollections")
debug(f"Processing markdown for page: {page.name} in collection: {page.collection.name}\nInitial content length: {len(page.content)} characters")
if page.processed:
RuntimeError(f"double processing of page {page.name}")
# Process special blocks with page and md_server arguments
#debug("Processing echarts blocks...")
page = process_markdown_echarts(page)
#debug("Processing mermaid blocks...")
page = process_markdown_mermaid(page)
#debug("Processing slides blocks...")
page = process_markdown_slides(page)
#debug("Processing streamlit blocks...")
page = process_streamlit_blocks(page)
#debug("Processing links...")
# Pass the debug flag to process_links
page = process_links(page=page, collections=collections)
page = process_images(page=page, collections=collections )
# Process remaining content
if page.content.strip():
debug(f"Rendering final markdown content (length: {len(page.content)} characters)")
st.markdown(page.content, unsafe_allow_html=True)
else:
debug("No content to render after processing")
return page
def parse_page_parameter(page_param: str) -> Tuple[Optional[str], str]:
"""Parse the page parameter to extract collection and file name."""
if '__' in page_param:
collection, filename = page_param.split('__', 1)
return collection, filename
return None, page_param

View File

@@ -0,0 +1,5 @@
streamlit>=1.24.0
pandas>=1.5.0
numpy>=1.24.0
ipython>=8.0.0
Pillow>=10.0.0

View File

@@ -0,0 +1,43 @@
import re
import streamlit as st
def strip_ansi_codes(text):
"""Remove ANSI escape codes from text."""
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', text)
def debug(message: str):
"""Display debug messages in a compact code block format.
Args:
message: The debug message to display
"""
debug_enabled=st.session_state.debug_mode
if debug_enabled:
#st.code(message, language="text")
print(strip_ansi_codes(message))
def rewrite_summary_links(text:str) -> str:
import re
def replace_first_slash(match):
# Get the matched text
link = match.group(1)
# Replace the first slash with double underscore
new_link = link.replace('/', '__', 1)
return f'({new_link})'
# Use a regular expression to find links in the format (path/to/resource)
pattern = r'\(([^)]+)\)'
# Process each line and apply the substitution
rewritten_lines = []
for line in text.splitlines():
rewritten_line = re.sub(pattern, replace_first_slash, line)
rewritten_lines.append(rewritten_line)
# Join the rewritten lines back together
return '\n'.join(rewritten_lines)