doctree_rust/webbuilder/src/parser.rs
Mahmoud Emad f9d338a8f1 feat: Add WebBuilder library for website generation
- Adds a new library for building websites from configuration
  files and markdown content.  Improves developer workflow by
  automating website construction.

- Implements multiple parsing strategies for configuration files
  (Hjson, Simple, Auto) for flexibility and backward
  compatibility.

- Includes support for cloning Git repositories, processing
  markdown, and uploading files to IPFS, streamlining the
  website deployment process.  Facilitates easier website
  updates and content management.

- Adds comprehensive README documentation explaining the library's
  usage and configuration options.  Improves user onboarding and
  reduces the learning curve for new users.
2025-05-15 09:42:08 +03:00

518 lines
16 KiB
Rust

use std::fs;
use std::path::Path;
use deser_hjson::from_str;
use serde::de::DeserializeOwned;
use serde_json::{self, Value};
use crate::config::{CollectionConfig, FooterConfig, HeaderConfig, PageConfig, SiteConfig};
use crate::error::{Result, WebBuilderError};
/// Parsing strategy to use
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ParsingStrategy {
/// Use the deser-hjson library (recommended)
Hjson,
/// Use a simple line-by-line parser (legacy)
Simple,
/// Auto-detect the best parser to use
Auto,
}
/// Parse a file into a struct using the specified strategy
///
/// # Arguments
///
/// * `path` - Path to the file to parse
/// * `strategy` - Parsing strategy to use
///
/// # Returns
///
/// The parsed struct or an error
pub fn parse_file<T, P>(path: P, strategy: ParsingStrategy) -> Result<T>
where
T: DeserializeOwned,
P: AsRef<Path>,
{
let path = path.as_ref();
// Check if the file exists
if !path.exists() {
return Err(WebBuilderError::MissingFile(path.to_path_buf()));
}
// Read the file
let content = fs::read_to_string(path).map_err(|e| WebBuilderError::IoError(e))?;
match strategy {
ParsingStrategy::Hjson => {
// Use the deser-hjson library
from_str(&content).map_err(|e| WebBuilderError::HjsonError(format!("Error parsing {:?}: {}", path, e)))
}
ParsingStrategy::Simple => {
// Use the simple parser - for this we need to handle the file reading ourselves
// since the original parse_hjson function does that internally
let path_ref: &Path = path.as_ref();
// Check if the file exists
if !path_ref.exists() {
return Err(WebBuilderError::MissingFile(path_ref.to_path_buf()));
}
// Read the file
let content = fs::read_to_string(path).map_err(|e| WebBuilderError::IoError(e))?;
// First try to parse as JSON
let json_result = serde_json::from_str::<T>(&content);
if json_result.is_ok() {
return Ok(json_result.unwrap());
}
// If that fails, try to convert hjson to json using a simple approach
let json_content = convert_hjson_to_json(&content)?;
// Parse the JSON
serde_json::from_str(&json_content)
.map_err(|e| WebBuilderError::HjsonError(format!("Error parsing {:?}: {}", path, e)))
}
ParsingStrategy::Auto => {
// Try the hjson parser first, fall back to simple if it fails
match from_str(&content) {
Ok(result) => Ok(result),
Err(e) => {
log::warn!("Hjson parser failed: {}, falling back to simple parser", e);
// Call the simple parser directly
let path_ref: &Path = path.as_ref();
// Check if the file exists
if !path_ref.exists() {
return Err(WebBuilderError::MissingFile(path_ref.to_path_buf()));
}
// Read the file
let content = fs::read_to_string(path).map_err(|e| WebBuilderError::IoError(e))?;
// First try to parse as JSON
let json_result = serde_json::from_str::<T>(&content);
if json_result.is_ok() {
return Ok(json_result.unwrap());
}
// If that fails, try to convert hjson to json using a simple approach
let json_content = convert_hjson_to_json(&content)?;
// Parse the JSON
serde_json::from_str(&json_content)
.map_err(|e| WebBuilderError::HjsonError(format!("Error parsing {:?}: {}", path, e)))
}
}
}
}
}
/// Parse a hjson file into a struct using the simple parser
///
/// # Arguments
///
/// * `path` - Path to the hjson file
///
/// # Returns
///
/// The parsed struct or an error
pub fn parse_hjson<T, P>(path: P) -> Result<T>
where
T: DeserializeOwned,
P: AsRef<Path>,
{
let path = path.as_ref();
// Check if the file exists
if !path.exists() {
return Err(WebBuilderError::MissingFile(path.to_path_buf()));
}
// Read the file
let content = fs::read_to_string(path).map_err(|e| WebBuilderError::IoError(e))?;
// First try to parse as JSON
let json_result = serde_json::from_str::<T>(&content);
if json_result.is_ok() {
return Ok(json_result.unwrap());
}
// If that fails, try to convert hjson to json using a simple approach
let json_content = convert_hjson_to_json(&content)?;
// Parse the JSON
serde_json::from_str(&json_content)
.map_err(|e| WebBuilderError::HjsonError(format!("Error parsing {:?}: {}", path, e)))
}
/// Convert hjson to json using a simple approach
///
/// # Arguments
///
/// * `hjson` - The hjson content
///
/// # Returns
///
/// The json content or an error
fn convert_hjson_to_json(hjson: &str) -> Result<String> {
// Remove comments
let mut json = String::new();
let mut lines = hjson.lines();
while let Some(line) = lines.next() {
let trimmed = line.trim();
// Skip empty lines
if trimmed.is_empty() {
continue;
}
// Skip comment lines
if trimmed.starts_with('#') {
continue;
}
// Handle key-value pairs
if let Some(pos) = trimmed.find(':') {
let key = trimmed[..pos].trim();
let value = trimmed[pos + 1..].trim();
// Add quotes to keys
json.push_str(&format!("\"{}\":", key));
// Add value
if value.is_empty() {
// If value is empty, it might be an object or array start
if lines
.clone()
.next()
.map_or(false, |l| l.trim().starts_with('{'))
{
json.push_str(" {");
} else if lines
.clone()
.next()
.map_or(false, |l| l.trim().starts_with('['))
{
json.push_str(" [");
} else {
json.push_str(" null");
}
} else {
// Add quotes to string values
if value.starts_with('"')
|| value.starts_with('[')
|| value.starts_with('{')
|| value == "true"
|| value == "false"
|| value == "null"
|| value.parse::<f64>().is_ok()
{
json.push_str(&format!(" {}", value));
} else {
json.push_str(&format!(" \"{}\"", value.replace('"', "\\\"")));
}
}
json.push_str(",\n");
} else if trimmed == "{" || trimmed == "[" {
json.push_str(trimmed);
json.push_str("\n");
} else if trimmed == "}" || trimmed == "]" {
// Remove trailing comma if present
if json.ends_with(",\n") {
json.pop();
json.pop();
json.push_str("\n");
}
json.push_str(trimmed);
json.push_str(",\n");
} else {
// Just copy the line
json.push_str(trimmed);
json.push_str("\n");
}
}
// Remove trailing comma if present
if json.ends_with(",\n") {
json.pop();
json.pop();
json.push_str("\n");
}
// Wrap in object if not already
if !json.trim().starts_with('{') {
json = format!("{{\n{}\n}}", json);
}
Ok(json)
}
/// Parse site configuration from a directory
///
/// # Arguments
///
/// * `path` - Path to the directory containing hjson configuration files
///
/// # Returns
///
/// The parsed site configuration or an error
pub fn parse_site_config<P: AsRef<Path>>(path: P) -> Result<SiteConfig> {
let path = path.as_ref();
// Check if the directory exists
if !path.exists() {
return Err(WebBuilderError::MissingDirectory(path.to_path_buf()));
}
// Check if the directory is a directory
if !path.is_dir() {
return Err(WebBuilderError::InvalidConfiguration(format!(
"{:?} is not a directory",
path
)));
}
// Parse main.hjson
let main_path = path.join("main.hjson");
let main_config: serde_json::Value = parse_hjson(main_path)?;
// Parse header.hjson
let header_path = path.join("header.hjson");
let header_config: Option<HeaderConfig> = if header_path.exists() {
Some(parse_hjson(header_path)?)
} else {
None
};
// Parse footer.hjson
let footer_path = path.join("footer.hjson");
let footer_config: Option<FooterConfig> = if footer_path.exists() {
Some(parse_hjson(footer_path)?)
} else {
None
};
// Parse collection.hjson
let collection_path = path.join("collection.hjson");
let collection_configs: Vec<CollectionConfig> = if collection_path.exists() {
parse_hjson(collection_path)?
} else {
Vec::new()
};
// Parse pages directory
let pages_path = path.join("pages");
let mut page_configs: Vec<PageConfig> = Vec::new();
if pages_path.exists() && pages_path.is_dir() {
for entry in fs::read_dir(pages_path)? {
let entry = entry?;
let entry_path = entry.path();
if entry_path.is_file() && entry_path.extension().map_or(false, |ext| ext == "hjson") {
let page_config: Vec<PageConfig> = parse_hjson(&entry_path)?;
page_configs.extend(page_config);
}
}
}
// Parse keywords from main.hjson
let keywords = if let Some(keywords_value) = main_config.get("keywords") {
if keywords_value.is_array() {
let mut keywords_vec = Vec::new();
for keyword in keywords_value.as_array().unwrap() {
if let Some(keyword_str) = keyword.as_str() {
keywords_vec.push(keyword_str.to_string());
}
}
Some(keywords_vec)
} else if let Some(keywords_str) = keywords_value.as_str() {
// Handle comma-separated keywords
Some(
keywords_str
.split(',')
.map(|s| s.trim().to_string())
.collect(),
)
} else {
None
}
} else {
None
};
// Create site configuration
let site_config = SiteConfig {
name: main_config["name"]
.as_str()
.unwrap_or("default")
.to_string(),
title: main_config["title"].as_str().unwrap_or("").to_string(),
description: main_config["description"].as_str().map(|s| s.to_string()),
keywords,
url: main_config["url"].as_str().map(|s| s.to_string()),
favicon: main_config["favicon"].as_str().map(|s| s.to_string()),
header: header_config,
footer: footer_config,
collections: collection_configs,
pages: page_configs,
base_path: path.to_path_buf(),
};
Ok(site_config)
}
/// Parse site configuration from a directory using the specified strategy
///
/// # Arguments
///
/// * `path` - Path to the directory containing configuration files
/// * `strategy` - Parsing strategy to use
///
/// # Returns
///
/// The parsed site configuration or an error
pub fn parse_site_config_with_strategy<P: AsRef<Path>>(path: P, strategy: ParsingStrategy) -> Result<SiteConfig> {
let path = path.as_ref();
// Check if the directory exists
if !path.exists() {
return Err(WebBuilderError::MissingDirectory(path.to_path_buf()));
}
// Check if the directory is a directory
if !path.is_dir() {
return Err(WebBuilderError::InvalidConfiguration(format!(
"{:?} is not a directory",
path
)));
}
// Create a basic site configuration
let mut site_config = SiteConfig {
name: "default".to_string(),
title: "".to_string(),
description: None,
keywords: None,
url: None,
favicon: None,
header: None,
footer: None,
collections: Vec::new(),
pages: Vec::new(),
base_path: path.to_path_buf(),
};
// Parse main.hjson
let main_path = path.join("main.hjson");
if main_path.exists() {
let main_config: Value = parse_file(main_path, strategy)?;
// Extract values from main.hjson
if let Some(name) = main_config.get("name").and_then(|v| v.as_str()) {
site_config.name = name.to_string();
}
if let Some(title) = main_config.get("title").and_then(|v| v.as_str()) {
site_config.title = title.to_string();
}
if let Some(description) = main_config.get("description").and_then(|v| v.as_str()) {
site_config.description = Some(description.to_string());
}
if let Some(url) = main_config.get("url").and_then(|v| v.as_str()) {
site_config.url = Some(url.to_string());
}
if let Some(favicon) = main_config.get("favicon").and_then(|v| v.as_str()) {
site_config.favicon = Some(favicon.to_string());
}
if let Some(keywords) = main_config.get("keywords").and_then(|v| v.as_array()) {
let keywords_vec: Vec<String> = keywords
.iter()
.filter_map(|k| k.as_str().map(|s| s.to_string()))
.collect();
if !keywords_vec.is_empty() {
site_config.keywords = Some(keywords_vec);
}
}
}
// Parse header.hjson
let header_path = path.join("header.hjson");
if header_path.exists() {
site_config.header = Some(parse_file(header_path, strategy)?);
}
// Parse footer.hjson
let footer_path = path.join("footer.hjson");
if footer_path.exists() {
site_config.footer = Some(parse_file(footer_path, strategy)?);
}
// Parse collection.hjson
let collection_path = path.join("collection.hjson");
if collection_path.exists() {
let collection_array: Vec<CollectionConfig> = parse_file(collection_path, strategy)?;
// Process each collection
for mut collection in collection_array {
// Convert web interface URL to Git URL if needed
if let Some(url) = &collection.url {
if url.contains("/src/branch/") {
// This is a web interface URL, convert it to a Git URL
let parts: Vec<&str> = url.split("/src/branch/").collect();
if parts.len() == 2 {
collection.url = Some(format!("{}.git", parts[0]));
}
}
}
site_config.collections.push(collection);
}
}
// Parse pages directory
let pages_path = path.join("pages");
if pages_path.exists() && pages_path.is_dir() {
for entry in fs::read_dir(pages_path)? {
let entry = entry?;
let entry_path = entry.path();
if entry_path.is_file() && entry_path.extension().map_or(false, |ext| ext == "hjson") {
let pages_array: Vec<PageConfig> = parse_file(&entry_path, strategy)?;
site_config.pages.extend(pages_array);
}
}
}
Ok(site_config)
}
/// Parse site configuration from a directory using the recommended strategy (Hjson)
///
/// # Arguments
///
/// * `path` - Path to the directory containing configuration files
///
/// # Returns
///
/// The parsed site configuration or an error
pub fn parse_site_config_recommended<P: AsRef<Path>>(path: P) -> Result<SiteConfig> {
parse_site_config_with_strategy(path, ParsingStrategy::Hjson)
}
/// Parse site configuration from a directory using the auto-detect strategy
///
/// # Arguments
///
/// * `path` - Path to the directory containing configuration files
///
/// # Returns
///
/// The parsed site configuration or an error
pub fn parse_site_config_auto<P: AsRef<Path>>(path: P) -> Result<SiteConfig> {
parse_site_config_with_strategy(path, ParsingStrategy::Auto)
}