feat: Improve collection scanning and add .gitignore entries

- Add `.gitignore` entries for `webmeta.json` and `.vscode`
- Improve collection scanning logging for better debugging
- Improve error handling in collection methods for robustness
This commit is contained in:
Mahmoud Emad
2025-05-15 08:53:16 +03:00
parent cad8a6d125
commit ea25db7d29
22 changed files with 3042 additions and 102 deletions

View File

@@ -1,12 +1,11 @@
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
use std::fs;
use crate::error::{DocTreeError, Result};
use crate::storage::RedisStorage;
use crate::utils::{name_fix, markdown_to_html, ensure_md_extension};
use crate::include::process_includes;
use rand::Rng;
use crate::storage::RedisStorage;
use crate::utils::{ensure_md_extension, markdown_to_html, name_fix};
use ipfs_api::{IpfsApi, IpfsClient};
// use chacha20poly1305::aead::NewAead;
@@ -61,10 +60,16 @@ impl Collection {
///
/// Ok(()) on success or an error
pub fn scan(&self) -> Result<()> {
println!("DEBUG: Scanning collection '{}' at path {:?}", self.name, self.path);
println!(
"DEBUG: Scanning collection '{}' at path {:?}",
self.name, self.path
);
// Delete existing collection data if any
println!("DEBUG: Deleting existing collection data from Redis key 'collections:{}'", self.name);
println!(
"DEBUG: Deleting existing collection data from Redis key 'collections:{}'",
self.name
);
self.storage.delete_collection(&self.name)?;
// Store the collection's full absolute path in Redis
let absolute_path = std::fs::canonicalize(&self.path)
@@ -72,9 +77,14 @@ impl Collection {
.to_string_lossy()
.to_string();
println!("DEBUG: Storing collection path in Redis key 'collections:{}:path'", self.name);
self.storage.store_collection_path(&self.name, &absolute_path)?;
self.storage.store_collection_path(&self.name, &self.path.to_string_lossy())?;
println!(
"DEBUG: Storing collection path in Redis key 'collections:{}:path'",
self.name
);
self.storage
.store_collection_path(&self.name, &absolute_path)?;
self.storage
.store_collection_path(&self.name, &self.path.to_string_lossy())?;
// Walk through the directory
let walker = WalkDir::new(&self.path);
@@ -116,11 +126,11 @@ impl Collection {
// Determine if this is a document (markdown file) or an image
let is_markdown = filename.to_lowercase().ends_with(".md");
let is_image = filename.to_lowercase().ends_with(".png") ||
filename.to_lowercase().ends_with(".jpg") ||
filename.to_lowercase().ends_with(".jpeg") ||
filename.to_lowercase().ends_with(".gif") ||
filename.to_lowercase().ends_with(".svg");
let is_image = filename.to_lowercase().ends_with(".png")
|| filename.to_lowercase().ends_with(".jpg")
|| filename.to_lowercase().ends_with(".jpeg")
|| filename.to_lowercase().ends_with(".gif")
|| filename.to_lowercase().ends_with(".svg");
let file_type = if is_markdown {
"document"
@@ -132,13 +142,19 @@ impl Collection {
// Store in Redis using the namefixed filename as the key
// Store the original relative path to preserve case and special characters
println!("DEBUG: Storing {} '{}' in Redis key 'collections:{}' with key '{}' and value '{}'",
file_type, filename, self.name, namefixed_filename, rel_path.to_string_lossy());
println!(
"DEBUG: Storing {} '{}' in Redis key 'collections:{}' with key '{}' and value '{}'",
file_type,
filename,
self.name,
namefixed_filename,
rel_path.to_string_lossy()
);
self.storage.store_collection_entry(
&self.name,
&namefixed_filename,
&rel_path.to_string_lossy()
&rel_path.to_string_lossy(),
)?;
}
@@ -162,7 +178,9 @@ impl Collection {
let namefixed_page_name = ensure_md_extension(&namefixed_page_name);
// Get the relative path from Redis
let rel_path = self.storage.get_collection_entry(&self.name, &namefixed_page_name)
let rel_path = self
.storage
.get_collection_entry(&self.name, &namefixed_page_name)
.map_err(|_| DocTreeError::PageNotFound(page_name.to_string()))?;
// Check if the path is valid
@@ -171,14 +189,16 @@ impl Collection {
// Return an error since the actual file path is not available
return Err(DocTreeError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File path not available for {} in collection {}", page_name, self.name)
format!(
"File path not available for {} in collection {}",
page_name, self.name
),
)));
}
// Read the file
let full_path = self.path.join(rel_path);
let content = fs::read_to_string(full_path)
.map_err(|e| DocTreeError::IoError(e))?;
let content = fs::read_to_string(full_path).map_err(|e| DocTreeError::IoError(e))?;
// Skip include processing at this level to avoid infinite recursion
// Include processing will be done at the higher level
@@ -215,7 +235,11 @@ impl Collection {
fs::write(&full_path, content).map_err(DocTreeError::IoError)?;
// Update Redis
self.storage.store_collection_entry(&self.name, &namefixed_page_name, &namefixed_page_name)?;
self.storage.store_collection_entry(
&self.name,
&namefixed_page_name,
&namefixed_page_name,
)?;
Ok(())
}
@@ -237,7 +261,9 @@ impl Collection {
let namefixed_page_name = ensure_md_extension(&namefixed_page_name);
// Get the relative path from Redis
let rel_path = self.storage.get_collection_entry(&self.name, &namefixed_page_name)
let rel_path = self
.storage
.get_collection_entry(&self.name, &namefixed_page_name)
.map_err(|_| DocTreeError::PageNotFound(page_name.to_string()))?;
// Delete the file
@@ -245,7 +271,8 @@ impl Collection {
fs::remove_file(full_path).map_err(DocTreeError::IoError)?;
// Remove from Redis
self.storage.delete_collection_entry(&self.name, &namefixed_page_name)?;
self.storage
.delete_collection_entry(&self.name, &namefixed_page_name)?;
Ok(())
}
@@ -260,7 +287,8 @@ impl Collection {
let keys = self.storage.list_collection_entries(&self.name)?;
// Filter to only include .md files
let pages = keys.into_iter()
let pages = keys
.into_iter()
.filter(|key| key.ends_with(".md"))
.collect();
@@ -281,7 +309,9 @@ impl Collection {
let namefixed_file_name = name_fix(file_name);
// Get the relative path from Redis
let rel_path = self.storage.get_collection_entry(&self.name, &namefixed_file_name)
let rel_path = self
.storage
.get_collection_entry(&self.name, &namefixed_file_name)
.map_err(|_| DocTreeError::FileNotFound(file_name.to_string()))?;
// Construct a URL for the file
@@ -316,7 +346,11 @@ impl Collection {
fs::write(&full_path, content).map_err(DocTreeError::IoError)?;
// Update Redis
self.storage.store_collection_entry(&self.name, &namefixed_file_name, &namefixed_file_name)?;
self.storage.store_collection_entry(
&self.name,
&namefixed_file_name,
&namefixed_file_name,
)?;
Ok(())
}
@@ -335,7 +369,9 @@ impl Collection {
let namefixed_file_name = name_fix(file_name);
// Get the relative path from Redis
let rel_path = self.storage.get_collection_entry(&self.name, &namefixed_file_name)
let rel_path = self
.storage
.get_collection_entry(&self.name, &namefixed_file_name)
.map_err(|_| DocTreeError::FileNotFound(file_name.to_string()))?;
// Delete the file
@@ -343,7 +379,8 @@ impl Collection {
fs::remove_file(full_path).map_err(DocTreeError::IoError)?;
// Remove from Redis
self.storage.delete_collection_entry(&self.name, &namefixed_file_name)?;
self.storage
.delete_collection_entry(&self.name, &namefixed_file_name)?;
Ok(())
}
@@ -358,7 +395,8 @@ impl Collection {
let keys = self.storage.list_collection_entries(&self.name)?;
// Filter to exclude .md files
let files = keys.into_iter()
let files = keys
.into_iter()
.filter(|key| !key.ends_with(".md"))
.collect();
@@ -382,7 +420,8 @@ impl Collection {
let namefixed_page_name = ensure_md_extension(&namefixed_page_name);
// Get the relative path from Redis
self.storage.get_collection_entry(&self.name, &namefixed_page_name)
self.storage
.get_collection_entry(&self.name, &namefixed_page_name)
.map_err(|_| DocTreeError::PageNotFound(page_name.to_string()))
}
@@ -396,7 +435,11 @@ impl Collection {
/// # Returns
///
/// The HTML content of the page or an error
pub fn page_get_html(&self, page_name: &str, doctree: Option<&crate::doctree::DocTree>) -> Result<String> {
pub fn page_get_html(
&self,
page_name: &str,
doctree: Option<&crate::doctree::DocTree>,
) -> Result<String> {
// Get the markdown content
let markdown = self.page_get(page_name)?;
@@ -436,9 +479,8 @@ impl Collection {
/// Ok(()) on success or an error.
pub fn export_to_ipfs(&self, output_csv_path: &Path) -> Result<()> {
// Create a new tokio runtime and block on the async export function
tokio::runtime::Runtime::new()?.block_on(async {
self.export_to_ipfs_async(output_csv_path).await
})?;
tokio::runtime::Runtime::new()?
.block_on(async { self.export_to_ipfs_async(output_csv_path).await })?;
Ok(())
}
@@ -455,25 +497,31 @@ impl Collection {
pub async fn export_to_ipfs_async(&self, output_csv_path: &Path) -> Result<()> {
use blake3::Hasher;
// use chacha20poly1305::{ChaCha20Poly1305, Aead};
use chacha20poly1305::aead::generic_array::GenericArray;
use csv::Writer;
use ipfs_api::IpfsClient;
use rand::rngs::OsRng;
use tokio::fs::File;
use tokio::io::AsyncReadExt;
use csv::Writer;
use rand::rngs::OsRng;
use chacha20poly1305::aead::generic_array::GenericArray;
// Create the output directory if it doesn't exist
// Create the output directory if it doesn't exist
if let Some(parent) = output_csv_path.parent() {
if parent.exists() && parent.is_file() {
println!("DEBUG: Removing conflicting file at output directory path: {:?}", parent);
tokio::fs::remove_file(parent).await.map_err(DocTreeError::IoError)?;
println!(
"DEBUG: Removing conflicting file at output directory path: {:?}",
parent
);
tokio::fs::remove_file(parent)
.await
.map_err(DocTreeError::IoError)?;
println!("DEBUG: Conflicting file removed.");
}
if !parent.is_dir() {
println!("DEBUG: Ensuring output directory exists: {:?}", parent);
tokio::fs::create_dir_all(parent).await.map_err(DocTreeError::IoError)?;
tokio::fs::create_dir_all(parent)
.await
.map_err(DocTreeError::IoError)?;
println!("DEBUG: Output directory ensured.");
} else {
println!("DEBUG: Output directory already exists: {:?}", parent);
@@ -481,7 +529,10 @@ impl Collection {
}
// Create the CSV writer
println!("DEBUG: Creating or overwriting CSV file at {:?}", output_csv_path);
println!(
"DEBUG: Creating or overwriting CSV file at {:?}",
output_csv_path
);
let file = std::fs::OpenOptions::new()
.write(true)
.create(true)
@@ -492,7 +543,15 @@ impl Collection {
println!("DEBUG: CSV writer created successfully");
// Write the CSV header
writer.write_record(&["collectionname", "filename", "blakehash", "ipfshash", "size"]).map_err(|e| DocTreeError::CsvError(e.to_string()))?;
writer
.write_record(&[
"collectionname",
"filename",
"blakehash",
"ipfshash",
"size",
])
.map_err(|e| DocTreeError::CsvError(e.to_string()))?;
// Connect to IPFS
// let ipfs = IpfsClient::new("127.0.0.1:5001").await.map_err(|e| DocTreeError::IpfsError(e.to_string()))?;
@@ -510,7 +569,9 @@ impl Collection {
for entry_name in entries {
println!("DEBUG: Processing entry: {}", entry_name);
// Get the relative path from Redis
let relative_path = self.storage.get_collection_entry(&self.name, &entry_name)
let relative_path = self
.storage
.get_collection_entry(&self.name, &entry_name)
.map_err(|_| DocTreeError::FileNotFound(entry_name.clone()))?;
println!("DEBUG: Retrieved relative path: {}", relative_path);
@@ -560,9 +621,12 @@ impl Collection {
println!("DEBUG: Adding file to IPFS: {:?}", file_path);
let ipfs_path = match ipfs.add(std::io::Cursor::new(content)).await {
Ok(path) => {
println!("DEBUG: Successfully added file to IPFS. Hash: {}", path.hash);
println!(
"DEBUG: Successfully added file to IPFS. Hash: {}",
path.hash
);
path
},
}
Err(e) => {
eprintln!("Error adding file to IPFS {:?}: {}", file_path, e);
continue;
@@ -588,7 +652,9 @@ impl Collection {
// Flush the CSV writer
println!("DEBUG: Flushing CSV writer");
writer.flush().map_err(|e| DocTreeError::CsvError(e.to_string()))?;
writer
.flush()
.map_err(|e| DocTreeError::CsvError(e.to_string()))?;
println!("DEBUG: CSV writer flushed successfully");
Ok(())
@@ -616,9 +682,9 @@ impl CollectionBuilder {
///
/// A new Collection or an error
pub fn build(self) -> Result<Collection> {
let storage = self.storage.ok_or_else(|| {
DocTreeError::MissingParameter("storage".to_string())
})?;
let storage = self
.storage
.ok_or_else(|| DocTreeError::MissingParameter("storage".to_string()))?;
let collection = Collection {
path: self.path,
@@ -628,4 +694,4 @@ impl CollectionBuilder {
Ok(collection)
}
}
}