This commit is contained in:
2025-08-20 04:15:43 +02:00
parent 6b9f0cf291
commit e4bb201181
95 changed files with 194 additions and 907 deletions

View File

View File

@@ -0,0 +1,31 @@
import mimetypes
import os
def check_and_add_extension(file_path: str) -> str:
# Only check if there's no extension
if not os.path.splitext(file_path)[1]:
# Read the file content
with open(file_path, 'rb') as f:
content = f.read(2048) # Read the first 2048 bytes for detection
# Detect content type
content_type = detect_content_type(content)
extension = mimetypes.guess_extension(content_type)
if extension:
new_file_path = file_path + extension
os.rename(file_path, new_file_path)
return new_file_path
return file_path
def detect_content_type(content: bytes) -> str:
# Simple content-based detection
if content.startswith(b'\xff\xd8'):
return 'image/jpeg'
if content.startswith(b'\x89PNG'):
return 'image/png'
if content.startswith(b'GIF'):
return 'image/gif'
# Add more checks as needed
return 'application/octet-stream'

270
herolib/tools/gitscanner.py Normal file
View File

@@ -0,0 +1,270 @@
import sys
import os
# Add the parent directory of herotools to the Python module search path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import time
import json
import subprocess
from typing import Optional,List
import redis
from herotools.logger import logger
from herotools.texttools import name_fix
from enum import Enum, auto
from dataclasses import dataclass
import git
# Initialize Redis client
redis_client = redis.StrictRedis(host='localhost', port=6379, db=0)
# Define the ChangeType Enum
class ChangeType(Enum):
DEL = 'del'
MOD = 'mod'
NEW = 'new'
@dataclass
class FileChange:
commit_hash: str
commit_time: str
path: str #relative path in the repo
change_type: ChangeType
class Repo:
def __init__(self, cat: str, account: str, name: str, path: str):
self.cat = cat
self.account = account
self.name = name
self.path = path
self.hash_last_found: Optional[float] = None
self.hash_last_processed: Optional[str] = None
self.lastcheck: Optional[float] = None
def __str__(self):
return json.dumps({
"cat": self.cat,
"account": self.account,
"name": self.name,
"path": self.path,
"hash_last_found": self.hash_last_found,
"hash_last_processed": self.hash_last_processed,
"lastcheck": self.lastcheck
}, indent=2)
def _redis_key(self) -> str:
return f"gitcheck:{self.cat}:{self.account}:{self.name}"
def save_to_redis(self):
redis_client.set(self._redis_key(), json.dumps(self.__dict__))
@staticmethod
def load_from_redis(cat: str, account: str, name: str) -> Optional['Repo']:
redis_key = f"gitcheck:{cat}:{account}:{name}"
data = redis_client.get(redis_key)
if data:
data = json.loads(data)
repo = Repo(data["cat"], data["account"], data["name"], data["path"])
repo.hash_last_found = data.get("hash_last_found")
repo.hash_last_processed = data.get("hash_last_processed")
repo.lastcheck = data.get("lastcheck")
return repo
return None
def get_remote_commit_hash(self, branch: str) -> str:
"""Get the latest commit hash from the remote repository."""
result = subprocess.run(
['git', 'ls-remote', 'origin', f'refs/heads/{branch}'],
cwd=self.path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
if result.returncode != 0:
raise Exception(f"Error fetching remote commit hash: {result.stderr}")
return result.stdout.split()[0]
def get_local_commit_hash(self) -> str:
"""Get the latest commit hash from the local repository."""
result = subprocess.run(
['git', 'rev-parse', 'HEAD'],
cwd=self.path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
if result.returncode != 0:
raise Exception(f"Error fetching local commit hash: {result.stderr}")
return result.stdout.strip()
def get_current_branch(self) -> str:
result = subprocess.run(
['git', 'branch', '--show-current'],
cwd=self.path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
if result.returncode != 0:
raise Exception(f"Error fetching local branch name: {result.stderr}")
return result.stdout.split()[0]
def get_remote_default_branch(self) -> str:
result = subprocess.run(
['git', 'ls-remote', '--symref', 'origin', 'HEAD'],
cwd=self.path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
if result.returncode != 0:
raise Exception(f"Error fetching local branch name: {result.stderr}")
return result.stdout.split()[1].split('/')[-1]
def should_check_again(self) -> bool:
"""Determine if we should check the repository again based on the last check time."""
if self.lastcheck is None:
return True
return (time.time() - self.lastcheck) > 60
def update_last_check_time(self) -> None:
"""Update the last check time."""
self.lastcheck = time.time()
self.save_to_redis()
def log_change(self, epoch_time: float) -> None:
"""Log a detected change in Redis."""
self.hash_last_found = epoch_time
self.save_to_redis()
def check_for_changes(self, branch: str = 'main') -> None:
"""Check the repository for updates and log changes if found."""
if not self.should_check_again():
print("WAIT TO CHECK FOR CHANGES")
return
try:
diff_commits = self.get_local_remote_diff_commits(branch)
if diff_commits != []:
print("FOUND SOME CHANGES")
self.log_change(time.time())
file_changes = self.get_file_changes_from_commits(diff_commits)
self.print_file_changes(file_changes)
else:
print("NO CHANGED FOUND")
self.update_last_check_time()
except Exception as e:
print(f"An error occurred while checking repo {self.path}: {e}")
def get_local_remote_diff_commits(self, branch: str) -> List[git.Commit]:
# Open the repository
repo = git.Repo(self.path)
# Get the local branch
local_branch = repo.heads[branch]
# Get the remote reference for the branch
remote_ref = repo.remotes.origin.refs[branch]
# Fetch the latest changes from the remote
repo.remotes.origin.fetch()
# Get the commit hashes of the local and remote branches
local_commit = local_branch.commit
remote_commit = remote_ref.commit
if local_commit == remote_commit:
return []
# Get the common ancestor commit
base_commit = repo.merge_base(local_commit, remote_commit)[0]
# Get the ahead and behind commits
ahead_commits = list(repo.iter_commits(f"{base_commit}..{local_commit}"))
behind_commits = list(repo.iter_commits(f"{base_commit}..{remote_commit}"))
# Combine the ahead and behind commits
diff_commits = ahead_commits + behind_commits
return diff_commits
def get_file_changes_from_commits(self, commit_list: List[git.Commit]) -> List[FileChange]:
file_changes = []
for commit in commit_list:
# print(commit)
diffs = commit.diff(self.hash_last_processed, create_patch=True)
# print(diffs)
for diff in diffs:
if diff.deleted_file:
change_type = ChangeType.DEL
elif diff.new_file:
change_type = ChangeType.NEW
else:
change_type = ChangeType.MOD
file_change = FileChange(
commit_hash=commit.hexsha,
commit_time=str(commit.committed_datetime),
path=diff.b_path if diff.new_file else diff.a_path,
change_type=change_type
)
file_changes.append(file_change)
return file_changes
def print_file_changes(self, file_changes: List[FileChange]):
for file_change in file_changes:
print("------------------------------------")
print(f"Commit Hash: {file_change.commit_hash}")
print(f"Commit Time: {file_change.commit_time}")
print(f"File Path: {file_change.path}")
print(f"Change Type: {file_change.change_type.value}")
print("------------------------------------")
def gitscan(path: str, cat: str) -> None:
"""Walk over directories to find Git repositories and check them."""
path = os.path.abspath(os.path.expanduser(path))
for root, dirs, files in os.walk(path):
if '.git' in dirs:
accountname = os.path.basename(os.path.dirname(root))
reponame = os.path.basename(root)
repo = Repo.load_from_redis(cat, accountname, reponame)
if repo is None:
repo = Repo(cat, accountname, reponame, root)
branch = repo.get_current_branch()
logger.debug(f"root: {root}")
logger.debug(f"accountname: {accountname}")
logger.debug(f"reponame: {reponame}")
logger.debug(f"branch: {branch}")
logger.debug(f"repo: {repo}")
repo.check_for_changes(branch)
dirs[:] = [] # Don't go deeper into subdirectories
else:
# Filter out any .git directories from further traversal
dirs[:] = [d for d in dirs if d != '.git']
def print_redis_client():
cursor = 0
while True:
cursor, keys = redis_client.scan(cursor)
for key in keys:
value = redis_client.get(key)
print(key)
print(value)
print()
if cursor == 0:
break
if __name__ == "__main__":
# print_redis_client()
mypath = "~/code/git.threefold.info/projectmycelium"
category = 'mycat'
gitscan(path=mypath, cat=category)
# print_redis_client()

39
herolib/tools/logger.py Normal file
View File

@@ -0,0 +1,39 @@
import logging
import colorlog
log_colors_config = {
'DEBUG': 'cyan',
'INFO': 'green',
'WARNING': 'yellow',
'ERROR': 'red',
'CRITICAL': 'bold_red',
}
secondary_log_colors_config = {
'name': {
'DEBUG': 'blue',
'INFO': 'blue',
'WARNING': 'blue',
'ERROR': 'blue',
'CRITICAL': 'blue'
},
'levelname': log_colors_config
}
formatter = colorlog.ColoredFormatter(
'%(log_color)s%(asctime)s - %(name_log_color)s%(name)s - %(levelname_log_color)s%(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
log_colors=log_colors_config,
secondary_log_colors=secondary_log_colors_config
)
# Create a handler
handler = logging.StreamHandler()
handler.setFormatter(formatter)
# Get the root logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)

13
herolib/tools/md5.py Normal file
View File

@@ -0,0 +1,13 @@
import hashlib
from typing import List
def file_md5(file_path: str) -> str:
"""
Compute the MD5 hash of the file content.
"""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()

55
herolib/tools/ourtime.py Normal file
View File

@@ -0,0 +1,55 @@
from datetime import datetime, timezone, timedelta
import re
def epoch_get(deadline: str) -> int:
"""
Set the deadline based on various input formats.
Supports:
- Relative: +1h (hours), +2d (days), +1w (week), +1m (month)
- Absolute: 20/10/2024, 20/10, 20/10/24 (all same day)
If hour not specified, defaults to midday (noon).
Returns the deadline as a Unix timestamp (epoch).
"""
now = datetime.now(timezone.utc)
# Check for relative time format
relative_match = re.match(r'\+(\d+)([hdwm])', deadline)
if relative_match:
amount, unit = relative_match.groups()
amount = int(amount)
if unit == 'h':
delta = timedelta(hours=amount)
elif unit == 'd':
delta = timedelta(days=amount)
elif unit == 'w':
delta = timedelta(weeks=amount)
elif unit == 'm':
delta = timedelta(days=amount * 30) # Approximate
new_deadline = now + delta
return int(new_deadline.timestamp())
# Check for absolute date format
date_formats = ['%d/%m/%Y', '%d/%m/%y', '%d/%m']
for fmt in date_formats:
try:
date_obj = datetime.strptime(deadline, fmt)
if fmt == '%d/%m':
# If year is not provided, use the current year
date_obj = date_obj.replace(year=now.year)
# If the resulting date is in the past, assume next year
if date_obj.replace(tzinfo=timezone.utc) < now:
date_obj = date_obj.replace(year=date_obj.year + 1)
# Set time to noon (12:00)
date_obj = date_obj.replace(hour=12, minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
return int(date_obj.timestamp())
except ValueError:
continue
raise ValueError("Invalid deadline format. Use +Xh/d/w/m for relative or DD/MM/YYYY for absolute dates.")

View File

@@ -0,0 +1,26 @@
import os
def expand_path(path: str) -> str:
"""
Expand ~ to the user's home directory and return the absolute path.
"""
return os.path.abspath(os.path.expanduser(path))
def remove_file_if_exists(file_path):
try:
# This will remove the file or symlink, regardless of whether
# it's a regular file, a directory, or a broken symlink
os.remove(file_path)
except FileNotFoundError:
# File doesn't exist, so we don't need to do anything
pass
except IsADirectoryError:
# It's a directory, so we use rmdir instead
os.rmdir(file_path)
except PermissionError:
print(f"Permission denied: Unable to remove {file_path}")
except Exception as e:
print(f"An error occurred while trying to remove {file_path}: {str(e)}")

110
herolib/tools/texttools.py Normal file
View File

@@ -0,0 +1,110 @@
import re
import unicodedata
import random
def description_fix(description):
description = description.lower()
description = unicodedata.normalize('NFKD', description).encode('ASCII', 'ignore').decode('ASCII')
description = re.sub(r'[^a-z0-9\s]', '', description)
return description.strip()
# def name_fix(name: str) -> str:
# """
# Normalize the string to lowercase ASCII, replace spaces and specific punctuations with underscores,
# and remove non-ASCII characters.
# """
# name = name.lower()
# name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
# name = re.sub(r'[ :;!]', '_', name) # Replace spaces and specific punctuations with underscores
# name = re.sub(r'\W+', '', name) # Remove any other non-word characters
# return name
def name_fix(name: str) -> str:
"""
Normalize the string to lowercase ASCII, replace spaces and specific punctuations with underscores,
maintain dots, and remove non-ASCII characters.
"""
name = name.lower()
name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
name = re.sub(r'[ :;!]', '_', name) # Replace spaces and specific punctuations with underscores
name = re.sub(r'[^\w._]+', '', name) # Remove any non-word characters except dots and underscores
return name
def name_obfuscate(name):
# Define a mapping of consonants to their obfuscated counterparts
consonant_map = {
'b': 'p', 'c': 'k', 'd': 't', 'f': 'v', 'g': 'j', 'h': 'x',
'j': 'q', 'k': 'c', 'l': 'r', 'm': 'n', 'n': 'm', 'p': 'b',
'q': 'g', 'r': 'l', 's': 'z', 't': 'd', 'v': 'f', 'w': 'y',
'x': 'h', 'y': 'w', 'z': 's'
}
# Define a mapping of vowels to their obfuscated counterparts
vowel_map = {
'a': 'e', 'e': 'i', 'i': 'o', 'o': 'u', 'u': 'a'
}
# Convert the name to lowercase
name = name.lower()
# Split the name into words
words = name.split()
obfuscated_words = []
for word in words:
obfuscated_word = ''
for char in word:
if char in vowel_map:
# Obfuscate vowels
obfuscated_word += vowel_map[char]
elif char in consonant_map:
# Obfuscate consonants
obfuscated_word += consonant_map[char]
else:
# Keep non-alphabetic characters unchanged
obfuscated_word += char
obfuscated_words.append(obfuscated_word)
# Join the obfuscated words back into a single string
obfuscated_name = ' '.join(obfuscated_words)
# Capitalize the first letter of each word
obfuscated_name = obfuscated_name.title()
return obfuscated_name
def dedent(content: str) -> str:
# Split the input content into lines
lines = content.splitlines()
# Remove leading and trailing empty lines
while lines and not lines[0].strip():
lines.pop(0)
while lines and not lines[-1].strip():
lines.pop()
if not lines:
return ""
# Find the minimum indentation (leading spaces) in all non-empty lines
min_indent = None
for line in lines:
stripped_line = line.lstrip()
if stripped_line: # Only consider non-empty lines
leading_spaces = len(line) - len(stripped_line)
if min_indent is None or leading_spaces < min_indent:
min_indent = leading_spaces
# Dedent each line by the minimum indentation found
dedented_lines = [line[min_indent:] if len(line) > min_indent else line for line in lines]
# Join the dedented lines back into a single string
return "\n".join(dedented_lines)
if __name__ == "__main__":
print("fixed name:", name_fix("John Doe"))
print("obfuscated name:", name_obfuscate("John Doe"))