This commit is contained in:
2025-08-20 04:15:43 +02:00
parent 6b9f0cf291
commit e4bb201181
95 changed files with 194 additions and 907 deletions

0
herolib/__init__.py Normal file
View File

Binary file not shown.

View File

View File

View File

@@ -0,0 +1,69 @@
import os
from pydub import AudioSegment
import assemblyai as aai
class Client:
def __init__(self):
api_key = os.getenv("ASSEMBLYAI")
if not api_key:
raise EnvironmentError(
"Please set the ASSEMBLYAI environment variable with your AssemblyAI API key."
)
self.api_key = api_key
aai.settings.api_key = self.api_key
self.transcriber = aai.Transcriber()
def convert_to_ogg_mono(self, input_path: str, output_path: str):
"""Converts an audio file from .mp4 to .ogg (mono)."""
audio = AudioSegment.from_file(input_path, format="mp4")
# Convert to mono if needed by uncommenting the line below
# audio = audio.set_channels(1)
audio.export(output_path, format="ogg")
print(f"Converted to .ogg in {output_path}")
def transcribe_audio(self, audio_path: str, output_path: str):
"""Transcribes the audio file and saves the transcription to a Markdown file."""
config = aai.TranscriptionConfig(
speaker_labels=True,
)
transcript = self.transcriber.transcribe(audio_path, config)
with open(output_path, "w", encoding="utf-8") as f:
for utterance in transcript.utterances:
f.write(
f"** Speaker {utterance.speaker}:\n{utterance.text}\n-------------\n"
)
print(f"Transcription saved to {output_path}")
def transcribe_audio_file(self, input_path: str, output_transcription_path: str):
"""Handles the entire process from conversion to transcription and cleanup."""
converted_audio_path = input_path.replace(".mp4", ".ogg")
# Convert .mp4 to .ogg
self.convert_to_ogg_mono(input_path, converted_audio_path)
# Perform the transcription
self.transcribe_audio(converted_audio_path, output_transcription_path)
# Optionally, clean up the converted file
os.remove(converted_audio_path)
print(f"Removed temporary file {converted_audio_path}")
# Example usage:
if __name__ == "__main__":
# Retrieve API key from environment variable
# Define the paths for the input audio and output transcription
input_audio_path = "/tmp/475353425.mp4"
output_transcription_path = "/tmp/transcribe_475353425.md"
# Perform the transcription process
client = Client()
client.transcribe_audio_file(input_audio_path, output_transcription_path)

19
herolib/clients/readme.md Normal file
View File

@@ -0,0 +1,19 @@
# Vimeo Client
need following functionality
- upload video
- download
- list video's
## some info
- https://developer.vimeo.com/api/reference
## remarks to use make sure you have the secrets
```bash
hero git clone -u git@git.threefold.info:despiegk/hero_secrets.git
source git.threefold.info/projectmycelium/hero_server/myenv.sh
```

View File

View File

@@ -0,0 +1,241 @@
from dataclasses import dataclass, field, asdict
from typing import List, Optional
from stellar_sdk import Keypair, Server, StrKey
import json
import redis
from stellar.model import StellarAsset, StellarAccount
import os
import csv
import toml
from herotools.texttools import description_fix
class HorizonServer:
def __init__(self, instance: str = "default", network: str = "main", tomlfile: str = "", owner: str = ""):
"""
Load a Stellar account's information using the Horizon server.
The Horizon server is an API that allows interaction with the Stellar network. It provides endpoints to submit transactions, check account balances, and perform other operations on the Stellar ledger.
All gets cached in redis
"""
self.redis_client = redis.Redis(host='localhost', port=6379, db=0) # Adjust as needed
self.instance = instance
if network not in ['main', 'testnet']:
raise ValueError("Invalid network value. Must be 'main' or 'testnet'.")
self.network = network
testnet = self.network == 'testnet'
self.server = Server("https://horizon-testnet.stellar.org" if testnet else "https://horizon.stellar.org")
self.tomlfile = os.path.expanduser(tomlfile)
self.owner = owner
if self.tomlfile:
self.toml_load()
def account_exists(self, pubkey: str) -> bool:
"""
Check if an account exists in the Redis cache based on the public key.
"""
redis_key = f"stellar:{self.instance}:accounts:{pubkey}"
return self.redis_client.exists(redis_key) != None
def account_get(self, key: str, reload: bool = False, name: str = "", description: str = "", cat: str = "") -> StellarAccount:
"""
Load a Stellar account's information.
Args:
key (str): The private or public key of the Stellar account.
reset (bool, optional): Whether to force a refresh of the cached data. Defaults to False.
name (str, optional): Name for the account. Defaults to "".
description (str, optional): Description for the account. Defaults to "".
owner (str, optional): Owner of the account. Defaults to "".
cat (str, optional): Category of the account. Defaults to "".
Returns:
StellarAccount: A struct containing the account's information.
"""
if key == "" and name:
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
data = self.redis_client.get(redis_key)
if data:
data = json.loads(str(data))
if data.get('name') == name and data.get('priv_key', data.get('public_key')):
key = data.get('priv_key', data.get('public_key'))
break
if key == "":
raise ValueError("No key provided")
# Determine if the key is a public or private key
if StrKey.is_valid_ed25519_public_key(key):
public_key = key
priv_key = ""
elif StrKey.is_valid_ed25519_secret_seed(key):
priv_key = key
keypair = Keypair.from_secret(priv_key)
public_key = keypair.public_key
else:
raise ValueError("Invalid Stellar key provided")
redis_key = f"stellar:{self.instance}:accounts:{public_key}"
data = self.redis_client.get(redis_key)
changed = False
if data:
try:
data = json.loads(str(data))
except Exception as e:
print(data)
raise e
data['assets'] = [StellarAsset(**asset) for asset in data['assets']]
account = StellarAccount(**data)
if description!="" and description!=account.description:
account.description = description
changed = True
if name!="" and name!=account.name:
account.name = name
changed = True
if self.owner!="" and self.owner!=account.owner:
account.owner = self.owner
changed = True
if cat!="" and cat!=account.cat:
account.cat = cat
changed = True
else:
account = StellarAccount(public_key=public_key, description=description, name=name, priv_key=priv_key, owner=self.owner, cat=cat)
changed = True
if reload or account.assets == []:
changed = True
if reload:
account.assets = []
account_data = self.server.accounts().account_id(public_key).call()
account.assets.clear() # Clear existing assets to avoid duplication
for balance in account_data['balances']:
asset_type = balance['asset_type']
if asset_type == 'native':
account.assets.append(StellarAsset(type="XLM", balance=balance['balance']))
else:
if 'asset_code' in balance:
account.assets.append(StellarAsset(
type=balance['asset_code'],
issuer=balance['asset_issuer'],
balance=balance['balance']
))
changed = True
# Cache the result in Redis for 1 hour if there were changes
if changed:
self.account_save(account)
return account
def comment_add(self, pubkey: str, comment: str, ignore_non_exist: bool = False):
"""
Add a comment to a Stellar account based on the public key.
Args:
pubkey (str): The public key of the Stellar account.
comment (str): The comment to add to the account.
"""
comment = description_fix(comment)
if not self.account_exists(pubkey):
if ignore_non_exist:
return
raise ValueError("Account does not exist in the cache")
account = self.account_get(pubkey)
account.comments.append(comment)
self.account_save(account)
def account_save(self, account: StellarAccount):
"""
Save a Stellar account's information to the Redis cache.
Args:
account (StellarAccount): The account to save.
"""
redis_key = f"stellar:{self.instance}:accounts:{account.public_key}"
self.redis_client.setex(redis_key, 600, json.dumps(asdict(account)))
def reload_cache(self):
"""
Walk over all known accounts and reload their information.
"""
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
data = self.redis_client.get(redis_key) or ""
if data:
data = json.loads(str(data))
public_key = data.get('public_key')
if public_key:
self.account_get(public_key, reload=True)
#format is PUBKEY,DESCRIPTION in text format
def load_accounts_csv(self, file_path:str):
file_path=os.path.expanduser(file_path)
if not os.path.exists(file_path):
return Exception(f"Error: File '{file_path}' does not exist.")
try:
with open(file_path, 'r', newline='') as file:
reader = csv.reader(file, delimiter=',')
for row in reader:
if row and len(row) >= 2: # Check if row is not empty and has at least 2 elements
pubkey = row[0].strip()
comment = ','.join(row[1:]).strip()
if self.account_exists(pubkey):
self.comment_add(pubkey, comment)
except IOError as e:
return Exception(f"Error reading file: {e}")
except csv.Error as e:
return Exception(f"Error parsing CSV: {e}")
except Exception as e:
return Exception(f"Error: {e}")
def accounts_get(self) -> List[StellarAccount]:
"""
Retrieve a list of all known Stellar accounts from the Redis cache.
Returns:
List[StellarAccount]: A list of StellarAccount objects.
"""
accounts = []
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
pubkey = str(redis_key.split(':')[-1])
accounts.append(self.account_get(key=pubkey))
return accounts
def toml_save(self):
"""
Save the list of all known Stellar accounts to a TOML file.
Args:
file_path (str): The path where the list needs to be saved.
"""
if self.tomlfile == "":
raise ValueError("No TOML file path provided")
accounts = self.accounts_get()
accounts_dict = {account.public_key: asdict(account) for account in accounts}
with open(self.tomlfile, 'w') as file:
toml.dump( accounts_dict, file)
def toml_load(self):
"""
Load the list of Stellar accounts from a TOML file and save them to the Redis cache.
Args:
file_path (str): The path of the TOML file to load.
"""
if not os.path.exists(self.tomlfile):
return
#raise FileNotFoundError(f"Error: File '{self.tomlfile}' does not exist.")
with open(self.tomlfile, 'r') as file:
accounts_dict = toml.load(file)
for pubkey, account_data in accounts_dict.items():
account_data['assets'] = [StellarAsset(**asset) for asset in account_data['assets']]
account = StellarAccount(**account_data)
self.account_save(account)
def new(instance: str = "default",owner: str = "", network: str = "main", tomlfile: str = "") -> HorizonServer:
return HorizonServer(instance=instance, network=network, tomlfile=tomlfile,owner=owner)

View File

@@ -0,0 +1,70 @@
from dataclasses import dataclass, field, asdict
from typing import List, Optional
from stellar_sdk import Keypair, Server, StrKey
import json
import redis
@dataclass
class StellarAsset:
type: str
balance: float
issuer: str = ""
def format_balance(self):
balance_float = float(self.balance)
formatted_balance = f"{balance_float:,.2f}"
if '.' in formatted_balance:
formatted_balance = formatted_balance.rstrip('0').rstrip('.')
return formatted_balance
def md(self):
formatted_balance = self.format_balance()
return f"- **{self.type}**: {formatted_balance}"
@dataclass
class StellarAccount:
owner: str
priv_key: str = ""
public_key: str = ""
assets: List[StellarAsset] = field(default_factory=list)
name: str = ""
description: str = ""
comments: List[str] = field(default_factory=list)
cat: str = ""
question: str = ""
def md(self):
result = [
f"# Stellar Account: {self.name or 'Unnamed'}","",
f"**Public Key**: {self.public_key}",
f"**Cat**: {self.cat}",
f"**Description**: {self.description[:60]}..." if self.description else "**Description**: None",
f"**Question**: {self.question}" if self.question else "**Question**: None",
"",
"## Assets:",""
]
for asset in self.assets:
result.append(asset.md())
if len(self.assets) == 0:
result.append("- No assets")
result.append("")
if self.comments:
result.append("## Comments:")
for comment in self.comments:
if '\n' in comment:
multiline_comment = "\n ".join(comment.split('\n'))
result.append(f"- {multiline_comment}")
else:
result.append(f"- {comment}")
return "\n".join(result)
def balance_str(self) -> str:
out=[]
for asset in self.assets:
out.append(f"{asset.type}:{float(asset.balance):,.0f}")
return " ".join(out)

View File

@@ -0,0 +1,78 @@
module stellar
import freeflowuniverse.crystallib.core.texttools
pub struct DigitalAssets {
pub mut:
}
pub struct Owner {
pub mut:
name string
accounts []Account
}
@[params]
pub struct AccountGetArgs{
pub mut:
name string
bctype BlockChainType
}
pub fn (self DigitalAssets) account_get(args_ AccountGetArgs) !&Account {
mut accounts := []&Account
mut args:=args_
args.name = texttools.name_fix(args.name)
for account in self.accounts {
if account.name == args.name && account.bctype == args.bctype {
accounts<<&account
}
}
if accounts.len == 0 {
return error('No account found with the given name:${args.name} and blockchain type: ${args.bctype}')
} else if count > 1 {
return error('Multiple accounts found with the given name:${args.name} and blockchain type: ${args.bctype}')
}
return accounts[0]
}
pub struct Account {
pub mut:
name string
secret string
pubkey string
description string
cat string
owner string
assets []Asset
bctype BlockChainType
}
pub struct Asset {
pub mut:
amount int
assettype AssetType
}
pub fn (self Asset) name() string {
return self.assettype.name
}
pub struct AssetType {
pub mut:
name string
issuer string
bctype BlockChainType
}
pub enum BlockChainType{
stellar_pub
stellar_test
}

View File

@@ -0,0 +1,46 @@
from typing import Tuple
from stellar_sdk import Server, Keypair, TransactionBuilder, Network, Asset, Signer, TransactionEnvelope
import redis
import requests
import json
import time
def create_account_on_testnet() -> Tuple[str, str]:
def fund(public_key: str) -> float:
# Request funds from the Stellar testnet friendbot
response = requests.get(f"https://friendbot.stellar.org?addr={public_key}")
if response.status_code != 200:
raise Exception("Failed to fund new account with friendbot")
time.sleep(1)
return balance(public_key)
def create_account() -> Tuple[str, str]:
# Initialize Redis client
redis_client = redis.StrictRedis(host='localhost', port=6379, db=0)
# Generate keypair
keypair = Keypair.random()
public_key = keypair.public_key
secret_key = keypair.secret
account_data = {
"public_key": public_key,
"secret_key": secret_key
}
redis_client.set("stellartest:testaccount", json.dumps(account_data))
time.sleep(1)
return public_key, secret_key
# Check if the account already exists in Redis
if redis_client.exists("stellartest:testaccount"):
account_data = json.loads(redis_client.get("stellartest:testaccount"))
public_key = account_data["public_key"]
secret_key = account_data["secret_key"]
r = balance(public_key)
if r < 100:
fund(public_key)
r = balance(public_key)
return public_key, secret_key
else:
create_account()
return create_account_on_testnet()

View File

View File

@@ -0,0 +1,102 @@
import json
import redis
import telebot
import os
import logging
from termcolor import colored
from telebot.types import InlineKeyboardMarkup, InlineKeyboardButton
from telebot.formatting import escape_markdown
from bot_audio import audio_add
from bot_text import text_add
from ai.ask import ai_assistent,AIAssistant
class MyBot:
def __init__(self,ai_reset:bool=False):
# Initialize logging
logging.basicConfig(level=logging.INFO, format='%(message)s')
self.logger = logging.getLogger(__name__)
# Initialize Redis connection
self.redis_client = redis.Redis(host='localhost', port=6379, db=0)
# Initialize Telegram bot
self.telebotkey = os.getenv("TELEBOT")
if self.telebotkey:
self.logger.info(colored("TELEBOT key set", "green"))
self.bot = telebot.TeleBot(self.telebotkey)
else:
raise Exception("can't find TELEBOT in ENV")
# Set up message handlers
self.setup_handlers()
audio_add(self)
text_add(self,reset=ai_reset)
def setup_handlers(self):
@self.bot.message_handler(commands=['help'])
def send_welcome(message):
self.bot.reply_to(message, """\
Hi there, I am your hero.
Just speak to me or do /start or /help
""")
@self.bot.message_handler(commands=['start'])
def start_command(message):
chat_id = message.chat.id
keyboard = InlineKeyboardMarkup()
subscribe_button = InlineKeyboardButton("Subscribe to Updates", callback_data='subscribe')
unsubscribe_button = InlineKeyboardButton("Unsubscribe from Updates", callback_data='unsubscribe')
keyboard.row(subscribe_button, unsubscribe_button)
self.bot.reply_to(message, "Please choose an option:", reply_markup=keyboard)
@self.bot.callback_query_handler(func=lambda call: True)
def callback_query(call):
chat_id = call.message.chat.id
if call.data == 'subscribe':
self.redis_client.hset('subscribed_chats', chat_id, '1')
self.bot.answer_callback_query(call.id, "You have subscribed to updates.")
print(f"User subscribed to updates: {chat_id}")
elif call.data == 'unsubscribe':
self.redis_client.hdel('subscribed_chats', chat_id)
self.bot.answer_callback_query(call.id, "You have unsubscribed from updates.")
print(f"User unsubscribed from updates: {chat_id}")
def send_message_to_subscribers(self, message):
subscribed_chats = self.redis_client.hgetall('subscribed_chats')
for chat_id in subscribed_chats:
try:
self.bot.send_message(chat_id.decode('utf-8'), message)
except Exception as e:
print(f"Failed to send message to chat {chat_id}: {str(e)}")
def send_error_to_telegram(self,chat_id, error_message):
# Format the error message for Telegram
telegram_message = f"🚨 Error Occurred 🚨\n\n"
telegram_message += f"app: {escape_markdown(error_message['app'])}\n"
telegram_message += f"Function: {escape_markdown(error_message['function'])}\n"
telegram_message += f"msg: {escape_markdown(error_message['msg'])}\n"
telegram_message += f"Exception Type: {escape_markdown(error_message['exception_type'])}\n"
telegram_message += f"Exception Message: ```\n{escape_markdown(error_message['exception_message'])}\n```\n"
if 'traceback' in error_message:
telegram_message += f"Traceback:\n```\n{escape_markdown(error_message['traceback'])}\n```"
# Send the error message to the subscribed chat
self.bot.send_message(chat_id, telegram_message, parse_mode='Markdown')
def start(self):
print("Bot started")
# Start the bot
self.bot.polling()
def bot_new() -> MyBot:
return MyBot()
# Usage
if __name__ == "__main__":
my_bot = bot_new()
my_bot.start()

View File

@@ -0,0 +1,72 @@
import os
from pydub import AudioSegment
import whisper
def audio_add(self):
self.model = whisper.load_model("base")
@self.bot.message_handler(content_types=['audio', 'voice']) #, 'document'
def handle_audio(message):
try:
chat_id = message.chat.id
file_info = None
audio_path = None
if message.content_type == 'audio':
file_info = self.bot.get_file(message.audio.file_id)
audio_path = f"/tmp/audio/{message.audio.file_id}.mp3"
elif message.content_type == 'voice':
file_info = self.bot.get_file(message.voice.file_id)
audio_path = f"/tmp/audio/{message.voice.file_id}.ogg"
if file_info:
downloaded_file = self.bot.download_file(file_info.file_path)
# Ensure the directory exists
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
# Save the audio file
with open(audio_path, 'wb') as new_file:
new_file.write(downloaded_file)
#bot.send_message(chat_id, f"Audio received and saved successfully to {audio_path}.")
print(f"Audio received and saved to {audio_path}")
# Convert to WAV format if necessary
wav_path = audio_path.replace('.mp3', '.wav').replace('.ogg', '.wav')
if audio_path.endswith('.mp3') or audio_path.endswith('.ogg'):
audio = AudioSegment.from_file(audio_path)
audio.export(wav_path, format='wav')
else:
wav_path = audio_path
# Transcribe audio using Whisper
result = self.model.transcribe(wav_path)
transcription = result["text"]
self.bot.send_message(chat_id, transcription, parse_mode='Markdown')
print(f"Audio received and saved to {audio_path}")
print(f"Transcription: {transcription}")
text2 = self.text_process(self,transcription)
print(f"Processed text {chat_id}: {text2}")
if len(text2)>0:
self.bot.send_message(chat_id, text2)
except Exception as e:
error_message = {
'app': 'Telegram Bot',
'function': 'handle_audio',
'msg': 'Failed to process audio file',
'exception_type': type(e).__name__,
'exception_message': str(e)
}
self.send_error_to_telegram(chat_id, error_message)
print(f"Error processing audio file: {e}")

View File

@@ -0,0 +1,51 @@
import os
from ai.ask import ai_assistent
def text_add(self,reset:bool=False):
self.ai_assistent = ai_assistent(reset=reset)
self.text_process = text_process
@self.bot.message_handler(content_types=['text'])
def handle_text(message):
try:
chat_id = message.chat.id
text = message.text
# Here you can add your logic to process the text
# For now, let's just echo the message back
# response = f"You said: {text}"
print(f"Received text from {chat_id}: {text}")
text2 = self.text_process(self,text)
print(f"Processed text {chat_id}: {text2}")
if len(text2)>0:
self.bot.send_message(chat_id, text2)
except Exception as e:
error_message = {
'app': 'Telegram Bot',
'function': 'handle_text',
'msg': 'Failed to process text',
'exception_type': type(e).__name__,
'exception_message': str(e)
}
self.send_error_to_telegram(chat_id, error_message)
print(f"Error processing text file: {e}")
def text_process(self, txt) -> str:
if "translate" not in txt.lower():
txt+='''\n\n
only output the heroscript, no comments
'''
response = self.ai_assistent.ask(
category='timemgmt',
name='schedule',
question=txt)
return response

View File

@@ -0,0 +1,36 @@
import json
import redis
import telebot
import threading
from telebot.types import InlineKeyboardMarkup, InlineKeyboardButton
import time
from telebot.formatting import escape_markdown
import os
from telegram.bot import send_error_to_telegram
# Initialize Redis connection
redis_client = redis.Redis(host='localhost', port=6379, db=0)
#get errors from redis and send them to bot if subscription done
def process_error_queue():
while True:
# Pop an error message from the Redis queue
error_json = redis_client.lpop('error_queue')
if error_json:
# Deserialize the error message from JSON
error_message = json.loads(error_json)
# Get all subscribed chat IDs from Redis
subscribed_chats = redis_client.hgetall('subscribed_chats')
# Send the error message to all subscribed chats
for chat_id in subscribed_chats.keys():
send_error_to_telegram(int(chat_id), error_message)
else:
# If the queue is empty, wait for a short interval before checking again
time.sleep(1)
# Start processing the error queue
process_error_queue_thread = threading.Thread(target=process_error_queue)
process_error_queue_thread.start()

View File

View File

@@ -0,0 +1,142 @@
import os
from typing import List, Optional
import requests
import vimeo
from model_video import VideoInfo, video_model_load, videos_model_load
class VimeoClient:
def __init__(self):
# Retrieve necessary credentials from environment variables
self.client_id = os.getenv("VIMEO_CLIENT_ID")
self.client_secret = os.getenv("VIMEO_SECRET")
self.access_token = os.getenv("VIMEO_ACCESSTOKEN_ID")
self.user_id = os.getenv("VIMEO_USER_ID")
# Check if all environment variables are present
if not all([self.client_id, self.client_secret, self.access_token, self.user_id]):
raise EnvironmentError(
"Please set the VIMEO_CLIENT_ID, VIMEO_SECRET,VIMEO_USER_ID and VIMEO_ACCESSTOKEN_ID environment variables."
)
# Initialize the Vimeo client
self.client = vimeo.VimeoClient(token=self.access_token, key=self.client_id, secret=self.client_secret)
def upload(self, file: str, video_title: str, description: str) -> str:
video_uri = self.client.upload(file, data={"name": video_title, "description": description})
return video_uri
def download(self, video_id: str, output_file: str = "myvid.mp4"):
info = self.get_video_info(video_id)
size, link = 0, ""
for item in info.download:
if item["size"] > size:
size = item["size"]
link = item["link"]
if link == "":
raise Exception("download link not provided for video")
video_response = requests.get(link, stream=True)
downloaded_mb = 0
with open(output_file, "wb") as video_file:
for chunk in video_response.iter_content(chunk_size=1024):
if chunk:
downloaded_mb += len(chunk) / 1024
print(f"{downloaded_mb}MB Downloaded...")
video_file.write(chunk)
print(f"Video downloaded successfully to {output_file}!")
def get_video_info(self, video_id: str) -> VideoInfo:
"""
Get information about a video by URI.
:param uri: URI of the Vimeo video.
:return: Video information as a dictionary, or None if an error occurs.
"""
# , fields: List[str]
response = self.client.get(f"/videos/{video_id}")
if response.status_code == 200:
myvideo = video_model_load(response.content)
else:
raise Exception(f"Failed to get video details. Status code: {response.status_code}, Error: {response.text}")
return myvideo
def get_videos(self, folder: Optional[int] = None, folders: Optional[List[int]] = None) -> List[VideoInfo]:
"""
Get information about videos from specified folder(s) or all videos if no folder is specified.
:param folder: ID of a single folder to fetch videos from.
:param folders: List of folder IDs to fetch videos from.
:return: List of VideoInfo objects.
"""
if self.user_id == 0:
raise Exception("Can't find user ID, it's not set in env variables")
all_videos = []
if folder is not None:
folders = [folder]
elif folders is None:
# If no folder or folders specified, get all videos
response = self.client.get("/me/videos")
if response.status_code == 200:
return videos_model_load(response.content)
else:
raise Exception(f"Failed to get videos. Status code: {response.status_code}, Error: {response.text}")
for folder_id in folders:
response = self.client.get(f"/users/{self.user_id}/projects/{folder_id}/videos")
if response.status_code == 200:
videos = videos_model_load(response.content)
all_videos.extend(videos)
else:
print(f"Failed to get videos for folder {folder_id}. Status code: {response.status_code}, Error: {response.text}")
return all_videos
# def get_videos(self,folder:int,folders:List[int]) -> List[VideoInfo]:
# """
# Get information about a video by URI.
# :param uri: URI of the Vimeo video.
# :return: Video information as a dictionary, or None if an error occurs.
# """
# if folder>0:
# if self.user_id == 0:
# return Exception("can't find userid, its not set in env variables")
# # print(f"folderid:{folder}")
# response = self.client.get(f"/users/{self.user_id}/projects/{folder}/videos")
# # api_url = f"https://api.vimeo.com/users/{self.user_id}/projects/13139570/videos"
# # print(api_url)
# # access_token = "e65daca3b0dbc18c2fadc5cafcf81004"
# # headers = {
# # "Authorization": f"Bearer {access_token}"
# # }
# # Make the GET request to the Vimeo API
# #response = requests.get(api_url, headers=headers)
# else:
# response = self.client.get(f"/me/videos/")
# if response.status_code == 200:
# myvideos = videos_model_load(response.content)
# else:
# raise Exception(f"Failed to get video details. Status code: {response.status_code}, Error: {response.text}")
# return myvideos
def new() -> VimeoClient:
return VimeoClient()
# Example usage:
if __name__ == "__main__":
cl = new()
v = cl.get_videos(folders=[10700101, 13139570, 12926235, 10752310, 10702046])
for item in v:
video_id = item.uri.split("/")[-1]
print(f" - {item.name} : {video_id} ")
# from IPython import embed; embed()
# s
# vi=cl.get_video_info("475353425")
# print(json_to_yaml(vi))
# cl.download("475353425", "/tmp/475353425.mp4")

View File

@@ -0,0 +1,177 @@
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any
from dataclasses_json import dataclass_json
import json
import yaml
def json_to_yaml(json_data):
# If the input is a JSON string, parse it into a Python dictionary
if isinstance(json_data, str):
json_data = json.loads(json_data)
# Convert the dictionary to a YAML formatted string
yaml_data = yaml.dump(json_data, sort_keys=False, default_flow_style=False)
return yaml_data
@dataclass_json
@dataclass
class Size:
width: int
height: int
link: str
link_with_play_button: Optional[str] = None
@dataclass_json
@dataclass
class Pictures:
uri: str
active: bool
type: str
base_link: str
sizes: List[Size]
resource_key: str
default_picture: bool
@dataclass_json
@dataclass
class Embed:
html: str
badges: Dict[str, Any]
interactive: bool
buttons: Dict[str, bool]
logos: Dict[str, Any]
play_button: Dict[str, Any]
title: Dict[str, Any]
end_screen: List[Any]
playbar: bool
quality_selector: Optional[str]
pip: bool
autopip: bool
volume: bool
color: str
colors: Dict[str, str]
event_schedule: bool
has_cards: bool
outro_type: str
show_timezone: bool
cards: List[Any]
airplay: bool
audio_tracks: bool
chapters: bool
chromecast: bool
closed_captions: bool
transcript: bool
ask_ai: bool
uri: Optional[str]
email_capture_form: Optional[str]
speed: bool
@dataclass_json
@dataclass
class Uploader:
pictures: Pictures
@dataclass_json
@dataclass
class User:
uri: str
name: str
link: str
capabilities: Dict[str, bool]
location: str
gender: str
bio: str
short_bio: str
created_time: str
pictures: Pictures
websites: List[Dict[str, Optional[str]]]
#metadata: Dict[str, Any]
location_details: Dict[str, Optional[Any]]
skills: List[Any]
available_for_hire: bool
can_work_remotely: bool
preferences: Dict[str, Any]
content_filter: List[str]
upload_quota: Dict[str, Any]
resource_key: str
account: str
@dataclass_json
@dataclass
class VideoInfo:
uri: str
name: str
description: Optional[str]
type: str
link: str
player_embed_url: str
duration: int
width: int
height: int
#embed: Embed
created_time: str
modified_time: str
release_time: str
content_rating: List[str]
content_rating_class: str
rating_mod_locked: bool
license: Optional[str]
privacy: Dict[str, Any]
pictures: Pictures
tags: List[Any]
stats: Dict[str, int]
categories: List[Any]
uploader: Uploader
#metadata: Dict[str, Any]
manage_link: str
#user: Optional[User]
last_user_action_event_date: Optional[str]
parent_folder: Optional[Dict[str, Any]]
review_page: Optional[Dict[str, Any]]
files: Optional[List[Dict[str, Any]]]
download: Optional[List[Dict[str, Any]]]
app: Optional[Dict[str, str]]
play: Optional[Dict[str, Any]]
status: str
resource_key: str
upload: Optional[Dict[str, Optional[str]]]
transcode: Dict[str, str]
is_playable: bool
has_audio: bool
def video_model_load(json_data:str,dojsonload:bool=True) -> VideoInfo:
if dojsonload:
json_dict = json.loads(json_data)
else:
json_dict = json_data
json_dict.pop('metadata', {})
json_dict.pop('embed', {})
json_dict.pop('user', {})
json_dict.pop('websites', {})
# if 'user' in json_dict:
# json_dict['user'].pop('metadata', None)
# if 'websites' in json_dict:
# json_dict['websites'].pop('metadata', None)
json_data_cleaned = json.dumps(json_dict)
video_object = VideoInfo.from_json(json_data_cleaned)
return video_object
def videos_model_load(json_data:str) -> List[VideoInfo]:
json_list = json.loads(json_data)
json_list2= list()
for item in json_list["data"]:
d=video_model_load(item,dojsonload=False)
json_list2.append(d)
return json_list2

View File

View File

@@ -0,0 +1,107 @@
import os
from pydub import AudioSegment
import whisper
import moviepy.editor as mp
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
# Download necessary NLTK data
nltk.download('punkt', quiet=True)
class Convertor:
def __init__(self, max_chars_per_part=4000,context:str = "main"):
self.max_chars_per_part = max_chars_per_part
self.context = context
@classmethod
def new(cls, max_chars_per_part=4000):
return cls(max_chars_per_part)
def process(self, path: str):
if path.lower().endswith(('.mp4', '.avi', '.mov')): # Video files
return self.process_video(path)
elif path.lower().endswith(('.mp3', '.wav', '.ogg')): # Audio files
return self.process_audio(path)
else:
raise ValueError("Unsupported file format")
def process_video(self, video_path: str):
# Extract audio from video
video = mp.VideoFileClip(video_path)
audio_path = video_path.rsplit('.', 1)[0] + '.wav'
video.audio.write_audiofile(audio_path)
video.close()
return audio_path
def process_audio(self, audio_path: str):
# Convert to WAV format if necessary
wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
if not audio_path.lower().endswith('.wav'):
audio = AudioSegment.from_file(audio_path)
audio.export(wav_path, format='wav')
else:
wav_path = audio_path
def split_text(self, text):
parts = []
current_part = ""
paragraphs = text.split('\n\n')
for paragraph in paragraphs:
sentences = sent_tokenize(paragraph)
for sentence in sentences:
if len(current_part) + len(sentence) < self.max_chars_per_part:
current_part += sentence + ' '
else:
if current_part:
parts.append(current_part.strip())
current_part = sentence + ' '
# Add a paragraph break if it doesn't exceed the limit
if len(current_part) + 2 < self.max_chars_per_part:
current_part += '\n\n'
else:
parts.append(current_part.strip())
current_part = '\n\n'
if current_part:
parts.append(current_part.strip())
return parts
def find_natural_pause(self, text):
words = word_tokenize(text)
total_words = len(words)
mid_point = total_words // 2
# Look for punctuation near the middle
for i in range(mid_point, total_words):
if words[i] in '.!?':
return ' '.join(words[:i+1]), ' '.join(words[i+1:])
# If no punctuation found, split at the nearest space to the middle
return ' '.join(words[:mid_point]), ' '.join(words[mid_point:])
def write_to_file(self, parts, output_path):
with open(output_path, 'w', encoding='utf-8') as f:
for i, part in enumerate(parts, 1):
f.write(f"Part {i}:\n\n")
f.write(part)
f.write("\n\n")
if i < len(parts):
f.write("-" * 50 + "\n\n")
# Usage example:
if __name__ == "__main__":
processor = Convertor.new()
item = "/Users/despiegk1/Documents/Zoom/2024-07-16 16.42.50 Kristof De Spiegeleer's Personal Meeting Room/video1720369800.mp4"
transcription_parts = processor.process(item)
processor.write_to_file(transcription_parts, output_file)
print(f"Transcription split into {len(transcription_parts)} parts:")
for i, part in enumerate(transcription_parts, 1):
print(f"Part {i}:")
print(part)
print("-" * 50)

View File

@@ -0,0 +1,118 @@
import os
from pydub import AudioSegment
import whisper
import moviepy.editor as mp
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
# Download necessary NLTK data
nltk.download('punkt', quiet=True)
class MediaProcessor:
def __init__(self, max_chars_per_part=4000):
self.model = whisper.load_model("base.en")
#self.model = whisper.load_model("medium.en")
self.max_chars_per_part = max_chars_per_part
@classmethod
def new(cls, max_chars_per_part=4000):
return cls(max_chars_per_part)
def process(self, path: str):
if path.lower().endswith(('.mp4', '.avi', '.mov')): # Video files
return self.process_video(path)
elif path.lower().endswith(('.mp3', '.wav', '.ogg')): # Audio files
return self.process_audio(path)
else:
raise ValueError("Unsupported file format")
def process_video(self, video_path: str):
# Extract audio from video
video = mp.VideoFileClip(video_path)
audio_path = video_path.rsplit('.', 1)[0] + '.wav'
video.audio.write_audiofile(audio_path)
video.close()
# Now process the extracted audio
return self.process_audio(audio_path)
def process_audio(self, audio_path: str):
# Convert to WAV format if necessary
wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
if not audio_path.lower().endswith('.wav'):
audio = AudioSegment.from_file(audio_path)
audio.export(wav_path, format='wav')
else:
wav_path = audio_path
# Transcribe audio using Whisper
result = self.model.transcribe(wav_path)
transcription = result["text"]
# Split the transcription into parts
return self.split_text(transcription)
def split_text(self, text):
parts = []
current_part = ""
paragraphs = text.split('\n\n')
for paragraph in paragraphs:
sentences = sent_tokenize(paragraph)
for sentence in sentences:
if len(current_part) + len(sentence) < self.max_chars_per_part:
current_part += sentence + ' '
else:
if current_part:
parts.append(current_part.strip())
current_part = sentence + ' '
# Add a paragraph break if it doesn't exceed the limit
if len(current_part) + 2 < self.max_chars_per_part:
current_part += '\n\n'
else:
parts.append(current_part.strip())
current_part = '\n\n'
if current_part:
parts.append(current_part.strip())
return parts
def find_natural_pause(self, text):
words = word_tokenize(text)
total_words = len(words)
mid_point = total_words // 2
# Look for punctuation near the middle
for i in range(mid_point, total_words):
if words[i] in '.!?':
return ' '.join(words[:i+1]), ' '.join(words[i+1:])
# If no punctuation found, split at the nearest space to the middle
return ' '.join(words[:mid_point]), ' '.join(words[mid_point:])
def write_to_file(self, parts, output_path):
with open(output_path, 'w', encoding='utf-8') as f:
for i, part in enumerate(parts, 1):
f.write(f"Part {i}:\n\n")
f.write(part)
f.write("\n\n")
if i < len(parts):
f.write("-" * 50 + "\n\n")
# Usage example:
if __name__ == "__main__":
processor = MediaProcessor.new(max_chars_per_part=10000)
output_file = "/Users/despiegk1/Documents/transcription3.md"
item = "/Users/despiegk1/Documents/Zoom/2024-07-16 16.42.50 Kristof De Spiegeleer's Personal Meeting Room/video1720369800.mp4"
transcription_parts = processor.process(item)
processor.write_to_file(transcription_parts, output_file)
print(f"Transcription split into {len(transcription_parts)} parts:")
for i, part in enumerate(transcription_parts, 1):
print(f"Part {i}:")
print(part)
print("-" * 50)

View File

View File

@@ -0,0 +1,313 @@
import json
import os
import time
from dataclasses import dataclass
from datetime import datetime, timedelta
from enum import Enum
from typing import Any, Dict, List, Optional
import redis
import requests
API_URL = "https://api.wigle.net/api/v2/network/search"
REDIS_CACHE_EXPIRY = timedelta(hours=1)
API_RATE_LIMIT = 30 # seconds between requests
# Initialize Redis connection
redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)
# Track last API request time (initialized to allow immediate first request)
_last_request_time = time.time() - API_RATE_LIMIT
class WigleError(Exception):
"""Custom exception for Wigle-related errors"""
pass
class NetworkType(str, Enum):
"""Network types supported by Wigle API"""
WIFI = "WIFI"
BT = "BT"
CELL = "CELL"
class Encryption(str, Enum):
"""WiFi encryption types"""
NONE = "None"
WEP = "WEP"
WPA = "WPA"
WPA2 = "WPA2"
WPA3 = "WPA3"
UNKNOWN = "unknown"
@dataclass
class Location:
"""Represents a wireless network location with all available Wigle API fields"""
ssid: str
latitude: float
longitude: float
last_update: Optional[datetime]
encryption: Optional[str] = None
network_type: Optional[str] = None
channel: Optional[int] = None
frequency: Optional[float] = None
qos: Optional[int] = None
transid: Optional[str] = None
firsttime: Optional[datetime] = None
lasttime: Optional[datetime] = None
country_code: Optional[str] = None
city: Optional[str] = None
region: Optional[str] = None
house_number: Optional[str] = None
road: Optional[str] = None
address: Optional[str] = None
def get_wigle_auth() -> str:
"""Get Wigle authentication token from environment variable"""
wigle_auth = os.getenv("WIGLE")
if not wigle_auth:
raise WigleError("WIGLE environment variable not set. Format should be: 'AIDxxx:yyy'")
return wigle_auth
def enforce_rate_limit():
"""Enforce API rate limit by sleeping if needed, showing countdown"""
global _last_request_time
current_time = time.time()
time_since_last_request = current_time - _last_request_time
if time_since_last_request < API_RATE_LIMIT:
sleep_time = API_RATE_LIMIT - time_since_last_request
print(f"\nRate limit: waiting {sleep_time:.0f} seconds", end="", flush=True)
# Show countdown
for remaining in range(int(sleep_time), 0, -1):
time.sleep(1)
print(f"\rRate limit: waiting {remaining:2d} seconds", end="", flush=True)
print("\rRate limit: continuing... ") # Clear the line
_last_request_time = time.time()
def search_networks(
*,
# Location filters
latitude_north: Optional[float] = None,
latitude_south: Optional[float] = None,
longitude_east: Optional[float] = None,
longitude_west: Optional[float] = None,
# Network filters
ssid: Optional[str] = None,
ssidlike: Optional[str] = None,
network_type: Optional[NetworkType] = None,
encryption: Optional[Encryption] = None,
# Time filters
on_since: Optional[datetime] = None,
last_update: Optional[datetime] = None,
# Result control
results_per_page: int = 100,
search_after: Optional[str] = None,
# Other filters
freenet: Optional[bool] = None,
paynet: Optional[bool] = None,
show_query: bool = False,
) -> Dict[str, Any]:
"""
Search for networks using the Wigle API with full parameter support and Redis caching.
Rate limited to one request per minute.
Args:
latitude_north: Northern boundary of search box
latitude_south: Southern boundary of search box
longitude_east: Eastern boundary of search box
longitude_west: Western boundary of search box
ssid: Exact SSID match
ssidlike: SSID wildcard match
network_type: Filter by network type (WIFI/BT/CELL)
encryption: Filter by encryption type
on_since: Only show networks seen on or after date
last_update: Only show networks updated since date
results_per_page: Number of results per page (max 100)
search_after: Token for getting next batch of results
freenet: Show only free networks
paynet: Show only pay networks
show_query: Return query bounds without results
Returns:
Dictionary containing search results and metadata including searchAfter token
Raises:
WigleError: If the WIGLE environment variable is not set or API request fails
"""
# https://api.wigle.net/api/v2/network/search?onlymine=false&encryption=None&freenet=false&paynet=false
try:
# Build cache key from all parameters
params = locals()
cache_key = f"wigle:search:{json.dumps(params, default=str, sort_keys=True)}"
cached_result = redis_client.get(cache_key)
if cached_result:
return json.loads(cached_result)
# Enforce rate limit before making request
enforce_rate_limit()
# Build API parameters
api_params = {
"onlymine": "false",
"resultsPerPage": results_per_page,
}
# Add optional parameters if provided
if latitude_north is not None:
api_params["latrange1"] = latitude_south
api_params["latrange2"] = latitude_north
api_params["longrange1"] = longitude_west
api_params["longrange2"] = longitude_east
if ssid:
api_params["ssid"] = ssid
if ssidlike:
api_params["ssidlike"] = ssidlike
if network_type:
api_params["netid"] = network_type.value
if encryption:
api_params["encryption"] = encryption.value
else:
api_params["encryption"] = "None"
if on_since:
api_params["onSince"] = on_since.strftime("%Y%m%d")
if last_update:
api_params["lastupdt"] = last_update.strftime("%Y%m%d")
if freenet is not None:
api_params["freenet"] = str(freenet).lower()
if paynet is not None:
api_params["paynet"] = str(paynet).lower()
if search_after:
api_params["searchAfter"] = search_after
if show_query:
api_params["showQuery"] = str(show_query).lower()
# Make API request
wigle_auth = get_wigle_auth()
headers = {"Authorization": f"Basic {wigle_auth}"}
response = requests.get(API_URL, params=api_params, headers=headers)
response.raise_for_status()
result = response.json()
print(result)
# Cache the result
redis_client.setex(cache_key, int(REDIS_CACHE_EXPIRY.total_seconds()), json.dumps(result))
return result
except requests.exceptions.RequestException as e:
raise WigleError(f"API request failed: {str(e)}")
def parse_network_to_location(network: Dict[str, Any]) -> Location:
"""Convert a network result from Wigle API to a Location object"""
# Parse dates if present
last_update = None
firsttime = None
lasttime = None
if network.get("lastupdt"):
try:
last_update = datetime.strptime(network["lastupdt"], "%Y-%m-%d %H:%M:%S")
except ValueError:
pass
if network.get("firsttime"):
try:
firsttime = datetime.strptime(network["firsttime"], "%Y-%m-%d %H:%M:%S")
except ValueError:
pass
if network.get("lasttime"):
try:
lasttime = datetime.strptime(network["lasttime"], "%Y-%m-%d %H:%M:%S")
except ValueError:
pass
return Location(
ssid=network["ssid"],
latitude=float(network["trilat"]),
longitude=float(network["trilong"]),
last_update=last_update,
encryption=network.get("encryption"),
network_type=network.get("type"),
channel=network.get("channel"),
frequency=network.get("frequency"),
qos=network.get("qos"),
transid=network.get("transid"),
firsttime=firsttime,
lasttime=lasttime,
country_code=network.get("country"),
city=network.get("city"),
region=network.get("region"),
house_number=network.get("housenumber"),
road=network.get("road"),
address=network.get("address"),
)
def get_all() -> List[Location]:
"""Search for OpenRoaming networks and return list of locations.
Rate limited to one request per minute, including pagination requests.
Returns:
List[Location]: List of found network locations
Raises:
WigleError: If the WIGLE environment variable is not set or API request fails
"""
ssid_names = ["Adentro OpenRoaming", "OpenRoaming", "Passpoint", "PasspointAruba", "Cellular Wi-Fi Passthrough", "WBA_OpenRoaming"]
locations: List[Location] = []
for name in ssid_names:
try:
search_after = None
while True:
results = search_networks(
ssid=name, encryption=Encryption.NONE, network_type=NetworkType.WIFI, results_per_page=100, search_after=search_after
)
if not results or not results.get("results"):
break
for network in results["results"]:
locations.append(parse_network_to_location(network))
# Get searchAfter token for next batch
search_after = results.get("searchAfter")
if not search_after:
break
except WigleError as e:
raise WigleError(f"Error searching for {name}: {str(e)}")
print(f"Found {len(locations)} OpenRoaming network locations")
return locations
if __name__ == "__main__":
locations = get_all()
for loc in locations:
print(f"SSID: {loc.ssid}")
print(f"Location: ({loc.latitude}, {loc.longitude})")
print(f"Network Type: {loc.network_type or 'N/A'}")
print(f"Encryption: {loc.encryption or 'N/A'}")
print(f"Last Update: {loc.last_update or 'N/A'}")
if loc.address:
print(f"Address: {loc.address}")
print("-" * 50)

0
herolib/core/__init__.py Normal file
View File

Binary file not shown.

View File

View File

@@ -0,0 +1,38 @@
from pydantic import BaseModel, Field
from typing import Dict, Any, Type, TypeVar
from heroscript.heroscript import *
class User(BaseModel, HeroScriptMixin):
oid: str = Field()
name: str = Field(min_length=2, description="Chosen name by user", example="myname")
city: str = Field()
age: int = Field()
description: str = Field()
# Example usage
u1 = User(oid="abc123", name="John", age=30, city="New York",
description="""
this is a multiline
we need to remove the
this will stay 4 chars in
end
""")
myheroscript = u1.heroscript()
print(myheroscript)
u2 = User.from_heroscript(heroscript=myheroscript)
myprint(u2)
# p1 = Product(id=1, name="Phone", price=999.99, description="A smart phone")
# product_heroscript = p1.heroscript()
# print(product_heroscript)
# p2 = Product.from_heroscript(product_heroscript)
# print(p2)

View File

@@ -0,0 +1,78 @@
from pydantic import BaseModel, Field
from typing import Dict, Any, Type, TypeVar, List
from heroscript.heroscript import *
class Comment(BaseModel):
description: str = Field(default="")
class HeroBase(BaseModel, HeroScriptMixin):
oid: str = Field(default="",metadata={"unique": True})
name: str = Field(min_length=2, description="Chosen name by user", example="myname",metadata={"unique": True})
comments: List[Comment] = Field(..., description="Comment which can be attached to obj")
class User(HeroBase):
city: str = Field(metadata={"index": True})
age: int = Field(metadata={"index": True})
description: str = Field(default="")
class Product(BaseModel, HeroScriptMixin):
id: int = Field(default="",metadata={"unique": True})
name: str = Field(metadata={"unique": True})
price: float = Field()
description: str = Field()
myheroscript="""
```hero
!!user.define
oid:abc123
name:John
description:'
this is a multiline
we need to remove the
this will stay 4 chars in
end
'
age:30
city:'New York'
!!product.define
id:33
name:aproduct
description:'
this is a multiline
we need to remove the
this will stay 4 chars in
end
'
price:10.0
```
"""
# hs=HeroScripts(class_types={"user":User,"product":Product},content=myheroscript)
mypath="~/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example"
hs=HeroScripts(class_types={"user":User,"product":Product},path=mypath)
objs=hs.get_objects()
for o in objs:
myprint(o)
for item in hs.heroscripts:
print(item)
query = "john*"
results = hs.search(User, query)
# Print the search results
for r in results:
# print(f"User: {r["path"]}")
print(r)

View File

@@ -0,0 +1 @@
{"/Users/despiegk/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example/testFile.md": "f6e8b6a32349c262cb9afbea771c5add", "/Users/despiegk/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example/sub/test file 2.md": "0ecc29046b6ef743481358e4c5630a6d"}

View File

@@ -0,0 +1,15 @@
# header
!!product.define
id:33
name:aproduct
description:'
this is a multiline
we need to remove the
this will stay 4 chars in
end
'
price:10.0
something else

View File

@@ -0,0 +1,22 @@
!!user.define
oid:abc123
name:John
description:'
this is a multiline
we need to remove the
this will stay 4 chars in
end
'
age:30
city:'New York'
```heroscript
!!user.define
oid:4nd
name:John2
age:40
city:bxl
```

View File

@@ -0,0 +1,207 @@
from herotools.texttools import dedent
from typing import List, Dict, Tuple
import re
from heroscript.tools import action_blocks,format_multiline_text,heroscript_repr
import textwrap
class HeroActions:
def __init__(self, path: str = "", content:str = ""):
blocks=action_blocks(path=path,content=content)
self.actions : List[HeroAction] = []
for block in blocks:
self.actions.append(HeroAction(block))
def __repr__(self):
out=""
for item in self.actions:
out+=item.__repr__()+"\n"
return out
class HeroAction:
def __init__(self, content: str):
blocks=action_blocks(content=content)
if len(blocks)==0:
raise ValueError(f"don't find actions in {content}")
elif len(blocks)>1:
raise ValueError(f"Found more than one action in {content}")
content=blocks[0]
self.name, content = _name_paramstr(content)
self.params = Params(content)
def __str__(self):
param_str=textwrap.indent(self.params.__str__()," ")
return f"!!{self.name}\n{param_str}"
def __repr__(self):
#return self.__str__()
return heroscript_repr(self.__str__())
class Params:
def __init__(self, content: str):
self.__params = params_parse(content)
def __str__(self):
sorted_params = sorted(self.__params.items())
param_str=""
for key,value in sorted_params:
if "'" in value:
param_str+=f"{key}: {value}\n"
elif "\n" in value:
v=format_multiline_text(value)
param_str+=f"{key}: {v}\n"
elif " " in value:
param_str+=f"{key}: '{value}'\n"
else:
param_str+=f"{key}: {value}\n"
return param_str
def get_int(self, key: str, defval: int = 99999999) -> int:
if key not in self.__params:
if defval == 99999999:
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return int(self.__params[key])
def get_float(self, key: str, defval: float = 99999999.0) -> float:
if key not in self.__params:
if defval == 99999999.0:
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return float(self.__params[key])
def get(self, key: str, defval: str = "99999999") -> str:
if key not in self.__params:
if defval == "99999999":
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return self.__params[key]
def get_list(self, key: str, defval: List[str] = [], needtoexist: bool = True) -> List[str]:
if defval is None:
defval = []
if key not in self.__params:
if needtoexist:
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return [item.strip().strip("'").strip() for item in self.__params[key].split(",")]
def get_list_int(self, key: str, defval: List[int] = [], needtoexist: bool = True) -> List[int]:
if defval is None:
defval = []
if key not in self.__params:
if needtoexist:
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return [int(item.strip()) for item in self.__params[key].split(",")]
def get_list_float(self, key: str, defval: List[float] = [], needtoexist: bool = True) -> List[float]:
if defval is None:
defval = []
if key not in self.__params:
if needtoexist:
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return [float(item.strip()) for item in self.__params[key].split(",")]
def get_all(self) -> Dict[str, str]:
return self.__params
def _name_paramstr(heroscript: str) -> Tuple[str, str]:
if not isinstance(heroscript, str):
raise ValueError("Input must be a string")
heroscript = dedent(heroscript)
lines = heroscript.strip().split("\n")
if not lines or "!!" not in lines[0]:
raise ValueError("The first line must contain '!!' to indicate the class name")
try:
class_name = lines[0].split("!!")[1].lower().strip()
except IndexError:
raise ValueError("Invalid format for class name extraction")
rest_of_text = dedent("\n".join(lines[1:]))
return class_name, rest_of_text
def params_parse(content: str) -> Dict[str, str]:
lines = dedent(content).strip().split("\n")
props = {}
multiline_prop = None
multiline_value : List[str] = list()
for line in lines:
if multiline_prop:
if line.strip() == "'":
props[prop] = dedent("\n".join(multiline_value))
multiline_prop = None
multiline_value = []
else:
multiline_value.append(line)
else:
if ":" in line:
prop, value = line.split(":", 1)
prop = prop.strip()
value = value.strip()
if value == "'":
multiline_prop = prop
else:
if value.startswith("'") and value.endswith("'"):
value1 = value[1:-1]
if not "'" in value1:
value=value1
props[prop] = value
return props
if __name__ == "__main__":
# Example usage
text = """
!!obj1.define
myname: 'mymama'
mylist: '20,200'
mylist2: 20,'a bbb'
mylist3: 20,200
myint:2
!!obj2.color
mother: 'mymama'
name:'aurelie'
length:60
description:'
multiline is supported
now for aurelie
'
color:green
"""
hero_actions = HeroActions(content=text)
print(hero_actions)
a2=hero_actions.actions[1]
assert a2.params.get_list(key="color")==["green"]
assert a2.params.get_list(key="mother")==["mymama"]
assert a2.params.get(key="color")=="green"
assert a2.params.get_int(key="length")==60
assert a2.params.get_list_int(key="length")==[60]
#now some non existing ones
assert a2.params.get_int(key="lengtha",defval=3)==3
assert a2.params.get(key="lengtha",defval="3")=="3"
a1=hero_actions.actions[0]
#print(a1.params.get_list(key="mylist2"))
assert a1.params.get_list(key="mylist")==["20","200"]
assert a1.params.get_list_int(key="mylist")==[20,200]
assert a1.params.get_list(key="mylist2")==["20","a bbb"]

View File

@@ -0,0 +1,129 @@
from pydantic import BaseModel, Field
from typing import Any, Type, TypeVar
import re
import hashlib
import json
import os
from types import List,Dict
T = TypeVar("T", bound=BaseModel)
class HeroScripts:
def __init__(self, class_types: dict, path:str = "", content:str = "", indexpath: str = ""):
self.class_types = class_types
self.heroscripts = List(HeroScript)
self.path = os.path.expanduser(path)
self.indexpath = os.path.expanduser(indexpath)
self.done = Dict[str,str] = {}
# self.done_load()
if self.path:
try:
# self.done_load()
self.load(self.path)
self.done_save()
except FileNotFoundError as e:
print(f"Directory not found: {self.path}")
print(f"Error: {str(e)}")
self.create_indexes()
self.index_objects()
if content:
blocks = extract_heroscript_blocks(content)
self.heroscripts.extend(HeroScript(block) for block in blocks)
def done_load(self):
if self.path:
done_file = os.path.join(self.path, "done.json")
if os.path.exists(done_file):
with open(done_file, "r") as f:
self.done = json.load(f)
def done_save(self):
if self.path:
done_file = os.path.join(self.path, "done.json")
with open(done_file, "w") as f:
json.dump(self.done, f)
def load(self, path):
for root, _, files in os.walk(path):
for filename in files:
print(f" - load {path}/{filename}")
path=f"{path}/{filename}"
if filename.endswith(".md"):
filepath = os.path.join(root, filename)
with open(filepath, "r") as file:
content = file.read()
md5hash = hashlib.md5(content.encode()).hexdigest()
if filepath not in self.done or self.done[filepath] != md5hash:
blocks = self.extract_heroscript_blocks(content)
self.heroscripts.extend(HeroScript(block,path) for block in blocks)
self.done[filepath] = md5hash
@staticmethod
def get_objects(self):
objects = []
for heroscript in self.heroscripts:
if heroscript.content:
try:
class_name = heroscript.content.split("\n")[0].split("!!")[1].split(".")[0].lower()
if class_name in self.class_types:
class_type = self.class_types[class_name]
try:
obj = class_type.from_heroscript(heroscript.content)
objects.append(obj)
except Exception as e:
print(f"Error parsing HeroScript: {e}")
except (IndexError, ValueError):
print(f"Invalid HeroScript format: {heroscript.content}")
return objects
def create_indexes(self):
for class_type in self.class_types.values():
schema = self.create_schema(class_type)
index_dir = os.path.join(self.indexpath, class_type.__name__.lower())
if not os.path.exists(index_dir):
os.makedirs(index_dir)
index.create_in(index_dir, schema)
def create_schema(self, class_type):
schema_fields = {"path": STORED()}
for field_name, field in class_type.__fields__.items():
json_schema_extra = getattr(field, "json_schema_extra", None)
if json_schema_extra is not None:
metadata = json_schema_extra.get("metadata", {})
if isinstance(metadata, list):
metadata = {item: True for item in metadata}
if metadata.get("unique") or metadata.get("indexed"):
if field.annotation == str :
schema_fields[field_name] = ID(stored=True, unique=metadata.get("unique", False))
elif field.annotation == int or field.annotation == float :
schema_fields[field_name] = NUMERIC(stored=True, unique=metadata.get("unique", False))
else:
schema_fields[field_name] = TEXT(stored=True,lowercase=True)
return Schema(**schema_fields)
def index_objects(self):
for heroscript in self.heroscripts:
for obj in self.get_objects():
index_dir = os.path.join(self.indexpath, type(obj).__name__.lower())
ix = index.open_dir(index_dir)
writer = ix.writer()
writer.add_document(path=heroscript.path, **{k: str(v).lower() for k, v in obj.dict().items() if k in ix.schema.names()})
writer.commit()
def search(self, class_type, query):
index_dir = os.path.join(self.indexpath, class_type.__name__.lower())
ix = index.open_dir(index_dir)
qp = QueryParser("name", schema=ix.schema)
q = qp.parse(query)
with ix.searcher() as searcher:
results = searcher.search(q)
# return results
return [result["path"] for result in results]

View File

@@ -0,0 +1,82 @@
from pydantic import BaseModel, Field
from typing import Dict, Any, Type, TypeVar
import re
from colorama import Fore, Style
import hashlib
import json
import os
from types import List
from heroscript.heroaction import HeroAction
from heroscript.tools import format_multiline_text
class HeroScriptMixin:
def heroscript(self) -> HeroAction:
class_name = self.__class__.__name__.lower()
prop_order = ["id", "oid", "name", "title", "description", "content"]
# Get all the properties of the object
props = list(self.__fields__.keys())
# Separate properties into those in prop_order and the rest
ordered_props = [prop for prop in prop_order if prop in props]
remaining_props = [prop for prop in props if prop not in prop_order]
# Sort the remaining properties
sorted_remaining_props = sorted(remaining_props)
# Combine the ordered properties and sorted remaining properties
sorted_props = ordered_props + sorted_remaining_props
lines = [f"!!{class_name}.define"]
for prop in sorted_props:
if prop in self.__fields__:
val = getattr(self, prop)
if isinstance(val, str):
if "\n" in val:
val = format_multiline_text(text=val)
elif any(c.isspace() for c in val):
val = f"'{val}'"
lines.append(f" {prop}:{val}")
result = "\n".join(lines)
return HeroAction(content=result)
@classmethod
def from_heroscript(cls, heroscript: str):
lines = heroscript.strip().split("\n")
class_name = lines[0].split("!!")[1].split(".")[0]
props = {}
multiline_prop = None
multiline_value = List(str)
for line in lines[1:]:
if multiline_prop:
if line.strip() == "'":
# End of multiline text
min_indent = min(len(ml) - len(ml.lstrip()) for ml in multiline_value if ml.strip())
unindented_lines = [ml[min_indent:] for ml in multiline_value]
props[multiline_prop] = "\n".join(unindented_lines)
multiline_prop = None
multiline_value = []
else:
multiline_value.append(line)
else:
if ":" in line:
prop, value = line.split(":", 1)
prop = prop.strip()
value = value.strip()
if value == "'":
# Start of multiline text
multiline_prop = prop
else:
if value.startswith("'") and value.endswith("'"):
value = value[1:-1]
props[prop] = value
return cls(**props)

View File

@@ -0,0 +1,4 @@
## heroscript
> not to be used yet

View File

@@ -0,0 +1,145 @@
from typing import List
import os
from colorama import Fore, Style
from herotools.texttools import dedent
import textwrap
#load the heroscripts from filesystem
def heroscript_blocks(path: str) -> List[str]:
heroscript_blocks = list()
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".md"):
file_path = os.path.join(root, file)
with open(file_path, "r") as f:
content = f.read()
blocks = _extract_heroscript_blocks(content)
heroscript_blocks.extend(blocks)
return heroscript_blocks
def _extract_heroscript_blocks(content: str):
content=dedent(content)
blocks = []
lines = content.split("\n")
in_block = False
block_lines : List[str] = list()
for line in lines:
if line.startswith("```hero"):
in_block = True
block_lines = []
elif line.startswith("```") and in_block:
in_block = False
block = "\n".join(block_lines)
blocks.append(block)
elif in_block:
block_lines.append(line)
return blocks
def action_blocks(path: str = "", content:str = "") -> List[str]:
if content!="":
return __action_blocks_get(content)
res : List[str] = list()
for hscript in heroscript_blocks(path):
for actionscript in __action_blocks_get(hscript):
res.append(actionscript)
return res
def __action_blocks_get(content: str) -> List[str]:
content=dedent(content)
blocks = list()
lines = content.split("\n")
block_lines : List[str] = list()
herofound=False
for line in lines:
# print(line)
if line.startswith("!!"):
herofound=True
if block_lines: #means we found before
block = "\n".join(block_lines)
blocks.append(block)
block_lines = []
# print("f1")
block_lines.append(line)
elif line.strip() and not line.startswith(" ") and not line.startswith("\t") and block_lines:
block = "\n".join(block_lines)
blocks.append(block)
block_lines = []
herofound=False
elif herofound:
block_lines.append(line)
# print("append")
if block_lines:
block = "\n".join(block_lines)
blocks.append(block)
return blocks
def myprint(obj):
class_name = f"{Fore.YELLOW}{obj.__class__.__name__}{Style.RESET_ALL}"
fields = [field for field in obj.__fields__ if field in obj.__dict__]
attributes = ', '.join(f"{Fore.LIGHTBLACK_EX}{field}{Style.RESET_ALL}={Fore.GREEN}'{getattr(obj, field)}'{Style.RESET_ALL}" for field in fields)
print( f"{class_name}({attributes})" )
#format text to be ready to be set in heroscript
def format_multiline_text(text: str) -> str:
text = dedent(text)
text = textwrap.indent(text, " ")
# Join the formatted lines with newline characters and add the required indentation
formatted_text = "'\n" + text + "\n '"
return formatted_text
#representation with colors of heroscript
def heroscript_repr(content:str) ->str:
lines = content.split("\n")
formatted_lines = []
for line in lines:
if line.startswith("!!"):
formatted_line = f"{Fore.RED}{line}{Style.RESET_ALL}"
elif ":" in line:
prop, value = line.split(":", 1)
prop = prop.strip()
value = value.strip()
if value.startswith("'") and value.endswith("'"):
value = f" {Fore.GREEN}{value}{Style.RESET_ALL}"
else:
value = f" {Fore.YELLOW}{value}{Style.RESET_ALL}"
formatted_line = f" {Fore.CYAN}{prop}{Style.RESET_ALL}:{value}"
else:
formatted_line = line
formatted_lines.append(formatted_line)
return "\n".join(formatted_lines)
def heroscript_print(content:str):
o=heroscript_repr(content)
print(o)
if __name__ == "__main__":
t=" something\n a\n\n bbbb"
print(dedent(t))
print(format_multiline_text(t))

View File

View File

@@ -0,0 +1,9 @@
from herolib.core.pathlib.pathlib import get_dir
from herolib.core.logger.model import Logger
def new(path: str) -> Logger:
p = get_dir(path=path, create=True)
return Logger(
path=p,
lastlog_time=0
)

View File

@@ -0,0 +1,3 @@
# This file is now empty as the log function has been moved to model.py
# It can be removed or kept as a placeholder if needed for future extensions.
# For now, we will keep it empty.

View File

@@ -0,0 +1,150 @@
import unittest
import os
import shutil
from lib.core.logger.factory import new
from lib.core.logger.model import LogItemArgs, LogType, Logger # Import Logger class
from lib.data.ourtime.ourtime import new as ourtime_new, now as ourtime_now
from lib.core.pathlib.pathlib import get_file, ls, rmdir_all
class TestLogger(unittest.TestCase):
def setUp(self):
# Corresponds to testsuite_begin()
if os.path.exists('/tmp/testlogs'):
rmdir_all('/tmp/testlogs')
def tearDown(self):
# Corresponds to testsuite_end()
# if os.path.exists('/tmp/testlogs'):
# rmdir_all('/tmp/testlogs')
pass
def test_logger_functionality(self):
logger = new('/tmp/testlogs')
# Test stdout logging
logger.log(LogItemArgs(
cat='test-app',
log='This is a test message\nWith a second line\nAnd a third line',
logtype=LogType.STDOUT,
timestamp=ourtime_new('2022-12-05 20:14:35')
))
# Test error logging
logger.log(LogItemArgs(
cat='error-test',
log='This is an error\nWith details',
logtype=LogType.ERROR,
timestamp=ourtime_new('2022-12-05 20:14:35')
))
logger.log(LogItemArgs(
cat='test-app',
log='This is a test message\nWith a second line\nAnd a third line',
logtype=LogType.STDOUT,
timestamp=ourtime_new('2022-12-05 20:14:36')
))
logger.log(LogItemArgs(
cat='error-test',
log='''
This is an error
With details
''',
logtype=LogType.ERROR,
timestamp=ourtime_new('2022-12-05 20:14:36')
))
logger.log(LogItemArgs(
cat='error-test',
log='''
aaa
bbb
''',
logtype=LogType.ERROR,
timestamp=ourtime_new('2022-12-05 22:14:36')
))
logger.log(LogItemArgs(
cat='error-test',
log='''
aaa2
bbb2
''',
logtype=LogType.ERROR,
timestamp=ourtime_new('2022-12-05 22:14:36')
))
# Verify log directory exists
self.assertTrue(os.path.exists('/tmp/testlogs'), 'Log directory should exist')
# Get log file
files = ls('/tmp/testlogs')
self.assertEqual(len(files), 2) # Expecting two files: 2022-12-05-20.log and 2022-12-05-22.log
# Test search functionality
items_stdout = logger.search(
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
logtype=LogType.STDOUT
)
self.assertEqual(len(items_stdout), 2)
items_error = logger.search(
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
logtype=LogType.ERROR
)
self.assertEqual(len(items_error), 4)
# Test specific log content
found_error_log = False
for item in items_error:
if "This is an error\nWith details" in item.log:
found_error_log = True
break
self.assertTrue(found_error_log, "Expected error log content not found")
found_stdout_log = False
for item in items_stdout:
if "This is a test message\nWith a second line\nAnd a third line" in item.log:
found_stdout_log = True
break
self.assertTrue(found_stdout_log, "Expected stdout log content not found")
# Test search by category
items_test_app = logger.search(
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
cat='test-app'
)
self.assertEqual(len(items_test_app), 2)
items_error_test = logger.search(
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
cat='error-test'
)
self.assertEqual(len(items_error_test), 4)
# Test search by log content
items_with_aaa = logger.search(
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
log='aaa'
)
self.assertEqual(len(items_with_aaa), 2)
# Test search with timestamp range
items_specific_time = logger.search(
timestamp_from=ourtime_new('2022-12-05 22:00:00'),
timestamp_to=ourtime_new('2022-12-05 23:00:00'),
logtype=LogType.ERROR
)
self.assertEqual(len(items_specific_time), 2)
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,72 @@
from enum import Enum
from typing import Optional
from herolib.data.ourtime.ourtime import OurTime
from herolib.core.pathlib.pathlib import Path
class LogType(Enum):
STDOUT = "stdout"
ERROR = "error"
class LogItemArgs:
def __init__(self, cat: str, log: str, logtype: LogType, timestamp: Optional[OurTime] = None):
self.timestamp = timestamp
self.cat = cat
self.log = log
self.logtype = logtype
import os
from herolib.core.texttools.texttools import name_fix, expand, dedent
from herolib.data.ourtime.ourtime import OurTime, now as ourtime_now
class Logger:
def __init__(self, path: Path, lastlog_time: int = 0):
self.path = path
self.lastlog_time = lastlog_time
def log(self, args_: LogItemArgs):
args = args_
t = args.timestamp if args.timestamp else ourtime_now()
# Format category (max 10 chars, ascii only)
args.cat = name_fix(args.cat)
if len(args.cat) > 10:
raise ValueError('category cannot be longer than 10 chars')
args.cat = expand(args.cat, 10, ' ')
args.log = dedent(args.log).strip()
logfile_path = os.path.join(self.path.path, f"{t.dayhour()}.log")
# Create log file if it doesn't exist
if not os.path.exists(logfile_path):
with open(logfile_path, 'w') as f:
pass # Create empty file
self.lastlog_time = 0 # make sure we put time again
with open(logfile_path, 'a') as f:
content = ''
# Add timestamp if we're in a new second
if t.unix() > self.lastlog_time:
content += f"\n{t.time().format_ss()}\n"
self.lastlog_time = t.unix()
# Format log lines
error_prefix = 'E' if args.logtype == LogType.ERROR else ' '
lines = args.log.split('\n')
for i, line in enumerate(lines):
if i == 0:
content += f"{error_prefix} {args.cat} - {line}\n"
else:
content += f"{error_prefix} {line}\n"
f.write(content.rstrip()) # Use rstrip to remove trailing whitespace
f.write('\n') # Add a newline after each log entry for consistency
class LogItem:
def __init__(self, timestamp: OurTime, cat: str, log: str, logtype: LogType):
self.timestamp = timestamp
self.cat = cat
self.log = log
self.logtype = logtype

View File

@@ -0,0 +1,137 @@
import os
from typing import Optional, List
from herolib.core.texttools.texttools import name_fix
from herolib.data.ourtime.ourtime import OurTime, new as ourtime_new
from herolib.core.logger.model import Logger, LogItem, LogType
class SearchArgs:
def __init__(self, timestamp_from: Optional[OurTime] = None,
timestamp_to: Optional[OurTime] = None,
cat: str = "", log: str = "", logtype: Optional[LogType] = None,
maxitems: int = 10000):
self.timestamp_from = timestamp_from
self.timestamp_to = timestamp_to
self.cat = cat
self.log = log
self.logtype = logtype
self.maxitems = maxitems
def process(result: List[LogItem], current_item: LogItem, current_time: OurTime,
args: SearchArgs, from_time: int, to_time: int):
# Add previous item if it matches filters
log_epoch = current_item.timestamp.unix()
if log_epoch < from_time or log_epoch > to_time:
return
cat_match = (args.cat == '' or current_item.cat.strip() == args.cat)
log_match = (args.log == '' or args.log.lower() in current_item.log.lower())
logtype_match = (args.logtype is None or current_item.logtype == args.logtype)
if cat_match and log_match and logtype_match:
result.append(current_item)
def search(l: Logger, args_: SearchArgs) -> List[LogItem]:
args = args_
# Format category (max 10 chars, ascii only)
args.cat = name_fix(args.cat)
if len(args.cat) > 10:
raise ValueError('category cannot be longer than 10 chars')
timestamp_from = args.timestamp_from if args.timestamp_from else OurTime()
timestamp_to = args.timestamp_to if args.timestamp_to else OurTime()
# Get time range
from_time = timestamp_from.unix()
to_time = timestamp_to.unix()
if from_time > to_time:
raise ValueError(f'from_time cannot be after to_time: {from_time} < {to_time}')
result: List[LogItem] = []
# Find log files in time range
files = sorted(os.listdir(l.path.path))
for file in files:
if not file.endswith('.log'):
continue
# Parse dayhour from filename
dayhour = file[:-4] # remove .log
try:
file_time = ourtime_new(dayhour)
except ValueError:
continue # Skip if filename is not a valid time format
current_time = OurTime()
current_item = LogItem(OurTime(), "", "", LogType.STDOUT) # Initialize with dummy values
collecting = False
# Skip if file is outside time range
if file_time.unix() < from_time or file_time.unix() > to_time:
continue
# Read and parse log file
content = ""
try:
with open(os.path.join(l.path.path, file), 'r') as f:
content = f.read()
except FileNotFoundError:
continue
lines = content.split('\n')
for line in lines:
if len(result) >= args.maxitems:
return result
line_trim = line.strip()
if not line_trim:
continue
# Check if this is a timestamp line
if not (line.startswith(' ') or line.startswith('E')):
try:
current_time = ourtime_new(line_trim)
except ValueError:
continue # Skip if not a valid timestamp line
if collecting:
process(result, current_item, current_time, args, from_time, to_time)
collecting = False
continue
if collecting and len(line) > 14 and line[13] == '-':
process(result, current_item, current_time, args, from_time, to_time)
collecting = False
# Parse log line
is_error = line.startswith('E')
if not collecting:
# Start new item
cat_start = 2
cat_end = 12
log_start = 15
if len(line) < log_start:
continue # Line too short to contain log content
current_item = LogItem(
timestamp=current_time,
cat=line[cat_start:cat_end].strip(),
log=line[log_start:].strip(),
logtype=LogType.ERROR if is_error else LogType.STDOUT
)
collecting = True
else:
# Continuation line
if len(line_trim) < 16: # Check for minimum length for continuation line
current_item.log += '\n' + line_trim
else:
current_item.log += '\n' + line[15:].strip() # Use strip for continuation lines
# Add last item if collecting
if collecting:
process(result, current_item, current_time, args, from_time, to_time)
return result

View File

View File

@@ -0,0 +1,214 @@
from peewee import *
import time
from datetime import datetime
from typing import Optional, List, Dict, Any, Iterable, Union
import os
import logging
import traceback
# Configure database path
DB_DIR = os.path.expanduser('~/hero/var/logdb/')
DB_FILE = os.path.join(DB_DIR, 'logs.db')
# Create directory if it doesn't exist
os.makedirs(DB_DIR, exist_ok=True)
# Initialize database
database = SqliteDatabase(DB_FILE, pragmas={'journal_mode': 'wal'})
class BaseModel(Model):
"""Base model class for Peewee."""
class Meta:
database = database
def to_dict(self) -> Dict[str, Any]:
"""Convert model instance to dictionary."""
data = {}
for field_name in self._meta.fields:
field_value = getattr(self, field_name)
if field_name in ('time', 'last_seen') and isinstance(field_value, int):
# Convert epoch to a readable format for the frontend
data[field_name] = datetime.fromtimestamp(field_value).strftime('%d-%m %H:%M')
else:
data[field_name] = field_value
return data
class Log(BaseModel):
"""Model for INFO logs."""
time = IntegerField(default=lambda: int(time.time()), index=True)
email = CharField(max_length=255, null=True)
logmsg = TextField()
level = IntegerField(default=100)
cat = CharField(max_length=100, index=True, default="general")
payload = TextField(null=True)
payload_cat = CharField(max_length=100, null=True)
class Meta:
table_name = 'logs'
class Error(BaseModel):
"""Model for ERROR logs."""
time = IntegerField(default=lambda: int(time.time()), index=True)
last_seen = IntegerField(default=lambda: int(time.time()), index=True)
email = CharField(max_length=255, null=True)
logmsg = TextField()
stacktrace = TextField(null=True)
count = IntegerField(default=1)
cat = CharField(max_length=100, index=True, default="general")
payload = TextField(null=True)
payload_cat = CharField(max_length=100, null=True)
class Meta:
table_name = 'errors'
def init_db_logging():
"""Create tables if they don't exist."""
with database:
database.create_tables([Log, Error], safe=True)
class DatabaseLogHandler(logging.Handler):
"""A logging handler that writes logs to the Peewee database."""
def emit(self, record):
stacktrace = None
if record.exc_info:
stacktrace = logging.Formatter().formatException(record.exc_info)
if record.levelno >= logging.ERROR:
log_error(
msg=record.getMessage(),
cat=record.name,
stacktrace=stacktrace
)
else:
log_info(
msg=record.getMessage(),
level=record.levelno,
cat=record.name
)
def log_error(msg: str, cat: str = "general", email: Optional[str] = None, stacktrace: Optional[str] = None, payload: Optional[str] = None, payload_cat: Optional[str] = None):
"""Log an ERROR message to the database, handling duplicates."""
try:
log_info(msg=msg, cat=cat, email=email, payload=payload, payload_cat=payload_cat)
except Exception as e:
pass
try:
if not stacktrace:
# Capture the current stack trace if not provided
stacktrace = "".join(traceback.format_stack())
# Filter out irrelevant lines from the stack trace
if stacktrace:
lines = stacktrace.split('\n')
filtered_lines = [
line for line in lines
if 'python3.13/logging' not in line and 'src/mylogging.py' not in line
]
stacktrace = '\n'.join(filtered_lines)
one_day_ago = int(time.time()) - (24 * 3600)
# Look for a similar error in the last 24 hours from the same user
existing_error = Error.select().where(
(Error.logmsg == msg) &
(Error.email == email) &
(Error.last_seen >= one_day_ago)
).first()
if existing_error:
# If found, increment counter and update last_seen
existing_error.count += 1
existing_error.last_seen = int(time.time())
existing_error.stacktrace = stacktrace
existing_error.save()
print(existing_error)
else:
# Otherwise, create a new error record
Error.create(
logmsg=msg,
cat=cat,
email=email,
stacktrace=stacktrace,
payload=payload,
payload_cat=payload_cat
)
logging.info(f"Successfully logged new error: {msg}")
except Exception as e:
logging.error(f"Failed to log error to {DB_FILE}: {e}")
def log_info(msg: str, level: int = 0, cat: str = "general", email: Optional[str] = None, payload: Optional[str] = None, payload_cat: Optional[str] = None):
"""Log an INFO message to the database."""
try:
Log.create(logmsg=msg, level=level, cat=cat, email=email, payload=payload, payload_cat=payload_cat)
except Exception as e:
print(f"Failed to log info to {DB_FILE}: {e}")
def get_errors(search: Optional[str] = None, cat: Optional[str] = None) -> List[Dict[str, Any]]:
"""Get errors from the database with optional filters. Category search is prefix-based."""
query = Error.select().order_by(Error.last_seen.desc())
if search:
query = query.where(Error.logmsg.contains(search))
if cat and cat.strip():
query = query.where(Error.cat.startswith(cat.strip()))
return [e.to_dict() for e in query]
def get_logs(
search: Optional[str] = None,
cat: Optional[str] = None,
level: Optional[int] = None,
hours_ago: Optional[int] = None,
) -> List[Dict[str, Any]]:
"""Get logs from the database with optional filters. Category search is prefix-based."""
query = Log.select().order_by(Log.time.desc())
if search and search.strip():
query = query.where(Log.logmsg.contains(search))
if cat and cat.strip():
query = query.where(Log.cat.startswith(cat.strip()))
if level is not None:
query = query.where(Log.level <= level)
if hours_ago is not None:
time_ago = int(time.time()) - (hours_ago * 3600)
query = query.where(Log.time >= time_ago)
return [l.to_dict() for l in query]
def get_log_by_id(log_id: int) -> Optional[Dict[str, Any]]:
"""Get a single log by its ID."""
try:
log = Log.get_by_id(log_id)
return log.to_dict()
except Log.DoesNotExist:
return None
def delete_logs_older_than(minutes: int):
"""Delete logs older than a specified number of minutes."""
time_ago = int(time.time()) - (minutes * 60)
Log.delete().where(Log.time < time_ago).execute()
def delete_errors_older_than(minutes: int):
"""Delete errors older than a specified number of minutes."""
time_ago = int(time.time()) - (minutes * 60)
Error.delete().where(Error.time < time_ago).execute()
def get_unique_log_categories() -> List[str]:
"""Get unique log categories from the database."""
query = (Log
.select(Log.cat)
.where(Log.cat.is_null(False))
.distinct()
.order_by(Log.cat))
return [l.cat for l in query]
def get_unique_error_categories() -> List[str]:
"""Get unique error categories from the database."""
query = (Error
.select(Error.cat)
.where(Error.cat.is_null(False))
.distinct()
.order_by(Error.cat))
return [e.cat for e in query]

View File

View File

@@ -0,0 +1,80 @@
import os
class Path:
def __init__(self, path: str):
self.path = os.path.expanduser(path)
def exists(self) -> bool:
return os.path.exists(self.path)
def is_file(self) -> bool:
return os.path.isfile(self.path)
def is_dir(self) -> bool:
return os.path.isdir(self.path)
def read(self) -> str:
with open(self.path, 'r') as f:
return f.read()
def write(self, content: str):
os.makedirs(os.path.dirname(self.path), exist_ok=True)
with open(self.path, 'w') as f:
f.write(content)
def delete(self):
if self.is_file():
os.remove(self.path)
elif self.is_dir():
os.rmdir(self.path)
def list(self, recursive: bool = False, regex: list = None) -> list[str]:
files = []
if self.is_dir():
if recursive:
for root, _, filenames in os.walk(self.path):
for filename in filenames:
full_path = os.path.join(root, filename)
relative_path = os.path.relpath(full_path, self.path)
if regex:
import re
if any(re.match(r, relative_path) for r in regex):
files.append(relative_path)
else:
files.append(relative_path)
else:
for entry in os.listdir(self.path):
full_path = os.path.join(self.path, entry)
if os.path.isfile(full_path):
if regex:
import re
if any(re.match(r, entry) for r in regex):
files.append(entry)
else:
files.append(entry)
return files
def get(path: str) -> Path:
return Path(path)
def get_dir(path: str, create: bool = False) -> Path:
p = Path(path)
if create and not p.exists():
os.makedirs(p.path, exist_ok=True)
return p
def get_file(path: str, create: bool = False) -> Path:
p = Path(path)
if create and not p.exists():
os.makedirs(os.path.dirname(p.path), exist_ok=True)
with open(p.path, 'w') as f:
pass # Create empty file
return p
def rmdir_all(path: str):
if os.path.exists(path):
import shutil
shutil.rmtree(path)
def ls(path: str) -> list[str]:
return os.listdir(path)

View File

View File

@@ -0,0 +1,142 @@
import re
def name_fix(name: str) -> str:
# VLang's name_fix converts '-' to '_' and cleans up special chars.
# Python's re.sub can handle this.
name = re.sub(r'[^a-zA-Z0-9_ ]', '', name.replace('-', '_'))
return name.strip()
def expand(txt: str, length: int, expand_with: str) -> str:
# Pads the string to the specified length.
return txt.ljust(length, expand_with)
def dedent(text: str) -> str:
# Removes common leading whitespace from every line.
# This is a simplified version of textwrap.dedent
lines = text.splitlines()
if not lines:
return ""
# Find the minimum indentation of non-empty lines
min_indent = float('inf')
for line in lines:
if line.strip():
indent = len(line) - len(line.lstrip())
min_indent = min(min_indent, indent)
if min_indent == float('inf'): # All lines are empty or just whitespace
return "\n".join([line.strip() for line in lines])
dedented_lines = [line[min_indent:] for line in lines]
return "\n".join(dedented_lines)
def remove_empty_lines(text: str) -> str:
lines = text.splitlines()
return "\n".join([line for line in lines if line.strip()])
def remove_double_lines(text: str) -> str:
lines = text.splitlines()
cleaned_lines = []
prev_empty = False
for line in lines:
is_empty = not line.strip()
if is_empty and prev_empty:
continue
cleaned_lines.append(line)
prev_empty = is_empty
return "\n".join(cleaned_lines)
def ascii_clean(r: str) -> str:
return r.encode('ascii', 'ignore').decode('ascii')
def name_clean(r: str) -> str:
return re.sub(r'[^a-zA-Z0-9]', '', r)
def name_fix_keepspace(name_: str) -> str:
# Similar to name_fix but keeps spaces.
return re.sub(r'[^a-zA-Z0-9 ]', '', name_.replace('-', '_')).strip()
def name_fix_no_ext(name_: str) -> str:
return os.path.splitext(name_)[0]
def name_fix_snake_to_pascal(name: str) -> str:
return ''.join(word.capitalize() for word in name.split('_'))
def snake_case(name: str) -> str:
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
def name_split(name: str) -> tuple[str, str]:
parts = name.split('.')
if len(parts) > 1:
return parts[0], '.'.join(parts[1:])
return name, ""
def cmd_line_args_parser(text: str) -> list[str]:
# A simple parser, might need more robust solution for complex cases
import shlex
return shlex.split(text)
def text_remove_quotes(text: str) -> str:
return re.sub(r'["\'].*?["\']', '', text)
def check_exists_outside_quotes(text: str, items: list[str]) -> bool:
# This is a simplified implementation. A full implementation would require
# more complex parsing to correctly identify text outside quotes.
cleaned_text = text_remove_quotes(text)
for item in items:
if item in cleaned_text:
return True
return False
def is_int(text: str) -> bool:
return text.isdigit()
def is_upper_text(text: str) -> bool:
return text.isupper() and text.isalpha()
def multiline_to_single(text: str) -> str:
return text.replace('\n', '\\n').replace('\r', '')
def split_smart(t: str, delimiter_: str) -> list[str]:
# This is a placeholder, a smart split would need to handle quotes and escapes
return t.split(delimiter_)
def version(text_: str) -> int:
# Converts version strings like "v0.4.36" to 4036 or "v1.4.36" to 1004036
match = re.match(r'v?(\d+)\.(\d+)\.(\d+)', text_)
if match:
major, minor, patch = int(match.group(1)), int(match.group(2)), int(match.group(3))
if major == 0:
return minor * 100 + patch
else:
return major * 1000000 + minor * 100 + patch
return 0
def format_rfc1123(dt: datetime) -> str:
return dt.strftime('%a, %d %b %Y %H:%M:%S GMT')
def to_array(r: str) -> list[str]:
if ',' in r:
return [item.strip() for item in r.split(',')]
return [item.strip() for item in r.splitlines() if item.strip()]
def to_array_int(r: str) -> list[int]:
return [int(item) for item in to_array(r) if item.isdigit()]
def to_map(mapstring: str, line: str, delimiter_: str = ' ') -> dict[str, str]:
# This is a simplified implementation. The VLang version is more complex.
# It assumes a space delimiter for now.
keys = [k.strip() for k in mapstring.split(',')]
values = line.split(delimiter_)
result = {}
val_idx = 0
for key in keys:
if key == '-':
val_idx += 1
continue
if val_idx < len(values):
result[key] = values[val_idx]
val_idx += 1
return result

View File

View File

41
herolib/crypt/box/box.py Normal file
View File

@@ -0,0 +1,41 @@
from fastapi import HTTPException
from cryptography.fernet import Fernet
import redis
import base64
import hashlib
#TODO: KRISTOF FIX
def box_get():
r = redis.Redis(host='localhost', port=6379, db=0)
key = r.get('my.secret')
if key is None:
raise HTTPException(status_code=404, detail="can't find my.secret in redis, needs to be set: "+name+" use secret-set to register your secret.")
hash_digest = hashlib.sha256(key).digest()
# Encode the hash digest to make it url-safe base64-encoded
key2 = base64.urlsafe_b64encode(hash_digest)
try:
f = Fernet(key2)
except Exception as e:
# if str(e).find("Resource Missing")>0:
# raise HTTPException(status_code=400, detail="Could not find account with pubkey: "+account_keypair.public_key)
raise HTTPException(status_code=400, detail=str(e))
return f
def box_secret_set(secret:str):
r = redis.Redis(host='localhost', port=6379, db=0)
# key = r.set('my.secret',secret)
r.setex('my.secret', 43200,secret) # Set the key with an expiration time of 12 hours
box_get()
return "OK"

View File

@@ -0,0 +1,26 @@
from fastapi import APIRouter, HTTPException,Response
from pydantic import BaseModel, constr, Field
from secret.box import box_secret_set,box_get
#TODO: KRISTOF FIX
router = APIRouter()
##############POSITION
class BoxSecretSetRequest(BaseModel):
secret: str = Field(..., description="a well chosen secret key, do never forget this key, you will loose your assets")
@router.post("/secret",description="Set your secret for your hero, will be kept for 12 hours")
async def set_secret(request: BoxSecretSetRequest):
box_secret_set(secret=request.secret)
return Response(content="OK", media_type="text/plain")
@router.get("/secret",description="Check if it exists.")
async def secret_check():
b=box_get()
return Response(content="OK", media_type="text/plain")

0
herolib/data/__init__.py Normal file
View File

View File

View File

@@ -0,0 +1,123 @@
from datetime import datetime, timedelta
import re
class OurTime:
def __init__(self, dt: datetime = None):
self._dt = dt if dt else datetime.min
def __str__(self) -> str:
return self.str()
def str(self) -> str:
if self._dt == datetime.min:
return ""
return self._dt.strftime('%Y-%m-%d %H:%M')
def day(self) -> str:
if self._dt == datetime.min:
return ""
return self._dt.strftime('%Y-%m-%d')
def key(self) -> str:
if self._dt == datetime.min:
return ""
return self._dt.strftime('%Y_%m_%d_%H_%M_%S')
def md(self) -> str:
if self._dt == datetime.min:
return ""
return self._dt.strftime('%Y-%m-%d %H:%M:%S')
def unix(self) -> int:
if self._dt == datetime.min:
return 0
return int(self._dt.timestamp())
def empty(self) -> bool:
return self._dt == datetime.min
def dayhour(self) -> str:
if self._dt == datetime.min:
return ""
return self._dt.strftime('%Y-%m-%d-%H')
def time(self):
# This is a simplified representation, as VLang's time() returns a time object.
# Here, we return self to allow chaining format_ss().
return self
def format_ss(self) -> str:
if self._dt == datetime.min:
return ""
return self._dt.strftime('%H:%M:%S')
def warp(self, expression: str):
if self._dt == datetime.min:
return
parts = expression.split()
for part in parts:
match = re.match(r'([+-]?\d+)([smhdwMQY])', part)
if not match:
continue
value = int(match.group(1))
unit = match.group(2)
if unit == 's':
self._dt += timedelta(seconds=value)
elif unit == 'm':
self._dt += timedelta(minutes=value)
elif unit == 'h':
self._dt += timedelta(hours=value)
elif unit == 'd':
self._dt += timedelta(days=value)
elif unit == 'w':
self._dt += timedelta(weeks=value)
elif unit == 'M':
# Approximate months, for more accuracy, a proper dateutil.relativedelta would be needed
self._dt += timedelta(days=value * 30)
elif unit == 'Q':
self._dt += timedelta(days=value * 90)
elif unit == 'Y':
self._dt += timedelta(days=value * 365)
def now() -> OurTime:
return OurTime(datetime.now())
def new(time_str: str) -> OurTime:
if not time_str:
return OurTime()
formats = [
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M',
'%Y-%m-%d %H',
'%Y-%m-%d',
'%d-%m-%Y %H:%M:%S',
'%d-%m-%Y %H:%M',
'%d-%m-%Y %H',
'%d-%m-%Y',
'%H:%M:%S', # For time() and format_ss() usage
]
for fmt in formats:
try:
dt = datetime.strptime(time_str, fmt)
return OurTime(dt)
except ValueError:
pass
# Handle relative time expressions
try:
# Create a dummy OurTime object to use its warp method
temp_time = now()
temp_time.warp(time_str)
return temp_time
except Exception:
pass
raise ValueError(f"Could not parse time string: {time_str}")
def new_from_epoch(epoch: int) -> OurTime:
return OurTime(datetime.fromtimestamp(epoch))

View File

View File

@@ -0,0 +1 @@
../../../../tfgrid_research/tfdev/research/scrape_dynamic

View File

@@ -0,0 +1 @@
../../../../tfgrid_research/tfdev/research/scrape_fast

View File

@@ -0,0 +1 @@
../../../../tfgrid_research/tfdev/research/scrape_scapegraph

View File

View File

@@ -0,0 +1,31 @@
import mimetypes
import os
def check_and_add_extension(file_path: str) -> str:
# Only check if there's no extension
if not os.path.splitext(file_path)[1]:
# Read the file content
with open(file_path, 'rb') as f:
content = f.read(2048) # Read the first 2048 bytes for detection
# Detect content type
content_type = detect_content_type(content)
extension = mimetypes.guess_extension(content_type)
if extension:
new_file_path = file_path + extension
os.rename(file_path, new_file_path)
return new_file_path
return file_path
def detect_content_type(content: bytes) -> str:
# Simple content-based detection
if content.startswith(b'\xff\xd8'):
return 'image/jpeg'
if content.startswith(b'\x89PNG'):
return 'image/png'
if content.startswith(b'GIF'):
return 'image/gif'
# Add more checks as needed
return 'application/octet-stream'

270
herolib/tools/gitscanner.py Normal file
View File

@@ -0,0 +1,270 @@
import sys
import os
# Add the parent directory of herotools to the Python module search path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import time
import json
import subprocess
from typing import Optional,List
import redis
from herotools.logger import logger
from herotools.texttools import name_fix
from enum import Enum, auto
from dataclasses import dataclass
import git
# Initialize Redis client
redis_client = redis.StrictRedis(host='localhost', port=6379, db=0)
# Define the ChangeType Enum
class ChangeType(Enum):
DEL = 'del'
MOD = 'mod'
NEW = 'new'
@dataclass
class FileChange:
commit_hash: str
commit_time: str
path: str #relative path in the repo
change_type: ChangeType
class Repo:
def __init__(self, cat: str, account: str, name: str, path: str):
self.cat = cat
self.account = account
self.name = name
self.path = path
self.hash_last_found: Optional[float] = None
self.hash_last_processed: Optional[str] = None
self.lastcheck: Optional[float] = None
def __str__(self):
return json.dumps({
"cat": self.cat,
"account": self.account,
"name": self.name,
"path": self.path,
"hash_last_found": self.hash_last_found,
"hash_last_processed": self.hash_last_processed,
"lastcheck": self.lastcheck
}, indent=2)
def _redis_key(self) -> str:
return f"gitcheck:{self.cat}:{self.account}:{self.name}"
def save_to_redis(self):
redis_client.set(self._redis_key(), json.dumps(self.__dict__))
@staticmethod
def load_from_redis(cat: str, account: str, name: str) -> Optional['Repo']:
redis_key = f"gitcheck:{cat}:{account}:{name}"
data = redis_client.get(redis_key)
if data:
data = json.loads(data)
repo = Repo(data["cat"], data["account"], data["name"], data["path"])
repo.hash_last_found = data.get("hash_last_found")
repo.hash_last_processed = data.get("hash_last_processed")
repo.lastcheck = data.get("lastcheck")
return repo
return None
def get_remote_commit_hash(self, branch: str) -> str:
"""Get the latest commit hash from the remote repository."""
result = subprocess.run(
['git', 'ls-remote', 'origin', f'refs/heads/{branch}'],
cwd=self.path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
if result.returncode != 0:
raise Exception(f"Error fetching remote commit hash: {result.stderr}")
return result.stdout.split()[0]
def get_local_commit_hash(self) -> str:
"""Get the latest commit hash from the local repository."""
result = subprocess.run(
['git', 'rev-parse', 'HEAD'],
cwd=self.path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
if result.returncode != 0:
raise Exception(f"Error fetching local commit hash: {result.stderr}")
return result.stdout.strip()
def get_current_branch(self) -> str:
result = subprocess.run(
['git', 'branch', '--show-current'],
cwd=self.path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
if result.returncode != 0:
raise Exception(f"Error fetching local branch name: {result.stderr}")
return result.stdout.split()[0]
def get_remote_default_branch(self) -> str:
result = subprocess.run(
['git', 'ls-remote', '--symref', 'origin', 'HEAD'],
cwd=self.path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
if result.returncode != 0:
raise Exception(f"Error fetching local branch name: {result.stderr}")
return result.stdout.split()[1].split('/')[-1]
def should_check_again(self) -> bool:
"""Determine if we should check the repository again based on the last check time."""
if self.lastcheck is None:
return True
return (time.time() - self.lastcheck) > 60
def update_last_check_time(self) -> None:
"""Update the last check time."""
self.lastcheck = time.time()
self.save_to_redis()
def log_change(self, epoch_time: float) -> None:
"""Log a detected change in Redis."""
self.hash_last_found = epoch_time
self.save_to_redis()
def check_for_changes(self, branch: str = 'main') -> None:
"""Check the repository for updates and log changes if found."""
if not self.should_check_again():
print("WAIT TO CHECK FOR CHANGES")
return
try:
diff_commits = self.get_local_remote_diff_commits(branch)
if diff_commits != []:
print("FOUND SOME CHANGES")
self.log_change(time.time())
file_changes = self.get_file_changes_from_commits(diff_commits)
self.print_file_changes(file_changes)
else:
print("NO CHANGED FOUND")
self.update_last_check_time()
except Exception as e:
print(f"An error occurred while checking repo {self.path}: {e}")
def get_local_remote_diff_commits(self, branch: str) -> List[git.Commit]:
# Open the repository
repo = git.Repo(self.path)
# Get the local branch
local_branch = repo.heads[branch]
# Get the remote reference for the branch
remote_ref = repo.remotes.origin.refs[branch]
# Fetch the latest changes from the remote
repo.remotes.origin.fetch()
# Get the commit hashes of the local and remote branches
local_commit = local_branch.commit
remote_commit = remote_ref.commit
if local_commit == remote_commit:
return []
# Get the common ancestor commit
base_commit = repo.merge_base(local_commit, remote_commit)[0]
# Get the ahead and behind commits
ahead_commits = list(repo.iter_commits(f"{base_commit}..{local_commit}"))
behind_commits = list(repo.iter_commits(f"{base_commit}..{remote_commit}"))
# Combine the ahead and behind commits
diff_commits = ahead_commits + behind_commits
return diff_commits
def get_file_changes_from_commits(self, commit_list: List[git.Commit]) -> List[FileChange]:
file_changes = []
for commit in commit_list:
# print(commit)
diffs = commit.diff(self.hash_last_processed, create_patch=True)
# print(diffs)
for diff in diffs:
if diff.deleted_file:
change_type = ChangeType.DEL
elif diff.new_file:
change_type = ChangeType.NEW
else:
change_type = ChangeType.MOD
file_change = FileChange(
commit_hash=commit.hexsha,
commit_time=str(commit.committed_datetime),
path=diff.b_path if diff.new_file else diff.a_path,
change_type=change_type
)
file_changes.append(file_change)
return file_changes
def print_file_changes(self, file_changes: List[FileChange]):
for file_change in file_changes:
print("------------------------------------")
print(f"Commit Hash: {file_change.commit_hash}")
print(f"Commit Time: {file_change.commit_time}")
print(f"File Path: {file_change.path}")
print(f"Change Type: {file_change.change_type.value}")
print("------------------------------------")
def gitscan(path: str, cat: str) -> None:
"""Walk over directories to find Git repositories and check them."""
path = os.path.abspath(os.path.expanduser(path))
for root, dirs, files in os.walk(path):
if '.git' in dirs:
accountname = os.path.basename(os.path.dirname(root))
reponame = os.path.basename(root)
repo = Repo.load_from_redis(cat, accountname, reponame)
if repo is None:
repo = Repo(cat, accountname, reponame, root)
branch = repo.get_current_branch()
logger.debug(f"root: {root}")
logger.debug(f"accountname: {accountname}")
logger.debug(f"reponame: {reponame}")
logger.debug(f"branch: {branch}")
logger.debug(f"repo: {repo}")
repo.check_for_changes(branch)
dirs[:] = [] # Don't go deeper into subdirectories
else:
# Filter out any .git directories from further traversal
dirs[:] = [d for d in dirs if d != '.git']
def print_redis_client():
cursor = 0
while True:
cursor, keys = redis_client.scan(cursor)
for key in keys:
value = redis_client.get(key)
print(key)
print(value)
print()
if cursor == 0:
break
if __name__ == "__main__":
# print_redis_client()
mypath = "~/code/git.threefold.info/projectmycelium"
category = 'mycat'
gitscan(path=mypath, cat=category)
# print_redis_client()

39
herolib/tools/logger.py Normal file
View File

@@ -0,0 +1,39 @@
import logging
import colorlog
log_colors_config = {
'DEBUG': 'cyan',
'INFO': 'green',
'WARNING': 'yellow',
'ERROR': 'red',
'CRITICAL': 'bold_red',
}
secondary_log_colors_config = {
'name': {
'DEBUG': 'blue',
'INFO': 'blue',
'WARNING': 'blue',
'ERROR': 'blue',
'CRITICAL': 'blue'
},
'levelname': log_colors_config
}
formatter = colorlog.ColoredFormatter(
'%(log_color)s%(asctime)s - %(name_log_color)s%(name)s - %(levelname_log_color)s%(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
log_colors=log_colors_config,
secondary_log_colors=secondary_log_colors_config
)
# Create a handler
handler = logging.StreamHandler()
handler.setFormatter(formatter)
# Get the root logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)

13
herolib/tools/md5.py Normal file
View File

@@ -0,0 +1,13 @@
import hashlib
from typing import List
def file_md5(file_path: str) -> str:
"""
Compute the MD5 hash of the file content.
"""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()

55
herolib/tools/ourtime.py Normal file
View File

@@ -0,0 +1,55 @@
from datetime import datetime, timezone, timedelta
import re
def epoch_get(deadline: str) -> int:
"""
Set the deadline based on various input formats.
Supports:
- Relative: +1h (hours), +2d (days), +1w (week), +1m (month)
- Absolute: 20/10/2024, 20/10, 20/10/24 (all same day)
If hour not specified, defaults to midday (noon).
Returns the deadline as a Unix timestamp (epoch).
"""
now = datetime.now(timezone.utc)
# Check for relative time format
relative_match = re.match(r'\+(\d+)([hdwm])', deadline)
if relative_match:
amount, unit = relative_match.groups()
amount = int(amount)
if unit == 'h':
delta = timedelta(hours=amount)
elif unit == 'd':
delta = timedelta(days=amount)
elif unit == 'w':
delta = timedelta(weeks=amount)
elif unit == 'm':
delta = timedelta(days=amount * 30) # Approximate
new_deadline = now + delta
return int(new_deadline.timestamp())
# Check for absolute date format
date_formats = ['%d/%m/%Y', '%d/%m/%y', '%d/%m']
for fmt in date_formats:
try:
date_obj = datetime.strptime(deadline, fmt)
if fmt == '%d/%m':
# If year is not provided, use the current year
date_obj = date_obj.replace(year=now.year)
# If the resulting date is in the past, assume next year
if date_obj.replace(tzinfo=timezone.utc) < now:
date_obj = date_obj.replace(year=date_obj.year + 1)
# Set time to noon (12:00)
date_obj = date_obj.replace(hour=12, minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
return int(date_obj.timestamp())
except ValueError:
continue
raise ValueError("Invalid deadline format. Use +Xh/d/w/m for relative or DD/MM/YYYY for absolute dates.")

View File

@@ -0,0 +1,26 @@
import os
def expand_path(path: str) -> str:
"""
Expand ~ to the user's home directory and return the absolute path.
"""
return os.path.abspath(os.path.expanduser(path))
def remove_file_if_exists(file_path):
try:
# This will remove the file or symlink, regardless of whether
# it's a regular file, a directory, or a broken symlink
os.remove(file_path)
except FileNotFoundError:
# File doesn't exist, so we don't need to do anything
pass
except IsADirectoryError:
# It's a directory, so we use rmdir instead
os.rmdir(file_path)
except PermissionError:
print(f"Permission denied: Unable to remove {file_path}")
except Exception as e:
print(f"An error occurred while trying to remove {file_path}: {str(e)}")

110
herolib/tools/texttools.py Normal file
View File

@@ -0,0 +1,110 @@
import re
import unicodedata
import random
def description_fix(description):
description = description.lower()
description = unicodedata.normalize('NFKD', description).encode('ASCII', 'ignore').decode('ASCII')
description = re.sub(r'[^a-z0-9\s]', '', description)
return description.strip()
# def name_fix(name: str) -> str:
# """
# Normalize the string to lowercase ASCII, replace spaces and specific punctuations with underscores,
# and remove non-ASCII characters.
# """
# name = name.lower()
# name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
# name = re.sub(r'[ :;!]', '_', name) # Replace spaces and specific punctuations with underscores
# name = re.sub(r'\W+', '', name) # Remove any other non-word characters
# return name
def name_fix(name: str) -> str:
"""
Normalize the string to lowercase ASCII, replace spaces and specific punctuations with underscores,
maintain dots, and remove non-ASCII characters.
"""
name = name.lower()
name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
name = re.sub(r'[ :;!]', '_', name) # Replace spaces and specific punctuations with underscores
name = re.sub(r'[^\w._]+', '', name) # Remove any non-word characters except dots and underscores
return name
def name_obfuscate(name):
# Define a mapping of consonants to their obfuscated counterparts
consonant_map = {
'b': 'p', 'c': 'k', 'd': 't', 'f': 'v', 'g': 'j', 'h': 'x',
'j': 'q', 'k': 'c', 'l': 'r', 'm': 'n', 'n': 'm', 'p': 'b',
'q': 'g', 'r': 'l', 's': 'z', 't': 'd', 'v': 'f', 'w': 'y',
'x': 'h', 'y': 'w', 'z': 's'
}
# Define a mapping of vowels to their obfuscated counterparts
vowel_map = {
'a': 'e', 'e': 'i', 'i': 'o', 'o': 'u', 'u': 'a'
}
# Convert the name to lowercase
name = name.lower()
# Split the name into words
words = name.split()
obfuscated_words = []
for word in words:
obfuscated_word = ''
for char in word:
if char in vowel_map:
# Obfuscate vowels
obfuscated_word += vowel_map[char]
elif char in consonant_map:
# Obfuscate consonants
obfuscated_word += consonant_map[char]
else:
# Keep non-alphabetic characters unchanged
obfuscated_word += char
obfuscated_words.append(obfuscated_word)
# Join the obfuscated words back into a single string
obfuscated_name = ' '.join(obfuscated_words)
# Capitalize the first letter of each word
obfuscated_name = obfuscated_name.title()
return obfuscated_name
def dedent(content: str) -> str:
# Split the input content into lines
lines = content.splitlines()
# Remove leading and trailing empty lines
while lines and not lines[0].strip():
lines.pop(0)
while lines and not lines[-1].strip():
lines.pop()
if not lines:
return ""
# Find the minimum indentation (leading spaces) in all non-empty lines
min_indent = None
for line in lines:
stripped_line = line.lstrip()
if stripped_line: # Only consider non-empty lines
leading_spaces = len(line) - len(stripped_line)
if min_indent is None or leading_spaces < min_indent:
min_indent = leading_spaces
# Dedent each line by the minimum indentation found
dedented_lines = [line[min_indent:] if len(line) > min_indent else line for line in lines]
# Join the dedented lines back into a single string
return "\n".join(dedented_lines)
if __name__ == "__main__":
print("fixed name:", name_fix("John Doe"))
print("obfuscated name:", name_obfuscate("John Doe"))

0
herolib/web/__init__.py Normal file
View File

View File

View File

@@ -0,0 +1,94 @@
from herotools.logger import logger
from bs4 import BeautifulSoup
import re
from typing import Callable
from herotools.texttools import name_fix
# Define the type for the content and link fetching functions
LinkFetcher = Callable[[str, str, str, str, str], str]
ContentFetcher = Callable[[str, str, str, str], str]
# Private functions to be used internally
def _get_link(language: str, prefix: str, site_name: str, pagename: str, name: str) -> str:
# Replace this with your logic to get the actual link
logger.debug(f"_get_link: {language[:10]:<10} {site_name}:{pagename}:{name}")
return f"{prefix}{language}/{site_name}/{pagename}/{name}.jpg"
def _get_content(language: str, site_name: str, pagename: str, name: str) -> str:
# Replace this with your logic to get the actual content
logger.debug(f"_get_content: {language[:10]:<10} {site_name}:{pagename}:{name}")
return f"Replaced text for {name} on page {pagename} in {language} language on {site_name} site"
def _process_html(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
"""
Function to process HTML and replace content based on tags.
This allows us to work with templates and get content based on language to replace in HTML.
"""
language = name_fix(language)
site_name = name_fix(site_name)
pagename = name_fix(pagename)
prefix = prefix.strip()
if not prefix.endswith('/'):
prefix += '/'
soup = BeautifulSoup(html_content, 'html.parser')
# Find all elements with class names starting with !!img: or !!txt:
for element in soup.find_all(class_=re.compile(r'!!(img|txt):(.+)')):
for cls in element['class']:
if cls.startswith('!!img:'):
name = cls.split(':')[1]
name = name_fix(name)
# Get the link to replace the src attribute in !!img: elements
link = _get_link(language=language, prefix=prefix, site_name=site_name, pagename=pagename, name=name)
if element.name == 'img':
element['src'] = link
elif 'src' in element.attrs:
element['src'] = link # In case the element is not an img but has a src attribute
elif cls.startswith('!!txt:'):
name = cls.split(':')[1]
name = name_fix(name)
# Get the content to replace the text in !!txt: elements
content = _get_content(language=language, site_name=site_name, pagename=pagename, name=name)
element.string = content
# Output the modified HTML
return str(soup)
# Public function to process the HTML content
def process(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
"""
Public function to process HTML and replace content based on tags.
This function wraps the internal _process_html function.
"""
return _process_html(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
# Sample usage with a given language, site name, page name, and HTML content
if __name__ == "__main__":
# Example HTML content
html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Sample Page</title>
</head>
<body>
<h2 class="mb-6 is-size-1 is-size-3-mobile has-text-weight-bold !!txt:title1">Take care of your performance every day.</h2>
<img class="responsive !!img:logo" src="old-link.jpg" alt="Company Logo">
<p class="content !!txt:description">This is a sample description text.</p>
</body>
</html>
'''
# Process the HTML content for a specific language, site name, and page
language: str = "en"
site_name: str = "ExampleSite"
pagename: str = "HomePage"
prefix: str = "http://localhost/images/"
processed_html: str = process(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
# Print the modified HTML
print(processed_html)

View File

@@ -0,0 +1,172 @@
import sys
import os
# Add the parent directory of herotools to the Python module search path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from herotools.logger import logger
from markdown_it import MarkdownIt
from markdown_it.tree import SyntaxTreeNode
import re
from enum import Enum
from herotools.texttools import name_fix
from mdformat.renderer import MDRenderer
from urllib.parse import urlparse
class ImageType(Enum):
JPEG = 'jpeg'
PNG = 'png'
GIF = 'gif'
OTHER = 'other'
def get_link_page(prefix:str, linkname:str, sitename: str, name: str) -> str:
"""
Generates a page link based on sitename and name.
Args:
sitename (str): The name of the site.
name (str): The name of the page.
Returns:
str: The generated link.
"""
logger.debug(f"get_link_page: {prefix[:60]:<60} {linkname} {sitename}:{name}")
return f"[{linkname}]({prefix}/{sitename}/{name})"
def get_link_image(prefix:str, sitename: str, name: str, image_type: ImageType) -> str:
"""
Generates an image link based on the URL and image type.
Args:
url (str): The original URL of the image.
image_type (ImageType): The type of the image.
Returns:
str: The generated link.
"""
logger.debug(f"get_link_image: {prefix[:60]:<60} {sitename}:{name}")
return f"![]({prefix}/{sitename}/{name})"
def get_include(sitename: str, name: str) -> str:
"""
Generates an include directive link based on sitename and name.
Args:
sitename (str): The name of the site.
name (str): The name of the page to include.
Returns:
str: The generated include directive.
"""
logger.debug(f"get_include: {sitename}:{name}")
return f"include: {sitename}/{name}"
def replace(prefix:str, markdown: str) -> str:
"""
Finds all image links, markdown page links, and custom include directives in the provided markdown text
and replaces them using the appropriate functions.
Args:
markdown (str): The markdown content.
Returns:
str: The modified markdown content with updated links.
"""
# Initialize the Markdown parser
md = MarkdownIt()
tokens = md.parse(markdown)
ast = SyntaxTreeNode(tokens)
print(ast.pretty(indent=2, show_text=True))
def process_node(node: SyntaxTreeNode):
# from IPython import embed; embed()
def get_new_url(url: str):
logger.debug(f"url: {url}")
parsed_url = urlparse(url)
# site_name = parsed_url.netloc
image_path = parsed_url.path
logger.debug(f"parsed_url: {parsed_url}")
# prefix = prefix.rstrip('/')
# image_path = image_path.strip('/')
new_url = f"{prefix.rstrip('/')}/{image_path.strip('/')}"
logger.debug(f"new_url: {new_url}")
return new_url
if node.type == 'image':
# Process image link
url = node.attrs.get('src', '')
new_url = get_new_url(url)
node.attrs['src'] = new_url
elif node.type == 'link':
# Process markdown page link
url = node.attrs.get('href', '')
new_url = get_new_url(url)
node.attrs['href'] = new_url
# Recursively process child nodes
for child in node.children or []:
process_node(child)
def replace_include_directives(match: re.Match) -> str:
"""
Replaces custom include directives with appropriate links.
Args:
match (re.Match): The match object containing the found include directive.
Returns:
str: The generated link for the include directive.
"""
url = match.group(1)
if ':' in url:
site_name, page = url.split(':', 1)
page_name = page.split('/')[-1]
else:
site_name = ""
page_name = url
if not page.endswith('.md'):
page += '.md'
return get_include(prefix, site_name, page_name)
# Process the root node
process_node(ast)
# Convert the AST back to markdown
renderer = MDRenderer()
options = {}
env = {}
rendered_markdown = renderer.render(tokens, options, env)
# include_pattern = re.compile(r"!!include page:'(.*?)'")
# rendered_markdown = include_pattern.sub(replace_include_directives, rendered_markdown)
return rendered_markdown
if __name__ == "__main__":
text = """
![Image description](https://example.com/image.png)
[Page link](sitename:some/path/to/page.md)
!!include page:'mypage'
!!include page:'mypage.md'
!!include page:'mysite:mypage
!!include page:'mysite:mypage'
!!include page:'mysite:mypage.md'
"""
print(text)
text2=replace("http://localhost:8080/pre/", text)
print(text2)

View File

@@ -0,0 +1,94 @@
import os
import re
from typing import Callable
from herotools.logger import logger
from herotools.md5 import file_md5
from herotools.texttools import name_fix
def _example_set_file(site_name: str, path: str, md5: str) -> None:
# Placeholder for actual implementation
logger.debug(f"set_file : site_name={site_name[:20]:<20} {path}")
def _example_set_img(site_name: str, path: str, md5: str) -> None:
# Placeholder for actual implementation
logger.debug(f"set_img : site_name={site_name[:20]:<20} {path}")
def _example_set_markdown(
site_name: str, path: str, md5: str, content: str
) -> None:
# Placeholder for actual implementation
logger.debug(f"set_markdown : site_name={site_name[:20]:<20} {path}")
def _example_set_site(site_name: str, path: str) -> None:
# Placeholder for actual implementation
logger.info(f"set_site : site_name={site_name[:20]:<20} {path}")
def _site_process_action(
site_name: str,
site_path: str,
set_file: Callable[[str, str, str], None],
set_img: Callable[[str, str, str], None],
set_markdown: Callable[[str, str, str, str], None],
) -> None:
logger.debug(f"site process: {site_path[:60]:<60} -> {site_name}")
for root, _, files in os.walk(site_path):
for file in files:
file_path = os.path.join(root, file)
file_path_rel = os.path.relpath(file_path, site_path)
file_name = os.path.basename(file)
# print(file_name)
mymd5 = file_md5(file_path)
if file.lower().endswith(".md"):
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
set_markdown(site_name, file_path_rel, mymd5, content)
elif file_name in [".collection", ".site", ".done"]:
continue
elif re.search(
r"\.(jpg|jpeg|png|gif|bmp|tiff|webp)$", file, re.IGNORECASE
):
set_img(site_name, file_path_rel, mymd5)
else:
set_file(site_name, file_path_rel, mymd5)
def process(
path: str,
set_site: Callable[[str, str], None],
set_file: Callable[[str, str, str], None],
set_img: Callable[[str, str, str], None],
set_markdown: Callable[[str, str, str, str], None],
) -> None:
"""
walk over directory and apply set_file(), set_img() and set_markdown()
"""
path = os.path.abspath(os.path.expanduser(path))
logger.info(f"sites process: {path}")
for root, dirs, files in os.walk(path):
if ".site" in files or ".collection" in files:
site_name = name_fix(os.path.basename(root))
set_site(site_name, root)
_site_process_action(
site_name, root, set_file, set_img, set_markdown
)
# Prevent the os.walk from going deeper into subdirectories
dirs[:] = []
if __name__ == "__main__":
mypath = "~/code/git.threefold.info/projectmycelium/info_projectmycelium/collections"
# logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
process(
mypath,
_example_set_site,
_example_set_file,
_example_set_img,
_example_set_markdown,
)