...
This commit is contained in:
0
herolib/__init__.py
Normal file
0
herolib/__init__.py
Normal file
BIN
herolib/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
herolib/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
0
herolib/clients/__init__.py
Normal file
0
herolib/clients/__init__.py
Normal file
0
herolib/clients/assemblyai/__init__.py
Normal file
0
herolib/clients/assemblyai/__init__.py
Normal file
69
herolib/clients/assemblyai/client.py
Normal file
69
herolib/clients/assemblyai/client.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import os
|
||||
|
||||
from pydub import AudioSegment
|
||||
import assemblyai as aai
|
||||
|
||||
|
||||
class Client:
|
||||
def __init__(self):
|
||||
api_key = os.getenv("ASSEMBLYAI")
|
||||
|
||||
if not api_key:
|
||||
raise EnvironmentError(
|
||||
"Please set the ASSEMBLYAI environment variable with your AssemblyAI API key."
|
||||
)
|
||||
|
||||
self.api_key = api_key
|
||||
aai.settings.api_key = self.api_key
|
||||
self.transcriber = aai.Transcriber()
|
||||
|
||||
def convert_to_ogg_mono(self, input_path: str, output_path: str):
|
||||
"""Converts an audio file from .mp4 to .ogg (mono)."""
|
||||
audio = AudioSegment.from_file(input_path, format="mp4")
|
||||
# Convert to mono if needed by uncommenting the line below
|
||||
# audio = audio.set_channels(1)
|
||||
audio.export(output_path, format="ogg")
|
||||
print(f"Converted to .ogg in {output_path}")
|
||||
|
||||
def transcribe_audio(self, audio_path: str, output_path: str):
|
||||
"""Transcribes the audio file and saves the transcription to a Markdown file."""
|
||||
config = aai.TranscriptionConfig(
|
||||
speaker_labels=True,
|
||||
)
|
||||
|
||||
transcript = self.transcriber.transcribe(audio_path, config)
|
||||
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
for utterance in transcript.utterances:
|
||||
f.write(
|
||||
f"** Speaker {utterance.speaker}:\n{utterance.text}\n-------------\n"
|
||||
)
|
||||
|
||||
print(f"Transcription saved to {output_path}")
|
||||
|
||||
def transcribe_audio_file(self, input_path: str, output_transcription_path: str):
|
||||
"""Handles the entire process from conversion to transcription and cleanup."""
|
||||
converted_audio_path = input_path.replace(".mp4", ".ogg")
|
||||
|
||||
# Convert .mp4 to .ogg
|
||||
self.convert_to_ogg_mono(input_path, converted_audio_path)
|
||||
|
||||
# Perform the transcription
|
||||
self.transcribe_audio(converted_audio_path, output_transcription_path)
|
||||
|
||||
# Optionally, clean up the converted file
|
||||
os.remove(converted_audio_path)
|
||||
print(f"Removed temporary file {converted_audio_path}")
|
||||
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
# Retrieve API key from environment variable
|
||||
|
||||
# Define the paths for the input audio and output transcription
|
||||
input_audio_path = "/tmp/475353425.mp4"
|
||||
output_transcription_path = "/tmp/transcribe_475353425.md"
|
||||
|
||||
# Perform the transcription process
|
||||
client = Client()
|
||||
client.transcribe_audio_file(input_audio_path, output_transcription_path)
|
19
herolib/clients/readme.md
Normal file
19
herolib/clients/readme.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Vimeo Client
|
||||
|
||||
need following functionality
|
||||
|
||||
- upload video
|
||||
- download
|
||||
- list video's
|
||||
|
||||
## some info
|
||||
|
||||
- https://developer.vimeo.com/api/reference
|
||||
|
||||
## remarks to use make sure you have the secrets
|
||||
|
||||
```bash
|
||||
hero git clone -u git@git.threefold.info:despiegk/hero_secrets.git
|
||||
source git.threefold.info/projectmycelium/hero_server/myenv.sh
|
||||
```
|
||||
|
0
herolib/clients/stellar/__init__.py
Normal file
0
herolib/clients/stellar/__init__.py
Normal file
241
herolib/clients/stellar/horizon.py
Normal file
241
herolib/clients/stellar/horizon.py
Normal file
@@ -0,0 +1,241 @@
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import List, Optional
|
||||
from stellar_sdk import Keypair, Server, StrKey
|
||||
import json
|
||||
import redis
|
||||
from stellar.model import StellarAsset, StellarAccount
|
||||
import os
|
||||
import csv
|
||||
import toml
|
||||
from herotools.texttools import description_fix
|
||||
|
||||
|
||||
|
||||
class HorizonServer:
|
||||
def __init__(self, instance: str = "default", network: str = "main", tomlfile: str = "", owner: str = ""):
|
||||
"""
|
||||
Load a Stellar account's information using the Horizon server.
|
||||
The Horizon server is an API that allows interaction with the Stellar network. It provides endpoints to submit transactions, check account balances, and perform other operations on the Stellar ledger.
|
||||
All gets cached in redis
|
||||
"""
|
||||
self.redis_client = redis.Redis(host='localhost', port=6379, db=0) # Adjust as needed
|
||||
self.instance = instance
|
||||
if network not in ['main', 'testnet']:
|
||||
raise ValueError("Invalid network value. Must be 'main' or 'testnet'.")
|
||||
self.network = network
|
||||
testnet = self.network == 'testnet'
|
||||
self.server = Server("https://horizon-testnet.stellar.org" if testnet else "https://horizon.stellar.org")
|
||||
self.tomlfile = os.path.expanduser(tomlfile)
|
||||
self.owner = owner
|
||||
if self.tomlfile:
|
||||
self.toml_load()
|
||||
|
||||
def account_exists(self, pubkey: str) -> bool:
|
||||
"""
|
||||
Check if an account exists in the Redis cache based on the public key.
|
||||
"""
|
||||
redis_key = f"stellar:{self.instance}:accounts:{pubkey}"
|
||||
return self.redis_client.exists(redis_key) != None
|
||||
|
||||
def account_get(self, key: str, reload: bool = False, name: str = "", description: str = "", cat: str = "") -> StellarAccount:
|
||||
"""
|
||||
Load a Stellar account's information.
|
||||
|
||||
Args:
|
||||
key (str): The private or public key of the Stellar account.
|
||||
reset (bool, optional): Whether to force a refresh of the cached data. Defaults to False.
|
||||
name (str, optional): Name for the account. Defaults to "".
|
||||
description (str, optional): Description for the account. Defaults to "".
|
||||
owner (str, optional): Owner of the account. Defaults to "".
|
||||
cat (str, optional): Category of the account. Defaults to "".
|
||||
|
||||
Returns:
|
||||
StellarAccount: A struct containing the account's information.
|
||||
"""
|
||||
|
||||
if key == "" and name:
|
||||
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
|
||||
data = self.redis_client.get(redis_key)
|
||||
if data:
|
||||
data = json.loads(str(data))
|
||||
if data.get('name') == name and data.get('priv_key', data.get('public_key')):
|
||||
key = data.get('priv_key', data.get('public_key'))
|
||||
break
|
||||
|
||||
if key == "":
|
||||
raise ValueError("No key provided")
|
||||
|
||||
# Determine if the key is a public or private key
|
||||
if StrKey.is_valid_ed25519_public_key(key):
|
||||
public_key = key
|
||||
priv_key = ""
|
||||
elif StrKey.is_valid_ed25519_secret_seed(key):
|
||||
priv_key = key
|
||||
keypair = Keypair.from_secret(priv_key)
|
||||
public_key = keypair.public_key
|
||||
else:
|
||||
raise ValueError("Invalid Stellar key provided")
|
||||
|
||||
redis_key = f"stellar:{self.instance}:accounts:{public_key}"
|
||||
|
||||
data = self.redis_client.get(redis_key)
|
||||
changed = False
|
||||
if data:
|
||||
try:
|
||||
data = json.loads(str(data))
|
||||
except Exception as e:
|
||||
print(data)
|
||||
raise e
|
||||
data['assets'] = [StellarAsset(**asset) for asset in data['assets']]
|
||||
account = StellarAccount(**data)
|
||||
if description!="" and description!=account.description:
|
||||
account.description = description
|
||||
changed = True
|
||||
if name!="" and name!=account.name:
|
||||
account.name = name
|
||||
changed = True
|
||||
if self.owner!="" and self.owner!=account.owner:
|
||||
account.owner = self.owner
|
||||
changed = True
|
||||
if cat!="" and cat!=account.cat:
|
||||
account.cat = cat
|
||||
changed = True
|
||||
else:
|
||||
account = StellarAccount(public_key=public_key, description=description, name=name, priv_key=priv_key, owner=self.owner, cat=cat)
|
||||
changed = True
|
||||
|
||||
|
||||
if reload or account.assets == []:
|
||||
changed = True
|
||||
if reload:
|
||||
account.assets = []
|
||||
account_data = self.server.accounts().account_id(public_key).call()
|
||||
account.assets.clear() # Clear existing assets to avoid duplication
|
||||
for balance in account_data['balances']:
|
||||
asset_type = balance['asset_type']
|
||||
if asset_type == 'native':
|
||||
account.assets.append(StellarAsset(type="XLM", balance=balance['balance']))
|
||||
else:
|
||||
if 'asset_code' in balance:
|
||||
account.assets.append(StellarAsset(
|
||||
type=balance['asset_code'],
|
||||
issuer=balance['asset_issuer'],
|
||||
balance=balance['balance']
|
||||
))
|
||||
changed = True
|
||||
|
||||
# Cache the result in Redis for 1 hour if there were changes
|
||||
if changed:
|
||||
self.account_save(account)
|
||||
|
||||
return account
|
||||
|
||||
def comment_add(self, pubkey: str, comment: str, ignore_non_exist: bool = False):
|
||||
"""
|
||||
Add a comment to a Stellar account based on the public key.
|
||||
|
||||
Args:
|
||||
pubkey (str): The public key of the Stellar account.
|
||||
comment (str): The comment to add to the account.
|
||||
"""
|
||||
comment = description_fix(comment)
|
||||
if not self.account_exists(pubkey):
|
||||
if ignore_non_exist:
|
||||
return
|
||||
raise ValueError("Account does not exist in the cache")
|
||||
account = self.account_get(pubkey)
|
||||
account.comments.append(comment)
|
||||
self.account_save(account)
|
||||
|
||||
def account_save(self, account: StellarAccount):
|
||||
"""
|
||||
Save a Stellar account's information to the Redis cache.
|
||||
|
||||
Args:
|
||||
account (StellarAccount): The account to save.
|
||||
"""
|
||||
redis_key = f"stellar:{self.instance}:accounts:{account.public_key}"
|
||||
self.redis_client.setex(redis_key, 600, json.dumps(asdict(account)))
|
||||
|
||||
def reload_cache(self):
|
||||
"""
|
||||
Walk over all known accounts and reload their information.
|
||||
"""
|
||||
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
|
||||
data = self.redis_client.get(redis_key) or ""
|
||||
if data:
|
||||
data = json.loads(str(data))
|
||||
public_key = data.get('public_key')
|
||||
if public_key:
|
||||
self.account_get(public_key, reload=True)
|
||||
|
||||
|
||||
#format is PUBKEY,DESCRIPTION in text format
|
||||
def load_accounts_csv(self, file_path:str):
|
||||
file_path=os.path.expanduser(file_path)
|
||||
if not os.path.exists(file_path):
|
||||
return Exception(f"Error: File '{file_path}' does not exist.")
|
||||
try:
|
||||
with open(file_path, 'r', newline='') as file:
|
||||
reader = csv.reader(file, delimiter=',')
|
||||
for row in reader:
|
||||
if row and len(row) >= 2: # Check if row is not empty and has at least 2 elements
|
||||
pubkey = row[0].strip()
|
||||
comment = ','.join(row[1:]).strip()
|
||||
if self.account_exists(pubkey):
|
||||
self.comment_add(pubkey, comment)
|
||||
except IOError as e:
|
||||
return Exception(f"Error reading file: {e}")
|
||||
except csv.Error as e:
|
||||
return Exception(f"Error parsing CSV: {e}")
|
||||
except Exception as e:
|
||||
return Exception(f"Error: {e}")
|
||||
|
||||
def accounts_get(self) -> List[StellarAccount]:
|
||||
"""
|
||||
Retrieve a list of all known Stellar accounts from the Redis cache.
|
||||
|
||||
Returns:
|
||||
List[StellarAccount]: A list of StellarAccount objects.
|
||||
"""
|
||||
accounts = []
|
||||
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
|
||||
pubkey = str(redis_key.split(':')[-1])
|
||||
accounts.append(self.account_get(key=pubkey))
|
||||
return accounts
|
||||
|
||||
def toml_save(self):
|
||||
"""
|
||||
Save the list of all known Stellar accounts to a TOML file.
|
||||
|
||||
Args:
|
||||
file_path (str): The path where the list needs to be saved.
|
||||
"""
|
||||
if self.tomlfile == "":
|
||||
raise ValueError("No TOML file path provided")
|
||||
accounts = self.accounts_get()
|
||||
accounts_dict = {account.public_key: asdict(account) for account in accounts}
|
||||
with open(self.tomlfile, 'w') as file:
|
||||
toml.dump( accounts_dict, file)
|
||||
|
||||
def toml_load(self):
|
||||
"""
|
||||
Load the list of Stellar accounts from a TOML file and save them to the Redis cache.
|
||||
|
||||
Args:
|
||||
file_path (str): The path of the TOML file to load.
|
||||
"""
|
||||
if not os.path.exists(self.tomlfile):
|
||||
return
|
||||
#raise FileNotFoundError(f"Error: File '{self.tomlfile}' does not exist.")
|
||||
with open(self.tomlfile, 'r') as file:
|
||||
accounts_dict = toml.load(file)
|
||||
for pubkey, account_data in accounts_dict.items():
|
||||
account_data['assets'] = [StellarAsset(**asset) for asset in account_data['assets']]
|
||||
account = StellarAccount(**account_data)
|
||||
self.account_save(account)
|
||||
|
||||
|
||||
|
||||
def new(instance: str = "default",owner: str = "", network: str = "main", tomlfile: str = "") -> HorizonServer:
|
||||
return HorizonServer(instance=instance, network=network, tomlfile=tomlfile,owner=owner)
|
70
herolib/clients/stellar/model.py
Normal file
70
herolib/clients/stellar/model.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import List, Optional
|
||||
from stellar_sdk import Keypair, Server, StrKey
|
||||
import json
|
||||
import redis
|
||||
|
||||
@dataclass
|
||||
class StellarAsset:
|
||||
type: str
|
||||
balance: float
|
||||
issuer: str = ""
|
||||
|
||||
def format_balance(self):
|
||||
balance_float = float(self.balance)
|
||||
formatted_balance = f"{balance_float:,.2f}"
|
||||
if '.' in formatted_balance:
|
||||
formatted_balance = formatted_balance.rstrip('0').rstrip('.')
|
||||
return formatted_balance
|
||||
|
||||
def md(self):
|
||||
formatted_balance = self.format_balance()
|
||||
return f"- **{self.type}**: {formatted_balance}"
|
||||
|
||||
@dataclass
|
||||
class StellarAccount:
|
||||
owner: str
|
||||
priv_key: str = ""
|
||||
public_key: str = ""
|
||||
assets: List[StellarAsset] = field(default_factory=list)
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
comments: List[str] = field(default_factory=list)
|
||||
cat: str = ""
|
||||
question: str = ""
|
||||
|
||||
def md(self):
|
||||
result = [
|
||||
f"# Stellar Account: {self.name or 'Unnamed'}","",
|
||||
f"**Public Key**: {self.public_key}",
|
||||
f"**Cat**: {self.cat}",
|
||||
f"**Description**: {self.description[:60]}..." if self.description else "**Description**: None",
|
||||
f"**Question**: {self.question}" if self.question else "**Question**: None",
|
||||
"",
|
||||
"## Assets:",""
|
||||
]
|
||||
|
||||
for asset in self.assets:
|
||||
result.append(asset.md())
|
||||
|
||||
if len(self.assets) == 0:
|
||||
result.append("- No assets")
|
||||
|
||||
result.append("")
|
||||
|
||||
if self.comments:
|
||||
result.append("## Comments:")
|
||||
for comment in self.comments:
|
||||
if '\n' in comment:
|
||||
multiline_comment = "\n ".join(comment.split('\n'))
|
||||
result.append(f"- {multiline_comment}")
|
||||
else:
|
||||
result.append(f"- {comment}")
|
||||
|
||||
return "\n".join(result)
|
||||
|
||||
def balance_str(self) -> str:
|
||||
out=[]
|
||||
for asset in self.assets:
|
||||
out.append(f"{asset.type}:{float(asset.balance):,.0f}")
|
||||
return " ".join(out)
|
78
herolib/clients/stellar/model_accounts.v
Normal file
78
herolib/clients/stellar/model_accounts.v
Normal file
@@ -0,0 +1,78 @@
|
||||
module stellar
|
||||
import freeflowuniverse.crystallib.core.texttools
|
||||
|
||||
pub struct DigitalAssets {
|
||||
pub mut:
|
||||
|
||||
|
||||
}
|
||||
|
||||
pub struct Owner {
|
||||
pub mut:
|
||||
name string
|
||||
accounts []Account
|
||||
}
|
||||
|
||||
@[params]
|
||||
pub struct AccountGetArgs{
|
||||
pub mut:
|
||||
name string
|
||||
bctype BlockChainType
|
||||
}
|
||||
|
||||
pub fn (self DigitalAssets) account_get(args_ AccountGetArgs) !&Account {
|
||||
|
||||
mut accounts := []&Account
|
||||
mut args:=args_
|
||||
|
||||
args.name = texttools.name_fix(args.name)
|
||||
|
||||
for account in self.accounts {
|
||||
if account.name == args.name && account.bctype == args.bctype {
|
||||
accounts<<&account
|
||||
}
|
||||
}
|
||||
|
||||
if accounts.len == 0 {
|
||||
return error('No account found with the given name:${args.name} and blockchain type: ${args.bctype}')
|
||||
} else if count > 1 {
|
||||
return error('Multiple accounts found with the given name:${args.name} and blockchain type: ${args.bctype}')
|
||||
}
|
||||
|
||||
return accounts[0]
|
||||
}
|
||||
|
||||
pub struct Account {
|
||||
pub mut:
|
||||
name string
|
||||
secret string
|
||||
pubkey string
|
||||
description string
|
||||
cat string
|
||||
owner string
|
||||
assets []Asset
|
||||
bctype BlockChainType
|
||||
}
|
||||
|
||||
pub struct Asset {
|
||||
pub mut:
|
||||
amount int
|
||||
assettype AssetType
|
||||
}
|
||||
|
||||
pub fn (self Asset) name() string {
|
||||
return self.assettype.name
|
||||
}
|
||||
|
||||
pub struct AssetType {
|
||||
pub mut:
|
||||
name string
|
||||
issuer string
|
||||
bctype BlockChainType
|
||||
}
|
||||
|
||||
pub enum BlockChainType{
|
||||
stellar_pub
|
||||
stellar_test
|
||||
|
||||
}
|
46
herolib/clients/stellar/testnet.py
Normal file
46
herolib/clients/stellar/testnet.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from typing import Tuple
|
||||
from stellar_sdk import Server, Keypair, TransactionBuilder, Network, Asset, Signer, TransactionEnvelope
|
||||
import redis
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
|
||||
def create_account_on_testnet() -> Tuple[str, str]:
|
||||
|
||||
def fund(public_key: str) -> float:
|
||||
# Request funds from the Stellar testnet friendbot
|
||||
response = requests.get(f"https://friendbot.stellar.org?addr={public_key}")
|
||||
if response.status_code != 200:
|
||||
raise Exception("Failed to fund new account with friendbot")
|
||||
time.sleep(1)
|
||||
return balance(public_key)
|
||||
|
||||
def create_account() -> Tuple[str, str]:
|
||||
# Initialize Redis client
|
||||
redis_client = redis.StrictRedis(host='localhost', port=6379, db=0)
|
||||
|
||||
# Generate keypair
|
||||
keypair = Keypair.random()
|
||||
public_key = keypair.public_key
|
||||
secret_key = keypair.secret
|
||||
account_data = {
|
||||
"public_key": public_key,
|
||||
"secret_key": secret_key
|
||||
}
|
||||
redis_client.set("stellartest:testaccount", json.dumps(account_data))
|
||||
time.sleep(1)
|
||||
return public_key, secret_key
|
||||
|
||||
# Check if the account already exists in Redis
|
||||
if redis_client.exists("stellartest:testaccount"):
|
||||
account_data = json.loads(redis_client.get("stellartest:testaccount"))
|
||||
public_key = account_data["public_key"]
|
||||
secret_key = account_data["secret_key"]
|
||||
r = balance(public_key)
|
||||
if r < 100:
|
||||
fund(public_key)
|
||||
r = balance(public_key)
|
||||
return public_key, secret_key
|
||||
else:
|
||||
create_account()
|
||||
return create_account_on_testnet()
|
0
herolib/clients/telegram/__init__.py
Normal file
0
herolib/clients/telegram/__init__.py
Normal file
102
herolib/clients/telegram/bot.py
Normal file
102
herolib/clients/telegram/bot.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import json
|
||||
import redis
|
||||
import telebot
|
||||
import os
|
||||
import logging
|
||||
from termcolor import colored
|
||||
from telebot.types import InlineKeyboardMarkup, InlineKeyboardButton
|
||||
from telebot.formatting import escape_markdown
|
||||
|
||||
from bot_audio import audio_add
|
||||
from bot_text import text_add
|
||||
from ai.ask import ai_assistent,AIAssistant
|
||||
|
||||
class MyBot:
|
||||
def __init__(self,ai_reset:bool=False):
|
||||
# Initialize logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize Redis connection
|
||||
self.redis_client = redis.Redis(host='localhost', port=6379, db=0)
|
||||
|
||||
# Initialize Telegram bot
|
||||
self.telebotkey = os.getenv("TELEBOT")
|
||||
if self.telebotkey:
|
||||
self.logger.info(colored("TELEBOT key set", "green"))
|
||||
self.bot = telebot.TeleBot(self.telebotkey)
|
||||
else:
|
||||
raise Exception("can't find TELEBOT in ENV")
|
||||
|
||||
# Set up message handlers
|
||||
self.setup_handlers()
|
||||
audio_add(self)
|
||||
text_add(self,reset=ai_reset)
|
||||
|
||||
def setup_handlers(self):
|
||||
@self.bot.message_handler(commands=['help'])
|
||||
def send_welcome(message):
|
||||
self.bot.reply_to(message, """\
|
||||
Hi there, I am your hero.
|
||||
Just speak to me or do /start or /help
|
||||
""")
|
||||
|
||||
@self.bot.message_handler(commands=['start'])
|
||||
def start_command(message):
|
||||
chat_id = message.chat.id
|
||||
|
||||
keyboard = InlineKeyboardMarkup()
|
||||
subscribe_button = InlineKeyboardButton("Subscribe to Updates", callback_data='subscribe')
|
||||
unsubscribe_button = InlineKeyboardButton("Unsubscribe from Updates", callback_data='unsubscribe')
|
||||
keyboard.row(subscribe_button, unsubscribe_button)
|
||||
|
||||
self.bot.reply_to(message, "Please choose an option:", reply_markup=keyboard)
|
||||
|
||||
@self.bot.callback_query_handler(func=lambda call: True)
|
||||
def callback_query(call):
|
||||
chat_id = call.message.chat.id
|
||||
|
||||
if call.data == 'subscribe':
|
||||
self.redis_client.hset('subscribed_chats', chat_id, '1')
|
||||
self.bot.answer_callback_query(call.id, "You have subscribed to updates.")
|
||||
print(f"User subscribed to updates: {chat_id}")
|
||||
elif call.data == 'unsubscribe':
|
||||
self.redis_client.hdel('subscribed_chats', chat_id)
|
||||
self.bot.answer_callback_query(call.id, "You have unsubscribed from updates.")
|
||||
print(f"User unsubscribed from updates: {chat_id}")
|
||||
|
||||
def send_message_to_subscribers(self, message):
|
||||
subscribed_chats = self.redis_client.hgetall('subscribed_chats')
|
||||
for chat_id in subscribed_chats:
|
||||
try:
|
||||
self.bot.send_message(chat_id.decode('utf-8'), message)
|
||||
except Exception as e:
|
||||
print(f"Failed to send message to chat {chat_id}: {str(e)}")
|
||||
|
||||
def send_error_to_telegram(self,chat_id, error_message):
|
||||
# Format the error message for Telegram
|
||||
telegram_message = f"🚨 Error Occurred 🚨\n\n"
|
||||
telegram_message += f"app: {escape_markdown(error_message['app'])}\n"
|
||||
telegram_message += f"Function: {escape_markdown(error_message['function'])}\n"
|
||||
telegram_message += f"msg: {escape_markdown(error_message['msg'])}\n"
|
||||
telegram_message += f"Exception Type: {escape_markdown(error_message['exception_type'])}\n"
|
||||
telegram_message += f"Exception Message: ```\n{escape_markdown(error_message['exception_message'])}\n```\n"
|
||||
if 'traceback' in error_message:
|
||||
telegram_message += f"Traceback:\n```\n{escape_markdown(error_message['traceback'])}\n```"
|
||||
# Send the error message to the subscribed chat
|
||||
self.bot.send_message(chat_id, telegram_message, parse_mode='Markdown')
|
||||
|
||||
|
||||
def start(self):
|
||||
print("Bot started")
|
||||
# Start the bot
|
||||
self.bot.polling()
|
||||
|
||||
|
||||
def bot_new() -> MyBot:
|
||||
return MyBot()
|
||||
|
||||
# Usage
|
||||
if __name__ == "__main__":
|
||||
my_bot = bot_new()
|
||||
my_bot.start()
|
72
herolib/clients/telegram/bot_audio.py
Normal file
72
herolib/clients/telegram/bot_audio.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import os
|
||||
from pydub import AudioSegment
|
||||
import whisper
|
||||
|
||||
def audio_add(self):
|
||||
|
||||
self.model = whisper.load_model("base")
|
||||
|
||||
@self.bot.message_handler(content_types=['audio', 'voice']) #, 'document'
|
||||
def handle_audio(message):
|
||||
try:
|
||||
chat_id = message.chat.id
|
||||
file_info = None
|
||||
audio_path = None
|
||||
|
||||
if message.content_type == 'audio':
|
||||
file_info = self.bot.get_file(message.audio.file_id)
|
||||
audio_path = f"/tmp/audio/{message.audio.file_id}.mp3"
|
||||
elif message.content_type == 'voice':
|
||||
file_info = self.bot.get_file(message.voice.file_id)
|
||||
audio_path = f"/tmp/audio/{message.voice.file_id}.ogg"
|
||||
|
||||
if file_info:
|
||||
downloaded_file = self.bot.download_file(file_info.file_path)
|
||||
|
||||
# Ensure the directory exists
|
||||
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
|
||||
|
||||
# Save the audio file
|
||||
with open(audio_path, 'wb') as new_file:
|
||||
new_file.write(downloaded_file)
|
||||
|
||||
#bot.send_message(chat_id, f"Audio received and saved successfully to {audio_path}.")
|
||||
print(f"Audio received and saved to {audio_path}")
|
||||
|
||||
|
||||
# Convert to WAV format if necessary
|
||||
wav_path = audio_path.replace('.mp3', '.wav').replace('.ogg', '.wav')
|
||||
if audio_path.endswith('.mp3') or audio_path.endswith('.ogg'):
|
||||
audio = AudioSegment.from_file(audio_path)
|
||||
audio.export(wav_path, format='wav')
|
||||
else:
|
||||
wav_path = audio_path
|
||||
|
||||
# Transcribe audio using Whisper
|
||||
result = self.model.transcribe(wav_path)
|
||||
transcription = result["text"]
|
||||
|
||||
self.bot.send_message(chat_id, transcription, parse_mode='Markdown')
|
||||
print(f"Audio received and saved to {audio_path}")
|
||||
print(f"Transcription: {transcription}")
|
||||
|
||||
text2 = self.text_process(self,transcription)
|
||||
|
||||
print(f"Processed text {chat_id}: {text2}")
|
||||
|
||||
if len(text2)>0:
|
||||
self.bot.send_message(chat_id, text2)
|
||||
|
||||
|
||||
|
||||
except Exception as e:
|
||||
error_message = {
|
||||
'app': 'Telegram Bot',
|
||||
'function': 'handle_audio',
|
||||
'msg': 'Failed to process audio file',
|
||||
'exception_type': type(e).__name__,
|
||||
'exception_message': str(e)
|
||||
}
|
||||
self.send_error_to_telegram(chat_id, error_message)
|
||||
print(f"Error processing audio file: {e}")
|
||||
|
51
herolib/clients/telegram/bot_text.py
Normal file
51
herolib/clients/telegram/bot_text.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import os
|
||||
from ai.ask import ai_assistent
|
||||
|
||||
def text_add(self,reset:bool=False):
|
||||
|
||||
self.ai_assistent = ai_assistent(reset=reset)
|
||||
self.text_process = text_process
|
||||
|
||||
@self.bot.message_handler(content_types=['text'])
|
||||
def handle_text(message):
|
||||
try:
|
||||
chat_id = message.chat.id
|
||||
|
||||
text = message.text
|
||||
|
||||
# Here you can add your logic to process the text
|
||||
# For now, let's just echo the message back
|
||||
# response = f"You said: {text}"
|
||||
|
||||
print(f"Received text from {chat_id}: {text}")
|
||||
|
||||
text2 = self.text_process(self,text)
|
||||
|
||||
print(f"Processed text {chat_id}: {text2}")
|
||||
|
||||
if len(text2)>0:
|
||||
self.bot.send_message(chat_id, text2)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
error_message = {
|
||||
'app': 'Telegram Bot',
|
||||
'function': 'handle_text',
|
||||
'msg': 'Failed to process text',
|
||||
'exception_type': type(e).__name__,
|
||||
'exception_message': str(e)
|
||||
}
|
||||
self.send_error_to_telegram(chat_id, error_message)
|
||||
print(f"Error processing text file: {e}")
|
||||
|
||||
|
||||
def text_process(self, txt) -> str:
|
||||
if "translate" not in txt.lower():
|
||||
txt+='''\n\n
|
||||
only output the heroscript, no comments
|
||||
'''
|
||||
response = self.ai_assistent.ask(
|
||||
category='timemgmt',
|
||||
name='schedule',
|
||||
question=txt)
|
||||
return response
|
36
herolib/clients/telegram/errorqueue.py
Normal file
36
herolib/clients/telegram/errorqueue.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import json
|
||||
import redis
|
||||
import telebot
|
||||
import threading
|
||||
from telebot.types import InlineKeyboardMarkup, InlineKeyboardButton
|
||||
import time
|
||||
from telebot.formatting import escape_markdown
|
||||
import os
|
||||
from telegram.bot import send_error_to_telegram
|
||||
|
||||
# Initialize Redis connection
|
||||
redis_client = redis.Redis(host='localhost', port=6379, db=0)
|
||||
|
||||
#get errors from redis and send them to bot if subscription done
|
||||
def process_error_queue():
|
||||
while True:
|
||||
# Pop an error message from the Redis queue
|
||||
error_json = redis_client.lpop('error_queue')
|
||||
|
||||
if error_json:
|
||||
# Deserialize the error message from JSON
|
||||
error_message = json.loads(error_json)
|
||||
|
||||
# Get all subscribed chat IDs from Redis
|
||||
subscribed_chats = redis_client.hgetall('subscribed_chats')
|
||||
|
||||
# Send the error message to all subscribed chats
|
||||
for chat_id in subscribed_chats.keys():
|
||||
send_error_to_telegram(int(chat_id), error_message)
|
||||
else:
|
||||
# If the queue is empty, wait for a short interval before checking again
|
||||
time.sleep(1)
|
||||
|
||||
# Start processing the error queue
|
||||
process_error_queue_thread = threading.Thread(target=process_error_queue)
|
||||
process_error_queue_thread.start()
|
0
herolib/clients/vimeo/__init__.py
Normal file
0
herolib/clients/vimeo/__init__.py
Normal file
142
herolib/clients/vimeo/client.py
Normal file
142
herolib/clients/vimeo/client.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
import requests
|
||||
import vimeo
|
||||
from model_video import VideoInfo, video_model_load, videos_model_load
|
||||
|
||||
|
||||
class VimeoClient:
|
||||
def __init__(self):
|
||||
# Retrieve necessary credentials from environment variables
|
||||
self.client_id = os.getenv("VIMEO_CLIENT_ID")
|
||||
self.client_secret = os.getenv("VIMEO_SECRET")
|
||||
self.access_token = os.getenv("VIMEO_ACCESSTOKEN_ID")
|
||||
self.user_id = os.getenv("VIMEO_USER_ID")
|
||||
|
||||
# Check if all environment variables are present
|
||||
if not all([self.client_id, self.client_secret, self.access_token, self.user_id]):
|
||||
raise EnvironmentError(
|
||||
"Please set the VIMEO_CLIENT_ID, VIMEO_SECRET,VIMEO_USER_ID and VIMEO_ACCESSTOKEN_ID environment variables."
|
||||
)
|
||||
|
||||
# Initialize the Vimeo client
|
||||
self.client = vimeo.VimeoClient(token=self.access_token, key=self.client_id, secret=self.client_secret)
|
||||
|
||||
def upload(self, file: str, video_title: str, description: str) -> str:
|
||||
video_uri = self.client.upload(file, data={"name": video_title, "description": description})
|
||||
return video_uri
|
||||
|
||||
def download(self, video_id: str, output_file: str = "myvid.mp4"):
|
||||
info = self.get_video_info(video_id)
|
||||
|
||||
size, link = 0, ""
|
||||
for item in info.download:
|
||||
if item["size"] > size:
|
||||
size = item["size"]
|
||||
link = item["link"]
|
||||
|
||||
if link == "":
|
||||
raise Exception("download link not provided for video")
|
||||
|
||||
video_response = requests.get(link, stream=True)
|
||||
downloaded_mb = 0
|
||||
with open(output_file, "wb") as video_file:
|
||||
for chunk in video_response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
downloaded_mb += len(chunk) / 1024
|
||||
print(f"{downloaded_mb}MB Downloaded...")
|
||||
video_file.write(chunk)
|
||||
|
||||
print(f"Video downloaded successfully to {output_file}!")
|
||||
|
||||
def get_video_info(self, video_id: str) -> VideoInfo:
|
||||
"""
|
||||
Get information about a video by URI.
|
||||
:param uri: URI of the Vimeo video.
|
||||
:return: Video information as a dictionary, or None if an error occurs.
|
||||
"""
|
||||
# , fields: List[str]
|
||||
response = self.client.get(f"/videos/{video_id}")
|
||||
if response.status_code == 200:
|
||||
myvideo = video_model_load(response.content)
|
||||
else:
|
||||
raise Exception(f"Failed to get video details. Status code: {response.status_code}, Error: {response.text}")
|
||||
return myvideo
|
||||
|
||||
def get_videos(self, folder: Optional[int] = None, folders: Optional[List[int]] = None) -> List[VideoInfo]:
|
||||
"""
|
||||
Get information about videos from specified folder(s) or all videos if no folder is specified.
|
||||
:param folder: ID of a single folder to fetch videos from.
|
||||
:param folders: List of folder IDs to fetch videos from.
|
||||
:return: List of VideoInfo objects.
|
||||
"""
|
||||
if self.user_id == 0:
|
||||
raise Exception("Can't find user ID, it's not set in env variables")
|
||||
|
||||
all_videos = []
|
||||
|
||||
if folder is not None:
|
||||
folders = [folder]
|
||||
elif folders is None:
|
||||
# If no folder or folders specified, get all videos
|
||||
response = self.client.get("/me/videos")
|
||||
if response.status_code == 200:
|
||||
return videos_model_load(response.content)
|
||||
else:
|
||||
raise Exception(f"Failed to get videos. Status code: {response.status_code}, Error: {response.text}")
|
||||
for folder_id in folders:
|
||||
response = self.client.get(f"/users/{self.user_id}/projects/{folder_id}/videos")
|
||||
if response.status_code == 200:
|
||||
videos = videos_model_load(response.content)
|
||||
all_videos.extend(videos)
|
||||
else:
|
||||
print(f"Failed to get videos for folder {folder_id}. Status code: {response.status_code}, Error: {response.text}")
|
||||
|
||||
return all_videos
|
||||
|
||||
# def get_videos(self,folder:int,folders:List[int]) -> List[VideoInfo]:
|
||||
# """
|
||||
# Get information about a video by URI.
|
||||
# :param uri: URI of the Vimeo video.
|
||||
# :return: Video information as a dictionary, or None if an error occurs.
|
||||
# """
|
||||
# if folder>0:
|
||||
# if self.user_id == 0:
|
||||
# return Exception("can't find userid, its not set in env variables")
|
||||
# # print(f"folderid:{folder}")
|
||||
# response = self.client.get(f"/users/{self.user_id}/projects/{folder}/videos")
|
||||
# # api_url = f"https://api.vimeo.com/users/{self.user_id}/projects/13139570/videos"
|
||||
# # print(api_url)
|
||||
# # access_token = "e65daca3b0dbc18c2fadc5cafcf81004"
|
||||
# # headers = {
|
||||
# # "Authorization": f"Bearer {access_token}"
|
||||
# # }
|
||||
# # Make the GET request to the Vimeo API
|
||||
# #response = requests.get(api_url, headers=headers)
|
||||
# else:
|
||||
# response = self.client.get(f"/me/videos/")
|
||||
|
||||
# if response.status_code == 200:
|
||||
# myvideos = videos_model_load(response.content)
|
||||
# else:
|
||||
# raise Exception(f"Failed to get video details. Status code: {response.status_code}, Error: {response.text}")
|
||||
# return myvideos
|
||||
|
||||
|
||||
def new() -> VimeoClient:
|
||||
return VimeoClient()
|
||||
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
cl = new()
|
||||
v = cl.get_videos(folders=[10700101, 13139570, 12926235, 10752310, 10702046])
|
||||
for item in v:
|
||||
video_id = item.uri.split("/")[-1]
|
||||
print(f" - {item.name} : {video_id} ")
|
||||
# from IPython import embed; embed()
|
||||
# s
|
||||
# vi=cl.get_video_info("475353425")
|
||||
# print(json_to_yaml(vi))
|
||||
# cl.download("475353425", "/tmp/475353425.mp4")
|
177
herolib/clients/vimeo/model_video.py
Normal file
177
herolib/clients/vimeo/model_video.py
Normal file
@@ -0,0 +1,177 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Dict, Any
|
||||
from dataclasses_json import dataclass_json
|
||||
import json
|
||||
import yaml
|
||||
|
||||
def json_to_yaml(json_data):
|
||||
# If the input is a JSON string, parse it into a Python dictionary
|
||||
if isinstance(json_data, str):
|
||||
json_data = json.loads(json_data)
|
||||
|
||||
# Convert the dictionary to a YAML formatted string
|
||||
yaml_data = yaml.dump(json_data, sort_keys=False, default_flow_style=False)
|
||||
|
||||
return yaml_data
|
||||
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class Size:
|
||||
width: int
|
||||
height: int
|
||||
link: str
|
||||
link_with_play_button: Optional[str] = None
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class Pictures:
|
||||
uri: str
|
||||
active: bool
|
||||
type: str
|
||||
base_link: str
|
||||
sizes: List[Size]
|
||||
resource_key: str
|
||||
default_picture: bool
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class Embed:
|
||||
html: str
|
||||
badges: Dict[str, Any]
|
||||
interactive: bool
|
||||
buttons: Dict[str, bool]
|
||||
logos: Dict[str, Any]
|
||||
play_button: Dict[str, Any]
|
||||
title: Dict[str, Any]
|
||||
end_screen: List[Any]
|
||||
playbar: bool
|
||||
quality_selector: Optional[str]
|
||||
pip: bool
|
||||
autopip: bool
|
||||
volume: bool
|
||||
color: str
|
||||
colors: Dict[str, str]
|
||||
event_schedule: bool
|
||||
has_cards: bool
|
||||
outro_type: str
|
||||
show_timezone: bool
|
||||
cards: List[Any]
|
||||
airplay: bool
|
||||
audio_tracks: bool
|
||||
chapters: bool
|
||||
chromecast: bool
|
||||
closed_captions: bool
|
||||
transcript: bool
|
||||
ask_ai: bool
|
||||
uri: Optional[str]
|
||||
email_capture_form: Optional[str]
|
||||
speed: bool
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class Uploader:
|
||||
pictures: Pictures
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class User:
|
||||
uri: str
|
||||
name: str
|
||||
link: str
|
||||
capabilities: Dict[str, bool]
|
||||
location: str
|
||||
gender: str
|
||||
bio: str
|
||||
short_bio: str
|
||||
created_time: str
|
||||
pictures: Pictures
|
||||
websites: List[Dict[str, Optional[str]]]
|
||||
#metadata: Dict[str, Any]
|
||||
location_details: Dict[str, Optional[Any]]
|
||||
skills: List[Any]
|
||||
available_for_hire: bool
|
||||
can_work_remotely: bool
|
||||
preferences: Dict[str, Any]
|
||||
content_filter: List[str]
|
||||
upload_quota: Dict[str, Any]
|
||||
resource_key: str
|
||||
account: str
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class VideoInfo:
|
||||
uri: str
|
||||
name: str
|
||||
description: Optional[str]
|
||||
type: str
|
||||
link: str
|
||||
player_embed_url: str
|
||||
duration: int
|
||||
width: int
|
||||
height: int
|
||||
#embed: Embed
|
||||
created_time: str
|
||||
modified_time: str
|
||||
release_time: str
|
||||
content_rating: List[str]
|
||||
content_rating_class: str
|
||||
rating_mod_locked: bool
|
||||
license: Optional[str]
|
||||
privacy: Dict[str, Any]
|
||||
pictures: Pictures
|
||||
tags: List[Any]
|
||||
stats: Dict[str, int]
|
||||
categories: List[Any]
|
||||
uploader: Uploader
|
||||
#metadata: Dict[str, Any]
|
||||
manage_link: str
|
||||
#user: Optional[User]
|
||||
last_user_action_event_date: Optional[str]
|
||||
parent_folder: Optional[Dict[str, Any]]
|
||||
review_page: Optional[Dict[str, Any]]
|
||||
files: Optional[List[Dict[str, Any]]]
|
||||
download: Optional[List[Dict[str, Any]]]
|
||||
app: Optional[Dict[str, str]]
|
||||
play: Optional[Dict[str, Any]]
|
||||
status: str
|
||||
resource_key: str
|
||||
upload: Optional[Dict[str, Optional[str]]]
|
||||
transcode: Dict[str, str]
|
||||
is_playable: bool
|
||||
has_audio: bool
|
||||
|
||||
|
||||
def video_model_load(json_data:str,dojsonload:bool=True) -> VideoInfo:
|
||||
|
||||
if dojsonload:
|
||||
json_dict = json.loads(json_data)
|
||||
else:
|
||||
json_dict = json_data
|
||||
|
||||
json_dict.pop('metadata', {})
|
||||
json_dict.pop('embed', {})
|
||||
json_dict.pop('user', {})
|
||||
json_dict.pop('websites', {})
|
||||
# if 'user' in json_dict:
|
||||
# json_dict['user'].pop('metadata', None)
|
||||
# if 'websites' in json_dict:
|
||||
# json_dict['websites'].pop('metadata', None)
|
||||
|
||||
|
||||
json_data_cleaned = json.dumps(json_dict)
|
||||
|
||||
video_object = VideoInfo.from_json(json_data_cleaned)
|
||||
|
||||
return video_object
|
||||
|
||||
|
||||
def videos_model_load(json_data:str) -> List[VideoInfo]:
|
||||
json_list = json.loads(json_data)
|
||||
json_list2= list()
|
||||
|
||||
for item in json_list["data"]:
|
||||
d=video_model_load(item,dojsonload=False)
|
||||
json_list2.append(d)
|
||||
|
||||
return json_list2
|
0
herolib/clients/whisper/__init__.py
Normal file
0
herolib/clients/whisper/__init__.py
Normal file
107
herolib/clients/whisper/convert.py
Normal file
107
herolib/clients/whisper/convert.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import os
|
||||
from pydub import AudioSegment
|
||||
import whisper
|
||||
import moviepy.editor as mp
|
||||
import nltk
|
||||
from nltk.tokenize import sent_tokenize, word_tokenize
|
||||
|
||||
# Download necessary NLTK data
|
||||
nltk.download('punkt', quiet=True)
|
||||
|
||||
class Convertor:
|
||||
def __init__(self, max_chars_per_part=4000,context:str = "main"):
|
||||
self.max_chars_per_part = max_chars_per_part
|
||||
self.context = context
|
||||
|
||||
@classmethod
|
||||
def new(cls, max_chars_per_part=4000):
|
||||
return cls(max_chars_per_part)
|
||||
|
||||
def process(self, path: str):
|
||||
if path.lower().endswith(('.mp4', '.avi', '.mov')): # Video files
|
||||
return self.process_video(path)
|
||||
elif path.lower().endswith(('.mp3', '.wav', '.ogg')): # Audio files
|
||||
return self.process_audio(path)
|
||||
else:
|
||||
raise ValueError("Unsupported file format")
|
||||
|
||||
def process_video(self, video_path: str):
|
||||
# Extract audio from video
|
||||
video = mp.VideoFileClip(video_path)
|
||||
audio_path = video_path.rsplit('.', 1)[0] + '.wav'
|
||||
video.audio.write_audiofile(audio_path)
|
||||
video.close()
|
||||
return audio_path
|
||||
|
||||
def process_audio(self, audio_path: str):
|
||||
# Convert to WAV format if necessary
|
||||
wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
|
||||
if not audio_path.lower().endswith('.wav'):
|
||||
audio = AudioSegment.from_file(audio_path)
|
||||
audio.export(wav_path, format='wav')
|
||||
else:
|
||||
wav_path = audio_path
|
||||
|
||||
def split_text(self, text):
|
||||
parts = []
|
||||
current_part = ""
|
||||
paragraphs = text.split('\n\n')
|
||||
|
||||
for paragraph in paragraphs:
|
||||
sentences = sent_tokenize(paragraph)
|
||||
for sentence in sentences:
|
||||
if len(current_part) + len(sentence) < self.max_chars_per_part:
|
||||
current_part += sentence + ' '
|
||||
else:
|
||||
if current_part:
|
||||
parts.append(current_part.strip())
|
||||
current_part = sentence + ' '
|
||||
|
||||
# Add a paragraph break if it doesn't exceed the limit
|
||||
if len(current_part) + 2 < self.max_chars_per_part:
|
||||
current_part += '\n\n'
|
||||
else:
|
||||
parts.append(current_part.strip())
|
||||
current_part = '\n\n'
|
||||
|
||||
if current_part:
|
||||
parts.append(current_part.strip())
|
||||
|
||||
return parts
|
||||
|
||||
def find_natural_pause(self, text):
|
||||
words = word_tokenize(text)
|
||||
total_words = len(words)
|
||||
mid_point = total_words // 2
|
||||
|
||||
# Look for punctuation near the middle
|
||||
for i in range(mid_point, total_words):
|
||||
if words[i] in '.!?':
|
||||
return ' '.join(words[:i+1]), ' '.join(words[i+1:])
|
||||
|
||||
# If no punctuation found, split at the nearest space to the middle
|
||||
return ' '.join(words[:mid_point]), ' '.join(words[mid_point:])
|
||||
|
||||
def write_to_file(self, parts, output_path):
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
for i, part in enumerate(parts, 1):
|
||||
f.write(f"Part {i}:\n\n")
|
||||
f.write(part)
|
||||
f.write("\n\n")
|
||||
if i < len(parts):
|
||||
f.write("-" * 50 + "\n\n")
|
||||
|
||||
|
||||
# Usage example:
|
||||
if __name__ == "__main__":
|
||||
processor = Convertor.new()
|
||||
item = "/Users/despiegk1/Documents/Zoom/2024-07-16 16.42.50 Kristof De Spiegeleer's Personal Meeting Room/video1720369800.mp4"
|
||||
transcription_parts = processor.process(item)
|
||||
|
||||
processor.write_to_file(transcription_parts, output_file)
|
||||
|
||||
print(f"Transcription split into {len(transcription_parts)} parts:")
|
||||
for i, part in enumerate(transcription_parts, 1):
|
||||
print(f"Part {i}:")
|
||||
print(part)
|
||||
print("-" * 50)
|
118
herolib/clients/whisper/whisper.py
Normal file
118
herolib/clients/whisper/whisper.py
Normal file
@@ -0,0 +1,118 @@
|
||||
import os
|
||||
from pydub import AudioSegment
|
||||
import whisper
|
||||
import moviepy.editor as mp
|
||||
import nltk
|
||||
from nltk.tokenize import sent_tokenize, word_tokenize
|
||||
|
||||
# Download necessary NLTK data
|
||||
nltk.download('punkt', quiet=True)
|
||||
|
||||
class MediaProcessor:
|
||||
def __init__(self, max_chars_per_part=4000):
|
||||
self.model = whisper.load_model("base.en")
|
||||
#self.model = whisper.load_model("medium.en")
|
||||
self.max_chars_per_part = max_chars_per_part
|
||||
|
||||
@classmethod
|
||||
def new(cls, max_chars_per_part=4000):
|
||||
return cls(max_chars_per_part)
|
||||
|
||||
def process(self, path: str):
|
||||
if path.lower().endswith(('.mp4', '.avi', '.mov')): # Video files
|
||||
return self.process_video(path)
|
||||
elif path.lower().endswith(('.mp3', '.wav', '.ogg')): # Audio files
|
||||
return self.process_audio(path)
|
||||
else:
|
||||
raise ValueError("Unsupported file format")
|
||||
|
||||
def process_video(self, video_path: str):
|
||||
# Extract audio from video
|
||||
video = mp.VideoFileClip(video_path)
|
||||
audio_path = video_path.rsplit('.', 1)[0] + '.wav'
|
||||
video.audio.write_audiofile(audio_path)
|
||||
video.close()
|
||||
|
||||
# Now process the extracted audio
|
||||
return self.process_audio(audio_path)
|
||||
|
||||
def process_audio(self, audio_path: str):
|
||||
# Convert to WAV format if necessary
|
||||
wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
|
||||
if not audio_path.lower().endswith('.wav'):
|
||||
audio = AudioSegment.from_file(audio_path)
|
||||
audio.export(wav_path, format='wav')
|
||||
else:
|
||||
wav_path = audio_path
|
||||
|
||||
# Transcribe audio using Whisper
|
||||
result = self.model.transcribe(wav_path)
|
||||
transcription = result["text"]
|
||||
|
||||
# Split the transcription into parts
|
||||
return self.split_text(transcription)
|
||||
|
||||
def split_text(self, text):
|
||||
parts = []
|
||||
current_part = ""
|
||||
paragraphs = text.split('\n\n')
|
||||
|
||||
for paragraph in paragraphs:
|
||||
sentences = sent_tokenize(paragraph)
|
||||
for sentence in sentences:
|
||||
if len(current_part) + len(sentence) < self.max_chars_per_part:
|
||||
current_part += sentence + ' '
|
||||
else:
|
||||
if current_part:
|
||||
parts.append(current_part.strip())
|
||||
current_part = sentence + ' '
|
||||
|
||||
# Add a paragraph break if it doesn't exceed the limit
|
||||
if len(current_part) + 2 < self.max_chars_per_part:
|
||||
current_part += '\n\n'
|
||||
else:
|
||||
parts.append(current_part.strip())
|
||||
current_part = '\n\n'
|
||||
|
||||
if current_part:
|
||||
parts.append(current_part.strip())
|
||||
|
||||
return parts
|
||||
|
||||
def find_natural_pause(self, text):
|
||||
words = word_tokenize(text)
|
||||
total_words = len(words)
|
||||
mid_point = total_words // 2
|
||||
|
||||
# Look for punctuation near the middle
|
||||
for i in range(mid_point, total_words):
|
||||
if words[i] in '.!?':
|
||||
return ' '.join(words[:i+1]), ' '.join(words[i+1:])
|
||||
|
||||
# If no punctuation found, split at the nearest space to the middle
|
||||
return ' '.join(words[:mid_point]), ' '.join(words[mid_point:])
|
||||
|
||||
def write_to_file(self, parts, output_path):
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
for i, part in enumerate(parts, 1):
|
||||
f.write(f"Part {i}:\n\n")
|
||||
f.write(part)
|
||||
f.write("\n\n")
|
||||
if i < len(parts):
|
||||
f.write("-" * 50 + "\n\n")
|
||||
|
||||
|
||||
# Usage example:
|
||||
if __name__ == "__main__":
|
||||
processor = MediaProcessor.new(max_chars_per_part=10000)
|
||||
output_file = "/Users/despiegk1/Documents/transcription3.md"
|
||||
item = "/Users/despiegk1/Documents/Zoom/2024-07-16 16.42.50 Kristof De Spiegeleer's Personal Meeting Room/video1720369800.mp4"
|
||||
transcription_parts = processor.process(item)
|
||||
|
||||
processor.write_to_file(transcription_parts, output_file)
|
||||
|
||||
print(f"Transcription split into {len(transcription_parts)} parts:")
|
||||
for i, part in enumerate(transcription_parts, 1):
|
||||
print(f"Part {i}:")
|
||||
print(part)
|
||||
print("-" * 50)
|
0
herolib/clients/wireless/__init__.py
Normal file
0
herolib/clients/wireless/__init__.py
Normal file
313
herolib/clients/wireless/wigle_net.py
Normal file
313
herolib/clients/wireless/wigle_net.py
Normal file
@@ -0,0 +1,313 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import redis
|
||||
import requests
|
||||
|
||||
API_URL = "https://api.wigle.net/api/v2/network/search"
|
||||
REDIS_CACHE_EXPIRY = timedelta(hours=1)
|
||||
API_RATE_LIMIT = 30 # seconds between requests
|
||||
|
||||
# Initialize Redis connection
|
||||
redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)
|
||||
|
||||
# Track last API request time (initialized to allow immediate first request)
|
||||
_last_request_time = time.time() - API_RATE_LIMIT
|
||||
|
||||
|
||||
class WigleError(Exception):
|
||||
"""Custom exception for Wigle-related errors"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class NetworkType(str, Enum):
|
||||
"""Network types supported by Wigle API"""
|
||||
|
||||
WIFI = "WIFI"
|
||||
BT = "BT"
|
||||
CELL = "CELL"
|
||||
|
||||
|
||||
class Encryption(str, Enum):
|
||||
"""WiFi encryption types"""
|
||||
|
||||
NONE = "None"
|
||||
WEP = "WEP"
|
||||
WPA = "WPA"
|
||||
WPA2 = "WPA2"
|
||||
WPA3 = "WPA3"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Location:
|
||||
"""Represents a wireless network location with all available Wigle API fields"""
|
||||
|
||||
ssid: str
|
||||
latitude: float
|
||||
longitude: float
|
||||
last_update: Optional[datetime]
|
||||
encryption: Optional[str] = None
|
||||
network_type: Optional[str] = None
|
||||
channel: Optional[int] = None
|
||||
frequency: Optional[float] = None
|
||||
qos: Optional[int] = None
|
||||
transid: Optional[str] = None
|
||||
firsttime: Optional[datetime] = None
|
||||
lasttime: Optional[datetime] = None
|
||||
country_code: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
region: Optional[str] = None
|
||||
house_number: Optional[str] = None
|
||||
road: Optional[str] = None
|
||||
address: Optional[str] = None
|
||||
|
||||
|
||||
def get_wigle_auth() -> str:
|
||||
"""Get Wigle authentication token from environment variable"""
|
||||
wigle_auth = os.getenv("WIGLE")
|
||||
if not wigle_auth:
|
||||
raise WigleError("WIGLE environment variable not set. Format should be: 'AIDxxx:yyy'")
|
||||
return wigle_auth
|
||||
|
||||
|
||||
def enforce_rate_limit():
|
||||
"""Enforce API rate limit by sleeping if needed, showing countdown"""
|
||||
global _last_request_time
|
||||
current_time = time.time()
|
||||
time_since_last_request = current_time - _last_request_time
|
||||
|
||||
if time_since_last_request < API_RATE_LIMIT:
|
||||
sleep_time = API_RATE_LIMIT - time_since_last_request
|
||||
print(f"\nRate limit: waiting {sleep_time:.0f} seconds", end="", flush=True)
|
||||
|
||||
# Show countdown
|
||||
for remaining in range(int(sleep_time), 0, -1):
|
||||
time.sleep(1)
|
||||
print(f"\rRate limit: waiting {remaining:2d} seconds", end="", flush=True)
|
||||
|
||||
print("\rRate limit: continuing... ") # Clear the line
|
||||
|
||||
_last_request_time = time.time()
|
||||
|
||||
|
||||
def search_networks(
|
||||
*,
|
||||
# Location filters
|
||||
latitude_north: Optional[float] = None,
|
||||
latitude_south: Optional[float] = None,
|
||||
longitude_east: Optional[float] = None,
|
||||
longitude_west: Optional[float] = None,
|
||||
# Network filters
|
||||
ssid: Optional[str] = None,
|
||||
ssidlike: Optional[str] = None,
|
||||
network_type: Optional[NetworkType] = None,
|
||||
encryption: Optional[Encryption] = None,
|
||||
# Time filters
|
||||
on_since: Optional[datetime] = None,
|
||||
last_update: Optional[datetime] = None,
|
||||
# Result control
|
||||
results_per_page: int = 100,
|
||||
search_after: Optional[str] = None,
|
||||
# Other filters
|
||||
freenet: Optional[bool] = None,
|
||||
paynet: Optional[bool] = None,
|
||||
show_query: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Search for networks using the Wigle API with full parameter support and Redis caching.
|
||||
Rate limited to one request per minute.
|
||||
|
||||
Args:
|
||||
latitude_north: Northern boundary of search box
|
||||
latitude_south: Southern boundary of search box
|
||||
longitude_east: Eastern boundary of search box
|
||||
longitude_west: Western boundary of search box
|
||||
ssid: Exact SSID match
|
||||
ssidlike: SSID wildcard match
|
||||
network_type: Filter by network type (WIFI/BT/CELL)
|
||||
encryption: Filter by encryption type
|
||||
on_since: Only show networks seen on or after date
|
||||
last_update: Only show networks updated since date
|
||||
results_per_page: Number of results per page (max 100)
|
||||
search_after: Token for getting next batch of results
|
||||
freenet: Show only free networks
|
||||
paynet: Show only pay networks
|
||||
show_query: Return query bounds without results
|
||||
|
||||
Returns:
|
||||
Dictionary containing search results and metadata including searchAfter token
|
||||
|
||||
Raises:
|
||||
WigleError: If the WIGLE environment variable is not set or API request fails
|
||||
"""
|
||||
# https://api.wigle.net/api/v2/network/search?onlymine=false&encryption=None&freenet=false&paynet=false
|
||||
try:
|
||||
# Build cache key from all parameters
|
||||
params = locals()
|
||||
cache_key = f"wigle:search:{json.dumps(params, default=str, sort_keys=True)}"
|
||||
|
||||
cached_result = redis_client.get(cache_key)
|
||||
if cached_result:
|
||||
return json.loads(cached_result)
|
||||
|
||||
# Enforce rate limit before making request
|
||||
enforce_rate_limit()
|
||||
|
||||
# Build API parameters
|
||||
api_params = {
|
||||
"onlymine": "false",
|
||||
"resultsPerPage": results_per_page,
|
||||
}
|
||||
|
||||
# Add optional parameters if provided
|
||||
if latitude_north is not None:
|
||||
api_params["latrange1"] = latitude_south
|
||||
api_params["latrange2"] = latitude_north
|
||||
api_params["longrange1"] = longitude_west
|
||||
api_params["longrange2"] = longitude_east
|
||||
|
||||
if ssid:
|
||||
api_params["ssid"] = ssid
|
||||
if ssidlike:
|
||||
api_params["ssidlike"] = ssidlike
|
||||
if network_type:
|
||||
api_params["netid"] = network_type.value
|
||||
if encryption:
|
||||
api_params["encryption"] = encryption.value
|
||||
else:
|
||||
api_params["encryption"] = "None"
|
||||
if on_since:
|
||||
api_params["onSince"] = on_since.strftime("%Y%m%d")
|
||||
if last_update:
|
||||
api_params["lastupdt"] = last_update.strftime("%Y%m%d")
|
||||
if freenet is not None:
|
||||
api_params["freenet"] = str(freenet).lower()
|
||||
if paynet is not None:
|
||||
api_params["paynet"] = str(paynet).lower()
|
||||
if search_after:
|
||||
api_params["searchAfter"] = search_after
|
||||
if show_query:
|
||||
api_params["showQuery"] = str(show_query).lower()
|
||||
|
||||
# Make API request
|
||||
wigle_auth = get_wigle_auth()
|
||||
headers = {"Authorization": f"Basic {wigle_auth}"}
|
||||
response = requests.get(API_URL, params=api_params, headers=headers)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
print(result)
|
||||
|
||||
# Cache the result
|
||||
redis_client.setex(cache_key, int(REDIS_CACHE_EXPIRY.total_seconds()), json.dumps(result))
|
||||
|
||||
return result
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise WigleError(f"API request failed: {str(e)}")
|
||||
|
||||
|
||||
def parse_network_to_location(network: Dict[str, Any]) -> Location:
|
||||
"""Convert a network result from Wigle API to a Location object"""
|
||||
# Parse dates if present
|
||||
last_update = None
|
||||
firsttime = None
|
||||
lasttime = None
|
||||
|
||||
if network.get("lastupdt"):
|
||||
try:
|
||||
last_update = datetime.strptime(network["lastupdt"], "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if network.get("firsttime"):
|
||||
try:
|
||||
firsttime = datetime.strptime(network["firsttime"], "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if network.get("lasttime"):
|
||||
try:
|
||||
lasttime = datetime.strptime(network["lasttime"], "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return Location(
|
||||
ssid=network["ssid"],
|
||||
latitude=float(network["trilat"]),
|
||||
longitude=float(network["trilong"]),
|
||||
last_update=last_update,
|
||||
encryption=network.get("encryption"),
|
||||
network_type=network.get("type"),
|
||||
channel=network.get("channel"),
|
||||
frequency=network.get("frequency"),
|
||||
qos=network.get("qos"),
|
||||
transid=network.get("transid"),
|
||||
firsttime=firsttime,
|
||||
lasttime=lasttime,
|
||||
country_code=network.get("country"),
|
||||
city=network.get("city"),
|
||||
region=network.get("region"),
|
||||
house_number=network.get("housenumber"),
|
||||
road=network.get("road"),
|
||||
address=network.get("address"),
|
||||
)
|
||||
|
||||
|
||||
def get_all() -> List[Location]:
|
||||
"""Search for OpenRoaming networks and return list of locations.
|
||||
Rate limited to one request per minute, including pagination requests.
|
||||
|
||||
Returns:
|
||||
List[Location]: List of found network locations
|
||||
|
||||
Raises:
|
||||
WigleError: If the WIGLE environment variable is not set or API request fails
|
||||
"""
|
||||
ssid_names = ["Adentro OpenRoaming", "OpenRoaming", "Passpoint", "PasspointAruba", "Cellular Wi-Fi Passthrough", "WBA_OpenRoaming"]
|
||||
locations: List[Location] = []
|
||||
|
||||
for name in ssid_names:
|
||||
try:
|
||||
search_after = None
|
||||
while True:
|
||||
results = search_networks(
|
||||
ssid=name, encryption=Encryption.NONE, network_type=NetworkType.WIFI, results_per_page=100, search_after=search_after
|
||||
)
|
||||
|
||||
if not results or not results.get("results"):
|
||||
break
|
||||
|
||||
for network in results["results"]:
|
||||
locations.append(parse_network_to_location(network))
|
||||
|
||||
# Get searchAfter token for next batch
|
||||
search_after = results.get("searchAfter")
|
||||
if not search_after:
|
||||
break
|
||||
|
||||
except WigleError as e:
|
||||
raise WigleError(f"Error searching for {name}: {str(e)}")
|
||||
|
||||
print(f"Found {len(locations)} OpenRoaming network locations")
|
||||
return locations
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
locations = get_all()
|
||||
for loc in locations:
|
||||
print(f"SSID: {loc.ssid}")
|
||||
print(f"Location: ({loc.latitude}, {loc.longitude})")
|
||||
print(f"Network Type: {loc.network_type or 'N/A'}")
|
||||
print(f"Encryption: {loc.encryption or 'N/A'}")
|
||||
print(f"Last Update: {loc.last_update or 'N/A'}")
|
||||
if loc.address:
|
||||
print(f"Address: {loc.address}")
|
||||
print("-" * 50)
|
0
herolib/core/__init__.py
Normal file
0
herolib/core/__init__.py
Normal file
BIN
herolib/core/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
herolib/core/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
0
herolib/core/heroscript/__init__.py
Normal file
0
herolib/core/heroscript/__init__.py
Normal file
0
herolib/core/heroscript/examples/__init__.py
Normal file
0
herolib/core/heroscript/examples/__init__.py
Normal file
38
herolib/core/heroscript/examples/heroscript_example.py
Normal file
38
herolib/core/heroscript/examples/heroscript_example.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any, Type, TypeVar
|
||||
from heroscript.heroscript import *
|
||||
|
||||
|
||||
class User(BaseModel, HeroScriptMixin):
|
||||
oid: str = Field()
|
||||
name: str = Field(min_length=2, description="Chosen name by user", example="myname")
|
||||
city: str = Field()
|
||||
age: int = Field()
|
||||
description: str = Field()
|
||||
|
||||
|
||||
|
||||
# Example usage
|
||||
u1 = User(oid="abc123", name="John", age=30, city="New York",
|
||||
description="""
|
||||
this is a multiline
|
||||
|
||||
we need to remove the
|
||||
this will stay 4 chars in
|
||||
|
||||
end
|
||||
""")
|
||||
|
||||
myheroscript = u1.heroscript()
|
||||
print(myheroscript)
|
||||
|
||||
u2 = User.from_heroscript(heroscript=myheroscript)
|
||||
myprint(u2)
|
||||
|
||||
# p1 = Product(id=1, name="Phone", price=999.99, description="A smart phone")
|
||||
|
||||
# product_heroscript = p1.heroscript()
|
||||
# print(product_heroscript)
|
||||
|
||||
# p2 = Product.from_heroscript(product_heroscript)
|
||||
# print(p2)
|
78
herolib/core/heroscript/examples/heroscript_example2.py
Normal file
78
herolib/core/heroscript/examples/heroscript_example2.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any, Type, TypeVar, List
|
||||
from heroscript.heroscript import *
|
||||
|
||||
class Comment(BaseModel):
|
||||
description: str = Field(default="")
|
||||
|
||||
class HeroBase(BaseModel, HeroScriptMixin):
|
||||
oid: str = Field(default="",metadata={"unique": True})
|
||||
name: str = Field(min_length=2, description="Chosen name by user", example="myname",metadata={"unique": True})
|
||||
comments: List[Comment] = Field(..., description="Comment which can be attached to obj")
|
||||
|
||||
class User(HeroBase):
|
||||
city: str = Field(metadata={"index": True})
|
||||
age: int = Field(metadata={"index": True})
|
||||
description: str = Field(default="")
|
||||
|
||||
class Product(BaseModel, HeroScriptMixin):
|
||||
id: int = Field(default="",metadata={"unique": True})
|
||||
name: str = Field(metadata={"unique": True})
|
||||
price: float = Field()
|
||||
description: str = Field()
|
||||
|
||||
|
||||
myheroscript="""
|
||||
|
||||
```hero
|
||||
!!user.define
|
||||
oid:abc123
|
||||
name:John
|
||||
description:'
|
||||
this is a multiline
|
||||
|
||||
we need to remove the
|
||||
this will stay 4 chars in
|
||||
|
||||
end
|
||||
'
|
||||
age:30
|
||||
city:'New York'
|
||||
|
||||
!!product.define
|
||||
id:33
|
||||
name:aproduct
|
||||
description:'
|
||||
this is a multiline
|
||||
|
||||
we need to remove the
|
||||
this will stay 4 chars in
|
||||
|
||||
end
|
||||
'
|
||||
price:10.0
|
||||
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
# hs=HeroScripts(class_types={"user":User,"product":Product},content=myheroscript)
|
||||
mypath="~/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example"
|
||||
hs=HeroScripts(class_types={"user":User,"product":Product},path=mypath)
|
||||
|
||||
objs=hs.get_objects()
|
||||
|
||||
for o in objs:
|
||||
myprint(o)
|
||||
|
||||
for item in hs.heroscripts:
|
||||
print(item)
|
||||
|
||||
query = "john*"
|
||||
results = hs.search(User, query)
|
||||
|
||||
# Print the search results
|
||||
for r in results:
|
||||
# print(f"User: {r["path"]}")
|
||||
print(r)
|
||||
|
0
herolib/core/heroscript/examples/wiki/__init__.py
Normal file
0
herolib/core/heroscript/examples/wiki/__init__.py
Normal file
1
herolib/core/heroscript/examples/wiki/done.json
Normal file
1
herolib/core/heroscript/examples/wiki/done.json
Normal file
@@ -0,0 +1 @@
|
||||
{"/Users/despiegk/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example/testFile.md": "f6e8b6a32349c262cb9afbea771c5add", "/Users/despiegk/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example/sub/test file 2.md": "0ecc29046b6ef743481358e4c5630a6d"}
|
15
herolib/core/heroscript/examples/wiki/sub/test file 2.md
Normal file
15
herolib/core/heroscript/examples/wiki/sub/test file 2.md
Normal file
@@ -0,0 +1,15 @@
|
||||
# header
|
||||
|
||||
!!product.define
|
||||
id:33
|
||||
name:aproduct
|
||||
description:'
|
||||
this is a multiline
|
||||
|
||||
we need to remove the
|
||||
this will stay 4 chars in
|
||||
|
||||
end
|
||||
'
|
||||
price:10.0
|
||||
something else
|
22
herolib/core/heroscript/examples/wiki/testFile.md
Normal file
22
herolib/core/heroscript/examples/wiki/testFile.md
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
!!user.define
|
||||
oid:abc123
|
||||
name:John
|
||||
description:'
|
||||
this is a multiline
|
||||
|
||||
we need to remove the
|
||||
this will stay 4 chars in
|
||||
|
||||
end
|
||||
'
|
||||
age:30
|
||||
city:'New York'
|
||||
|
||||
```heroscript
|
||||
!!user.define
|
||||
oid:4nd
|
||||
name:John2
|
||||
age:40
|
||||
city:bxl
|
||||
```
|
207
herolib/core/heroscript/heroaction.py
Normal file
207
herolib/core/heroscript/heroaction.py
Normal file
@@ -0,0 +1,207 @@
|
||||
|
||||
from herotools.texttools import dedent
|
||||
from typing import List, Dict, Tuple
|
||||
import re
|
||||
from heroscript.tools import action_blocks,format_multiline_text,heroscript_repr
|
||||
import textwrap
|
||||
|
||||
class HeroActions:
|
||||
def __init__(self, path: str = "", content:str = ""):
|
||||
blocks=action_blocks(path=path,content=content)
|
||||
self.actions : List[HeroAction] = []
|
||||
for block in blocks:
|
||||
self.actions.append(HeroAction(block))
|
||||
|
||||
def __repr__(self):
|
||||
out=""
|
||||
for item in self.actions:
|
||||
out+=item.__repr__()+"\n"
|
||||
return out
|
||||
|
||||
|
||||
class HeroAction:
|
||||
def __init__(self, content: str):
|
||||
blocks=action_blocks(content=content)
|
||||
if len(blocks)==0:
|
||||
raise ValueError(f"don't find actions in {content}")
|
||||
elif len(blocks)>1:
|
||||
raise ValueError(f"Found more than one action in {content}")
|
||||
content=blocks[0]
|
||||
self.name, content = _name_paramstr(content)
|
||||
self.params = Params(content)
|
||||
|
||||
def __str__(self):
|
||||
param_str=textwrap.indent(self.params.__str__()," ")
|
||||
return f"!!{self.name}\n{param_str}"
|
||||
|
||||
def __repr__(self):
|
||||
#return self.__str__()
|
||||
return heroscript_repr(self.__str__())
|
||||
|
||||
|
||||
class Params:
|
||||
def __init__(self, content: str):
|
||||
self.__params = params_parse(content)
|
||||
|
||||
def __str__(self):
|
||||
sorted_params = sorted(self.__params.items())
|
||||
param_str=""
|
||||
for key,value in sorted_params:
|
||||
if "'" in value:
|
||||
param_str+=f"{key}: {value}\n"
|
||||
elif "\n" in value:
|
||||
v=format_multiline_text(value)
|
||||
param_str+=f"{key}: {v}\n"
|
||||
elif " " in value:
|
||||
param_str+=f"{key}: '{value}'\n"
|
||||
else:
|
||||
param_str+=f"{key}: {value}\n"
|
||||
return param_str
|
||||
|
||||
|
||||
def get_int(self, key: str, defval: int = 99999999) -> int:
|
||||
if key not in self.__params:
|
||||
if defval == 99999999:
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return int(self.__params[key])
|
||||
|
||||
def get_float(self, key: str, defval: float = 99999999.0) -> float:
|
||||
if key not in self.__params:
|
||||
if defval == 99999999.0:
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return float(self.__params[key])
|
||||
|
||||
def get(self, key: str, defval: str = "99999999") -> str:
|
||||
if key not in self.__params:
|
||||
if defval == "99999999":
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return self.__params[key]
|
||||
|
||||
def get_list(self, key: str, defval: List[str] = [], needtoexist: bool = True) -> List[str]:
|
||||
if defval is None:
|
||||
defval = []
|
||||
if key not in self.__params:
|
||||
if needtoexist:
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return [item.strip().strip("'").strip() for item in self.__params[key].split(",")]
|
||||
|
||||
def get_list_int(self, key: str, defval: List[int] = [], needtoexist: bool = True) -> List[int]:
|
||||
if defval is None:
|
||||
defval = []
|
||||
if key not in self.__params:
|
||||
if needtoexist:
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return [int(item.strip()) for item in self.__params[key].split(",")]
|
||||
|
||||
def get_list_float(self, key: str, defval: List[float] = [], needtoexist: bool = True) -> List[float]:
|
||||
if defval is None:
|
||||
defval = []
|
||||
if key not in self.__params:
|
||||
if needtoexist:
|
||||
raise KeyError(f"Key '{key}' must exist in parameters")
|
||||
return defval
|
||||
return [float(item.strip()) for item in self.__params[key].split(",")]
|
||||
|
||||
def get_all(self) -> Dict[str, str]:
|
||||
return self.__params
|
||||
|
||||
|
||||
def _name_paramstr(heroscript: str) -> Tuple[str, str]:
|
||||
if not isinstance(heroscript, str):
|
||||
raise ValueError("Input must be a string")
|
||||
|
||||
heroscript = dedent(heroscript)
|
||||
lines = heroscript.strip().split("\n")
|
||||
if not lines or "!!" not in lines[0]:
|
||||
raise ValueError("The first line must contain '!!' to indicate the class name")
|
||||
|
||||
try:
|
||||
class_name = lines[0].split("!!")[1].lower().strip()
|
||||
except IndexError:
|
||||
raise ValueError("Invalid format for class name extraction")
|
||||
|
||||
rest_of_text = dedent("\n".join(lines[1:]))
|
||||
return class_name, rest_of_text
|
||||
|
||||
|
||||
def params_parse(content: str) -> Dict[str, str]:
|
||||
lines = dedent(content).strip().split("\n")
|
||||
props = {}
|
||||
multiline_prop = None
|
||||
multiline_value : List[str] = list()
|
||||
|
||||
for line in lines:
|
||||
if multiline_prop:
|
||||
if line.strip() == "'":
|
||||
props[prop] = dedent("\n".join(multiline_value))
|
||||
multiline_prop = None
|
||||
multiline_value = []
|
||||
else:
|
||||
multiline_value.append(line)
|
||||
else:
|
||||
if ":" in line:
|
||||
prop, value = line.split(":", 1)
|
||||
prop = prop.strip()
|
||||
value = value.strip()
|
||||
if value == "'":
|
||||
multiline_prop = prop
|
||||
else:
|
||||
if value.startswith("'") and value.endswith("'"):
|
||||
value1 = value[1:-1]
|
||||
if not "'" in value1:
|
||||
value=value1
|
||||
props[prop] = value
|
||||
return props
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# Example usage
|
||||
text = """
|
||||
|
||||
!!obj1.define
|
||||
myname: 'mymama'
|
||||
mylist: '20,200'
|
||||
mylist2: 20,'a bbb'
|
||||
mylist3: 20,200
|
||||
myint:2
|
||||
|
||||
!!obj2.color
|
||||
mother: 'mymama'
|
||||
name:'aurelie'
|
||||
length:60
|
||||
description:'
|
||||
multiline is supported
|
||||
now for aurelie
|
||||
'
|
||||
color:green
|
||||
"""
|
||||
|
||||
|
||||
hero_actions = HeroActions(content=text)
|
||||
print(hero_actions)
|
||||
|
||||
a2=hero_actions.actions[1]
|
||||
|
||||
|
||||
assert a2.params.get_list(key="color")==["green"]
|
||||
assert a2.params.get_list(key="mother")==["mymama"]
|
||||
assert a2.params.get(key="color")=="green"
|
||||
assert a2.params.get_int(key="length")==60
|
||||
assert a2.params.get_list_int(key="length")==[60]
|
||||
|
||||
#now some non existing ones
|
||||
assert a2.params.get_int(key="lengtha",defval=3)==3
|
||||
assert a2.params.get(key="lengtha",defval="3")=="3"
|
||||
|
||||
a1=hero_actions.actions[0]
|
||||
#print(a1.params.get_list(key="mylist2"))
|
||||
assert a1.params.get_list(key="mylist")==["20","200"]
|
||||
assert a1.params.get_list_int(key="mylist")==[20,200]
|
||||
assert a1.params.get_list(key="mylist2")==["20","a bbb"]
|
129
herolib/core/heroscript/heroscripts.py
Normal file
129
herolib/core/heroscript/heroscripts.py
Normal file
@@ -0,0 +1,129 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Any, Type, TypeVar
|
||||
import re
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
from types import List,Dict
|
||||
|
||||
|
||||
T = TypeVar("T", bound=BaseModel)
|
||||
|
||||
class HeroScripts:
|
||||
def __init__(self, class_types: dict, path:str = "", content:str = "", indexpath: str = ""):
|
||||
self.class_types = class_types
|
||||
self.heroscripts = List(HeroScript)
|
||||
self.path = os.path.expanduser(path)
|
||||
self.indexpath = os.path.expanduser(indexpath)
|
||||
self.done = Dict[str,str] = {}
|
||||
|
||||
# self.done_load()
|
||||
|
||||
if self.path:
|
||||
try:
|
||||
# self.done_load()
|
||||
self.load(self.path)
|
||||
self.done_save()
|
||||
except FileNotFoundError as e:
|
||||
print(f"Directory not found: {self.path}")
|
||||
print(f"Error: {str(e)}")
|
||||
|
||||
self.create_indexes()
|
||||
self.index_objects()
|
||||
|
||||
if content:
|
||||
blocks = extract_heroscript_blocks(content)
|
||||
self.heroscripts.extend(HeroScript(block) for block in blocks)
|
||||
|
||||
def done_load(self):
|
||||
if self.path:
|
||||
done_file = os.path.join(self.path, "done.json")
|
||||
if os.path.exists(done_file):
|
||||
with open(done_file, "r") as f:
|
||||
self.done = json.load(f)
|
||||
|
||||
def done_save(self):
|
||||
if self.path:
|
||||
done_file = os.path.join(self.path, "done.json")
|
||||
with open(done_file, "w") as f:
|
||||
json.dump(self.done, f)
|
||||
|
||||
def load(self, path):
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
print(f" - load {path}/{filename}")
|
||||
path=f"{path}/{filename}"
|
||||
if filename.endswith(".md"):
|
||||
filepath = os.path.join(root, filename)
|
||||
with open(filepath, "r") as file:
|
||||
content = file.read()
|
||||
md5hash = hashlib.md5(content.encode()).hexdigest()
|
||||
if filepath not in self.done or self.done[filepath] != md5hash:
|
||||
blocks = self.extract_heroscript_blocks(content)
|
||||
self.heroscripts.extend(HeroScript(block,path) for block in blocks)
|
||||
self.done[filepath] = md5hash
|
||||
|
||||
@staticmethod
|
||||
|
||||
|
||||
def get_objects(self):
|
||||
objects = []
|
||||
for heroscript in self.heroscripts:
|
||||
if heroscript.content:
|
||||
try:
|
||||
class_name = heroscript.content.split("\n")[0].split("!!")[1].split(".")[0].lower()
|
||||
if class_name in self.class_types:
|
||||
class_type = self.class_types[class_name]
|
||||
try:
|
||||
obj = class_type.from_heroscript(heroscript.content)
|
||||
objects.append(obj)
|
||||
except Exception as e:
|
||||
print(f"Error parsing HeroScript: {e}")
|
||||
except (IndexError, ValueError):
|
||||
print(f"Invalid HeroScript format: {heroscript.content}")
|
||||
return objects
|
||||
|
||||
|
||||
def create_indexes(self):
|
||||
for class_type in self.class_types.values():
|
||||
schema = self.create_schema(class_type)
|
||||
index_dir = os.path.join(self.indexpath, class_type.__name__.lower())
|
||||
if not os.path.exists(index_dir):
|
||||
os.makedirs(index_dir)
|
||||
index.create_in(index_dir, schema)
|
||||
|
||||
def create_schema(self, class_type):
|
||||
schema_fields = {"path": STORED()}
|
||||
for field_name, field in class_type.__fields__.items():
|
||||
json_schema_extra = getattr(field, "json_schema_extra", None)
|
||||
if json_schema_extra is not None:
|
||||
metadata = json_schema_extra.get("metadata", {})
|
||||
if isinstance(metadata, list):
|
||||
metadata = {item: True for item in metadata}
|
||||
if metadata.get("unique") or metadata.get("indexed"):
|
||||
if field.annotation == str :
|
||||
schema_fields[field_name] = ID(stored=True, unique=metadata.get("unique", False))
|
||||
elif field.annotation == int or field.annotation == float :
|
||||
schema_fields[field_name] = NUMERIC(stored=True, unique=metadata.get("unique", False))
|
||||
else:
|
||||
schema_fields[field_name] = TEXT(stored=True,lowercase=True)
|
||||
return Schema(**schema_fields)
|
||||
|
||||
def index_objects(self):
|
||||
for heroscript in self.heroscripts:
|
||||
for obj in self.get_objects():
|
||||
index_dir = os.path.join(self.indexpath, type(obj).__name__.lower())
|
||||
ix = index.open_dir(index_dir)
|
||||
writer = ix.writer()
|
||||
writer.add_document(path=heroscript.path, **{k: str(v).lower() for k, v in obj.dict().items() if k in ix.schema.names()})
|
||||
writer.commit()
|
||||
|
||||
def search(self, class_type, query):
|
||||
index_dir = os.path.join(self.indexpath, class_type.__name__.lower())
|
||||
ix = index.open_dir(index_dir)
|
||||
qp = QueryParser("name", schema=ix.schema)
|
||||
q = qp.parse(query)
|
||||
with ix.searcher() as searcher:
|
||||
results = searcher.search(q)
|
||||
# return results
|
||||
return [result["path"] for result in results]
|
82
herolib/core/heroscript/mixin.py
Normal file
82
herolib/core/heroscript/mixin.py
Normal file
@@ -0,0 +1,82 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any, Type, TypeVar
|
||||
import re
|
||||
from colorama import Fore, Style
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
from types import List
|
||||
from heroscript.heroaction import HeroAction
|
||||
from heroscript.tools import format_multiline_text
|
||||
|
||||
class HeroScriptMixin:
|
||||
|
||||
def heroscript(self) -> HeroAction:
|
||||
class_name = self.__class__.__name__.lower()
|
||||
prop_order = ["id", "oid", "name", "title", "description", "content"]
|
||||
|
||||
# Get all the properties of the object
|
||||
props = list(self.__fields__.keys())
|
||||
|
||||
# Separate properties into those in prop_order and the rest
|
||||
ordered_props = [prop for prop in prop_order if prop in props]
|
||||
remaining_props = [prop for prop in props if prop not in prop_order]
|
||||
|
||||
# Sort the remaining properties
|
||||
sorted_remaining_props = sorted(remaining_props)
|
||||
|
||||
# Combine the ordered properties and sorted remaining properties
|
||||
sorted_props = ordered_props + sorted_remaining_props
|
||||
|
||||
lines = [f"!!{class_name}.define"]
|
||||
for prop in sorted_props:
|
||||
if prop in self.__fields__:
|
||||
val = getattr(self, prop)
|
||||
if isinstance(val, str):
|
||||
if "\n" in val:
|
||||
val = format_multiline_text(text=val)
|
||||
elif any(c.isspace() for c in val):
|
||||
val = f"'{val}'"
|
||||
lines.append(f" {prop}:{val}")
|
||||
|
||||
result = "\n".join(lines)
|
||||
|
||||
return HeroAction(content=result)
|
||||
|
||||
@classmethod
|
||||
def from_heroscript(cls, heroscript: str):
|
||||
lines = heroscript.strip().split("\n")
|
||||
class_name = lines[0].split("!!")[1].split(".")[0]
|
||||
|
||||
props = {}
|
||||
multiline_prop = None
|
||||
multiline_value = List(str)
|
||||
|
||||
for line in lines[1:]:
|
||||
if multiline_prop:
|
||||
if line.strip() == "'":
|
||||
# End of multiline text
|
||||
min_indent = min(len(ml) - len(ml.lstrip()) for ml in multiline_value if ml.strip())
|
||||
unindented_lines = [ml[min_indent:] for ml in multiline_value]
|
||||
props[multiline_prop] = "\n".join(unindented_lines)
|
||||
multiline_prop = None
|
||||
multiline_value = []
|
||||
else:
|
||||
multiline_value.append(line)
|
||||
else:
|
||||
if ":" in line:
|
||||
prop, value = line.split(":", 1)
|
||||
prop = prop.strip()
|
||||
value = value.strip()
|
||||
|
||||
if value == "'":
|
||||
# Start of multiline text
|
||||
multiline_prop = prop
|
||||
else:
|
||||
if value.startswith("'") and value.endswith("'"):
|
||||
value = value[1:-1]
|
||||
props[prop] = value
|
||||
|
||||
return cls(**props)
|
||||
|
||||
|
4
herolib/core/heroscript/readme.md
Normal file
4
herolib/core/heroscript/readme.md
Normal file
@@ -0,0 +1,4 @@
|
||||
## heroscript
|
||||
|
||||
|
||||
> not to be used yet
|
145
herolib/core/heroscript/tools.py
Normal file
145
herolib/core/heroscript/tools.py
Normal file
@@ -0,0 +1,145 @@
|
||||
|
||||
from typing import List
|
||||
import os
|
||||
from colorama import Fore, Style
|
||||
from herotools.texttools import dedent
|
||||
import textwrap
|
||||
|
||||
#load the heroscripts from filesystem
|
||||
def heroscript_blocks(path: str) -> List[str]:
|
||||
|
||||
heroscript_blocks = list()
|
||||
|
||||
for root, dirs, files in os.walk(path):
|
||||
for file in files:
|
||||
if file.endswith(".md"):
|
||||
file_path = os.path.join(root, file)
|
||||
with open(file_path, "r") as f:
|
||||
content = f.read()
|
||||
blocks = _extract_heroscript_blocks(content)
|
||||
heroscript_blocks.extend(blocks)
|
||||
|
||||
return heroscript_blocks
|
||||
|
||||
|
||||
def _extract_heroscript_blocks(content: str):
|
||||
content=dedent(content)
|
||||
blocks = []
|
||||
lines = content.split("\n")
|
||||
|
||||
in_block = False
|
||||
block_lines : List[str] = list()
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("```hero"):
|
||||
in_block = True
|
||||
block_lines = []
|
||||
elif line.startswith("```") and in_block:
|
||||
in_block = False
|
||||
block = "\n".join(block_lines)
|
||||
blocks.append(block)
|
||||
elif in_block:
|
||||
block_lines.append(line)
|
||||
return blocks
|
||||
|
||||
|
||||
def action_blocks(path: str = "", content:str = "") -> List[str]:
|
||||
if content!="":
|
||||
return __action_blocks_get(content)
|
||||
res : List[str] = list()
|
||||
for hscript in heroscript_blocks(path):
|
||||
for actionscript in __action_blocks_get(hscript):
|
||||
res.append(actionscript)
|
||||
return res
|
||||
|
||||
def __action_blocks_get(content: str) -> List[str]:
|
||||
content=dedent(content)
|
||||
blocks = list()
|
||||
lines = content.split("\n")
|
||||
|
||||
block_lines : List[str] = list()
|
||||
herofound=False
|
||||
|
||||
for line in lines:
|
||||
# print(line)
|
||||
if line.startswith("!!"):
|
||||
herofound=True
|
||||
if block_lines: #means we found before
|
||||
block = "\n".join(block_lines)
|
||||
blocks.append(block)
|
||||
block_lines = []
|
||||
# print("f1")
|
||||
block_lines.append(line)
|
||||
elif line.strip() and not line.startswith(" ") and not line.startswith("\t") and block_lines:
|
||||
block = "\n".join(block_lines)
|
||||
blocks.append(block)
|
||||
block_lines = []
|
||||
herofound=False
|
||||
elif herofound:
|
||||
block_lines.append(line)
|
||||
# print("append")
|
||||
|
||||
if block_lines:
|
||||
block = "\n".join(block_lines)
|
||||
blocks.append(block)
|
||||
|
||||
return blocks
|
||||
|
||||
def myprint(obj):
|
||||
class_name = f"{Fore.YELLOW}{obj.__class__.__name__}{Style.RESET_ALL}"
|
||||
fields = [field for field in obj.__fields__ if field in obj.__dict__]
|
||||
attributes = ', '.join(f"{Fore.LIGHTBLACK_EX}{field}{Style.RESET_ALL}={Fore.GREEN}'{getattr(obj, field)}'{Style.RESET_ALL}" for field in fields)
|
||||
print( f"{class_name}({attributes})" )
|
||||
|
||||
|
||||
#format text to be ready to be set in heroscript
|
||||
def format_multiline_text(text: str) -> str:
|
||||
|
||||
text = dedent(text)
|
||||
text = textwrap.indent(text, " ")
|
||||
|
||||
# Join the formatted lines with newline characters and add the required indentation
|
||||
formatted_text = "'\n" + text + "\n '"
|
||||
|
||||
return formatted_text
|
||||
|
||||
|
||||
|
||||
#representation with colors of heroscript
|
||||
def heroscript_repr(content:str) ->str:
|
||||
lines = content.split("\n")
|
||||
formatted_lines = []
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("!!"):
|
||||
formatted_line = f"{Fore.RED}{line}{Style.RESET_ALL}"
|
||||
elif ":" in line:
|
||||
prop, value = line.split(":", 1)
|
||||
prop = prop.strip()
|
||||
value = value.strip()
|
||||
|
||||
if value.startswith("'") and value.endswith("'"):
|
||||
value = f" {Fore.GREEN}{value}{Style.RESET_ALL}"
|
||||
else:
|
||||
value = f" {Fore.YELLOW}{value}{Style.RESET_ALL}"
|
||||
|
||||
formatted_line = f" {Fore.CYAN}{prop}{Style.RESET_ALL}:{value}"
|
||||
else:
|
||||
formatted_line = line
|
||||
|
||||
formatted_lines.append(formatted_line)
|
||||
|
||||
return "\n".join(formatted_lines)
|
||||
|
||||
def heroscript_print(content:str):
|
||||
o=heroscript_repr(content)
|
||||
print(o)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
t=" something\n a\n\n bbbb"
|
||||
|
||||
print(dedent(t))
|
||||
|
||||
print(format_multiline_text(t))
|
0
herolib/core/logger/__init__.py
Normal file
0
herolib/core/logger/__init__.py
Normal file
9
herolib/core/logger/factory.py
Normal file
9
herolib/core/logger/factory.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from herolib.core.pathlib.pathlib import get_dir
|
||||
from herolib.core.logger.model import Logger
|
||||
|
||||
def new(path: str) -> Logger:
|
||||
p = get_dir(path=path, create=True)
|
||||
return Logger(
|
||||
path=p,
|
||||
lastlog_time=0
|
||||
)
|
3
herolib/core/logger/log.py
Normal file
3
herolib/core/logger/log.py
Normal file
@@ -0,0 +1,3 @@
|
||||
# This file is now empty as the log function has been moved to model.py
|
||||
# It can be removed or kept as a placeholder if needed for future extensions.
|
||||
# For now, we will keep it empty.
|
150
herolib/core/logger/log_test.py
Normal file
150
herolib/core/logger/log_test.py
Normal file
@@ -0,0 +1,150 @@
|
||||
import unittest
|
||||
import os
|
||||
import shutil
|
||||
from lib.core.logger.factory import new
|
||||
from lib.core.logger.model import LogItemArgs, LogType, Logger # Import Logger class
|
||||
from lib.data.ourtime.ourtime import new as ourtime_new, now as ourtime_now
|
||||
from lib.core.pathlib.pathlib import get_file, ls, rmdir_all
|
||||
|
||||
class TestLogger(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Corresponds to testsuite_begin()
|
||||
if os.path.exists('/tmp/testlogs'):
|
||||
rmdir_all('/tmp/testlogs')
|
||||
|
||||
def tearDown(self):
|
||||
# Corresponds to testsuite_end()
|
||||
# if os.path.exists('/tmp/testlogs'):
|
||||
# rmdir_all('/tmp/testlogs')
|
||||
pass
|
||||
|
||||
def test_logger_functionality(self):
|
||||
logger = new('/tmp/testlogs')
|
||||
|
||||
# Test stdout logging
|
||||
logger.log(LogItemArgs(
|
||||
cat='test-app',
|
||||
log='This is a test message\nWith a second line\nAnd a third line',
|
||||
logtype=LogType.STDOUT,
|
||||
timestamp=ourtime_new('2022-12-05 20:14:35')
|
||||
))
|
||||
|
||||
# Test error logging
|
||||
logger.log(LogItemArgs(
|
||||
cat='error-test',
|
||||
log='This is an error\nWith details',
|
||||
logtype=LogType.ERROR,
|
||||
timestamp=ourtime_new('2022-12-05 20:14:35')
|
||||
))
|
||||
|
||||
logger.log(LogItemArgs(
|
||||
cat='test-app',
|
||||
log='This is a test message\nWith a second line\nAnd a third line',
|
||||
logtype=LogType.STDOUT,
|
||||
timestamp=ourtime_new('2022-12-05 20:14:36')
|
||||
))
|
||||
|
||||
logger.log(LogItemArgs(
|
||||
cat='error-test',
|
||||
log='''
|
||||
This is an error
|
||||
|
||||
With details
|
||||
''',
|
||||
logtype=LogType.ERROR,
|
||||
timestamp=ourtime_new('2022-12-05 20:14:36')
|
||||
))
|
||||
|
||||
logger.log(LogItemArgs(
|
||||
cat='error-test',
|
||||
log='''
|
||||
aaa
|
||||
|
||||
bbb
|
||||
''',
|
||||
logtype=LogType.ERROR,
|
||||
timestamp=ourtime_new('2022-12-05 22:14:36')
|
||||
))
|
||||
|
||||
logger.log(LogItemArgs(
|
||||
cat='error-test',
|
||||
log='''
|
||||
aaa2
|
||||
|
||||
bbb2
|
||||
''',
|
||||
logtype=LogType.ERROR,
|
||||
timestamp=ourtime_new('2022-12-05 22:14:36')
|
||||
))
|
||||
|
||||
# Verify log directory exists
|
||||
self.assertTrue(os.path.exists('/tmp/testlogs'), 'Log directory should exist')
|
||||
|
||||
# Get log file
|
||||
files = ls('/tmp/testlogs')
|
||||
self.assertEqual(len(files), 2) # Expecting two files: 2022-12-05-20.log and 2022-12-05-22.log
|
||||
|
||||
# Test search functionality
|
||||
items_stdout = logger.search(
|
||||
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
|
||||
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
|
||||
logtype=LogType.STDOUT
|
||||
)
|
||||
self.assertEqual(len(items_stdout), 2)
|
||||
|
||||
items_error = logger.search(
|
||||
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
|
||||
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
|
||||
logtype=LogType.ERROR
|
||||
)
|
||||
self.assertEqual(len(items_error), 4)
|
||||
|
||||
# Test specific log content
|
||||
found_error_log = False
|
||||
for item in items_error:
|
||||
if "This is an error\nWith details" in item.log:
|
||||
found_error_log = True
|
||||
break
|
||||
self.assertTrue(found_error_log, "Expected error log content not found")
|
||||
|
||||
found_stdout_log = False
|
||||
for item in items_stdout:
|
||||
if "This is a test message\nWith a second line\nAnd a third line" in item.log:
|
||||
found_stdout_log = True
|
||||
break
|
||||
self.assertTrue(found_stdout_log, "Expected stdout log content not found")
|
||||
|
||||
# Test search by category
|
||||
items_test_app = logger.search(
|
||||
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
|
||||
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
|
||||
cat='test-app'
|
||||
)
|
||||
self.assertEqual(len(items_test_app), 2)
|
||||
|
||||
items_error_test = logger.search(
|
||||
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
|
||||
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
|
||||
cat='error-test'
|
||||
)
|
||||
self.assertEqual(len(items_error_test), 4)
|
||||
|
||||
# Test search by log content
|
||||
items_with_aaa = logger.search(
|
||||
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
|
||||
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
|
||||
log='aaa'
|
||||
)
|
||||
self.assertEqual(len(items_with_aaa), 2)
|
||||
|
||||
# Test search with timestamp range
|
||||
items_specific_time = logger.search(
|
||||
timestamp_from=ourtime_new('2022-12-05 22:00:00'),
|
||||
timestamp_to=ourtime_new('2022-12-05 23:00:00'),
|
||||
logtype=LogType.ERROR
|
||||
)
|
||||
self.assertEqual(len(items_specific_time), 2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
72
herolib/core/logger/model.py
Normal file
72
herolib/core/logger/model.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from herolib.data.ourtime.ourtime import OurTime
|
||||
from herolib.core.pathlib.pathlib import Path
|
||||
|
||||
class LogType(Enum):
|
||||
STDOUT = "stdout"
|
||||
ERROR = "error"
|
||||
|
||||
class LogItemArgs:
|
||||
def __init__(self, cat: str, log: str, logtype: LogType, timestamp: Optional[OurTime] = None):
|
||||
self.timestamp = timestamp
|
||||
self.cat = cat
|
||||
self.log = log
|
||||
self.logtype = logtype
|
||||
|
||||
import os
|
||||
from herolib.core.texttools.texttools import name_fix, expand, dedent
|
||||
from herolib.data.ourtime.ourtime import OurTime, now as ourtime_now
|
||||
|
||||
class Logger:
|
||||
def __init__(self, path: Path, lastlog_time: int = 0):
|
||||
self.path = path
|
||||
self.lastlog_time = lastlog_time
|
||||
|
||||
def log(self, args_: LogItemArgs):
|
||||
args = args_
|
||||
|
||||
t = args.timestamp if args.timestamp else ourtime_now()
|
||||
|
||||
# Format category (max 10 chars, ascii only)
|
||||
args.cat = name_fix(args.cat)
|
||||
if len(args.cat) > 10:
|
||||
raise ValueError('category cannot be longer than 10 chars')
|
||||
args.cat = expand(args.cat, 10, ' ')
|
||||
|
||||
args.log = dedent(args.log).strip()
|
||||
|
||||
logfile_path = os.path.join(self.path.path, f"{t.dayhour()}.log")
|
||||
|
||||
# Create log file if it doesn't exist
|
||||
if not os.path.exists(logfile_path):
|
||||
with open(logfile_path, 'w') as f:
|
||||
pass # Create empty file
|
||||
self.lastlog_time = 0 # make sure we put time again
|
||||
|
||||
with open(logfile_path, 'a') as f:
|
||||
content = ''
|
||||
|
||||
# Add timestamp if we're in a new second
|
||||
if t.unix() > self.lastlog_time:
|
||||
content += f"\n{t.time().format_ss()}\n"
|
||||
self.lastlog_time = t.unix()
|
||||
|
||||
# Format log lines
|
||||
error_prefix = 'E' if args.logtype == LogType.ERROR else ' '
|
||||
lines = args.log.split('\n')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
if i == 0:
|
||||
content += f"{error_prefix} {args.cat} - {line}\n"
|
||||
else:
|
||||
content += f"{error_prefix} {line}\n"
|
||||
f.write(content.rstrip()) # Use rstrip to remove trailing whitespace
|
||||
f.write('\n') # Add a newline after each log entry for consistency
|
||||
|
||||
class LogItem:
|
||||
def __init__(self, timestamp: OurTime, cat: str, log: str, logtype: LogType):
|
||||
self.timestamp = timestamp
|
||||
self.cat = cat
|
||||
self.log = log
|
||||
self.logtype = logtype
|
137
herolib/core/logger/search.py
Normal file
137
herolib/core/logger/search.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import os
|
||||
from typing import Optional, List
|
||||
from herolib.core.texttools.texttools import name_fix
|
||||
from herolib.data.ourtime.ourtime import OurTime, new as ourtime_new
|
||||
from herolib.core.logger.model import Logger, LogItem, LogType
|
||||
|
||||
class SearchArgs:
|
||||
def __init__(self, timestamp_from: Optional[OurTime] = None,
|
||||
timestamp_to: Optional[OurTime] = None,
|
||||
cat: str = "", log: str = "", logtype: Optional[LogType] = None,
|
||||
maxitems: int = 10000):
|
||||
self.timestamp_from = timestamp_from
|
||||
self.timestamp_to = timestamp_to
|
||||
self.cat = cat
|
||||
self.log = log
|
||||
self.logtype = logtype
|
||||
self.maxitems = maxitems
|
||||
|
||||
def process(result: List[LogItem], current_item: LogItem, current_time: OurTime,
|
||||
args: SearchArgs, from_time: int, to_time: int):
|
||||
# Add previous item if it matches filters
|
||||
log_epoch = current_item.timestamp.unix()
|
||||
if log_epoch < from_time or log_epoch > to_time:
|
||||
return
|
||||
|
||||
cat_match = (args.cat == '' or current_item.cat.strip() == args.cat)
|
||||
log_match = (args.log == '' or args.log.lower() in current_item.log.lower())
|
||||
logtype_match = (args.logtype is None or current_item.logtype == args.logtype)
|
||||
|
||||
if cat_match and log_match and logtype_match:
|
||||
result.append(current_item)
|
||||
|
||||
def search(l: Logger, args_: SearchArgs) -> List[LogItem]:
|
||||
args = args_
|
||||
|
||||
# Format category (max 10 chars, ascii only)
|
||||
args.cat = name_fix(args.cat)
|
||||
if len(args.cat) > 10:
|
||||
raise ValueError('category cannot be longer than 10 chars')
|
||||
|
||||
timestamp_from = args.timestamp_from if args.timestamp_from else OurTime()
|
||||
timestamp_to = args.timestamp_to if args.timestamp_to else OurTime()
|
||||
|
||||
# Get time range
|
||||
from_time = timestamp_from.unix()
|
||||
to_time = timestamp_to.unix()
|
||||
if from_time > to_time:
|
||||
raise ValueError(f'from_time cannot be after to_time: {from_time} < {to_time}')
|
||||
|
||||
result: List[LogItem] = []
|
||||
|
||||
# Find log files in time range
|
||||
files = sorted(os.listdir(l.path.path))
|
||||
|
||||
for file in files:
|
||||
if not file.endswith('.log'):
|
||||
continue
|
||||
|
||||
# Parse dayhour from filename
|
||||
dayhour = file[:-4] # remove .log
|
||||
try:
|
||||
file_time = ourtime_new(dayhour)
|
||||
except ValueError:
|
||||
continue # Skip if filename is not a valid time format
|
||||
|
||||
current_time = OurTime()
|
||||
current_item = LogItem(OurTime(), "", "", LogType.STDOUT) # Initialize with dummy values
|
||||
collecting = False
|
||||
|
||||
# Skip if file is outside time range
|
||||
if file_time.unix() < from_time or file_time.unix() > to_time:
|
||||
continue
|
||||
|
||||
# Read and parse log file
|
||||
content = ""
|
||||
try:
|
||||
with open(os.path.join(l.path.path, file), 'r') as f:
|
||||
content = f.read()
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
lines = content.split('\n')
|
||||
|
||||
for line in lines:
|
||||
if len(result) >= args.maxitems:
|
||||
return result
|
||||
|
||||
line_trim = line.strip()
|
||||
if not line_trim:
|
||||
continue
|
||||
|
||||
# Check if this is a timestamp line
|
||||
if not (line.startswith(' ') or line.startswith('E')):
|
||||
try:
|
||||
current_time = ourtime_new(line_trim)
|
||||
except ValueError:
|
||||
continue # Skip if not a valid timestamp line
|
||||
|
||||
if collecting:
|
||||
process(result, current_item, current_time, args, from_time, to_time)
|
||||
collecting = False
|
||||
continue
|
||||
|
||||
if collecting and len(line) > 14 and line[13] == '-':
|
||||
process(result, current_item, current_time, args, from_time, to_time)
|
||||
collecting = False
|
||||
|
||||
# Parse log line
|
||||
is_error = line.startswith('E')
|
||||
if not collecting:
|
||||
# Start new item
|
||||
cat_start = 2
|
||||
cat_end = 12
|
||||
log_start = 15
|
||||
|
||||
if len(line) < log_start:
|
||||
continue # Line too short to contain log content
|
||||
|
||||
current_item = LogItem(
|
||||
timestamp=current_time,
|
||||
cat=line[cat_start:cat_end].strip(),
|
||||
log=line[log_start:].strip(),
|
||||
logtype=LogType.ERROR if is_error else LogType.STDOUT
|
||||
)
|
||||
collecting = True
|
||||
else:
|
||||
# Continuation line
|
||||
if len(line_trim) < 16: # Check for minimum length for continuation line
|
||||
current_item.log += '\n' + line_trim
|
||||
else:
|
||||
current_item.log += '\n' + line[15:].strip() # Use strip for continuation lines
|
||||
|
||||
# Add last item if collecting
|
||||
if collecting:
|
||||
process(result, current_item, current_time, args, from_time, to_time)
|
||||
|
||||
return result
|
0
herolib/core/loghandler/__init__.py
Normal file
0
herolib/core/loghandler/__init__.py
Normal file
BIN
herolib/core/loghandler/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
herolib/core/loghandler/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
herolib/core/loghandler/__pycache__/mylogging.cpython-313.pyc
Normal file
BIN
herolib/core/loghandler/__pycache__/mylogging.cpython-313.pyc
Normal file
Binary file not shown.
214
herolib/core/loghandler/mylogging.py
Normal file
214
herolib/core/loghandler/mylogging.py
Normal file
@@ -0,0 +1,214 @@
|
||||
from peewee import *
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any, Iterable, Union
|
||||
import os
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
# Configure database path
|
||||
DB_DIR = os.path.expanduser('~/hero/var/logdb/')
|
||||
DB_FILE = os.path.join(DB_DIR, 'logs.db')
|
||||
|
||||
# Create directory if it doesn't exist
|
||||
os.makedirs(DB_DIR, exist_ok=True)
|
||||
|
||||
# Initialize database
|
||||
database = SqliteDatabase(DB_FILE, pragmas={'journal_mode': 'wal'})
|
||||
|
||||
class BaseModel(Model):
|
||||
"""Base model class for Peewee."""
|
||||
class Meta:
|
||||
database = database
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert model instance to dictionary."""
|
||||
data = {}
|
||||
for field_name in self._meta.fields:
|
||||
field_value = getattr(self, field_name)
|
||||
if field_name in ('time', 'last_seen') and isinstance(field_value, int):
|
||||
# Convert epoch to a readable format for the frontend
|
||||
data[field_name] = datetime.fromtimestamp(field_value).strftime('%d-%m %H:%M')
|
||||
else:
|
||||
data[field_name] = field_value
|
||||
return data
|
||||
|
||||
class Log(BaseModel):
|
||||
"""Model for INFO logs."""
|
||||
time = IntegerField(default=lambda: int(time.time()), index=True)
|
||||
email = CharField(max_length=255, null=True)
|
||||
logmsg = TextField()
|
||||
level = IntegerField(default=100)
|
||||
cat = CharField(max_length=100, index=True, default="general")
|
||||
payload = TextField(null=True)
|
||||
payload_cat = CharField(max_length=100, null=True)
|
||||
|
||||
class Meta:
|
||||
table_name = 'logs'
|
||||
|
||||
class Error(BaseModel):
|
||||
"""Model for ERROR logs."""
|
||||
time = IntegerField(default=lambda: int(time.time()), index=True)
|
||||
last_seen = IntegerField(default=lambda: int(time.time()), index=True)
|
||||
email = CharField(max_length=255, null=True)
|
||||
logmsg = TextField()
|
||||
stacktrace = TextField(null=True)
|
||||
count = IntegerField(default=1)
|
||||
cat = CharField(max_length=100, index=True, default="general")
|
||||
payload = TextField(null=True)
|
||||
payload_cat = CharField(max_length=100, null=True)
|
||||
|
||||
class Meta:
|
||||
table_name = 'errors'
|
||||
|
||||
def init_db_logging():
|
||||
"""Create tables if they don't exist."""
|
||||
with database:
|
||||
database.create_tables([Log, Error], safe=True)
|
||||
|
||||
class DatabaseLogHandler(logging.Handler):
|
||||
"""A logging handler that writes logs to the Peewee database."""
|
||||
def emit(self, record):
|
||||
stacktrace = None
|
||||
if record.exc_info:
|
||||
stacktrace = logging.Formatter().formatException(record.exc_info)
|
||||
|
||||
if record.levelno >= logging.ERROR:
|
||||
log_error(
|
||||
msg=record.getMessage(),
|
||||
cat=record.name,
|
||||
stacktrace=stacktrace
|
||||
)
|
||||
else:
|
||||
log_info(
|
||||
msg=record.getMessage(),
|
||||
level=record.levelno,
|
||||
cat=record.name
|
||||
)
|
||||
|
||||
def log_error(msg: str, cat: str = "general", email: Optional[str] = None, stacktrace: Optional[str] = None, payload: Optional[str] = None, payload_cat: Optional[str] = None):
|
||||
"""Log an ERROR message to the database, handling duplicates."""
|
||||
try:
|
||||
log_info(msg=msg, cat=cat, email=email, payload=payload, payload_cat=payload_cat)
|
||||
except Exception as e:
|
||||
pass
|
||||
try:
|
||||
if not stacktrace:
|
||||
# Capture the current stack trace if not provided
|
||||
stacktrace = "".join(traceback.format_stack())
|
||||
|
||||
# Filter out irrelevant lines from the stack trace
|
||||
if stacktrace:
|
||||
lines = stacktrace.split('\n')
|
||||
filtered_lines = [
|
||||
line for line in lines
|
||||
if 'python3.13/logging' not in line and 'src/mylogging.py' not in line
|
||||
]
|
||||
stacktrace = '\n'.join(filtered_lines)
|
||||
|
||||
one_day_ago = int(time.time()) - (24 * 3600)
|
||||
|
||||
# Look for a similar error in the last 24 hours from the same user
|
||||
existing_error = Error.select().where(
|
||||
(Error.logmsg == msg) &
|
||||
(Error.email == email) &
|
||||
(Error.last_seen >= one_day_ago)
|
||||
).first()
|
||||
|
||||
if existing_error:
|
||||
# If found, increment counter and update last_seen
|
||||
existing_error.count += 1
|
||||
existing_error.last_seen = int(time.time())
|
||||
existing_error.stacktrace = stacktrace
|
||||
existing_error.save()
|
||||
print(existing_error)
|
||||
else:
|
||||
# Otherwise, create a new error record
|
||||
Error.create(
|
||||
logmsg=msg,
|
||||
cat=cat,
|
||||
email=email,
|
||||
stacktrace=stacktrace,
|
||||
payload=payload,
|
||||
payload_cat=payload_cat
|
||||
)
|
||||
logging.info(f"Successfully logged new error: {msg}")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to log error to {DB_FILE}: {e}")
|
||||
|
||||
def log_info(msg: str, level: int = 0, cat: str = "general", email: Optional[str] = None, payload: Optional[str] = None, payload_cat: Optional[str] = None):
|
||||
"""Log an INFO message to the database."""
|
||||
try:
|
||||
Log.create(logmsg=msg, level=level, cat=cat, email=email, payload=payload, payload_cat=payload_cat)
|
||||
except Exception as e:
|
||||
print(f"Failed to log info to {DB_FILE}: {e}")
|
||||
|
||||
def get_errors(search: Optional[str] = None, cat: Optional[str] = None) -> List[Dict[str, Any]]:
|
||||
"""Get errors from the database with optional filters. Category search is prefix-based."""
|
||||
query = Error.select().order_by(Error.last_seen.desc())
|
||||
if search:
|
||||
query = query.where(Error.logmsg.contains(search))
|
||||
if cat and cat.strip():
|
||||
query = query.where(Error.cat.startswith(cat.strip()))
|
||||
return [e.to_dict() for e in query]
|
||||
|
||||
def get_logs(
|
||||
search: Optional[str] = None,
|
||||
cat: Optional[str] = None,
|
||||
level: Optional[int] = None,
|
||||
hours_ago: Optional[int] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Get logs from the database with optional filters. Category search is prefix-based."""
|
||||
query = Log.select().order_by(Log.time.desc())
|
||||
|
||||
if search and search.strip():
|
||||
query = query.where(Log.logmsg.contains(search))
|
||||
|
||||
if cat and cat.strip():
|
||||
query = query.where(Log.cat.startswith(cat.strip()))
|
||||
|
||||
if level is not None:
|
||||
query = query.where(Log.level <= level)
|
||||
|
||||
if hours_ago is not None:
|
||||
time_ago = int(time.time()) - (hours_ago * 3600)
|
||||
query = query.where(Log.time >= time_ago)
|
||||
|
||||
return [l.to_dict() for l in query]
|
||||
|
||||
def get_log_by_id(log_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get a single log by its ID."""
|
||||
try:
|
||||
log = Log.get_by_id(log_id)
|
||||
return log.to_dict()
|
||||
except Log.DoesNotExist:
|
||||
return None
|
||||
|
||||
def delete_logs_older_than(minutes: int):
|
||||
"""Delete logs older than a specified number of minutes."""
|
||||
time_ago = int(time.time()) - (minutes * 60)
|
||||
Log.delete().where(Log.time < time_ago).execute()
|
||||
|
||||
def delete_errors_older_than(minutes: int):
|
||||
"""Delete errors older than a specified number of minutes."""
|
||||
time_ago = int(time.time()) - (minutes * 60)
|
||||
Error.delete().where(Error.time < time_ago).execute()
|
||||
|
||||
def get_unique_log_categories() -> List[str]:
|
||||
"""Get unique log categories from the database."""
|
||||
query = (Log
|
||||
.select(Log.cat)
|
||||
.where(Log.cat.is_null(False))
|
||||
.distinct()
|
||||
.order_by(Log.cat))
|
||||
return [l.cat for l in query]
|
||||
|
||||
def get_unique_error_categories() -> List[str]:
|
||||
"""Get unique error categories from the database."""
|
||||
query = (Error
|
||||
.select(Error.cat)
|
||||
.where(Error.cat.is_null(False))
|
||||
.distinct()
|
||||
.order_by(Error.cat))
|
||||
return [e.cat for e in query]
|
0
herolib/core/pathlib/__init__.py
Normal file
0
herolib/core/pathlib/__init__.py
Normal file
BIN
herolib/core/pathlib/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
herolib/core/pathlib/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
0
herolib/core/pathlib/__pycache__/__init__.py
Normal file
0
herolib/core/pathlib/__pycache__/__init__.py
Normal file
BIN
herolib/core/pathlib/__pycache__/pathlib.cpython-313.pyc
Normal file
BIN
herolib/core/pathlib/__pycache__/pathlib.cpython-313.pyc
Normal file
Binary file not shown.
80
herolib/core/pathlib/pathlib.py
Normal file
80
herolib/core/pathlib/pathlib.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import os
|
||||
|
||||
class Path:
|
||||
def __init__(self, path: str):
|
||||
self.path = os.path.expanduser(path)
|
||||
|
||||
def exists(self) -> bool:
|
||||
return os.path.exists(self.path)
|
||||
|
||||
def is_file(self) -> bool:
|
||||
return os.path.isfile(self.path)
|
||||
|
||||
def is_dir(self) -> bool:
|
||||
return os.path.isdir(self.path)
|
||||
|
||||
def read(self) -> str:
|
||||
with open(self.path, 'r') as f:
|
||||
return f.read()
|
||||
|
||||
def write(self, content: str):
|
||||
os.makedirs(os.path.dirname(self.path), exist_ok=True)
|
||||
with open(self.path, 'w') as f:
|
||||
f.write(content)
|
||||
|
||||
def delete(self):
|
||||
if self.is_file():
|
||||
os.remove(self.path)
|
||||
elif self.is_dir():
|
||||
os.rmdir(self.path)
|
||||
|
||||
def list(self, recursive: bool = False, regex: list = None) -> list[str]:
|
||||
files = []
|
||||
if self.is_dir():
|
||||
if recursive:
|
||||
for root, _, filenames in os.walk(self.path):
|
||||
for filename in filenames:
|
||||
full_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(full_path, self.path)
|
||||
if regex:
|
||||
import re
|
||||
if any(re.match(r, relative_path) for r in regex):
|
||||
files.append(relative_path)
|
||||
else:
|
||||
files.append(relative_path)
|
||||
else:
|
||||
for entry in os.listdir(self.path):
|
||||
full_path = os.path.join(self.path, entry)
|
||||
if os.path.isfile(full_path):
|
||||
if regex:
|
||||
import re
|
||||
if any(re.match(r, entry) for r in regex):
|
||||
files.append(entry)
|
||||
else:
|
||||
files.append(entry)
|
||||
return files
|
||||
|
||||
def get(path: str) -> Path:
|
||||
return Path(path)
|
||||
|
||||
def get_dir(path: str, create: bool = False) -> Path:
|
||||
p = Path(path)
|
||||
if create and not p.exists():
|
||||
os.makedirs(p.path, exist_ok=True)
|
||||
return p
|
||||
|
||||
def get_file(path: str, create: bool = False) -> Path:
|
||||
p = Path(path)
|
||||
if create and not p.exists():
|
||||
os.makedirs(os.path.dirname(p.path), exist_ok=True)
|
||||
with open(p.path, 'w') as f:
|
||||
pass # Create empty file
|
||||
return p
|
||||
|
||||
def rmdir_all(path: str):
|
||||
if os.path.exists(path):
|
||||
import shutil
|
||||
shutil.rmtree(path)
|
||||
|
||||
def ls(path: str) -> list[str]:
|
||||
return os.listdir(path)
|
0
herolib/core/texttools/__init__.py
Normal file
0
herolib/core/texttools/__init__.py
Normal file
142
herolib/core/texttools/texttools.py
Normal file
142
herolib/core/texttools/texttools.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import re
|
||||
|
||||
def name_fix(name: str) -> str:
|
||||
# VLang's name_fix converts '-' to '_' and cleans up special chars.
|
||||
# Python's re.sub can handle this.
|
||||
name = re.sub(r'[^a-zA-Z0-9_ ]', '', name.replace('-', '_'))
|
||||
return name.strip()
|
||||
|
||||
def expand(txt: str, length: int, expand_with: str) -> str:
|
||||
# Pads the string to the specified length.
|
||||
return txt.ljust(length, expand_with)
|
||||
|
||||
def dedent(text: str) -> str:
|
||||
# Removes common leading whitespace from every line.
|
||||
# This is a simplified version of textwrap.dedent
|
||||
lines = text.splitlines()
|
||||
if not lines:
|
||||
return ""
|
||||
|
||||
# Find the minimum indentation of non-empty lines
|
||||
min_indent = float('inf')
|
||||
for line in lines:
|
||||
if line.strip():
|
||||
indent = len(line) - len(line.lstrip())
|
||||
min_indent = min(min_indent, indent)
|
||||
|
||||
if min_indent == float('inf'): # All lines are empty or just whitespace
|
||||
return "\n".join([line.strip() for line in lines])
|
||||
|
||||
dedented_lines = [line[min_indent:] for line in lines]
|
||||
return "\n".join(dedented_lines)
|
||||
|
||||
def remove_empty_lines(text: str) -> str:
|
||||
lines = text.splitlines()
|
||||
return "\n".join([line for line in lines if line.strip()])
|
||||
|
||||
def remove_double_lines(text: str) -> str:
|
||||
lines = text.splitlines()
|
||||
cleaned_lines = []
|
||||
prev_empty = False
|
||||
for line in lines:
|
||||
is_empty = not line.strip()
|
||||
if is_empty and prev_empty:
|
||||
continue
|
||||
cleaned_lines.append(line)
|
||||
prev_empty = is_empty
|
||||
return "\n".join(cleaned_lines)
|
||||
|
||||
def ascii_clean(r: str) -> str:
|
||||
return r.encode('ascii', 'ignore').decode('ascii')
|
||||
|
||||
def name_clean(r: str) -> str:
|
||||
return re.sub(r'[^a-zA-Z0-9]', '', r)
|
||||
|
||||
def name_fix_keepspace(name_: str) -> str:
|
||||
# Similar to name_fix but keeps spaces.
|
||||
return re.sub(r'[^a-zA-Z0-9 ]', '', name_.replace('-', '_')).strip()
|
||||
|
||||
def name_fix_no_ext(name_: str) -> str:
|
||||
return os.path.splitext(name_)[0]
|
||||
|
||||
def name_fix_snake_to_pascal(name: str) -> str:
|
||||
return ''.join(word.capitalize() for word in name.split('_'))
|
||||
|
||||
def snake_case(name: str) -> str:
|
||||
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
||||
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
||||
|
||||
def name_split(name: str) -> tuple[str, str]:
|
||||
parts = name.split('.')
|
||||
if len(parts) > 1:
|
||||
return parts[0], '.'.join(parts[1:])
|
||||
return name, ""
|
||||
|
||||
def cmd_line_args_parser(text: str) -> list[str]:
|
||||
# A simple parser, might need more robust solution for complex cases
|
||||
import shlex
|
||||
return shlex.split(text)
|
||||
|
||||
def text_remove_quotes(text: str) -> str:
|
||||
return re.sub(r'["\'].*?["\']', '', text)
|
||||
|
||||
def check_exists_outside_quotes(text: str, items: list[str]) -> bool:
|
||||
# This is a simplified implementation. A full implementation would require
|
||||
# more complex parsing to correctly identify text outside quotes.
|
||||
cleaned_text = text_remove_quotes(text)
|
||||
for item in items:
|
||||
if item in cleaned_text:
|
||||
return True
|
||||
return False
|
||||
|
||||
def is_int(text: str) -> bool:
|
||||
return text.isdigit()
|
||||
|
||||
def is_upper_text(text: str) -> bool:
|
||||
return text.isupper() and text.isalpha()
|
||||
|
||||
def multiline_to_single(text: str) -> str:
|
||||
return text.replace('\n', '\\n').replace('\r', '')
|
||||
|
||||
def split_smart(t: str, delimiter_: str) -> list[str]:
|
||||
# This is a placeholder, a smart split would need to handle quotes and escapes
|
||||
return t.split(delimiter_)
|
||||
|
||||
def version(text_: str) -> int:
|
||||
# Converts version strings like "v0.4.36" to 4036 or "v1.4.36" to 1004036
|
||||
match = re.match(r'v?(\d+)\.(\d+)\.(\d+)', text_)
|
||||
if match:
|
||||
major, minor, patch = int(match.group(1)), int(match.group(2)), int(match.group(3))
|
||||
if major == 0:
|
||||
return minor * 100 + patch
|
||||
else:
|
||||
return major * 1000000 + minor * 100 + patch
|
||||
return 0
|
||||
|
||||
def format_rfc1123(dt: datetime) -> str:
|
||||
return dt.strftime('%a, %d %b %Y %H:%M:%S GMT')
|
||||
|
||||
def to_array(r: str) -> list[str]:
|
||||
if ',' in r:
|
||||
return [item.strip() for item in r.split(',')]
|
||||
return [item.strip() for item in r.splitlines() if item.strip()]
|
||||
|
||||
def to_array_int(r: str) -> list[int]:
|
||||
return [int(item) for item in to_array(r) if item.isdigit()]
|
||||
|
||||
def to_map(mapstring: str, line: str, delimiter_: str = ' ') -> dict[str, str]:
|
||||
# This is a simplified implementation. The VLang version is more complex.
|
||||
# It assumes a space delimiter for now.
|
||||
keys = [k.strip() for k in mapstring.split(',')]
|
||||
values = line.split(delimiter_)
|
||||
|
||||
result = {}
|
||||
val_idx = 0
|
||||
for key in keys:
|
||||
if key == '-':
|
||||
val_idx += 1
|
||||
continue
|
||||
if val_idx < len(values):
|
||||
result[key] = values[val_idx]
|
||||
val_idx += 1
|
||||
return result
|
0
herolib/crypt/__init__.py
Normal file
0
herolib/crypt/__init__.py
Normal file
0
herolib/crypt/box/__init__.py
Normal file
0
herolib/crypt/box/__init__.py
Normal file
41
herolib/crypt/box/box.py
Normal file
41
herolib/crypt/box/box.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from fastapi import HTTPException
|
||||
from cryptography.fernet import Fernet
|
||||
import redis
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
#TODO: KRISTOF FIX
|
||||
|
||||
def box_get():
|
||||
r = redis.Redis(host='localhost', port=6379, db=0)
|
||||
|
||||
key = r.get('my.secret')
|
||||
|
||||
if key is None:
|
||||
raise HTTPException(status_code=404, detail="can't find my.secret in redis, needs to be set: "+name+" use secret-set to register your secret.")
|
||||
|
||||
|
||||
hash_digest = hashlib.sha256(key).digest()
|
||||
|
||||
# Encode the hash digest to make it url-safe base64-encoded
|
||||
key2 = base64.urlsafe_b64encode(hash_digest)
|
||||
|
||||
try:
|
||||
f = Fernet(key2)
|
||||
except Exception as e:
|
||||
# if str(e).find("Resource Missing")>0:
|
||||
# raise HTTPException(status_code=400, detail="Could not find account with pubkey: "+account_keypair.public_key)
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
return f
|
||||
|
||||
|
||||
def box_secret_set(secret:str):
|
||||
r = redis.Redis(host='localhost', port=6379, db=0)
|
||||
|
||||
# key = r.set('my.secret',secret)
|
||||
r.setex('my.secret', 43200,secret) # Set the key with an expiration time of 12 hours
|
||||
|
||||
box_get()
|
||||
|
||||
return "OK"
|
26
herolib/crypt/box/box_api.py
Normal file
26
herolib/crypt/box/box_api.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from fastapi import APIRouter, HTTPException,Response
|
||||
from pydantic import BaseModel, constr, Field
|
||||
from secret.box import box_secret_set,box_get
|
||||
|
||||
#TODO: KRISTOF FIX
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
##############POSITION
|
||||
|
||||
class BoxSecretSetRequest(BaseModel):
|
||||
secret: str = Field(..., description="a well chosen secret key, do never forget this key, you will loose your assets")
|
||||
|
||||
|
||||
|
||||
@router.post("/secret",description="Set your secret for your hero, will be kept for 12 hours")
|
||||
async def set_secret(request: BoxSecretSetRequest):
|
||||
box_secret_set(secret=request.secret)
|
||||
return Response(content="OK", media_type="text/plain")
|
||||
|
||||
|
||||
@router.get("/secret",description="Check if it exists.")
|
||||
async def secret_check():
|
||||
b=box_get()
|
||||
return Response(content="OK", media_type="text/plain")
|
||||
|
0
herolib/data/__init__.py
Normal file
0
herolib/data/__init__.py
Normal file
0
herolib/data/ourtime/__init__.py
Normal file
0
herolib/data/ourtime/__init__.py
Normal file
BIN
herolib/data/ourtime/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
herolib/data/ourtime/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
0
herolib/data/ourtime/__pycache__/__init__.py
Normal file
0
herolib/data/ourtime/__pycache__/__init__.py
Normal file
BIN
herolib/data/ourtime/__pycache__/ourtime.cpython-313.pyc
Normal file
BIN
herolib/data/ourtime/__pycache__/ourtime.cpython-313.pyc
Normal file
Binary file not shown.
123
herolib/data/ourtime/ourtime.py
Normal file
123
herolib/data/ourtime/ourtime.py
Normal file
@@ -0,0 +1,123 @@
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
|
||||
class OurTime:
|
||||
def __init__(self, dt: datetime = None):
|
||||
self._dt = dt if dt else datetime.min
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.str()
|
||||
|
||||
def str(self) -> str:
|
||||
if self._dt == datetime.min:
|
||||
return ""
|
||||
return self._dt.strftime('%Y-%m-%d %H:%M')
|
||||
|
||||
def day(self) -> str:
|
||||
if self._dt == datetime.min:
|
||||
return ""
|
||||
return self._dt.strftime('%Y-%m-%d')
|
||||
|
||||
def key(self) -> str:
|
||||
if self._dt == datetime.min:
|
||||
return ""
|
||||
return self._dt.strftime('%Y_%m_%d_%H_%M_%S')
|
||||
|
||||
def md(self) -> str:
|
||||
if self._dt == datetime.min:
|
||||
return ""
|
||||
return self._dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
def unix(self) -> int:
|
||||
if self._dt == datetime.min:
|
||||
return 0
|
||||
return int(self._dt.timestamp())
|
||||
|
||||
def empty(self) -> bool:
|
||||
return self._dt == datetime.min
|
||||
|
||||
def dayhour(self) -> str:
|
||||
if self._dt == datetime.min:
|
||||
return ""
|
||||
return self._dt.strftime('%Y-%m-%d-%H')
|
||||
|
||||
def time(self):
|
||||
# This is a simplified representation, as VLang's time() returns a time object.
|
||||
# Here, we return self to allow chaining format_ss().
|
||||
return self
|
||||
|
||||
def format_ss(self) -> str:
|
||||
if self._dt == datetime.min:
|
||||
return ""
|
||||
return self._dt.strftime('%H:%M:%S')
|
||||
|
||||
def warp(self, expression: str):
|
||||
if self._dt == datetime.min:
|
||||
return
|
||||
|
||||
parts = expression.split()
|
||||
for part in parts:
|
||||
match = re.match(r'([+-]?\d+)([smhdwMQY])', part)
|
||||
if not match:
|
||||
continue
|
||||
|
||||
value = int(match.group(1))
|
||||
unit = match.group(2)
|
||||
|
||||
if unit == 's':
|
||||
self._dt += timedelta(seconds=value)
|
||||
elif unit == 'm':
|
||||
self._dt += timedelta(minutes=value)
|
||||
elif unit == 'h':
|
||||
self._dt += timedelta(hours=value)
|
||||
elif unit == 'd':
|
||||
self._dt += timedelta(days=value)
|
||||
elif unit == 'w':
|
||||
self._dt += timedelta(weeks=value)
|
||||
elif unit == 'M':
|
||||
# Approximate months, for more accuracy, a proper dateutil.relativedelta would be needed
|
||||
self._dt += timedelta(days=value * 30)
|
||||
elif unit == 'Q':
|
||||
self._dt += timedelta(days=value * 90)
|
||||
elif unit == 'Y':
|
||||
self._dt += timedelta(days=value * 365)
|
||||
|
||||
def now() -> OurTime:
|
||||
return OurTime(datetime.now())
|
||||
|
||||
def new(time_str: str) -> OurTime:
|
||||
if not time_str:
|
||||
return OurTime()
|
||||
|
||||
formats = [
|
||||
'%Y-%m-%d %H:%M:%S',
|
||||
'%Y-%m-%d %H:%M',
|
||||
'%Y-%m-%d %H',
|
||||
'%Y-%m-%d',
|
||||
'%d-%m-%Y %H:%M:%S',
|
||||
'%d-%m-%Y %H:%M',
|
||||
'%d-%m-%Y %H',
|
||||
'%d-%m-%Y',
|
||||
'%H:%M:%S', # For time() and format_ss() usage
|
||||
]
|
||||
|
||||
for fmt in formats:
|
||||
try:
|
||||
dt = datetime.strptime(time_str, fmt)
|
||||
return OurTime(dt)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Handle relative time expressions
|
||||
try:
|
||||
# Create a dummy OurTime object to use its warp method
|
||||
temp_time = now()
|
||||
temp_time.warp(time_str)
|
||||
return temp_time
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
raise ValueError(f"Could not parse time string: {time_str}")
|
||||
|
||||
def new_from_epoch(epoch: int) -> OurTime:
|
||||
return OurTime(datetime.fromtimestamp(epoch))
|
0
herolib/downloader/__init__.py
Normal file
0
herolib/downloader/__init__.py
Normal file
1
herolib/downloader/scrape_dynamic
Symbolic link
1
herolib/downloader/scrape_dynamic
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../tfgrid_research/tfdev/research/scrape_dynamic
|
1
herolib/downloader/scrape_fast
Symbolic link
1
herolib/downloader/scrape_fast
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../tfgrid_research/tfdev/research/scrape_fast
|
1
herolib/downloader/scrape_scapegraph
Symbolic link
1
herolib/downloader/scrape_scapegraph
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../tfgrid_research/tfdev/research/scrape_scapegraph
|
0
herolib/tools/__init__.py
Normal file
0
herolib/tools/__init__.py
Normal file
31
herolib/tools/extensions.py
Normal file
31
herolib/tools/extensions.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import mimetypes
|
||||
import os
|
||||
|
||||
def check_and_add_extension(file_path: str) -> str:
|
||||
# Only check if there's no extension
|
||||
if not os.path.splitext(file_path)[1]:
|
||||
# Read the file content
|
||||
with open(file_path, 'rb') as f:
|
||||
content = f.read(2048) # Read the first 2048 bytes for detection
|
||||
|
||||
# Detect content type
|
||||
content_type = detect_content_type(content)
|
||||
extension = mimetypes.guess_extension(content_type)
|
||||
|
||||
if extension:
|
||||
new_file_path = file_path + extension
|
||||
os.rename(file_path, new_file_path)
|
||||
return new_file_path
|
||||
|
||||
return file_path
|
||||
|
||||
def detect_content_type(content: bytes) -> str:
|
||||
# Simple content-based detection
|
||||
if content.startswith(b'\xff\xd8'):
|
||||
return 'image/jpeg'
|
||||
if content.startswith(b'\x89PNG'):
|
||||
return 'image/png'
|
||||
if content.startswith(b'GIF'):
|
||||
return 'image/gif'
|
||||
# Add more checks as needed
|
||||
return 'application/octet-stream'
|
270
herolib/tools/gitscanner.py
Normal file
270
herolib/tools/gitscanner.py
Normal file
@@ -0,0 +1,270 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the parent directory of herotools to the Python module search path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
import time
|
||||
import json
|
||||
import subprocess
|
||||
from typing import Optional,List
|
||||
import redis
|
||||
from herotools.logger import logger
|
||||
from herotools.texttools import name_fix
|
||||
from enum import Enum, auto
|
||||
from dataclasses import dataclass
|
||||
import git
|
||||
|
||||
|
||||
# Initialize Redis client
|
||||
redis_client = redis.StrictRedis(host='localhost', port=6379, db=0)
|
||||
|
||||
# Define the ChangeType Enum
|
||||
class ChangeType(Enum):
|
||||
DEL = 'del'
|
||||
MOD = 'mod'
|
||||
NEW = 'new'
|
||||
|
||||
@dataclass
|
||||
class FileChange:
|
||||
commit_hash: str
|
||||
commit_time: str
|
||||
path: str #relative path in the repo
|
||||
change_type: ChangeType
|
||||
|
||||
|
||||
class Repo:
|
||||
def __init__(self, cat: str, account: str, name: str, path: str):
|
||||
self.cat = cat
|
||||
self.account = account
|
||||
self.name = name
|
||||
self.path = path
|
||||
self.hash_last_found: Optional[float] = None
|
||||
self.hash_last_processed: Optional[str] = None
|
||||
self.lastcheck: Optional[float] = None
|
||||
|
||||
def __str__(self):
|
||||
return json.dumps({
|
||||
"cat": self.cat,
|
||||
"account": self.account,
|
||||
"name": self.name,
|
||||
"path": self.path,
|
||||
"hash_last_found": self.hash_last_found,
|
||||
"hash_last_processed": self.hash_last_processed,
|
||||
"lastcheck": self.lastcheck
|
||||
}, indent=2)
|
||||
|
||||
def _redis_key(self) -> str:
|
||||
return f"gitcheck:{self.cat}:{self.account}:{self.name}"
|
||||
|
||||
def save_to_redis(self):
|
||||
redis_client.set(self._redis_key(), json.dumps(self.__dict__))
|
||||
|
||||
@staticmethod
|
||||
def load_from_redis(cat: str, account: str, name: str) -> Optional['Repo']:
|
||||
redis_key = f"gitcheck:{cat}:{account}:{name}"
|
||||
data = redis_client.get(redis_key)
|
||||
if data:
|
||||
data = json.loads(data)
|
||||
repo = Repo(data["cat"], data["account"], data["name"], data["path"])
|
||||
repo.hash_last_found = data.get("hash_last_found")
|
||||
repo.hash_last_processed = data.get("hash_last_processed")
|
||||
repo.lastcheck = data.get("lastcheck")
|
||||
return repo
|
||||
return None
|
||||
|
||||
def get_remote_commit_hash(self, branch: str) -> str:
|
||||
"""Get the latest commit hash from the remote repository."""
|
||||
result = subprocess.run(
|
||||
['git', 'ls-remote', 'origin', f'refs/heads/{branch}'],
|
||||
cwd=self.path,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"Error fetching remote commit hash: {result.stderr}")
|
||||
|
||||
return result.stdout.split()[0]
|
||||
|
||||
def get_local_commit_hash(self) -> str:
|
||||
"""Get the latest commit hash from the local repository."""
|
||||
result = subprocess.run(
|
||||
['git', 'rev-parse', 'HEAD'],
|
||||
cwd=self.path,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"Error fetching local commit hash: {result.stderr}")
|
||||
|
||||
return result.stdout.strip()
|
||||
|
||||
def get_current_branch(self) -> str:
|
||||
result = subprocess.run(
|
||||
['git', 'branch', '--show-current'],
|
||||
cwd=self.path,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"Error fetching local branch name: {result.stderr}")
|
||||
|
||||
return result.stdout.split()[0]
|
||||
|
||||
def get_remote_default_branch(self) -> str:
|
||||
result = subprocess.run(
|
||||
['git', 'ls-remote', '--symref', 'origin', 'HEAD'],
|
||||
cwd=self.path,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"Error fetching local branch name: {result.stderr}")
|
||||
|
||||
return result.stdout.split()[1].split('/')[-1]
|
||||
|
||||
def should_check_again(self) -> bool:
|
||||
"""Determine if we should check the repository again based on the last check time."""
|
||||
if self.lastcheck is None:
|
||||
return True
|
||||
return (time.time() - self.lastcheck) > 60
|
||||
|
||||
def update_last_check_time(self) -> None:
|
||||
"""Update the last check time."""
|
||||
self.lastcheck = time.time()
|
||||
self.save_to_redis()
|
||||
|
||||
def log_change(self, epoch_time: float) -> None:
|
||||
"""Log a detected change in Redis."""
|
||||
self.hash_last_found = epoch_time
|
||||
self.save_to_redis()
|
||||
|
||||
def check_for_changes(self, branch: str = 'main') -> None:
|
||||
"""Check the repository for updates and log changes if found."""
|
||||
if not self.should_check_again():
|
||||
print("WAIT TO CHECK FOR CHANGES")
|
||||
return
|
||||
|
||||
try:
|
||||
diff_commits = self.get_local_remote_diff_commits(branch)
|
||||
|
||||
if diff_commits != []:
|
||||
print("FOUND SOME CHANGES")
|
||||
self.log_change(time.time())
|
||||
file_changes = self.get_file_changes_from_commits(diff_commits)
|
||||
self.print_file_changes(file_changes)
|
||||
else:
|
||||
print("NO CHANGED FOUND")
|
||||
|
||||
self.update_last_check_time()
|
||||
except Exception as e:
|
||||
print(f"An error occurred while checking repo {self.path}: {e}")
|
||||
|
||||
def get_local_remote_diff_commits(self, branch: str) -> List[git.Commit]:
|
||||
# Open the repository
|
||||
repo = git.Repo(self.path)
|
||||
|
||||
# Get the local branch
|
||||
local_branch = repo.heads[branch]
|
||||
# Get the remote reference for the branch
|
||||
remote_ref = repo.remotes.origin.refs[branch]
|
||||
|
||||
# Fetch the latest changes from the remote
|
||||
repo.remotes.origin.fetch()
|
||||
|
||||
# Get the commit hashes of the local and remote branches
|
||||
local_commit = local_branch.commit
|
||||
remote_commit = remote_ref.commit
|
||||
|
||||
if local_commit == remote_commit:
|
||||
return []
|
||||
|
||||
# Get the common ancestor commit
|
||||
base_commit = repo.merge_base(local_commit, remote_commit)[0]
|
||||
|
||||
# Get the ahead and behind commits
|
||||
ahead_commits = list(repo.iter_commits(f"{base_commit}..{local_commit}"))
|
||||
behind_commits = list(repo.iter_commits(f"{base_commit}..{remote_commit}"))
|
||||
|
||||
# Combine the ahead and behind commits
|
||||
diff_commits = ahead_commits + behind_commits
|
||||
return diff_commits
|
||||
|
||||
def get_file_changes_from_commits(self, commit_list: List[git.Commit]) -> List[FileChange]:
|
||||
file_changes = []
|
||||
for commit in commit_list:
|
||||
# print(commit)
|
||||
diffs = commit.diff(self.hash_last_processed, create_patch=True)
|
||||
# print(diffs)
|
||||
for diff in diffs:
|
||||
if diff.deleted_file:
|
||||
change_type = ChangeType.DEL
|
||||
elif diff.new_file:
|
||||
change_type = ChangeType.NEW
|
||||
else:
|
||||
change_type = ChangeType.MOD
|
||||
|
||||
file_change = FileChange(
|
||||
commit_hash=commit.hexsha,
|
||||
commit_time=str(commit.committed_datetime),
|
||||
path=diff.b_path if diff.new_file else diff.a_path,
|
||||
change_type=change_type
|
||||
)
|
||||
file_changes.append(file_change)
|
||||
return file_changes
|
||||
|
||||
def print_file_changes(self, file_changes: List[FileChange]):
|
||||
for file_change in file_changes:
|
||||
print("------------------------------------")
|
||||
print(f"Commit Hash: {file_change.commit_hash}")
|
||||
print(f"Commit Time: {file_change.commit_time}")
|
||||
print(f"File Path: {file_change.path}")
|
||||
print(f"Change Type: {file_change.change_type.value}")
|
||||
print("------------------------------------")
|
||||
|
||||
def gitscan(path: str, cat: str) -> None:
|
||||
"""Walk over directories to find Git repositories and check them."""
|
||||
path = os.path.abspath(os.path.expanduser(path))
|
||||
for root, dirs, files in os.walk(path):
|
||||
if '.git' in dirs:
|
||||
accountname = os.path.basename(os.path.dirname(root))
|
||||
reponame = os.path.basename(root)
|
||||
repo = Repo.load_from_redis(cat, accountname, reponame)
|
||||
if repo is None:
|
||||
repo = Repo(cat, accountname, reponame, root)
|
||||
branch = repo.get_current_branch()
|
||||
|
||||
logger.debug(f"root: {root}")
|
||||
logger.debug(f"accountname: {accountname}")
|
||||
logger.debug(f"reponame: {reponame}")
|
||||
logger.debug(f"branch: {branch}")
|
||||
logger.debug(f"repo: {repo}")
|
||||
|
||||
repo.check_for_changes(branch)
|
||||
dirs[:] = [] # Don't go deeper into subdirectories
|
||||
else:
|
||||
# Filter out any .git directories from further traversal
|
||||
dirs[:] = [d for d in dirs if d != '.git']
|
||||
|
||||
def print_redis_client():
|
||||
cursor = 0
|
||||
while True:
|
||||
cursor, keys = redis_client.scan(cursor)
|
||||
for key in keys:
|
||||
value = redis_client.get(key)
|
||||
print(key)
|
||||
print(value)
|
||||
print()
|
||||
if cursor == 0:
|
||||
break
|
||||
|
||||
if __name__ == "__main__":
|
||||
# print_redis_client()
|
||||
mypath = "~/code/git.threefold.info/projectmycelium"
|
||||
category = 'mycat'
|
||||
gitscan(path=mypath, cat=category)
|
||||
# print_redis_client()
|
39
herolib/tools/logger.py
Normal file
39
herolib/tools/logger.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import logging
|
||||
import colorlog
|
||||
|
||||
log_colors_config = {
|
||||
'DEBUG': 'cyan',
|
||||
'INFO': 'green',
|
||||
'WARNING': 'yellow',
|
||||
'ERROR': 'red',
|
||||
'CRITICAL': 'bold_red',
|
||||
}
|
||||
|
||||
secondary_log_colors_config = {
|
||||
'name': {
|
||||
'DEBUG': 'blue',
|
||||
'INFO': 'blue',
|
||||
'WARNING': 'blue',
|
||||
'ERROR': 'blue',
|
||||
'CRITICAL': 'blue'
|
||||
},
|
||||
'levelname': log_colors_config
|
||||
}
|
||||
|
||||
|
||||
formatter = colorlog.ColoredFormatter(
|
||||
'%(log_color)s%(asctime)s - %(name_log_color)s%(name)s - %(levelname_log_color)s%(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
log_colors=log_colors_config,
|
||||
secondary_log_colors=secondary_log_colors_config
|
||||
)
|
||||
|
||||
|
||||
# Create a handler
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
# Get the root logger
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.addHandler(handler)
|
13
herolib/tools/md5.py
Normal file
13
herolib/tools/md5.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import hashlib
|
||||
from typing import List
|
||||
|
||||
|
||||
def file_md5(file_path: str) -> str:
|
||||
"""
|
||||
Compute the MD5 hash of the file content.
|
||||
"""
|
||||
hash_md5 = hashlib.md5()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
55
herolib/tools/ourtime.py
Normal file
55
herolib/tools/ourtime.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import re
|
||||
|
||||
|
||||
def epoch_get(deadline: str) -> int:
|
||||
"""
|
||||
Set the deadline based on various input formats.
|
||||
|
||||
Supports:
|
||||
- Relative: +1h (hours), +2d (days), +1w (week), +1m (month)
|
||||
- Absolute: 20/10/2024, 20/10, 20/10/24 (all same day)
|
||||
|
||||
If hour not specified, defaults to midday (noon).
|
||||
|
||||
Returns the deadline as a Unix timestamp (epoch).
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Check for relative time format
|
||||
relative_match = re.match(r'\+(\d+)([hdwm])', deadline)
|
||||
if relative_match:
|
||||
amount, unit = relative_match.groups()
|
||||
amount = int(amount)
|
||||
if unit == 'h':
|
||||
delta = timedelta(hours=amount)
|
||||
elif unit == 'd':
|
||||
delta = timedelta(days=amount)
|
||||
elif unit == 'w':
|
||||
delta = timedelta(weeks=amount)
|
||||
elif unit == 'm':
|
||||
delta = timedelta(days=amount * 30) # Approximate
|
||||
|
||||
new_deadline = now + delta
|
||||
return int(new_deadline.timestamp())
|
||||
|
||||
# Check for absolute date format
|
||||
date_formats = ['%d/%m/%Y', '%d/%m/%y', '%d/%m']
|
||||
for fmt in date_formats:
|
||||
try:
|
||||
date_obj = datetime.strptime(deadline, fmt)
|
||||
if fmt == '%d/%m':
|
||||
# If year is not provided, use the current year
|
||||
date_obj = date_obj.replace(year=now.year)
|
||||
|
||||
# If the resulting date is in the past, assume next year
|
||||
if date_obj.replace(tzinfo=timezone.utc) < now:
|
||||
date_obj = date_obj.replace(year=date_obj.year + 1)
|
||||
|
||||
# Set time to noon (12:00)
|
||||
date_obj = date_obj.replace(hour=12, minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
|
||||
return int(date_obj.timestamp())
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
raise ValueError("Invalid deadline format. Use +Xh/d/w/m for relative or DD/MM/YYYY for absolute dates.")
|
26
herolib/tools/pathtools.py
Normal file
26
herolib/tools/pathtools.py
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
|
||||
import os
|
||||
|
||||
def expand_path(path: str) -> str:
|
||||
"""
|
||||
Expand ~ to the user's home directory and return the absolute path.
|
||||
"""
|
||||
return os.path.abspath(os.path.expanduser(path))
|
||||
|
||||
|
||||
def remove_file_if_exists(file_path):
|
||||
try:
|
||||
# This will remove the file or symlink, regardless of whether
|
||||
# it's a regular file, a directory, or a broken symlink
|
||||
os.remove(file_path)
|
||||
except FileNotFoundError:
|
||||
# File doesn't exist, so we don't need to do anything
|
||||
pass
|
||||
except IsADirectoryError:
|
||||
# It's a directory, so we use rmdir instead
|
||||
os.rmdir(file_path)
|
||||
except PermissionError:
|
||||
print(f"Permission denied: Unable to remove {file_path}")
|
||||
except Exception as e:
|
||||
print(f"An error occurred while trying to remove {file_path}: {str(e)}")
|
110
herolib/tools/texttools.py
Normal file
110
herolib/tools/texttools.py
Normal file
@@ -0,0 +1,110 @@
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
import random
|
||||
|
||||
|
||||
def description_fix(description):
|
||||
description = description.lower()
|
||||
description = unicodedata.normalize('NFKD', description).encode('ASCII', 'ignore').decode('ASCII')
|
||||
description = re.sub(r'[^a-z0-9\s]', '', description)
|
||||
return description.strip()
|
||||
|
||||
|
||||
# def name_fix(name: str) -> str:
|
||||
# """
|
||||
# Normalize the string to lowercase ASCII, replace spaces and specific punctuations with underscores,
|
||||
# and remove non-ASCII characters.
|
||||
# """
|
||||
# name = name.lower()
|
||||
# name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
|
||||
# name = re.sub(r'[ :;!]', '_', name) # Replace spaces and specific punctuations with underscores
|
||||
# name = re.sub(r'\W+', '', name) # Remove any other non-word characters
|
||||
# return name
|
||||
|
||||
|
||||
def name_fix(name: str) -> str:
|
||||
"""
|
||||
Normalize the string to lowercase ASCII, replace spaces and specific punctuations with underscores,
|
||||
maintain dots, and remove non-ASCII characters.
|
||||
"""
|
||||
name = name.lower()
|
||||
name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
|
||||
name = re.sub(r'[ :;!]', '_', name) # Replace spaces and specific punctuations with underscores
|
||||
name = re.sub(r'[^\w._]+', '', name) # Remove any non-word characters except dots and underscores
|
||||
return name
|
||||
|
||||
def name_obfuscate(name):
|
||||
# Define a mapping of consonants to their obfuscated counterparts
|
||||
consonant_map = {
|
||||
'b': 'p', 'c': 'k', 'd': 't', 'f': 'v', 'g': 'j', 'h': 'x',
|
||||
'j': 'q', 'k': 'c', 'l': 'r', 'm': 'n', 'n': 'm', 'p': 'b',
|
||||
'q': 'g', 'r': 'l', 's': 'z', 't': 'd', 'v': 'f', 'w': 'y',
|
||||
'x': 'h', 'y': 'w', 'z': 's'
|
||||
}
|
||||
|
||||
# Define a mapping of vowels to their obfuscated counterparts
|
||||
vowel_map = {
|
||||
'a': 'e', 'e': 'i', 'i': 'o', 'o': 'u', 'u': 'a'
|
||||
}
|
||||
|
||||
# Convert the name to lowercase
|
||||
name = name.lower()
|
||||
|
||||
# Split the name into words
|
||||
words = name.split()
|
||||
|
||||
obfuscated_words = []
|
||||
for word in words:
|
||||
obfuscated_word = ''
|
||||
for char in word:
|
||||
if char in vowel_map:
|
||||
# Obfuscate vowels
|
||||
obfuscated_word += vowel_map[char]
|
||||
elif char in consonant_map:
|
||||
# Obfuscate consonants
|
||||
obfuscated_word += consonant_map[char]
|
||||
else:
|
||||
# Keep non-alphabetic characters unchanged
|
||||
obfuscated_word += char
|
||||
obfuscated_words.append(obfuscated_word)
|
||||
|
||||
# Join the obfuscated words back into a single string
|
||||
obfuscated_name = ' '.join(obfuscated_words)
|
||||
|
||||
# Capitalize the first letter of each word
|
||||
obfuscated_name = obfuscated_name.title()
|
||||
|
||||
return obfuscated_name
|
||||
|
||||
def dedent(content: str) -> str:
|
||||
# Split the input content into lines
|
||||
lines = content.splitlines()
|
||||
|
||||
# Remove leading and trailing empty lines
|
||||
while lines and not lines[0].strip():
|
||||
lines.pop(0)
|
||||
while lines and not lines[-1].strip():
|
||||
lines.pop()
|
||||
|
||||
if not lines:
|
||||
return ""
|
||||
|
||||
# Find the minimum indentation (leading spaces) in all non-empty lines
|
||||
min_indent = None
|
||||
for line in lines:
|
||||
stripped_line = line.lstrip()
|
||||
if stripped_line: # Only consider non-empty lines
|
||||
leading_spaces = len(line) - len(stripped_line)
|
||||
if min_indent is None or leading_spaces < min_indent:
|
||||
min_indent = leading_spaces
|
||||
|
||||
# Dedent each line by the minimum indentation found
|
||||
dedented_lines = [line[min_indent:] if len(line) > min_indent else line for line in lines]
|
||||
|
||||
# Join the dedented lines back into a single string
|
||||
return "\n".join(dedented_lines)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("fixed name:", name_fix("John Doe"))
|
||||
print("obfuscated name:", name_obfuscate("John Doe"))
|
0
herolib/web/__init__.py
Normal file
0
herolib/web/__init__.py
Normal file
0
herolib/web/doctools/__init__.py
Normal file
0
herolib/web/doctools/__init__.py
Normal file
94
herolib/web/doctools/html_replacer.py
Normal file
94
herolib/web/doctools/html_replacer.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from herotools.logger import logger
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
from typing import Callable
|
||||
from herotools.texttools import name_fix
|
||||
|
||||
# Define the type for the content and link fetching functions
|
||||
LinkFetcher = Callable[[str, str, str, str, str], str]
|
||||
ContentFetcher = Callable[[str, str, str, str], str]
|
||||
|
||||
# Private functions to be used internally
|
||||
|
||||
def _get_link(language: str, prefix: str, site_name: str, pagename: str, name: str) -> str:
|
||||
# Replace this with your logic to get the actual link
|
||||
logger.debug(f"_get_link: {language[:10]:<10} {site_name}:{pagename}:{name}")
|
||||
return f"{prefix}{language}/{site_name}/{pagename}/{name}.jpg"
|
||||
|
||||
def _get_content(language: str, site_name: str, pagename: str, name: str) -> str:
|
||||
# Replace this with your logic to get the actual content
|
||||
logger.debug(f"_get_content: {language[:10]:<10} {site_name}:{pagename}:{name}")
|
||||
return f"Replaced text for {name} on page {pagename} in {language} language on {site_name} site"
|
||||
|
||||
def _process_html(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
|
||||
"""
|
||||
Function to process HTML and replace content based on tags.
|
||||
This allows us to work with templates and get content based on language to replace in HTML.
|
||||
"""
|
||||
language = name_fix(language)
|
||||
site_name = name_fix(site_name)
|
||||
pagename = name_fix(pagename)
|
||||
prefix = prefix.strip()
|
||||
if not prefix.endswith('/'):
|
||||
prefix += '/'
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
# Find all elements with class names starting with !!img: or !!txt:
|
||||
for element in soup.find_all(class_=re.compile(r'!!(img|txt):(.+)')):
|
||||
for cls in element['class']:
|
||||
if cls.startswith('!!img:'):
|
||||
name = cls.split(':')[1]
|
||||
name = name_fix(name)
|
||||
# Get the link to replace the src attribute in !!img: elements
|
||||
link = _get_link(language=language, prefix=prefix, site_name=site_name, pagename=pagename, name=name)
|
||||
if element.name == 'img':
|
||||
element['src'] = link
|
||||
elif 'src' in element.attrs:
|
||||
element['src'] = link # In case the element is not an img but has a src attribute
|
||||
elif cls.startswith('!!txt:'):
|
||||
name = cls.split(':')[1]
|
||||
name = name_fix(name)
|
||||
# Get the content to replace the text in !!txt: elements
|
||||
content = _get_content(language=language, site_name=site_name, pagename=pagename, name=name)
|
||||
element.string = content
|
||||
|
||||
# Output the modified HTML
|
||||
return str(soup)
|
||||
|
||||
# Public function to process the HTML content
|
||||
def process(language: str, prefix: str, site_name: str, pagename: str, html_content: str) -> str:
|
||||
"""
|
||||
Public function to process HTML and replace content based on tags.
|
||||
This function wraps the internal _process_html function.
|
||||
"""
|
||||
return _process_html(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
|
||||
|
||||
# Sample usage with a given language, site name, page name, and HTML content
|
||||
if __name__ == "__main__":
|
||||
# Example HTML content
|
||||
html_content = '''
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Sample Page</title>
|
||||
</head>
|
||||
<body>
|
||||
<h2 class="mb-6 is-size-1 is-size-3-mobile has-text-weight-bold !!txt:title1">Take care of your performance every day.</h2>
|
||||
<img class="responsive !!img:logo" src="old-link.jpg" alt="Company Logo">
|
||||
<p class="content !!txt:description">This is a sample description text.</p>
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
# Process the HTML content for a specific language, site name, and page
|
||||
language: str = "en"
|
||||
site_name: str = "ExampleSite"
|
||||
pagename: str = "HomePage"
|
||||
prefix: str = "http://localhost/images/"
|
||||
processed_html: str = process(language=language, prefix=prefix, site_name=site_name, pagename=pagename, html_content=html_content)
|
||||
|
||||
# Print the modified HTML
|
||||
print(processed_html)
|
172
herolib/web/doctools/md_replacer.py
Normal file
172
herolib/web/doctools/md_replacer.py
Normal file
@@ -0,0 +1,172 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the parent directory of herotools to the Python module search path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
from herotools.logger import logger
|
||||
from markdown_it import MarkdownIt
|
||||
from markdown_it.tree import SyntaxTreeNode
|
||||
import re
|
||||
from enum import Enum
|
||||
from herotools.texttools import name_fix
|
||||
from mdformat.renderer import MDRenderer
|
||||
from urllib.parse import urlparse
|
||||
|
||||
class ImageType(Enum):
|
||||
JPEG = 'jpeg'
|
||||
PNG = 'png'
|
||||
GIF = 'gif'
|
||||
OTHER = 'other'
|
||||
|
||||
|
||||
def get_link_page(prefix:str, linkname:str, sitename: str, name: str) -> str:
|
||||
"""
|
||||
Generates a page link based on sitename and name.
|
||||
|
||||
Args:
|
||||
sitename (str): The name of the site.
|
||||
name (str): The name of the page.
|
||||
|
||||
Returns:
|
||||
str: The generated link.
|
||||
"""
|
||||
logger.debug(f"get_link_page: {prefix[:60]:<60} {linkname} {sitename}:{name}")
|
||||
return f"[{linkname}]({prefix}/{sitename}/{name})"
|
||||
|
||||
def get_link_image(prefix:str, sitename: str, name: str, image_type: ImageType) -> str:
|
||||
"""
|
||||
Generates an image link based on the URL and image type.
|
||||
|
||||
Args:
|
||||
url (str): The original URL of the image.
|
||||
image_type (ImageType): The type of the image.
|
||||
|
||||
Returns:
|
||||
str: The generated link.
|
||||
"""
|
||||
logger.debug(f"get_link_image: {prefix[:60]:<60} {sitename}:{name}")
|
||||
return f""
|
||||
|
||||
def get_include(sitename: str, name: str) -> str:
|
||||
"""
|
||||
Generates an include directive link based on sitename and name.
|
||||
|
||||
Args:
|
||||
sitename (str): The name of the site.
|
||||
name (str): The name of the page to include.
|
||||
|
||||
Returns:
|
||||
str: The generated include directive.
|
||||
"""
|
||||
logger.debug(f"get_include: {sitename}:{name}")
|
||||
return f"include: {sitename}/{name}"
|
||||
|
||||
def replace(prefix:str, markdown: str) -> str:
|
||||
"""
|
||||
Finds all image links, markdown page links, and custom include directives in the provided markdown text
|
||||
and replaces them using the appropriate functions.
|
||||
|
||||
Args:
|
||||
markdown (str): The markdown content.
|
||||
|
||||
Returns:
|
||||
str: The modified markdown content with updated links.
|
||||
"""
|
||||
# Initialize the Markdown parser
|
||||
md = MarkdownIt()
|
||||
tokens = md.parse(markdown)
|
||||
ast = SyntaxTreeNode(tokens)
|
||||
|
||||
print(ast.pretty(indent=2, show_text=True))
|
||||
|
||||
def process_node(node: SyntaxTreeNode):
|
||||
# from IPython import embed; embed()
|
||||
|
||||
def get_new_url(url: str):
|
||||
logger.debug(f"url: {url}")
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
# site_name = parsed_url.netloc
|
||||
image_path = parsed_url.path
|
||||
logger.debug(f"parsed_url: {parsed_url}")
|
||||
|
||||
# prefix = prefix.rstrip('/')
|
||||
# image_path = image_path.strip('/')
|
||||
|
||||
new_url = f"{prefix.rstrip('/')}/{image_path.strip('/')}"
|
||||
logger.debug(f"new_url: {new_url}")
|
||||
|
||||
return new_url
|
||||
|
||||
if node.type == 'image':
|
||||
# Process image link
|
||||
url = node.attrs.get('src', '')
|
||||
new_url = get_new_url(url)
|
||||
node.attrs['src'] = new_url
|
||||
|
||||
elif node.type == 'link':
|
||||
# Process markdown page link
|
||||
url = node.attrs.get('href', '')
|
||||
new_url = get_new_url(url)
|
||||
node.attrs['href'] = new_url
|
||||
|
||||
# Recursively process child nodes
|
||||
for child in node.children or []:
|
||||
process_node(child)
|
||||
|
||||
def replace_include_directives(match: re.Match) -> str:
|
||||
"""
|
||||
Replaces custom include directives with appropriate links.
|
||||
|
||||
Args:
|
||||
match (re.Match): The match object containing the found include directive.
|
||||
|
||||
Returns:
|
||||
str: The generated link for the include directive.
|
||||
"""
|
||||
url = match.group(1)
|
||||
if ':' in url:
|
||||
site_name, page = url.split(':', 1)
|
||||
page_name = page.split('/')[-1]
|
||||
else:
|
||||
site_name = ""
|
||||
page_name = url
|
||||
if not page.endswith('.md'):
|
||||
page += '.md'
|
||||
return get_include(prefix, site_name, page_name)
|
||||
|
||||
|
||||
# Process the root node
|
||||
process_node(ast)
|
||||
|
||||
# Convert the AST back to markdown
|
||||
renderer = MDRenderer()
|
||||
options = {}
|
||||
env = {}
|
||||
rendered_markdown = renderer.render(tokens, options, env)
|
||||
|
||||
# include_pattern = re.compile(r"!!include page:'(.*?)'")
|
||||
# rendered_markdown = include_pattern.sub(replace_include_directives, rendered_markdown)
|
||||
|
||||
return rendered_markdown
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
text = """
|
||||

|
||||
[Page link](sitename:some/path/to/page.md)
|
||||
!!include page:'mypage'
|
||||
!!include page:'mypage.md'
|
||||
!!include page:'mysite:mypage
|
||||
!!include page:'mysite:mypage'
|
||||
!!include page:'mysite:mypage.md'
|
||||
"""
|
||||
|
||||
print(text)
|
||||
text2=replace("http://localhost:8080/pre/", text)
|
||||
print(text2)
|
||||
|
||||
|
94
herolib/web/doctools/processor.py
Normal file
94
herolib/web/doctools/processor.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import os
|
||||
import re
|
||||
from typing import Callable
|
||||
|
||||
from herotools.logger import logger
|
||||
from herotools.md5 import file_md5
|
||||
from herotools.texttools import name_fix
|
||||
|
||||
|
||||
def _example_set_file(site_name: str, path: str, md5: str) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.debug(f"set_file : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _example_set_img(site_name: str, path: str, md5: str) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.debug(f"set_img : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _example_set_markdown(
|
||||
site_name: str, path: str, md5: str, content: str
|
||||
) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.debug(f"set_markdown : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _example_set_site(site_name: str, path: str) -> None:
|
||||
# Placeholder for actual implementation
|
||||
logger.info(f"set_site : site_name={site_name[:20]:<20} {path}")
|
||||
|
||||
|
||||
def _site_process_action(
|
||||
site_name: str,
|
||||
site_path: str,
|
||||
set_file: Callable[[str, str, str], None],
|
||||
set_img: Callable[[str, str, str], None],
|
||||
set_markdown: Callable[[str, str, str, str], None],
|
||||
) -> None:
|
||||
logger.debug(f"site process: {site_path[:60]:<60} -> {site_name}")
|
||||
for root, _, files in os.walk(site_path):
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
file_path_rel = os.path.relpath(file_path, site_path)
|
||||
file_name = os.path.basename(file)
|
||||
# print(file_name)
|
||||
mymd5 = file_md5(file_path)
|
||||
if file.lower().endswith(".md"):
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
set_markdown(site_name, file_path_rel, mymd5, content)
|
||||
elif file_name in [".collection", ".site", ".done"]:
|
||||
continue
|
||||
elif re.search(
|
||||
r"\.(jpg|jpeg|png|gif|bmp|tiff|webp)$", file, re.IGNORECASE
|
||||
):
|
||||
set_img(site_name, file_path_rel, mymd5)
|
||||
else:
|
||||
set_file(site_name, file_path_rel, mymd5)
|
||||
|
||||
|
||||
def process(
|
||||
path: str,
|
||||
set_site: Callable[[str, str], None],
|
||||
set_file: Callable[[str, str, str], None],
|
||||
set_img: Callable[[str, str, str], None],
|
||||
set_markdown: Callable[[str, str, str, str], None],
|
||||
) -> None:
|
||||
"""
|
||||
walk over directory and apply set_file(), set_img() and set_markdown()
|
||||
"""
|
||||
path = os.path.abspath(os.path.expanduser(path))
|
||||
logger.info(f"sites process: {path}")
|
||||
for root, dirs, files in os.walk(path):
|
||||
if ".site" in files or ".collection" in files:
|
||||
site_name = name_fix(os.path.basename(root))
|
||||
set_site(site_name, root)
|
||||
_site_process_action(
|
||||
site_name, root, set_file, set_img, set_markdown
|
||||
)
|
||||
# Prevent the os.walk from going deeper into subdirectories
|
||||
dirs[:] = []
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mypath = "~/code/git.threefold.info/projectmycelium/info_projectmycelium/collections"
|
||||
|
||||
# logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
process(
|
||||
mypath,
|
||||
_example_set_site,
|
||||
_example_set_file,
|
||||
_example_set_img,
|
||||
_example_set_markdown,
|
||||
)
|
Reference in New Issue
Block a user