This commit is contained in:
2025-08-20 04:15:43 +02:00
parent 6b9f0cf291
commit e4bb201181
95 changed files with 194 additions and 907 deletions

View File

View File

View File

@@ -0,0 +1,69 @@
import os
from pydub import AudioSegment
import assemblyai as aai
class Client:
def __init__(self):
api_key = os.getenv("ASSEMBLYAI")
if not api_key:
raise EnvironmentError(
"Please set the ASSEMBLYAI environment variable with your AssemblyAI API key."
)
self.api_key = api_key
aai.settings.api_key = self.api_key
self.transcriber = aai.Transcriber()
def convert_to_ogg_mono(self, input_path: str, output_path: str):
"""Converts an audio file from .mp4 to .ogg (mono)."""
audio = AudioSegment.from_file(input_path, format="mp4")
# Convert to mono if needed by uncommenting the line below
# audio = audio.set_channels(1)
audio.export(output_path, format="ogg")
print(f"Converted to .ogg in {output_path}")
def transcribe_audio(self, audio_path: str, output_path: str):
"""Transcribes the audio file and saves the transcription to a Markdown file."""
config = aai.TranscriptionConfig(
speaker_labels=True,
)
transcript = self.transcriber.transcribe(audio_path, config)
with open(output_path, "w", encoding="utf-8") as f:
for utterance in transcript.utterances:
f.write(
f"** Speaker {utterance.speaker}:\n{utterance.text}\n-------------\n"
)
print(f"Transcription saved to {output_path}")
def transcribe_audio_file(self, input_path: str, output_transcription_path: str):
"""Handles the entire process from conversion to transcription and cleanup."""
converted_audio_path = input_path.replace(".mp4", ".ogg")
# Convert .mp4 to .ogg
self.convert_to_ogg_mono(input_path, converted_audio_path)
# Perform the transcription
self.transcribe_audio(converted_audio_path, output_transcription_path)
# Optionally, clean up the converted file
os.remove(converted_audio_path)
print(f"Removed temporary file {converted_audio_path}")
# Example usage:
if __name__ == "__main__":
# Retrieve API key from environment variable
# Define the paths for the input audio and output transcription
input_audio_path = "/tmp/475353425.mp4"
output_transcription_path = "/tmp/transcribe_475353425.md"
# Perform the transcription process
client = Client()
client.transcribe_audio_file(input_audio_path, output_transcription_path)

19
herolib/clients/readme.md Normal file
View File

@@ -0,0 +1,19 @@
# Vimeo Client
need following functionality
- upload video
- download
- list video's
## some info
- https://developer.vimeo.com/api/reference
## remarks to use make sure you have the secrets
```bash
hero git clone -u git@git.threefold.info:despiegk/hero_secrets.git
source git.threefold.info/projectmycelium/hero_server/myenv.sh
```

View File

View File

@@ -0,0 +1,241 @@
from dataclasses import dataclass, field, asdict
from typing import List, Optional
from stellar_sdk import Keypair, Server, StrKey
import json
import redis
from stellar.model import StellarAsset, StellarAccount
import os
import csv
import toml
from herotools.texttools import description_fix
class HorizonServer:
def __init__(self, instance: str = "default", network: str = "main", tomlfile: str = "", owner: str = ""):
"""
Load a Stellar account's information using the Horizon server.
The Horizon server is an API that allows interaction with the Stellar network. It provides endpoints to submit transactions, check account balances, and perform other operations on the Stellar ledger.
All gets cached in redis
"""
self.redis_client = redis.Redis(host='localhost', port=6379, db=0) # Adjust as needed
self.instance = instance
if network not in ['main', 'testnet']:
raise ValueError("Invalid network value. Must be 'main' or 'testnet'.")
self.network = network
testnet = self.network == 'testnet'
self.server = Server("https://horizon-testnet.stellar.org" if testnet else "https://horizon.stellar.org")
self.tomlfile = os.path.expanduser(tomlfile)
self.owner = owner
if self.tomlfile:
self.toml_load()
def account_exists(self, pubkey: str) -> bool:
"""
Check if an account exists in the Redis cache based on the public key.
"""
redis_key = f"stellar:{self.instance}:accounts:{pubkey}"
return self.redis_client.exists(redis_key) != None
def account_get(self, key: str, reload: bool = False, name: str = "", description: str = "", cat: str = "") -> StellarAccount:
"""
Load a Stellar account's information.
Args:
key (str): The private or public key of the Stellar account.
reset (bool, optional): Whether to force a refresh of the cached data. Defaults to False.
name (str, optional): Name for the account. Defaults to "".
description (str, optional): Description for the account. Defaults to "".
owner (str, optional): Owner of the account. Defaults to "".
cat (str, optional): Category of the account. Defaults to "".
Returns:
StellarAccount: A struct containing the account's information.
"""
if key == "" and name:
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
data = self.redis_client.get(redis_key)
if data:
data = json.loads(str(data))
if data.get('name') == name and data.get('priv_key', data.get('public_key')):
key = data.get('priv_key', data.get('public_key'))
break
if key == "":
raise ValueError("No key provided")
# Determine if the key is a public or private key
if StrKey.is_valid_ed25519_public_key(key):
public_key = key
priv_key = ""
elif StrKey.is_valid_ed25519_secret_seed(key):
priv_key = key
keypair = Keypair.from_secret(priv_key)
public_key = keypair.public_key
else:
raise ValueError("Invalid Stellar key provided")
redis_key = f"stellar:{self.instance}:accounts:{public_key}"
data = self.redis_client.get(redis_key)
changed = False
if data:
try:
data = json.loads(str(data))
except Exception as e:
print(data)
raise e
data['assets'] = [StellarAsset(**asset) for asset in data['assets']]
account = StellarAccount(**data)
if description!="" and description!=account.description:
account.description = description
changed = True
if name!="" and name!=account.name:
account.name = name
changed = True
if self.owner!="" and self.owner!=account.owner:
account.owner = self.owner
changed = True
if cat!="" and cat!=account.cat:
account.cat = cat
changed = True
else:
account = StellarAccount(public_key=public_key, description=description, name=name, priv_key=priv_key, owner=self.owner, cat=cat)
changed = True
if reload or account.assets == []:
changed = True
if reload:
account.assets = []
account_data = self.server.accounts().account_id(public_key).call()
account.assets.clear() # Clear existing assets to avoid duplication
for balance in account_data['balances']:
asset_type = balance['asset_type']
if asset_type == 'native':
account.assets.append(StellarAsset(type="XLM", balance=balance['balance']))
else:
if 'asset_code' in balance:
account.assets.append(StellarAsset(
type=balance['asset_code'],
issuer=balance['asset_issuer'],
balance=balance['balance']
))
changed = True
# Cache the result in Redis for 1 hour if there were changes
if changed:
self.account_save(account)
return account
def comment_add(self, pubkey: str, comment: str, ignore_non_exist: bool = False):
"""
Add a comment to a Stellar account based on the public key.
Args:
pubkey (str): The public key of the Stellar account.
comment (str): The comment to add to the account.
"""
comment = description_fix(comment)
if not self.account_exists(pubkey):
if ignore_non_exist:
return
raise ValueError("Account does not exist in the cache")
account = self.account_get(pubkey)
account.comments.append(comment)
self.account_save(account)
def account_save(self, account: StellarAccount):
"""
Save a Stellar account's information to the Redis cache.
Args:
account (StellarAccount): The account to save.
"""
redis_key = f"stellar:{self.instance}:accounts:{account.public_key}"
self.redis_client.setex(redis_key, 600, json.dumps(asdict(account)))
def reload_cache(self):
"""
Walk over all known accounts and reload their information.
"""
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
data = self.redis_client.get(redis_key) or ""
if data:
data = json.loads(str(data))
public_key = data.get('public_key')
if public_key:
self.account_get(public_key, reload=True)
#format is PUBKEY,DESCRIPTION in text format
def load_accounts_csv(self, file_path:str):
file_path=os.path.expanduser(file_path)
if not os.path.exists(file_path):
return Exception(f"Error: File '{file_path}' does not exist.")
try:
with open(file_path, 'r', newline='') as file:
reader = csv.reader(file, delimiter=',')
for row in reader:
if row and len(row) >= 2: # Check if row is not empty and has at least 2 elements
pubkey = row[0].strip()
comment = ','.join(row[1:]).strip()
if self.account_exists(pubkey):
self.comment_add(pubkey, comment)
except IOError as e:
return Exception(f"Error reading file: {e}")
except csv.Error as e:
return Exception(f"Error parsing CSV: {e}")
except Exception as e:
return Exception(f"Error: {e}")
def accounts_get(self) -> List[StellarAccount]:
"""
Retrieve a list of all known Stellar accounts from the Redis cache.
Returns:
List[StellarAccount]: A list of StellarAccount objects.
"""
accounts = []
for redis_key in self.redis_client.scan_iter(f"stellar:{self.instance}:accounts:*"):
pubkey = str(redis_key.split(':')[-1])
accounts.append(self.account_get(key=pubkey))
return accounts
def toml_save(self):
"""
Save the list of all known Stellar accounts to a TOML file.
Args:
file_path (str): The path where the list needs to be saved.
"""
if self.tomlfile == "":
raise ValueError("No TOML file path provided")
accounts = self.accounts_get()
accounts_dict = {account.public_key: asdict(account) for account in accounts}
with open(self.tomlfile, 'w') as file:
toml.dump( accounts_dict, file)
def toml_load(self):
"""
Load the list of Stellar accounts from a TOML file and save them to the Redis cache.
Args:
file_path (str): The path of the TOML file to load.
"""
if not os.path.exists(self.tomlfile):
return
#raise FileNotFoundError(f"Error: File '{self.tomlfile}' does not exist.")
with open(self.tomlfile, 'r') as file:
accounts_dict = toml.load(file)
for pubkey, account_data in accounts_dict.items():
account_data['assets'] = [StellarAsset(**asset) for asset in account_data['assets']]
account = StellarAccount(**account_data)
self.account_save(account)
def new(instance: str = "default",owner: str = "", network: str = "main", tomlfile: str = "") -> HorizonServer:
return HorizonServer(instance=instance, network=network, tomlfile=tomlfile,owner=owner)

View File

@@ -0,0 +1,70 @@
from dataclasses import dataclass, field, asdict
from typing import List, Optional
from stellar_sdk import Keypair, Server, StrKey
import json
import redis
@dataclass
class StellarAsset:
type: str
balance: float
issuer: str = ""
def format_balance(self):
balance_float = float(self.balance)
formatted_balance = f"{balance_float:,.2f}"
if '.' in formatted_balance:
formatted_balance = formatted_balance.rstrip('0').rstrip('.')
return formatted_balance
def md(self):
formatted_balance = self.format_balance()
return f"- **{self.type}**: {formatted_balance}"
@dataclass
class StellarAccount:
owner: str
priv_key: str = ""
public_key: str = ""
assets: List[StellarAsset] = field(default_factory=list)
name: str = ""
description: str = ""
comments: List[str] = field(default_factory=list)
cat: str = ""
question: str = ""
def md(self):
result = [
f"# Stellar Account: {self.name or 'Unnamed'}","",
f"**Public Key**: {self.public_key}",
f"**Cat**: {self.cat}",
f"**Description**: {self.description[:60]}..." if self.description else "**Description**: None",
f"**Question**: {self.question}" if self.question else "**Question**: None",
"",
"## Assets:",""
]
for asset in self.assets:
result.append(asset.md())
if len(self.assets) == 0:
result.append("- No assets")
result.append("")
if self.comments:
result.append("## Comments:")
for comment in self.comments:
if '\n' in comment:
multiline_comment = "\n ".join(comment.split('\n'))
result.append(f"- {multiline_comment}")
else:
result.append(f"- {comment}")
return "\n".join(result)
def balance_str(self) -> str:
out=[]
for asset in self.assets:
out.append(f"{asset.type}:{float(asset.balance):,.0f}")
return " ".join(out)

View File

@@ -0,0 +1,78 @@
module stellar
import freeflowuniverse.crystallib.core.texttools
pub struct DigitalAssets {
pub mut:
}
pub struct Owner {
pub mut:
name string
accounts []Account
}
@[params]
pub struct AccountGetArgs{
pub mut:
name string
bctype BlockChainType
}
pub fn (self DigitalAssets) account_get(args_ AccountGetArgs) !&Account {
mut accounts := []&Account
mut args:=args_
args.name = texttools.name_fix(args.name)
for account in self.accounts {
if account.name == args.name && account.bctype == args.bctype {
accounts<<&account
}
}
if accounts.len == 0 {
return error('No account found with the given name:${args.name} and blockchain type: ${args.bctype}')
} else if count > 1 {
return error('Multiple accounts found with the given name:${args.name} and blockchain type: ${args.bctype}')
}
return accounts[0]
}
pub struct Account {
pub mut:
name string
secret string
pubkey string
description string
cat string
owner string
assets []Asset
bctype BlockChainType
}
pub struct Asset {
pub mut:
amount int
assettype AssetType
}
pub fn (self Asset) name() string {
return self.assettype.name
}
pub struct AssetType {
pub mut:
name string
issuer string
bctype BlockChainType
}
pub enum BlockChainType{
stellar_pub
stellar_test
}

View File

@@ -0,0 +1,46 @@
from typing import Tuple
from stellar_sdk import Server, Keypair, TransactionBuilder, Network, Asset, Signer, TransactionEnvelope
import redis
import requests
import json
import time
def create_account_on_testnet() -> Tuple[str, str]:
def fund(public_key: str) -> float:
# Request funds from the Stellar testnet friendbot
response = requests.get(f"https://friendbot.stellar.org?addr={public_key}")
if response.status_code != 200:
raise Exception("Failed to fund new account with friendbot")
time.sleep(1)
return balance(public_key)
def create_account() -> Tuple[str, str]:
# Initialize Redis client
redis_client = redis.StrictRedis(host='localhost', port=6379, db=0)
# Generate keypair
keypair = Keypair.random()
public_key = keypair.public_key
secret_key = keypair.secret
account_data = {
"public_key": public_key,
"secret_key": secret_key
}
redis_client.set("stellartest:testaccount", json.dumps(account_data))
time.sleep(1)
return public_key, secret_key
# Check if the account already exists in Redis
if redis_client.exists("stellartest:testaccount"):
account_data = json.loads(redis_client.get("stellartest:testaccount"))
public_key = account_data["public_key"]
secret_key = account_data["secret_key"]
r = balance(public_key)
if r < 100:
fund(public_key)
r = balance(public_key)
return public_key, secret_key
else:
create_account()
return create_account_on_testnet()

View File

View File

@@ -0,0 +1,102 @@
import json
import redis
import telebot
import os
import logging
from termcolor import colored
from telebot.types import InlineKeyboardMarkup, InlineKeyboardButton
from telebot.formatting import escape_markdown
from bot_audio import audio_add
from bot_text import text_add
from ai.ask import ai_assistent,AIAssistant
class MyBot:
def __init__(self,ai_reset:bool=False):
# Initialize logging
logging.basicConfig(level=logging.INFO, format='%(message)s')
self.logger = logging.getLogger(__name__)
# Initialize Redis connection
self.redis_client = redis.Redis(host='localhost', port=6379, db=0)
# Initialize Telegram bot
self.telebotkey = os.getenv("TELEBOT")
if self.telebotkey:
self.logger.info(colored("TELEBOT key set", "green"))
self.bot = telebot.TeleBot(self.telebotkey)
else:
raise Exception("can't find TELEBOT in ENV")
# Set up message handlers
self.setup_handlers()
audio_add(self)
text_add(self,reset=ai_reset)
def setup_handlers(self):
@self.bot.message_handler(commands=['help'])
def send_welcome(message):
self.bot.reply_to(message, """\
Hi there, I am your hero.
Just speak to me or do /start or /help
""")
@self.bot.message_handler(commands=['start'])
def start_command(message):
chat_id = message.chat.id
keyboard = InlineKeyboardMarkup()
subscribe_button = InlineKeyboardButton("Subscribe to Updates", callback_data='subscribe')
unsubscribe_button = InlineKeyboardButton("Unsubscribe from Updates", callback_data='unsubscribe')
keyboard.row(subscribe_button, unsubscribe_button)
self.bot.reply_to(message, "Please choose an option:", reply_markup=keyboard)
@self.bot.callback_query_handler(func=lambda call: True)
def callback_query(call):
chat_id = call.message.chat.id
if call.data == 'subscribe':
self.redis_client.hset('subscribed_chats', chat_id, '1')
self.bot.answer_callback_query(call.id, "You have subscribed to updates.")
print(f"User subscribed to updates: {chat_id}")
elif call.data == 'unsubscribe':
self.redis_client.hdel('subscribed_chats', chat_id)
self.bot.answer_callback_query(call.id, "You have unsubscribed from updates.")
print(f"User unsubscribed from updates: {chat_id}")
def send_message_to_subscribers(self, message):
subscribed_chats = self.redis_client.hgetall('subscribed_chats')
for chat_id in subscribed_chats:
try:
self.bot.send_message(chat_id.decode('utf-8'), message)
except Exception as e:
print(f"Failed to send message to chat {chat_id}: {str(e)}")
def send_error_to_telegram(self,chat_id, error_message):
# Format the error message for Telegram
telegram_message = f"🚨 Error Occurred 🚨\n\n"
telegram_message += f"app: {escape_markdown(error_message['app'])}\n"
telegram_message += f"Function: {escape_markdown(error_message['function'])}\n"
telegram_message += f"msg: {escape_markdown(error_message['msg'])}\n"
telegram_message += f"Exception Type: {escape_markdown(error_message['exception_type'])}\n"
telegram_message += f"Exception Message: ```\n{escape_markdown(error_message['exception_message'])}\n```\n"
if 'traceback' in error_message:
telegram_message += f"Traceback:\n```\n{escape_markdown(error_message['traceback'])}\n```"
# Send the error message to the subscribed chat
self.bot.send_message(chat_id, telegram_message, parse_mode='Markdown')
def start(self):
print("Bot started")
# Start the bot
self.bot.polling()
def bot_new() -> MyBot:
return MyBot()
# Usage
if __name__ == "__main__":
my_bot = bot_new()
my_bot.start()

View File

@@ -0,0 +1,72 @@
import os
from pydub import AudioSegment
import whisper
def audio_add(self):
self.model = whisper.load_model("base")
@self.bot.message_handler(content_types=['audio', 'voice']) #, 'document'
def handle_audio(message):
try:
chat_id = message.chat.id
file_info = None
audio_path = None
if message.content_type == 'audio':
file_info = self.bot.get_file(message.audio.file_id)
audio_path = f"/tmp/audio/{message.audio.file_id}.mp3"
elif message.content_type == 'voice':
file_info = self.bot.get_file(message.voice.file_id)
audio_path = f"/tmp/audio/{message.voice.file_id}.ogg"
if file_info:
downloaded_file = self.bot.download_file(file_info.file_path)
# Ensure the directory exists
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
# Save the audio file
with open(audio_path, 'wb') as new_file:
new_file.write(downloaded_file)
#bot.send_message(chat_id, f"Audio received and saved successfully to {audio_path}.")
print(f"Audio received and saved to {audio_path}")
# Convert to WAV format if necessary
wav_path = audio_path.replace('.mp3', '.wav').replace('.ogg', '.wav')
if audio_path.endswith('.mp3') or audio_path.endswith('.ogg'):
audio = AudioSegment.from_file(audio_path)
audio.export(wav_path, format='wav')
else:
wav_path = audio_path
# Transcribe audio using Whisper
result = self.model.transcribe(wav_path)
transcription = result["text"]
self.bot.send_message(chat_id, transcription, parse_mode='Markdown')
print(f"Audio received and saved to {audio_path}")
print(f"Transcription: {transcription}")
text2 = self.text_process(self,transcription)
print(f"Processed text {chat_id}: {text2}")
if len(text2)>0:
self.bot.send_message(chat_id, text2)
except Exception as e:
error_message = {
'app': 'Telegram Bot',
'function': 'handle_audio',
'msg': 'Failed to process audio file',
'exception_type': type(e).__name__,
'exception_message': str(e)
}
self.send_error_to_telegram(chat_id, error_message)
print(f"Error processing audio file: {e}")

View File

@@ -0,0 +1,51 @@
import os
from ai.ask import ai_assistent
def text_add(self,reset:bool=False):
self.ai_assistent = ai_assistent(reset=reset)
self.text_process = text_process
@self.bot.message_handler(content_types=['text'])
def handle_text(message):
try:
chat_id = message.chat.id
text = message.text
# Here you can add your logic to process the text
# For now, let's just echo the message back
# response = f"You said: {text}"
print(f"Received text from {chat_id}: {text}")
text2 = self.text_process(self,text)
print(f"Processed text {chat_id}: {text2}")
if len(text2)>0:
self.bot.send_message(chat_id, text2)
except Exception as e:
error_message = {
'app': 'Telegram Bot',
'function': 'handle_text',
'msg': 'Failed to process text',
'exception_type': type(e).__name__,
'exception_message': str(e)
}
self.send_error_to_telegram(chat_id, error_message)
print(f"Error processing text file: {e}")
def text_process(self, txt) -> str:
if "translate" not in txt.lower():
txt+='''\n\n
only output the heroscript, no comments
'''
response = self.ai_assistent.ask(
category='timemgmt',
name='schedule',
question=txt)
return response

View File

@@ -0,0 +1,36 @@
import json
import redis
import telebot
import threading
from telebot.types import InlineKeyboardMarkup, InlineKeyboardButton
import time
from telebot.formatting import escape_markdown
import os
from telegram.bot import send_error_to_telegram
# Initialize Redis connection
redis_client = redis.Redis(host='localhost', port=6379, db=0)
#get errors from redis and send them to bot if subscription done
def process_error_queue():
while True:
# Pop an error message from the Redis queue
error_json = redis_client.lpop('error_queue')
if error_json:
# Deserialize the error message from JSON
error_message = json.loads(error_json)
# Get all subscribed chat IDs from Redis
subscribed_chats = redis_client.hgetall('subscribed_chats')
# Send the error message to all subscribed chats
for chat_id in subscribed_chats.keys():
send_error_to_telegram(int(chat_id), error_message)
else:
# If the queue is empty, wait for a short interval before checking again
time.sleep(1)
# Start processing the error queue
process_error_queue_thread = threading.Thread(target=process_error_queue)
process_error_queue_thread.start()

View File

View File

@@ -0,0 +1,142 @@
import os
from typing import List, Optional
import requests
import vimeo
from model_video import VideoInfo, video_model_load, videos_model_load
class VimeoClient:
def __init__(self):
# Retrieve necessary credentials from environment variables
self.client_id = os.getenv("VIMEO_CLIENT_ID")
self.client_secret = os.getenv("VIMEO_SECRET")
self.access_token = os.getenv("VIMEO_ACCESSTOKEN_ID")
self.user_id = os.getenv("VIMEO_USER_ID")
# Check if all environment variables are present
if not all([self.client_id, self.client_secret, self.access_token, self.user_id]):
raise EnvironmentError(
"Please set the VIMEO_CLIENT_ID, VIMEO_SECRET,VIMEO_USER_ID and VIMEO_ACCESSTOKEN_ID environment variables."
)
# Initialize the Vimeo client
self.client = vimeo.VimeoClient(token=self.access_token, key=self.client_id, secret=self.client_secret)
def upload(self, file: str, video_title: str, description: str) -> str:
video_uri = self.client.upload(file, data={"name": video_title, "description": description})
return video_uri
def download(self, video_id: str, output_file: str = "myvid.mp4"):
info = self.get_video_info(video_id)
size, link = 0, ""
for item in info.download:
if item["size"] > size:
size = item["size"]
link = item["link"]
if link == "":
raise Exception("download link not provided for video")
video_response = requests.get(link, stream=True)
downloaded_mb = 0
with open(output_file, "wb") as video_file:
for chunk in video_response.iter_content(chunk_size=1024):
if chunk:
downloaded_mb += len(chunk) / 1024
print(f"{downloaded_mb}MB Downloaded...")
video_file.write(chunk)
print(f"Video downloaded successfully to {output_file}!")
def get_video_info(self, video_id: str) -> VideoInfo:
"""
Get information about a video by URI.
:param uri: URI of the Vimeo video.
:return: Video information as a dictionary, or None if an error occurs.
"""
# , fields: List[str]
response = self.client.get(f"/videos/{video_id}")
if response.status_code == 200:
myvideo = video_model_load(response.content)
else:
raise Exception(f"Failed to get video details. Status code: {response.status_code}, Error: {response.text}")
return myvideo
def get_videos(self, folder: Optional[int] = None, folders: Optional[List[int]] = None) -> List[VideoInfo]:
"""
Get information about videos from specified folder(s) or all videos if no folder is specified.
:param folder: ID of a single folder to fetch videos from.
:param folders: List of folder IDs to fetch videos from.
:return: List of VideoInfo objects.
"""
if self.user_id == 0:
raise Exception("Can't find user ID, it's not set in env variables")
all_videos = []
if folder is not None:
folders = [folder]
elif folders is None:
# If no folder or folders specified, get all videos
response = self.client.get("/me/videos")
if response.status_code == 200:
return videos_model_load(response.content)
else:
raise Exception(f"Failed to get videos. Status code: {response.status_code}, Error: {response.text}")
for folder_id in folders:
response = self.client.get(f"/users/{self.user_id}/projects/{folder_id}/videos")
if response.status_code == 200:
videos = videos_model_load(response.content)
all_videos.extend(videos)
else:
print(f"Failed to get videos for folder {folder_id}. Status code: {response.status_code}, Error: {response.text}")
return all_videos
# def get_videos(self,folder:int,folders:List[int]) -> List[VideoInfo]:
# """
# Get information about a video by URI.
# :param uri: URI of the Vimeo video.
# :return: Video information as a dictionary, or None if an error occurs.
# """
# if folder>0:
# if self.user_id == 0:
# return Exception("can't find userid, its not set in env variables")
# # print(f"folderid:{folder}")
# response = self.client.get(f"/users/{self.user_id}/projects/{folder}/videos")
# # api_url = f"https://api.vimeo.com/users/{self.user_id}/projects/13139570/videos"
# # print(api_url)
# # access_token = "e65daca3b0dbc18c2fadc5cafcf81004"
# # headers = {
# # "Authorization": f"Bearer {access_token}"
# # }
# # Make the GET request to the Vimeo API
# #response = requests.get(api_url, headers=headers)
# else:
# response = self.client.get(f"/me/videos/")
# if response.status_code == 200:
# myvideos = videos_model_load(response.content)
# else:
# raise Exception(f"Failed to get video details. Status code: {response.status_code}, Error: {response.text}")
# return myvideos
def new() -> VimeoClient:
return VimeoClient()
# Example usage:
if __name__ == "__main__":
cl = new()
v = cl.get_videos(folders=[10700101, 13139570, 12926235, 10752310, 10702046])
for item in v:
video_id = item.uri.split("/")[-1]
print(f" - {item.name} : {video_id} ")
# from IPython import embed; embed()
# s
# vi=cl.get_video_info("475353425")
# print(json_to_yaml(vi))
# cl.download("475353425", "/tmp/475353425.mp4")

View File

@@ -0,0 +1,177 @@
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any
from dataclasses_json import dataclass_json
import json
import yaml
def json_to_yaml(json_data):
# If the input is a JSON string, parse it into a Python dictionary
if isinstance(json_data, str):
json_data = json.loads(json_data)
# Convert the dictionary to a YAML formatted string
yaml_data = yaml.dump(json_data, sort_keys=False, default_flow_style=False)
return yaml_data
@dataclass_json
@dataclass
class Size:
width: int
height: int
link: str
link_with_play_button: Optional[str] = None
@dataclass_json
@dataclass
class Pictures:
uri: str
active: bool
type: str
base_link: str
sizes: List[Size]
resource_key: str
default_picture: bool
@dataclass_json
@dataclass
class Embed:
html: str
badges: Dict[str, Any]
interactive: bool
buttons: Dict[str, bool]
logos: Dict[str, Any]
play_button: Dict[str, Any]
title: Dict[str, Any]
end_screen: List[Any]
playbar: bool
quality_selector: Optional[str]
pip: bool
autopip: bool
volume: bool
color: str
colors: Dict[str, str]
event_schedule: bool
has_cards: bool
outro_type: str
show_timezone: bool
cards: List[Any]
airplay: bool
audio_tracks: bool
chapters: bool
chromecast: bool
closed_captions: bool
transcript: bool
ask_ai: bool
uri: Optional[str]
email_capture_form: Optional[str]
speed: bool
@dataclass_json
@dataclass
class Uploader:
pictures: Pictures
@dataclass_json
@dataclass
class User:
uri: str
name: str
link: str
capabilities: Dict[str, bool]
location: str
gender: str
bio: str
short_bio: str
created_time: str
pictures: Pictures
websites: List[Dict[str, Optional[str]]]
#metadata: Dict[str, Any]
location_details: Dict[str, Optional[Any]]
skills: List[Any]
available_for_hire: bool
can_work_remotely: bool
preferences: Dict[str, Any]
content_filter: List[str]
upload_quota: Dict[str, Any]
resource_key: str
account: str
@dataclass_json
@dataclass
class VideoInfo:
uri: str
name: str
description: Optional[str]
type: str
link: str
player_embed_url: str
duration: int
width: int
height: int
#embed: Embed
created_time: str
modified_time: str
release_time: str
content_rating: List[str]
content_rating_class: str
rating_mod_locked: bool
license: Optional[str]
privacy: Dict[str, Any]
pictures: Pictures
tags: List[Any]
stats: Dict[str, int]
categories: List[Any]
uploader: Uploader
#metadata: Dict[str, Any]
manage_link: str
#user: Optional[User]
last_user_action_event_date: Optional[str]
parent_folder: Optional[Dict[str, Any]]
review_page: Optional[Dict[str, Any]]
files: Optional[List[Dict[str, Any]]]
download: Optional[List[Dict[str, Any]]]
app: Optional[Dict[str, str]]
play: Optional[Dict[str, Any]]
status: str
resource_key: str
upload: Optional[Dict[str, Optional[str]]]
transcode: Dict[str, str]
is_playable: bool
has_audio: bool
def video_model_load(json_data:str,dojsonload:bool=True) -> VideoInfo:
if dojsonload:
json_dict = json.loads(json_data)
else:
json_dict = json_data
json_dict.pop('metadata', {})
json_dict.pop('embed', {})
json_dict.pop('user', {})
json_dict.pop('websites', {})
# if 'user' in json_dict:
# json_dict['user'].pop('metadata', None)
# if 'websites' in json_dict:
# json_dict['websites'].pop('metadata', None)
json_data_cleaned = json.dumps(json_dict)
video_object = VideoInfo.from_json(json_data_cleaned)
return video_object
def videos_model_load(json_data:str) -> List[VideoInfo]:
json_list = json.loads(json_data)
json_list2= list()
for item in json_list["data"]:
d=video_model_load(item,dojsonload=False)
json_list2.append(d)
return json_list2

View File

View File

@@ -0,0 +1,107 @@
import os
from pydub import AudioSegment
import whisper
import moviepy.editor as mp
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
# Download necessary NLTK data
nltk.download('punkt', quiet=True)
class Convertor:
def __init__(self, max_chars_per_part=4000,context:str = "main"):
self.max_chars_per_part = max_chars_per_part
self.context = context
@classmethod
def new(cls, max_chars_per_part=4000):
return cls(max_chars_per_part)
def process(self, path: str):
if path.lower().endswith(('.mp4', '.avi', '.mov')): # Video files
return self.process_video(path)
elif path.lower().endswith(('.mp3', '.wav', '.ogg')): # Audio files
return self.process_audio(path)
else:
raise ValueError("Unsupported file format")
def process_video(self, video_path: str):
# Extract audio from video
video = mp.VideoFileClip(video_path)
audio_path = video_path.rsplit('.', 1)[0] + '.wav'
video.audio.write_audiofile(audio_path)
video.close()
return audio_path
def process_audio(self, audio_path: str):
# Convert to WAV format if necessary
wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
if not audio_path.lower().endswith('.wav'):
audio = AudioSegment.from_file(audio_path)
audio.export(wav_path, format='wav')
else:
wav_path = audio_path
def split_text(self, text):
parts = []
current_part = ""
paragraphs = text.split('\n\n')
for paragraph in paragraphs:
sentences = sent_tokenize(paragraph)
for sentence in sentences:
if len(current_part) + len(sentence) < self.max_chars_per_part:
current_part += sentence + ' '
else:
if current_part:
parts.append(current_part.strip())
current_part = sentence + ' '
# Add a paragraph break if it doesn't exceed the limit
if len(current_part) + 2 < self.max_chars_per_part:
current_part += '\n\n'
else:
parts.append(current_part.strip())
current_part = '\n\n'
if current_part:
parts.append(current_part.strip())
return parts
def find_natural_pause(self, text):
words = word_tokenize(text)
total_words = len(words)
mid_point = total_words // 2
# Look for punctuation near the middle
for i in range(mid_point, total_words):
if words[i] in '.!?':
return ' '.join(words[:i+1]), ' '.join(words[i+1:])
# If no punctuation found, split at the nearest space to the middle
return ' '.join(words[:mid_point]), ' '.join(words[mid_point:])
def write_to_file(self, parts, output_path):
with open(output_path, 'w', encoding='utf-8') as f:
for i, part in enumerate(parts, 1):
f.write(f"Part {i}:\n\n")
f.write(part)
f.write("\n\n")
if i < len(parts):
f.write("-" * 50 + "\n\n")
# Usage example:
if __name__ == "__main__":
processor = Convertor.new()
item = "/Users/despiegk1/Documents/Zoom/2024-07-16 16.42.50 Kristof De Spiegeleer's Personal Meeting Room/video1720369800.mp4"
transcription_parts = processor.process(item)
processor.write_to_file(transcription_parts, output_file)
print(f"Transcription split into {len(transcription_parts)} parts:")
for i, part in enumerate(transcription_parts, 1):
print(f"Part {i}:")
print(part)
print("-" * 50)

View File

@@ -0,0 +1,118 @@
import os
from pydub import AudioSegment
import whisper
import moviepy.editor as mp
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
# Download necessary NLTK data
nltk.download('punkt', quiet=True)
class MediaProcessor:
def __init__(self, max_chars_per_part=4000):
self.model = whisper.load_model("base.en")
#self.model = whisper.load_model("medium.en")
self.max_chars_per_part = max_chars_per_part
@classmethod
def new(cls, max_chars_per_part=4000):
return cls(max_chars_per_part)
def process(self, path: str):
if path.lower().endswith(('.mp4', '.avi', '.mov')): # Video files
return self.process_video(path)
elif path.lower().endswith(('.mp3', '.wav', '.ogg')): # Audio files
return self.process_audio(path)
else:
raise ValueError("Unsupported file format")
def process_video(self, video_path: str):
# Extract audio from video
video = mp.VideoFileClip(video_path)
audio_path = video_path.rsplit('.', 1)[0] + '.wav'
video.audio.write_audiofile(audio_path)
video.close()
# Now process the extracted audio
return self.process_audio(audio_path)
def process_audio(self, audio_path: str):
# Convert to WAV format if necessary
wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
if not audio_path.lower().endswith('.wav'):
audio = AudioSegment.from_file(audio_path)
audio.export(wav_path, format='wav')
else:
wav_path = audio_path
# Transcribe audio using Whisper
result = self.model.transcribe(wav_path)
transcription = result["text"]
# Split the transcription into parts
return self.split_text(transcription)
def split_text(self, text):
parts = []
current_part = ""
paragraphs = text.split('\n\n')
for paragraph in paragraphs:
sentences = sent_tokenize(paragraph)
for sentence in sentences:
if len(current_part) + len(sentence) < self.max_chars_per_part:
current_part += sentence + ' '
else:
if current_part:
parts.append(current_part.strip())
current_part = sentence + ' '
# Add a paragraph break if it doesn't exceed the limit
if len(current_part) + 2 < self.max_chars_per_part:
current_part += '\n\n'
else:
parts.append(current_part.strip())
current_part = '\n\n'
if current_part:
parts.append(current_part.strip())
return parts
def find_natural_pause(self, text):
words = word_tokenize(text)
total_words = len(words)
mid_point = total_words // 2
# Look for punctuation near the middle
for i in range(mid_point, total_words):
if words[i] in '.!?':
return ' '.join(words[:i+1]), ' '.join(words[i+1:])
# If no punctuation found, split at the nearest space to the middle
return ' '.join(words[:mid_point]), ' '.join(words[mid_point:])
def write_to_file(self, parts, output_path):
with open(output_path, 'w', encoding='utf-8') as f:
for i, part in enumerate(parts, 1):
f.write(f"Part {i}:\n\n")
f.write(part)
f.write("\n\n")
if i < len(parts):
f.write("-" * 50 + "\n\n")
# Usage example:
if __name__ == "__main__":
processor = MediaProcessor.new(max_chars_per_part=10000)
output_file = "/Users/despiegk1/Documents/transcription3.md"
item = "/Users/despiegk1/Documents/Zoom/2024-07-16 16.42.50 Kristof De Spiegeleer's Personal Meeting Room/video1720369800.mp4"
transcription_parts = processor.process(item)
processor.write_to_file(transcription_parts, output_file)
print(f"Transcription split into {len(transcription_parts)} parts:")
for i, part in enumerate(transcription_parts, 1):
print(f"Part {i}:")
print(part)
print("-" * 50)

View File

View File

@@ -0,0 +1,313 @@
import json
import os
import time
from dataclasses import dataclass
from datetime import datetime, timedelta
from enum import Enum
from typing import Any, Dict, List, Optional
import redis
import requests
API_URL = "https://api.wigle.net/api/v2/network/search"
REDIS_CACHE_EXPIRY = timedelta(hours=1)
API_RATE_LIMIT = 30 # seconds between requests
# Initialize Redis connection
redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)
# Track last API request time (initialized to allow immediate first request)
_last_request_time = time.time() - API_RATE_LIMIT
class WigleError(Exception):
"""Custom exception for Wigle-related errors"""
pass
class NetworkType(str, Enum):
"""Network types supported by Wigle API"""
WIFI = "WIFI"
BT = "BT"
CELL = "CELL"
class Encryption(str, Enum):
"""WiFi encryption types"""
NONE = "None"
WEP = "WEP"
WPA = "WPA"
WPA2 = "WPA2"
WPA3 = "WPA3"
UNKNOWN = "unknown"
@dataclass
class Location:
"""Represents a wireless network location with all available Wigle API fields"""
ssid: str
latitude: float
longitude: float
last_update: Optional[datetime]
encryption: Optional[str] = None
network_type: Optional[str] = None
channel: Optional[int] = None
frequency: Optional[float] = None
qos: Optional[int] = None
transid: Optional[str] = None
firsttime: Optional[datetime] = None
lasttime: Optional[datetime] = None
country_code: Optional[str] = None
city: Optional[str] = None
region: Optional[str] = None
house_number: Optional[str] = None
road: Optional[str] = None
address: Optional[str] = None
def get_wigle_auth() -> str:
"""Get Wigle authentication token from environment variable"""
wigle_auth = os.getenv("WIGLE")
if not wigle_auth:
raise WigleError("WIGLE environment variable not set. Format should be: 'AIDxxx:yyy'")
return wigle_auth
def enforce_rate_limit():
"""Enforce API rate limit by sleeping if needed, showing countdown"""
global _last_request_time
current_time = time.time()
time_since_last_request = current_time - _last_request_time
if time_since_last_request < API_RATE_LIMIT:
sleep_time = API_RATE_LIMIT - time_since_last_request
print(f"\nRate limit: waiting {sleep_time:.0f} seconds", end="", flush=True)
# Show countdown
for remaining in range(int(sleep_time), 0, -1):
time.sleep(1)
print(f"\rRate limit: waiting {remaining:2d} seconds", end="", flush=True)
print("\rRate limit: continuing... ") # Clear the line
_last_request_time = time.time()
def search_networks(
*,
# Location filters
latitude_north: Optional[float] = None,
latitude_south: Optional[float] = None,
longitude_east: Optional[float] = None,
longitude_west: Optional[float] = None,
# Network filters
ssid: Optional[str] = None,
ssidlike: Optional[str] = None,
network_type: Optional[NetworkType] = None,
encryption: Optional[Encryption] = None,
# Time filters
on_since: Optional[datetime] = None,
last_update: Optional[datetime] = None,
# Result control
results_per_page: int = 100,
search_after: Optional[str] = None,
# Other filters
freenet: Optional[bool] = None,
paynet: Optional[bool] = None,
show_query: bool = False,
) -> Dict[str, Any]:
"""
Search for networks using the Wigle API with full parameter support and Redis caching.
Rate limited to one request per minute.
Args:
latitude_north: Northern boundary of search box
latitude_south: Southern boundary of search box
longitude_east: Eastern boundary of search box
longitude_west: Western boundary of search box
ssid: Exact SSID match
ssidlike: SSID wildcard match
network_type: Filter by network type (WIFI/BT/CELL)
encryption: Filter by encryption type
on_since: Only show networks seen on or after date
last_update: Only show networks updated since date
results_per_page: Number of results per page (max 100)
search_after: Token for getting next batch of results
freenet: Show only free networks
paynet: Show only pay networks
show_query: Return query bounds without results
Returns:
Dictionary containing search results and metadata including searchAfter token
Raises:
WigleError: If the WIGLE environment variable is not set or API request fails
"""
# https://api.wigle.net/api/v2/network/search?onlymine=false&encryption=None&freenet=false&paynet=false
try:
# Build cache key from all parameters
params = locals()
cache_key = f"wigle:search:{json.dumps(params, default=str, sort_keys=True)}"
cached_result = redis_client.get(cache_key)
if cached_result:
return json.loads(cached_result)
# Enforce rate limit before making request
enforce_rate_limit()
# Build API parameters
api_params = {
"onlymine": "false",
"resultsPerPage": results_per_page,
}
# Add optional parameters if provided
if latitude_north is not None:
api_params["latrange1"] = latitude_south
api_params["latrange2"] = latitude_north
api_params["longrange1"] = longitude_west
api_params["longrange2"] = longitude_east
if ssid:
api_params["ssid"] = ssid
if ssidlike:
api_params["ssidlike"] = ssidlike
if network_type:
api_params["netid"] = network_type.value
if encryption:
api_params["encryption"] = encryption.value
else:
api_params["encryption"] = "None"
if on_since:
api_params["onSince"] = on_since.strftime("%Y%m%d")
if last_update:
api_params["lastupdt"] = last_update.strftime("%Y%m%d")
if freenet is not None:
api_params["freenet"] = str(freenet).lower()
if paynet is not None:
api_params["paynet"] = str(paynet).lower()
if search_after:
api_params["searchAfter"] = search_after
if show_query:
api_params["showQuery"] = str(show_query).lower()
# Make API request
wigle_auth = get_wigle_auth()
headers = {"Authorization": f"Basic {wigle_auth}"}
response = requests.get(API_URL, params=api_params, headers=headers)
response.raise_for_status()
result = response.json()
print(result)
# Cache the result
redis_client.setex(cache_key, int(REDIS_CACHE_EXPIRY.total_seconds()), json.dumps(result))
return result
except requests.exceptions.RequestException as e:
raise WigleError(f"API request failed: {str(e)}")
def parse_network_to_location(network: Dict[str, Any]) -> Location:
"""Convert a network result from Wigle API to a Location object"""
# Parse dates if present
last_update = None
firsttime = None
lasttime = None
if network.get("lastupdt"):
try:
last_update = datetime.strptime(network["lastupdt"], "%Y-%m-%d %H:%M:%S")
except ValueError:
pass
if network.get("firsttime"):
try:
firsttime = datetime.strptime(network["firsttime"], "%Y-%m-%d %H:%M:%S")
except ValueError:
pass
if network.get("lasttime"):
try:
lasttime = datetime.strptime(network["lasttime"], "%Y-%m-%d %H:%M:%S")
except ValueError:
pass
return Location(
ssid=network["ssid"],
latitude=float(network["trilat"]),
longitude=float(network["trilong"]),
last_update=last_update,
encryption=network.get("encryption"),
network_type=network.get("type"),
channel=network.get("channel"),
frequency=network.get("frequency"),
qos=network.get("qos"),
transid=network.get("transid"),
firsttime=firsttime,
lasttime=lasttime,
country_code=network.get("country"),
city=network.get("city"),
region=network.get("region"),
house_number=network.get("housenumber"),
road=network.get("road"),
address=network.get("address"),
)
def get_all() -> List[Location]:
"""Search for OpenRoaming networks and return list of locations.
Rate limited to one request per minute, including pagination requests.
Returns:
List[Location]: List of found network locations
Raises:
WigleError: If the WIGLE environment variable is not set or API request fails
"""
ssid_names = ["Adentro OpenRoaming", "OpenRoaming", "Passpoint", "PasspointAruba", "Cellular Wi-Fi Passthrough", "WBA_OpenRoaming"]
locations: List[Location] = []
for name in ssid_names:
try:
search_after = None
while True:
results = search_networks(
ssid=name, encryption=Encryption.NONE, network_type=NetworkType.WIFI, results_per_page=100, search_after=search_after
)
if not results or not results.get("results"):
break
for network in results["results"]:
locations.append(parse_network_to_location(network))
# Get searchAfter token for next batch
search_after = results.get("searchAfter")
if not search_after:
break
except WigleError as e:
raise WigleError(f"Error searching for {name}: {str(e)}")
print(f"Found {len(locations)} OpenRoaming network locations")
return locations
if __name__ == "__main__":
locations = get_all()
for loc in locations:
print(f"SSID: {loc.ssid}")
print(f"Location: ({loc.latitude}, {loc.longitude})")
print(f"Network Type: {loc.network_type or 'N/A'}")
print(f"Encryption: {loc.encryption or 'N/A'}")
print(f"Last Update: {loc.last_update or 'N/A'}")
if loc.address:
print(f"Address: {loc.address}")
print("-" * 50)