Compare commits

...

14 Commits

Author SHA1 Message Date
dea3b0ec7f ... 2025-09-13 05:49:07 +02:00
581fb0c0f0 ... 2025-08-25 07:06:50 +02:00
e8d09164ff ... 2025-08-22 13:18:50 +02:00
bc0d90d41a ... 2025-08-22 13:11:04 +02:00
d80b956ff7 ... 2025-08-20 13:12:47 +02:00
c151e0749a ... 2025-08-20 05:49:47 +02:00
155ca42954 ... 2025-08-20 05:41:03 +02:00
672c1878da ... 2025-08-20 05:36:39 +02:00
bdfcbb9ea1 ... 2025-08-20 05:06:52 +02:00
b3c389e666 ... 2025-08-20 05:06:05 +02:00
c9a45d3435 ... 2025-08-20 04:32:30 +02:00
e4bb201181 ... 2025-08-20 04:15:43 +02:00
6b9f0cf291 ... 2025-08-20 04:01:35 +02:00
749c89aefc ... 2025-08-08 09:38:37 +02:00
136 changed files with 18700 additions and 1459 deletions

46
.gitignore vendored Normal file
View File

@@ -0,0 +1,46 @@
# Byte-code files
*.pyc
*.pyo
__pycache__
.pytest_cache/
# Distribution / packaging
.Python
build/
dist/
*.egg-info/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Editors
.vscode/
.idea/
# Logs and temporary files
*.log
*.tmp
/tmp/
# OS generated files
.DS_Store
.Trashes
Thumbs.db
# Specific to this project (from the test output)
/tmp/testlogs/

View File

@@ -1,2 +1,61 @@
# herolib_python
see also ~/code/git.ourworld.tf/tfgrid_research/tfdev
has some usefull stuff as well
## Installation
You can install `herolib` directly from the Git repository using `uv pip`:
```bash
uv pip install git+https://git.ourworld.tf/herocode/herolib_python.git
```
To install in editable mode for development:
```bash
uv pip install -e git+https://git.ourworld.tf/herocode/herolib_python.git#egg=herolib
```
## Usage
Once installed, you can import modules from the `herolib` package:
```python
import herolib.core.loghandler.mylogging
# Or import specific functions/classes
from herolib.core.loghandler.mylogging import MyLogger
```
## how to integrate python in other projects
see [Python Herolib Integration](pythonsetup/README.md)
## Version Control
This library follows standard Git version control practices. Releases can be managed by tagging specific commits in the Git repository. Users installing directly from the Git URL can specify a particular branch, tag, or commit hash to get a specific version. For example:
```bash
# Install from a specific branch
uv pip install git+https://git.ourworld.tf/herocode/herolib_python.git@main
# Install from a specific tag (e.g., v0.1.0)
uv pip install git+https://git.ourworld.tf/herocode/herolib_python.git@v0.1.0
# Install from a specific commit hash
uv pip install git+https://git.ourworld.tf/herocode/herolib_python.git@<commit_hash>
```
## Adding as a Dependency in `pyproject.toml`
To include `herolib` as a dependency in another Python project that uses `pyproject.toml`, you can add it to the `dependencies` section of your project's `pyproject.toml` file. This is particularly useful for development or when you need to pin to a specific version or branch.
Example `pyproject.toml` for another project:
```toml
[project]
name = "my-other-project"
version = "0.1.0"
dependencies = [
"herolib @ git+https://git.ourworld.tf/herocode/herolib_python.git",
]

1762
aiprompts/libtmux_python.md Normal file

File diff suppressed because it is too large Load Diff

14342
aiprompts/psutil_python.md Normal file

File diff suppressed because it is too large Load Diff

47
atest.py Normal file
View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
import requests
import sys
import os
# ---- Config ----
LMSTUDIO_URL = "http://172.22.22.210:1234/v1"
AUDIO_FILE = "/Users/despiegk/Downloads/harvard.wav" # change to your input file
# ---- Step 1: List available models ----
models_resp = requests.get(f"{LMSTUDIO_URL}/models")
models_resp.raise_for_status()
models = [m["id"] for m in models_resp.json().get("data", [])]
print("Available models:", models)
# ---- Step 2: Find Whisper ----
whisper_model = None
for m in models:
if "whisper" in m.lower():
whisper_model = m
break
if not whisper_model:
print("❌ No Whisper model found in LM Studio. Please download/start one.")
sys.exit(1)
print(f"✅ Found Whisper model: {whisper_model}")
# ---- Step 3: Transcribe ----
if not os.path.exists(AUDIO_FILE):
print(f"❌ Audio file '{AUDIO_FILE}' not found.")
sys.exit(1)
with open(AUDIO_FILE, "rb") as f:
files = {"file": f}
data = {"model": whisper_model}
headers = {"Authorization": "Bearer no-key"} # LM Studio ignores key
resp = requests.post(f"{LMSTUDIO_URL}/audio/transcriptions",
headers=headers,
files=files,
data=data)
resp.raise_for_status()
result = resp.json()
print("📝 Transcription result:")
print(result.get("text", result))

26
examples/install.sh Normal file
View File

@@ -0,0 +1,26 @@
cd /root
curl -LsSf https://astral.sh/uv/install.sh | sh
uv venv
apt update
apt install mc curl htop git -y
apt install -y build-essential python3.12-dev
source /root/.local/bin/env
export PATH=/root/.venv/bin/:$PATH
VLLM_ATTENTION_BACKEND=FLASHINFER
uv pip install --upgrade pip setuptools wheel ninja
uv pip install --upgrade tiktoken ipython numpy psutil
# uv pip install --pre torch==2.9.0.dev20250804+cu128 --index-url https://download.pytorch.org/whl/nightly/cu128
uv pip install vllm --torch-backend=auto
uv pip install flash-attn --no-build-isolation
uv pip install triton
uv pip install xformers
git clone https://github.com/flashinfer-ai/flashinfer.git
cd flashinfer
pip install .
# uv pip install --upgrade vllm --torch-backend=auto
# uv pip install --upgrade flash-attn --no-build-isolation

72
examples/readme.md Normal file
View File

@@ -0,0 +1,72 @@
[](https://docs.vllm.ai/projects/recipes/en/latest/Ernie/Ernie4.5.html)
vllm bench throughput --model baidu/ERNIE-4.5-21B-A3B-PT --dataset-name random --input-len 8000 --output-len 1000 --num-prompts 16
vllm bench throughput \
--model baidu/ERNIE-4.5-21B-A3B-PT \
--dataset-name random \
--input-len 9000 \
--output-len 4000 \
--num-prompts 5 \
--max-model-len 13000 \
--gpu-memory-utilization 0.7
vllm bench throughput \
--model Qwen/Qwen3-30B-A3B-FP8 \
--dataset-name random \
--input-len 9000 \
--output-len 6000 \
--num-prompts 4 \
--max-model-len 15000 \
--gpu-memory-utilization 0.7
vllm bench throughput \
--model Qwen/Qwen3-30B-A3B-FP8 \
--dataset-name random \
--input-len 9000 \
--output-len 6000 \
--num-prompts 10 \
--max-model-len 15000 \
--gpu-memory-utilization 0.7 \
--swap-space 256
vllm bench throughput \
--model nvidia/NVIDIA-Nemotron-Nano-9B-v2 \
--dataset-name random \
--input-len 9000 \
--output-len 6000 \
--num-prompts 2 \
--max-model-len 15000 \
--gpu-memory-utilization 0.7 \
--trust-remote-code \
--max-num-seqs 64 \
--mamba-ssm-cache-dtype float16
#IS A GOOD ONE:
vllm bench throughput \
--model Qwen/Qwen3-30B-A3B-FP8 \
--dataset-name random \
--input-len 12000 \
--output-len 6000 \
--num-prompts 12 \
--max-model-len 20000 \
--gpu-memory-utilization 0.7 \
--kv-cache-dtype fp8 \
--swap-space 128
//be careful with swap space, it can make it slower if too high, PCI bandwidth is limited
VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1
vllm bench throughput --model openai/gpt-oss-20b --dataset-name random --input-len 12000 --output-len 6000 --num-prompts 8 --max-model-len 20000 --gpu-memory-utilization 0.7 --kv-cache-dtype fp8

17
examples/tmuxrunner_start.py Executable file
View File

@@ -0,0 +1,17 @@
#!/usr/bin/env python3
import sys
from herolib.infra.tmuxrunner.task_runner_enhanced import TaskOrchestrator
def main():
tasks_dir = sys.argv[1]
api_port = int(sys.argv[2]) if len(sys.argv) > 2 else 8000
orchestrator = TaskOrchestrator(tasks_dir, api_port)
orchestrator.run()
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python enhanced_runner.py <tasks_directory_path> [api_port]")
sys.exit(1)
main()

View File

@@ -0,0 +1,32 @@
# Common error handling setup
# set -euo pipefail
SCRIPT="${BASH_SOURCE[-1]}" # last sourced = the actual script file
ERROR_FILE="$SCRIPT.error"
DONE_FILE="$SCRIPT.done"
# Reset markers
rm -f "$ERROR_FILE" "$DONE_FILE"
error_handler() {
local exit_code=$?
local line_no=$1
local cmd="$2"
{
echo "EXIT_CODE=$exit_code"
echo "LINE=$line_no"
echo "COMMAND=$cmd"
} > "$ERROR_FILE"
# If we are inside a sourced script, don't kill the shell
if [[ "${BASH_SOURCE[0]}" != "$0" ]]; then
return $exit_code
else
exit $exit_code
fi
}
trap 'error_handler ${LINENO} "$BASH_COMMAND"' ERR
mark_done() {
touch "$DONE_FILE"
}

View File

@@ -0,0 +1,12 @@
hpy() {
if [ ! -f ".venv/bin/activate" ]; then
echo "Error: .venv not found in current directory" >&2
return 1
fi
# Activate venv in a subshell so it doesnt pollute caller
(
source .venv/bin/activate
python "$@"
)
}

View File

@@ -0,0 +1,19 @@
get_session() {
local sessions
sessions=$(tmux ls 2>/dev/null | cut -d: -f1)
local count
count=$(echo "$sessions" | wc -l)
if [ "$count" -eq 0 ]; then
echo "Error: no tmux sessions found." >&2
return 1
elif [ "$count" -gt 1 ]; then
echo "Error: more than one tmux session found:" >&2
echo "$sessions" >&2
return 1
fi
export SESSIONNAME="$sessions"
echo "$SESSIONNAME"
}

View File

@@ -0,0 +1,3 @@
export SSH_SERVER=38.79.155.162
export SSH_PORT=61092

View File

@@ -0,0 +1,10 @@
#!/bin/bash
set -euo pipefail
source ../../functions/base.sh
apt update
apt upgrade -y
apt install -y tmux btop nvtop psutils htop
mark_done

View File

@@ -0,0 +1,57 @@
#!/usr/bin/env bash
set -euo pipefail
source source ../../functions/base.sh
# --- create ~/.tmux.conf ---
TMUX_CONF="$HOME/.tmux.conf"
cat > "$TMUX_CONF" <<'EOF'
# ~/.tmux.conf
# Enable mouse support (scroll, resize, select panes/windows)
set -g mouse on
# Use the mouse wheel to scroll in copy mode automatically
bind -T root WheelUpPane if-shell -F -t = "#{mouse_any_flag}" \
"send-keys -M" "if -Ft= '#{pane_in_mode}' 'send-keys -M' 'copy-mode -e'"
# Allow resizing panes by dragging borders
setw -g aggressive-resize on
# Easier navigation in copy mode
setw -g mode-keys vi
# Status bar improvements
set -g status-bg black
set -g status-fg green
set -g status-left-length 40
set -g status-left '#S '
set -g status-right '#(whoami)@#H %Y-%m-%d %H:%M'
# Pane borders more visible
set -g pane-border-style fg=cyan
set -g pane-active-border-style fg=yellow
# Reload config quickly
bind r source-file ~/.tmux.conf \; display-message "Reloaded tmux.conf"
# Use system clipboard on macOS
if-shell "command -v pbcopy >/dev/null 2>&1" \
"bind -T copy-mode-vi y send -X copy-pipe-and-cancel 'pbcopy'" \
"bind -T copy-mode-vi y send -X copy-pipe-and-cancel 'xclip -selection clipboard -in'"
EOF
echo "✅ Wrote $TMUX_CONF"
# --- apply config if tmux is running ---
if pgrep -x tmux >/dev/null 2>&1; then
echo "🔄 Reloading tmux config..."
tmux source-file "$TMUX_CONF"
else
echo " tmux is not running yet. Config will apply on next start."
fi
mark_done

View File

@@ -0,0 +1,42 @@
#!/bin/bash
set -euo pipefail
source source ../../functions/base.sh
mark_done
exit 0
URL="https://github.com/ollama/ollama/releases/download/v0.11.6/ollama-linux-amd64.tgz"
TGZ="/tmp/ollama.tgz"
INSTALL_PATH="/usr/bin/ollama"
echo "[*] Checking for running ollama serve..."
if pgrep -x "ollama" > /dev/null; then
echo "[*] Stopping running ollama process..."
pkill -9 ollama
sleep 2
fi
echo "[*] Downloading ollama..."
curl -L "$URL" -o "$TGZ"
echo "[*] Extracting..."
tar -xzf "$TGZ" -C /tmp
echo "[*] Installing to $INSTALL_PATH..."
sudo mv /tmp/ollama "$INSTALL_PATH"
sudo chmod +x "$INSTALL_PATH"
pkill -9 ollama
SESSION=$(tmux display-message -p '#S')
echo "[*] Using tmux session: $SESSION"
echo "[*] Started ollama pulls in tmux windows."
ollama pull adhishtanaka/llama_3.2_1b-SQL
mark_done

View File

@@ -0,0 +1,17 @@
#!/bin/bash
set -euo pipefail
source source ../../functions/base.sh
curl -LsSf https://astral.sh/uv/install.sh | sh
source $HOME/.local/bin/env
# vllm serve openai/gpt-oss-20b
# vllm serve openai/gpt-oss-20b --tensor-parallel-size 8
# For 120B
# vllm serve openai/gpt-oss-120b --tensor-parallel-size 8
mark_done

View File

@@ -0,0 +1,47 @@
#!/bin/bash
set -euo pipefail
source source ../../functions/base.sh
mark_done
exit 0
# uv pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
# uv pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128
touch "$0.done"
exit 0
cd /root
uv venv
source .venv/bin/activate
uv pip install --pre torch==2.9.0.dev20250804+cu129 \
--index-url https://download.pytorch.org/whl/nightly/cu129
uv pip install tiktoken ipython numpy psutil
# 4. Confirm it's correct
python -c "import torch; print(torch.__version__, torch.version.cuda)"
# 2.9.0.dev20250804+cu128 12.8
source .venv/bin/activate
uv pip install --upgrade pip setuptools wheel ninja
export MAX_JOBS=8
export TORCH_CUDA_ARCH_LIST="12.0"
export NCCL_P2P_DISABLE=0
export NCCL_DEBUG=INFO
export CUDA_DEVICE_MAX_CONNECTIONS=1
uv pip install vllm --torch-backend=auto
uv pip install flash-attn --no-build-isolation
# uv pip install --pre vllm==0.10.1+gptoss \
# --extra-index-url https://wheels.vllm.ai/gpt-oss/ \
# --extra-index-url https://download.pytorch.org/whl/nightly/cu128 \
# --index-strategy unsafe-best-match
mark_done

77
herolib.egg-info/PKG-INFO Normal file
View File

@@ -0,0 +1,77 @@
Metadata-Version: 2.4
Name: herolib
Version: 0.1.0
Summary: A Python library for HeroCode
Author-email: Kilo Code <kilo.code@example.com>
Requires-Python: >=3.12
Description-Content-Type: text/markdown
Requires-Dist: peewee
Requires-Dist: psutil>=5.9.0
Requires-Dist: fastapi>=0.100.0
Requires-Dist: uvicorn>=0.23.0
Requires-Dist: toml>=0.10.2
Requires-Dist: libtmux>=0.25.0
Requires-Dist: lmstudio
Requires-Dist: requests
# herolib_python
see also ~/code/git.ourworld.tf/tfgrid_research/tfdev
has some usefull stuff as well
## Installation
You can install `herolib` directly from the Git repository using `uv pip`:
```bash
uv pip install git+https://git.ourworld.tf/herocode/herolib_python.git
```
To install in editable mode for development:
```bash
uv pip install -e git+https://git.ourworld.tf/herocode/herolib_python.git#egg=herolib
```
## Usage
Once installed, you can import modules from the `herolib` package:
```python
import herolib.core.loghandler.mylogging
# Or import specific functions/classes
from herolib.core.loghandler.mylogging import MyLogger
```
## how to integrate python in other projects
see [Python Herolib Integration](pythonsetup/README.md)
## Version Control
This library follows standard Git version control practices. Releases can be managed by tagging specific commits in the Git repository. Users installing directly from the Git URL can specify a particular branch, tag, or commit hash to get a specific version. For example:
```bash
# Install from a specific branch
uv pip install git+https://git.ourworld.tf/herocode/herolib_python.git@main
# Install from a specific tag (e.g., v0.1.0)
uv pip install git+https://git.ourworld.tf/herocode/herolib_python.git@v0.1.0
# Install from a specific commit hash
uv pip install git+https://git.ourworld.tf/herocode/herolib_python.git@<commit_hash>
```
## Adding as a Dependency in `pyproject.toml`
To include `herolib` as a dependency in another Python project that uses `pyproject.toml`, you can add it to the `dependencies` section of your project's `pyproject.toml` file. This is particularly useful for development or when you need to pin to a specific version or branch.
Example `pyproject.toml` for another project:
```toml
[project]
name = "my-other-project"
version = "0.1.0"
dependencies = [
"herolib @ git+https://git.ourworld.tf/herocode/herolib_python.git",
]

View File

@@ -0,0 +1,83 @@
README.md
pyproject.toml
herolib/__init__.py
herolib.egg-info/PKG-INFO
herolib.egg-info/SOURCES.txt
herolib.egg-info/dependency_links.txt
herolib.egg-info/requires.txt
herolib.egg-info/top_level.txt
herolib/clients/__init__.py
herolib/clients/assemblyai/__init__.py
herolib/clients/assemblyai/client.py
herolib/clients/stellar/__init__.py
herolib/clients/stellar/horizon.py
herolib/clients/stellar/model.py
herolib/clients/stellar/testnet.py
herolib/clients/telegram/__init__.py
herolib/clients/telegram/bot.py
herolib/clients/telegram/bot_audio.py
herolib/clients/telegram/bot_text.py
herolib/clients/telegram/errorqueue.py
herolib/clients/vimeo/__init__.py
herolib/clients/vimeo/client.py
herolib/clients/vimeo/model_video.py
herolib/clients/whisper/__init__.py
herolib/clients/whisper/convert.py
herolib/clients/whisper/whisper.py
herolib/clients/wireless/__init__.py
herolib/clients/wireless/wigle_net.py
herolib/core/__init__.py
herolib/core/heroscript/__init__.py
herolib/core/heroscript/heroaction.py
herolib/core/heroscript/heroscripts.py
herolib/core/heroscript/mixin.py
herolib/core/heroscript/tools.py
herolib/core/heroscript/examples/__init__.py
herolib/core/heroscript/examples/heroscript_example.py
herolib/core/heroscript/examples/heroscript_example2.py
herolib/core/heroscript/examples/wiki/__init__.py
herolib/core/heroscript/examples/wiki/sub/__init__.py
herolib/core/logger/__init__.py
herolib/core/logger/factory.py
herolib/core/logger/log.py
herolib/core/logger/log_test.py
herolib/core/logger/model.py
herolib/core/logger/search.py
herolib/core/loghandler/__init__.py
herolib/core/loghandler/mylogging.py
herolib/core/pathlib/__init__.py
herolib/core/pathlib/pathlib.py
herolib/core/texttools/__init__.py
herolib/core/texttools/texttools.py
herolib/crypt/__init__.py
herolib/crypt/box/__init__.py
herolib/crypt/box/box.py
herolib/crypt/box/box_api.py
herolib/data/__init__.py
herolib/data/ourtime/__init__.py
herolib/data/ourtime/ourtime.py
herolib/downloader/__init__.py
herolib/downloader/scrape_dynamic/dynamic_crawl.py
herolib/downloader/scrape_scapegraph/main.py
herolib/downloader/scrape_scapegraph/scrape.py
herolib/downloader/scrape_scapegraph/scrape_md.py
herolib/downloader/scrape_scapegraph/scrape_search.py
herolib/downloader/scrape_scapegraph/scrape_with_local_llm.py
herolib/downloader/scrape_scapegraph/scrape_with_local_llm_search.py
herolib/infra/tmuxrunner/model.py
herolib/infra/tmuxrunner/process_monitor.py
herolib/infra/tmuxrunner/task_runner.py
herolib/infra/tmuxrunner/task_runner_api.py
herolib/tools/__init__.py
herolib/tools/extensions.py
herolib/tools/gitscanner.py
herolib/tools/logger.py
herolib/tools/md5.py
herolib/tools/ourtime.py
herolib/tools/pathtools.py
herolib/tools/texttools.py
herolib/web/__init__.py
herolib/web/doctools/__init__.py
herolib/web/doctools/html_replacer.py
herolib/web/doctools/md_replacer.py
herolib/web/doctools/processor.py

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,8 @@
peewee
psutil>=5.9.0
fastapi>=0.100.0
uvicorn>=0.23.0
toml>=0.10.2
libtmux>=0.25.0
lmstudio
requests

View File

@@ -0,0 +1 @@
herolib

Binary file not shown.

View File

0
herolib/core/__init__.py Normal file
View File

Binary file not shown.

View File

View File

Binary file not shown.

Binary file not shown.

View File

@@ -1,5 +1,5 @@
from lib.core.pathlib.pathlib import get_dir
from lib.core.logger.model import Logger
from herolib.core.pathlib.pathlib import get_dir
from herolib.core.logger.model import Logger
def new(path: str) -> Logger:
p = get_dir(path=path, create=True)

View File

@@ -1,10 +1,11 @@
import unittest
import os
import shutil
from lib.core.logger.factory import new
from lib.core.logger.model import LogItemArgs, LogType, Logger # Import Logger class
from lib.data.ourtime.ourtime import new as ourtime_new, now as ourtime_now
from lib.core.pathlib.pathlib import get_file, ls, rmdir_all
from herolib.core.logger.factory import new
from herolib.core.logger.model import LogItemArgs, LogType, Logger # Import Logger class
from herolib.data.ourtime.ourtime import new as ourtime_new, now as ourtime_now
from herolib.core.pathlib.pathlib import get_file, ls, rmdir_all
from herolib.core.logger.search import search, SearchArgs
class TestLogger(unittest.TestCase):
def setUp(self):
@@ -85,18 +86,18 @@ class TestLogger(unittest.TestCase):
self.assertEqual(len(files), 2) # Expecting two files: 2022-12-05-20.log and 2022-12-05-22.log
# Test search functionality
items_stdout = logger.search(
items_stdout = search(logger, SearchArgs(
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
logtype=LogType.STDOUT
)
))
self.assertEqual(len(items_stdout), 2)
items_error = logger.search(
items_error = search(logger, SearchArgs(
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
logtype=LogType.ERROR
)
))
self.assertEqual(len(items_error), 4)
# Test specific log content
@@ -115,34 +116,34 @@ class TestLogger(unittest.TestCase):
self.assertTrue(found_stdout_log, "Expected stdout log content not found")
# Test search by category
items_test_app = logger.search(
items_test_app = search(logger, SearchArgs(
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
cat='test-app'
)
))
self.assertEqual(len(items_test_app), 2)
items_error_test = logger.search(
items_error_test = search(logger, SearchArgs(
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
cat='error-test'
)
))
self.assertEqual(len(items_error_test), 4)
# Test search by log content
items_with_aaa = logger.search(
items_with_aaa = search(logger, SearchArgs(
timestamp_from=ourtime_new('2022-11-01 20:14:35'),
timestamp_to=ourtime_new('2025-11-01 20:14:35'),
log='aaa'
)
))
self.assertEqual(len(items_with_aaa), 2)
# Test search with timestamp range
items_specific_time = logger.search(
items_specific_time = search(logger, SearchArgs(
timestamp_from=ourtime_new('2022-12-05 22:00:00'),
timestamp_to=ourtime_new('2022-12-05 23:00:00'),
logtype=LogType.ERROR
)
))
self.assertEqual(len(items_specific_time), 2)

View File

@@ -1,7 +1,7 @@
from enum import Enum
from typing import Optional
from lib.data.ourtime.ourtime import OurTime
from lib.core.pathlib.pathlib import Path
from herolib.data.ourtime.ourtime import OurTime
from herolib.core.pathlib.pathlib import Path
class LogType(Enum):
STDOUT = "stdout"
@@ -15,8 +15,8 @@ class LogItemArgs:
self.logtype = logtype
import os
from lib.core.texttools.texttools import name_fix, expand, dedent
from lib.data.ourtime.ourtime import OurTime, now as ourtime_now
from herolib.core.texttools.texttools import name_fix, expand, dedent
from herolib.data.ourtime.ourtime import OurTime, now as ourtime_now
class Logger:
def __init__(self, path: Path, lastlog_time: int = 0):

View File

@@ -0,0 +1,102 @@
import os
from typing import Optional, List
from herolib.core.texttools.texttools import name_fix
from herolib.data.ourtime.ourtime import OurTime, new as ourtime_new
from herolib.core.logger.model import Logger, LogItem, LogType
class SearchArgs:
def __init__(self, timestamp_from: Optional[OurTime] = None,
timestamp_to: Optional[OurTime] = None,
cat: str = "", log: str = "", logtype: Optional[LogType] = None,
maxitems: int = 10000):
self.timestamp_from = timestamp_from
self.timestamp_to = timestamp_to
self.cat = cat
self.log = log
self.logtype = logtype
self.maxitems = maxitems
def search(l: Logger, args_: SearchArgs) -> List[LogItem]:
args = args_
args.cat = name_fix(args.cat)
if len(args.cat) > 10:
raise ValueError('category cannot be longer than 10 chars')
from_time = args.timestamp_from.unix() if args.timestamp_from else 0
to_time = args.timestamp_to.unix() + 1 if args.timestamp_to else ourtime_new('2100-01-01').unix()
if from_time > to_time:
raise ValueError(f'from_time cannot be after to_time: {from_time} > {to_time}')
result: List[LogItem] = []
if not os.path.exists(l.path.path):
return []
files = sorted(os.listdir(l.path.path))
for file in files:
if not file.endswith('.log'):
continue
dayhour = file[:-4]
try:
file_time = ourtime_new(dayhour)
except ValueError:
continue
file_hour_start_unix = file_time.unix()
file_hour_end_unix = file_hour_start_unix + 3599
if file_hour_end_unix < from_time or file_hour_start_unix > to_time:
continue
try:
with open(os.path.join(l.path.path, file), 'r') as f:
content = f.read()
except FileNotFoundError:
continue
current_time = None
current_item = None
for line in content.splitlines():
if len(result) >= args.maxitems:
break
# Helper to add current_item to result if it matches criteria
def _add_item_if_matches():
nonlocal current_item
if current_item:
if from_time <= current_item.timestamp.unix() <= to_time:
if (not args.cat or args.cat == current_item.cat) and \
(not args.log or args.log.lower() in current_item.log.lower()) and \
(args.logtype is None or args.logtype == current_item.logtype):
result.append(current_item)
current_item = None # Reset after processing
if not line.strip(): # Empty line, finalize previous item
_add_item_if_matches()
continue
if not line.startswith(' ') and not line.startswith('E'): # New timestamp line
_add_item_if_matches() # Finalize previous item
try:
current_time = ourtime_new(f"{file_time.day()} {line.strip()}")
except ValueError:
current_time = None
current_item = None # Reset for new item
elif current_time:
if len(line) > 14 and line[13] == '-': # New log entry line
_add_item_if_matches() # Finalize previous item
is_error = line.startswith('E')
logtype = LogType.ERROR if is_error else LogType.STDOUT
cat = line[2:12].strip()
log_content = line[15:]
current_item = LogItem(timestamp=current_time, cat=cat, log=log_content.strip(), logtype=logtype)
elif current_item: # Continuation line
current_item.log += "\n" + (line[15:] if len(line) >15 else line)
_add_item_if_matches() # Finalize the last item in the file
return result

View File

View File

@@ -0,0 +1,214 @@
from peewee import *
import time
from datetime import datetime
from typing import Optional, List, Dict, Any, Iterable, Union
import os
import logging
import traceback
# Configure database path
DB_DIR = os.path.expanduser('~/hero/var/logdb/')
DB_FILE = os.path.join(DB_DIR, 'logs.db')
# Create directory if it doesn't exist
os.makedirs(DB_DIR, exist_ok=True)
# Initialize database
database = SqliteDatabase(DB_FILE, pragmas={'journal_mode': 'wal'})
class BaseModel(Model):
"""Base model class for Peewee."""
class Meta:
database = database
def to_dict(self) -> Dict[str, Any]:
"""Convert model instance to dictionary."""
data = {}
for field_name in self._meta.fields:
field_value = getattr(self, field_name)
if field_name in ('time', 'last_seen') and isinstance(field_value, int):
# Convert epoch to a readable format for the frontend
data[field_name] = datetime.fromtimestamp(field_value).strftime('%d-%m %H:%M')
else:
data[field_name] = field_value
return data
class Log(BaseModel):
"""Model for INFO logs."""
time = IntegerField(default=lambda: int(time.time()), index=True)
email = CharField(max_length=255, null=True)
logmsg = TextField()
level = IntegerField(default=100)
cat = CharField(max_length=100, index=True, default="general")
payload = TextField(null=True)
payload_cat = CharField(max_length=100, null=True)
class Meta:
table_name = 'logs'
class Error(BaseModel):
"""Model for ERROR logs."""
time = IntegerField(default=lambda: int(time.time()), index=True)
last_seen = IntegerField(default=lambda: int(time.time()), index=True)
email = CharField(max_length=255, null=True)
logmsg = TextField()
stacktrace = TextField(null=True)
count = IntegerField(default=1)
cat = CharField(max_length=100, index=True, default="general")
payload = TextField(null=True)
payload_cat = CharField(max_length=100, null=True)
class Meta:
table_name = 'errors'
def init_db_logging():
"""Create tables if they don't exist."""
with database:
database.create_tables([Log, Error], safe=True)
class DatabaseLogHandler(logging.Handler):
"""A logging handler that writes logs to the Peewee database."""
def emit(self, record):
stacktrace = None
if record.exc_info:
stacktrace = logging.Formatter().formatException(record.exc_info)
if record.levelno >= logging.ERROR:
log_error(
msg=record.getMessage(),
cat=record.name,
stacktrace=stacktrace
)
else:
log_info(
msg=record.getMessage(),
level=record.levelno,
cat=record.name
)
def log_error(msg: str, cat: str = "general", email: Optional[str] = None, stacktrace: Optional[str] = None, payload: Optional[str] = None, payload_cat: Optional[str] = None):
"""Log an ERROR message to the database, handling duplicates."""
try:
log_info(msg=msg, cat=cat, email=email, payload=payload, payload_cat=payload_cat)
except Exception as e:
pass
try:
if not stacktrace:
# Capture the current stack trace if not provided
stacktrace = "".join(traceback.format_stack())
# Filter out irrelevant lines from the stack trace
if stacktrace:
lines = stacktrace.split('\n')
filtered_lines = [
line for line in lines
if 'python3.13/logging' not in line and 'src/mylogging.py' not in line
]
stacktrace = '\n'.join(filtered_lines)
one_day_ago = int(time.time()) - (24 * 3600)
# Look for a similar error in the last 24 hours from the same user
existing_error = Error.select().where(
(Error.logmsg == msg) &
(Error.email == email) &
(Error.last_seen >= one_day_ago)
).first()
if existing_error:
# If found, increment counter and update last_seen
existing_error.count += 1
existing_error.last_seen = int(time.time())
existing_error.stacktrace = stacktrace
existing_error.save()
print(existing_error)
else:
# Otherwise, create a new error record
Error.create(
logmsg=msg,
cat=cat,
email=email,
stacktrace=stacktrace,
payload=payload,
payload_cat=payload_cat
)
logging.info(f"Successfully logged new error: {msg}")
except Exception as e:
logging.error(f"Failed to log error to {DB_FILE}: {e}")
def log_info(msg: str, level: int = 0, cat: str = "general", email: Optional[str] = None, payload: Optional[str] = None, payload_cat: Optional[str] = None):
"""Log an INFO message to the database."""
try:
Log.create(logmsg=msg, level=level, cat=cat, email=email, payload=payload, payload_cat=payload_cat)
except Exception as e:
print(f"Failed to log info to {DB_FILE}: {e}")
def get_errors(search: Optional[str] = None, cat: Optional[str] = None) -> List[Dict[str, Any]]:
"""Get errors from the database with optional filters. Category search is prefix-based."""
query = Error.select().order_by(Error.last_seen.desc())
if search:
query = query.where(Error.logmsg.contains(search))
if cat and cat.strip():
query = query.where(Error.cat.startswith(cat.strip()))
return [e.to_dict() for e in query]
def get_logs(
search: Optional[str] = None,
cat: Optional[str] = None,
level: Optional[int] = None,
hours_ago: Optional[int] = None,
) -> List[Dict[str, Any]]:
"""Get logs from the database with optional filters. Category search is prefix-based."""
query = Log.select().order_by(Log.time.desc())
if search and search.strip():
query = query.where(Log.logmsg.contains(search))
if cat and cat.strip():
query = query.where(Log.cat.startswith(cat.strip()))
if level is not None:
query = query.where(Log.level <= level)
if hours_ago is not None:
time_ago = int(time.time()) - (hours_ago * 3600)
query = query.where(Log.time >= time_ago)
return [l.to_dict() for l in query]
def get_log_by_id(log_id: int) -> Optional[Dict[str, Any]]:
"""Get a single log by its ID."""
try:
log = Log.get_by_id(log_id)
return log.to_dict()
except Log.DoesNotExist:
return None
def delete_logs_older_than(minutes: int):
"""Delete logs older than a specified number of minutes."""
time_ago = int(time.time()) - (minutes * 60)
Log.delete().where(Log.time < time_ago).execute()
def delete_errors_older_than(minutes: int):
"""Delete errors older than a specified number of minutes."""
time_ago = int(time.time()) - (minutes * 60)
Error.delete().where(Error.time < time_ago).execute()
def get_unique_log_categories() -> List[str]:
"""Get unique log categories from the database."""
query = (Log
.select(Log.cat)
.where(Log.cat.is_null(False))
.distinct()
.order_by(Log.cat))
return [l.cat for l in query]
def get_unique_error_categories() -> List[str]:
"""Get unique error categories from the database."""
query = (Error
.select(Error.cat)
.where(Error.cat.is_null(False))
.distinct()
.order_by(Error.cat))
return [e.cat for e in query]

View File

View File

View File

@@ -1,4 +1,6 @@
import re
from datetime import datetime
import os
def name_fix(name: str) -> str:
# VLang's name_fix converts '-' to '_' and cleans up special chars.

View File

View File

0
herolib/data/__init__.py Normal file
View File

Binary file not shown.

View File

View File

@@ -93,6 +93,7 @@ def new(time_str: str) -> OurTime:
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M',
'%Y-%m-%d %H',
'%Y-%m-%d-%H', # Add this format for dayhour parsing
'%Y-%m-%d',
'%d-%m-%Y %H:%M:%S',
'%d-%m-%Y %H:%M',

View File

View File

@@ -0,0 +1 @@
../../../../tfgrid_research/tfdev/research/scrape_dynamic

View File

@@ -0,0 +1 @@
../../../../tfgrid_research/tfdev/research/scrape_fast

View File

@@ -0,0 +1 @@
../../../../tfgrid_research/tfdev/research/scrape_scapegraph

Some files were not shown because too many files have changed in this diff Show More