This commit is contained in:
2025-08-20 04:15:43 +02:00
parent 6b9f0cf291
commit e4bb201181
95 changed files with 194 additions and 907 deletions

View File

View File

@@ -0,0 +1,38 @@
from pydantic import BaseModel, Field
from typing import Dict, Any, Type, TypeVar
from heroscript.heroscript import *
class User(BaseModel, HeroScriptMixin):
oid: str = Field()
name: str = Field(min_length=2, description="Chosen name by user", example="myname")
city: str = Field()
age: int = Field()
description: str = Field()
# Example usage
u1 = User(oid="abc123", name="John", age=30, city="New York",
description="""
this is a multiline
we need to remove the
this will stay 4 chars in
end
""")
myheroscript = u1.heroscript()
print(myheroscript)
u2 = User.from_heroscript(heroscript=myheroscript)
myprint(u2)
# p1 = Product(id=1, name="Phone", price=999.99, description="A smart phone")
# product_heroscript = p1.heroscript()
# print(product_heroscript)
# p2 = Product.from_heroscript(product_heroscript)
# print(p2)

View File

@@ -0,0 +1,78 @@
from pydantic import BaseModel, Field
from typing import Dict, Any, Type, TypeVar, List
from heroscript.heroscript import *
class Comment(BaseModel):
description: str = Field(default="")
class HeroBase(BaseModel, HeroScriptMixin):
oid: str = Field(default="",metadata={"unique": True})
name: str = Field(min_length=2, description="Chosen name by user", example="myname",metadata={"unique": True})
comments: List[Comment] = Field(..., description="Comment which can be attached to obj")
class User(HeroBase):
city: str = Field(metadata={"index": True})
age: int = Field(metadata={"index": True})
description: str = Field(default="")
class Product(BaseModel, HeroScriptMixin):
id: int = Field(default="",metadata={"unique": True})
name: str = Field(metadata={"unique": True})
price: float = Field()
description: str = Field()
myheroscript="""
```hero
!!user.define
oid:abc123
name:John
description:'
this is a multiline
we need to remove the
this will stay 4 chars in
end
'
age:30
city:'New York'
!!product.define
id:33
name:aproduct
description:'
this is a multiline
we need to remove the
this will stay 4 chars in
end
'
price:10.0
```
"""
# hs=HeroScripts(class_types={"user":User,"product":Product},content=myheroscript)
mypath="~/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example"
hs=HeroScripts(class_types={"user":User,"product":Product},path=mypath)
objs=hs.get_objects()
for o in objs:
myprint(o)
for item in hs.heroscripts:
print(item)
query = "john*"
results = hs.search(User, query)
# Print the search results
for r in results:
# print(f"User: {r["path"]}")
print(r)

View File

@@ -0,0 +1 @@
{"/Users/despiegk/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example/testFile.md": "f6e8b6a32349c262cb9afbea771c5add", "/Users/despiegk/code/git.threefold.info/tfgrid/hero_research/hero/osis/heroscript/example/sub/test file 2.md": "0ecc29046b6ef743481358e4c5630a6d"}

View File

@@ -0,0 +1,15 @@
# header
!!product.define
id:33
name:aproduct
description:'
this is a multiline
we need to remove the
this will stay 4 chars in
end
'
price:10.0
something else

View File

@@ -0,0 +1,22 @@
!!user.define
oid:abc123
name:John
description:'
this is a multiline
we need to remove the
this will stay 4 chars in
end
'
age:30
city:'New York'
```heroscript
!!user.define
oid:4nd
name:John2
age:40
city:bxl
```

View File

@@ -0,0 +1,207 @@
from herotools.texttools import dedent
from typing import List, Dict, Tuple
import re
from heroscript.tools import action_blocks,format_multiline_text,heroscript_repr
import textwrap
class HeroActions:
def __init__(self, path: str = "", content:str = ""):
blocks=action_blocks(path=path,content=content)
self.actions : List[HeroAction] = []
for block in blocks:
self.actions.append(HeroAction(block))
def __repr__(self):
out=""
for item in self.actions:
out+=item.__repr__()+"\n"
return out
class HeroAction:
def __init__(self, content: str):
blocks=action_blocks(content=content)
if len(blocks)==0:
raise ValueError(f"don't find actions in {content}")
elif len(blocks)>1:
raise ValueError(f"Found more than one action in {content}")
content=blocks[0]
self.name, content = _name_paramstr(content)
self.params = Params(content)
def __str__(self):
param_str=textwrap.indent(self.params.__str__()," ")
return f"!!{self.name}\n{param_str}"
def __repr__(self):
#return self.__str__()
return heroscript_repr(self.__str__())
class Params:
def __init__(self, content: str):
self.__params = params_parse(content)
def __str__(self):
sorted_params = sorted(self.__params.items())
param_str=""
for key,value in sorted_params:
if "'" in value:
param_str+=f"{key}: {value}\n"
elif "\n" in value:
v=format_multiline_text(value)
param_str+=f"{key}: {v}\n"
elif " " in value:
param_str+=f"{key}: '{value}'\n"
else:
param_str+=f"{key}: {value}\n"
return param_str
def get_int(self, key: str, defval: int = 99999999) -> int:
if key not in self.__params:
if defval == 99999999:
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return int(self.__params[key])
def get_float(self, key: str, defval: float = 99999999.0) -> float:
if key not in self.__params:
if defval == 99999999.0:
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return float(self.__params[key])
def get(self, key: str, defval: str = "99999999") -> str:
if key not in self.__params:
if defval == "99999999":
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return self.__params[key]
def get_list(self, key: str, defval: List[str] = [], needtoexist: bool = True) -> List[str]:
if defval is None:
defval = []
if key not in self.__params:
if needtoexist:
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return [item.strip().strip("'").strip() for item in self.__params[key].split(",")]
def get_list_int(self, key: str, defval: List[int] = [], needtoexist: bool = True) -> List[int]:
if defval is None:
defval = []
if key not in self.__params:
if needtoexist:
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return [int(item.strip()) for item in self.__params[key].split(",")]
def get_list_float(self, key: str, defval: List[float] = [], needtoexist: bool = True) -> List[float]:
if defval is None:
defval = []
if key not in self.__params:
if needtoexist:
raise KeyError(f"Key '{key}' must exist in parameters")
return defval
return [float(item.strip()) for item in self.__params[key].split(",")]
def get_all(self) -> Dict[str, str]:
return self.__params
def _name_paramstr(heroscript: str) -> Tuple[str, str]:
if not isinstance(heroscript, str):
raise ValueError("Input must be a string")
heroscript = dedent(heroscript)
lines = heroscript.strip().split("\n")
if not lines or "!!" not in lines[0]:
raise ValueError("The first line must contain '!!' to indicate the class name")
try:
class_name = lines[0].split("!!")[1].lower().strip()
except IndexError:
raise ValueError("Invalid format for class name extraction")
rest_of_text = dedent("\n".join(lines[1:]))
return class_name, rest_of_text
def params_parse(content: str) -> Dict[str, str]:
lines = dedent(content).strip().split("\n")
props = {}
multiline_prop = None
multiline_value : List[str] = list()
for line in lines:
if multiline_prop:
if line.strip() == "'":
props[prop] = dedent("\n".join(multiline_value))
multiline_prop = None
multiline_value = []
else:
multiline_value.append(line)
else:
if ":" in line:
prop, value = line.split(":", 1)
prop = prop.strip()
value = value.strip()
if value == "'":
multiline_prop = prop
else:
if value.startswith("'") and value.endswith("'"):
value1 = value[1:-1]
if not "'" in value1:
value=value1
props[prop] = value
return props
if __name__ == "__main__":
# Example usage
text = """
!!obj1.define
myname: 'mymama'
mylist: '20,200'
mylist2: 20,'a bbb'
mylist3: 20,200
myint:2
!!obj2.color
mother: 'mymama'
name:'aurelie'
length:60
description:'
multiline is supported
now for aurelie
'
color:green
"""
hero_actions = HeroActions(content=text)
print(hero_actions)
a2=hero_actions.actions[1]
assert a2.params.get_list(key="color")==["green"]
assert a2.params.get_list(key="mother")==["mymama"]
assert a2.params.get(key="color")=="green"
assert a2.params.get_int(key="length")==60
assert a2.params.get_list_int(key="length")==[60]
#now some non existing ones
assert a2.params.get_int(key="lengtha",defval=3)==3
assert a2.params.get(key="lengtha",defval="3")=="3"
a1=hero_actions.actions[0]
#print(a1.params.get_list(key="mylist2"))
assert a1.params.get_list(key="mylist")==["20","200"]
assert a1.params.get_list_int(key="mylist")==[20,200]
assert a1.params.get_list(key="mylist2")==["20","a bbb"]

View File

@@ -0,0 +1,129 @@
from pydantic import BaseModel, Field
from typing import Any, Type, TypeVar
import re
import hashlib
import json
import os
from types import List,Dict
T = TypeVar("T", bound=BaseModel)
class HeroScripts:
def __init__(self, class_types: dict, path:str = "", content:str = "", indexpath: str = ""):
self.class_types = class_types
self.heroscripts = List(HeroScript)
self.path = os.path.expanduser(path)
self.indexpath = os.path.expanduser(indexpath)
self.done = Dict[str,str] = {}
# self.done_load()
if self.path:
try:
# self.done_load()
self.load(self.path)
self.done_save()
except FileNotFoundError as e:
print(f"Directory not found: {self.path}")
print(f"Error: {str(e)}")
self.create_indexes()
self.index_objects()
if content:
blocks = extract_heroscript_blocks(content)
self.heroscripts.extend(HeroScript(block) for block in blocks)
def done_load(self):
if self.path:
done_file = os.path.join(self.path, "done.json")
if os.path.exists(done_file):
with open(done_file, "r") as f:
self.done = json.load(f)
def done_save(self):
if self.path:
done_file = os.path.join(self.path, "done.json")
with open(done_file, "w") as f:
json.dump(self.done, f)
def load(self, path):
for root, _, files in os.walk(path):
for filename in files:
print(f" - load {path}/{filename}")
path=f"{path}/{filename}"
if filename.endswith(".md"):
filepath = os.path.join(root, filename)
with open(filepath, "r") as file:
content = file.read()
md5hash = hashlib.md5(content.encode()).hexdigest()
if filepath not in self.done or self.done[filepath] != md5hash:
blocks = self.extract_heroscript_blocks(content)
self.heroscripts.extend(HeroScript(block,path) for block in blocks)
self.done[filepath] = md5hash
@staticmethod
def get_objects(self):
objects = []
for heroscript in self.heroscripts:
if heroscript.content:
try:
class_name = heroscript.content.split("\n")[0].split("!!")[1].split(".")[0].lower()
if class_name in self.class_types:
class_type = self.class_types[class_name]
try:
obj = class_type.from_heroscript(heroscript.content)
objects.append(obj)
except Exception as e:
print(f"Error parsing HeroScript: {e}")
except (IndexError, ValueError):
print(f"Invalid HeroScript format: {heroscript.content}")
return objects
def create_indexes(self):
for class_type in self.class_types.values():
schema = self.create_schema(class_type)
index_dir = os.path.join(self.indexpath, class_type.__name__.lower())
if not os.path.exists(index_dir):
os.makedirs(index_dir)
index.create_in(index_dir, schema)
def create_schema(self, class_type):
schema_fields = {"path": STORED()}
for field_name, field in class_type.__fields__.items():
json_schema_extra = getattr(field, "json_schema_extra", None)
if json_schema_extra is not None:
metadata = json_schema_extra.get("metadata", {})
if isinstance(metadata, list):
metadata = {item: True for item in metadata}
if metadata.get("unique") or metadata.get("indexed"):
if field.annotation == str :
schema_fields[field_name] = ID(stored=True, unique=metadata.get("unique", False))
elif field.annotation == int or field.annotation == float :
schema_fields[field_name] = NUMERIC(stored=True, unique=metadata.get("unique", False))
else:
schema_fields[field_name] = TEXT(stored=True,lowercase=True)
return Schema(**schema_fields)
def index_objects(self):
for heroscript in self.heroscripts:
for obj in self.get_objects():
index_dir = os.path.join(self.indexpath, type(obj).__name__.lower())
ix = index.open_dir(index_dir)
writer = ix.writer()
writer.add_document(path=heroscript.path, **{k: str(v).lower() for k, v in obj.dict().items() if k in ix.schema.names()})
writer.commit()
def search(self, class_type, query):
index_dir = os.path.join(self.indexpath, class_type.__name__.lower())
ix = index.open_dir(index_dir)
qp = QueryParser("name", schema=ix.schema)
q = qp.parse(query)
with ix.searcher() as searcher:
results = searcher.search(q)
# return results
return [result["path"] for result in results]

View File

@@ -0,0 +1,82 @@
from pydantic import BaseModel, Field
from typing import Dict, Any, Type, TypeVar
import re
from colorama import Fore, Style
import hashlib
import json
import os
from types import List
from heroscript.heroaction import HeroAction
from heroscript.tools import format_multiline_text
class HeroScriptMixin:
def heroscript(self) -> HeroAction:
class_name = self.__class__.__name__.lower()
prop_order = ["id", "oid", "name", "title", "description", "content"]
# Get all the properties of the object
props = list(self.__fields__.keys())
# Separate properties into those in prop_order and the rest
ordered_props = [prop for prop in prop_order if prop in props]
remaining_props = [prop for prop in props if prop not in prop_order]
# Sort the remaining properties
sorted_remaining_props = sorted(remaining_props)
# Combine the ordered properties and sorted remaining properties
sorted_props = ordered_props + sorted_remaining_props
lines = [f"!!{class_name}.define"]
for prop in sorted_props:
if prop in self.__fields__:
val = getattr(self, prop)
if isinstance(val, str):
if "\n" in val:
val = format_multiline_text(text=val)
elif any(c.isspace() for c in val):
val = f"'{val}'"
lines.append(f" {prop}:{val}")
result = "\n".join(lines)
return HeroAction(content=result)
@classmethod
def from_heroscript(cls, heroscript: str):
lines = heroscript.strip().split("\n")
class_name = lines[0].split("!!")[1].split(".")[0]
props = {}
multiline_prop = None
multiline_value = List(str)
for line in lines[1:]:
if multiline_prop:
if line.strip() == "'":
# End of multiline text
min_indent = min(len(ml) - len(ml.lstrip()) for ml in multiline_value if ml.strip())
unindented_lines = [ml[min_indent:] for ml in multiline_value]
props[multiline_prop] = "\n".join(unindented_lines)
multiline_prop = None
multiline_value = []
else:
multiline_value.append(line)
else:
if ":" in line:
prop, value = line.split(":", 1)
prop = prop.strip()
value = value.strip()
if value == "'":
# Start of multiline text
multiline_prop = prop
else:
if value.startswith("'") and value.endswith("'"):
value = value[1:-1]
props[prop] = value
return cls(**props)

View File

@@ -0,0 +1,4 @@
## heroscript
> not to be used yet

View File

@@ -0,0 +1,145 @@
from typing import List
import os
from colorama import Fore, Style
from herotools.texttools import dedent
import textwrap
#load the heroscripts from filesystem
def heroscript_blocks(path: str) -> List[str]:
heroscript_blocks = list()
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".md"):
file_path = os.path.join(root, file)
with open(file_path, "r") as f:
content = f.read()
blocks = _extract_heroscript_blocks(content)
heroscript_blocks.extend(blocks)
return heroscript_blocks
def _extract_heroscript_blocks(content: str):
content=dedent(content)
blocks = []
lines = content.split("\n")
in_block = False
block_lines : List[str] = list()
for line in lines:
if line.startswith("```hero"):
in_block = True
block_lines = []
elif line.startswith("```") and in_block:
in_block = False
block = "\n".join(block_lines)
blocks.append(block)
elif in_block:
block_lines.append(line)
return blocks
def action_blocks(path: str = "", content:str = "") -> List[str]:
if content!="":
return __action_blocks_get(content)
res : List[str] = list()
for hscript in heroscript_blocks(path):
for actionscript in __action_blocks_get(hscript):
res.append(actionscript)
return res
def __action_blocks_get(content: str) -> List[str]:
content=dedent(content)
blocks = list()
lines = content.split("\n")
block_lines : List[str] = list()
herofound=False
for line in lines:
# print(line)
if line.startswith("!!"):
herofound=True
if block_lines: #means we found before
block = "\n".join(block_lines)
blocks.append(block)
block_lines = []
# print("f1")
block_lines.append(line)
elif line.strip() and not line.startswith(" ") and not line.startswith("\t") and block_lines:
block = "\n".join(block_lines)
blocks.append(block)
block_lines = []
herofound=False
elif herofound:
block_lines.append(line)
# print("append")
if block_lines:
block = "\n".join(block_lines)
blocks.append(block)
return blocks
def myprint(obj):
class_name = f"{Fore.YELLOW}{obj.__class__.__name__}{Style.RESET_ALL}"
fields = [field for field in obj.__fields__ if field in obj.__dict__]
attributes = ', '.join(f"{Fore.LIGHTBLACK_EX}{field}{Style.RESET_ALL}={Fore.GREEN}'{getattr(obj, field)}'{Style.RESET_ALL}" for field in fields)
print( f"{class_name}({attributes})" )
#format text to be ready to be set in heroscript
def format_multiline_text(text: str) -> str:
text = dedent(text)
text = textwrap.indent(text, " ")
# Join the formatted lines with newline characters and add the required indentation
formatted_text = "'\n" + text + "\n '"
return formatted_text
#representation with colors of heroscript
def heroscript_repr(content:str) ->str:
lines = content.split("\n")
formatted_lines = []
for line in lines:
if line.startswith("!!"):
formatted_line = f"{Fore.RED}{line}{Style.RESET_ALL}"
elif ":" in line:
prop, value = line.split(":", 1)
prop = prop.strip()
value = value.strip()
if value.startswith("'") and value.endswith("'"):
value = f" {Fore.GREEN}{value}{Style.RESET_ALL}"
else:
value = f" {Fore.YELLOW}{value}{Style.RESET_ALL}"
formatted_line = f" {Fore.CYAN}{prop}{Style.RESET_ALL}:{value}"
else:
formatted_line = line
formatted_lines.append(formatted_line)
return "\n".join(formatted_lines)
def heroscript_print(content:str):
o=heroscript_repr(content)
print(o)
if __name__ == "__main__":
t=" something\n a\n\n bbbb"
print(dedent(t))
print(format_multiline_text(t))