Source code for genro_treestore.builders.html

# Copyright 2025 Softwell S.r.l. - Genropy Team
# SPDX-License-Identifier: Apache-2.0

"""HtmlBuilder - HTML5 element builder with W3C schema validation.

This module provides builders for generating HTML5 documents. The schema
is loaded from a pre-compiled MessagePack file generated from W3C Validator
RELAX NG schema files.

Example:
    Creating an HTML document::

        from genro_treestore import TreeStore
        from genro_treestore.builders import HtmlBuilder

        store = TreeStore(builder=HtmlBuilder())
        body = store.body()
        div = body.div(id='main', class_='container')
        div.h1(value='Welcome')
        div.p(value='Hello, World!')
        ul = div.ul()
        ul.li(value='Item 1')
        ul.li(value='Item 2')

References:
    - W3C Validator: https://github.com/validator/validator
    - WHATWG HTML Standard: https://html.spec.whatwg.org/
"""

from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable

from .base import BuilderBase

if TYPE_CHECKING:
    from ..store import TreeStore
    from ..store import TreeStoreNode


# Cache for loaded schema
_schema_cache: dict | None = None


def _load_html5_schema() -> dict:
    """Load HTML5 schema from pre-compiled MessagePack.

    Returns:
        Dict with 'elements' (set) and 'void_elements' (set).
    """
    global _schema_cache

    if _schema_cache is not None:
        return _schema_cache

    from ..store import TreeStore

    schema_file = Path(__file__).parent / "schemas" / "html5_schema.msgpack"

    if not schema_file.exists():
        raise FileNotFoundError(
            f"HTML5 schema not found: {schema_file}\n"
            "Run: python scripts/build_html5_schema.py"
        )

    schema_store = TreeStore.from_tytx(
        schema_file.read_bytes(),
        transport="msgpack",
    )

    # Extract element lists
    elements_node = schema_store.get_node("_elements")
    void_node = schema_store.get_node("_void_elements")

    _schema_cache = {
        "elements": frozenset(elements_node.value) if elements_node else frozenset(),
        "void_elements": frozenset(void_node.value) if void_node else frozenset(),
    }

    return _schema_cache


[docs] class HtmlBuilder(BuilderBase): """Builder for HTML5 elements. Provides dynamic methods for all 112 HTML5 tags via __getattr__. Void elements (meta, br, img, etc.) automatically use empty string value. The schema is loaded from a pre-compiled MessagePack file generated from W3C Validator RELAX NG schema files. Usage: >>> store = TreeStore(builder=HtmlBuilder()) >>> store.div(id='main').p(value='Hello') >>> store.ul().li(value='Item 1') Attributes: VOID_ELEMENTS: Set of void (self-closing) element names. ALL_TAGS: Set of all valid HTML5 element names. """
[docs] def __init__(self): """Initialize HtmlBuilder with W3C HTML5 schema.""" self._schema_data = _load_html5_schema()
@property def VOID_ELEMENTS(self) -> frozenset[str]: """Void elements (self-closing, no content).""" return self._schema_data["void_elements"] @property def ALL_TAGS(self) -> frozenset[str]: """All valid HTML5 element names.""" return self._schema_data["elements"]
[docs] def __getattr__(self, name: str) -> Callable[..., TreeStore | TreeStoreNode]: """Dynamic method for any HTML tag. Args: name: Tag name (e.g., 'div', 'span', 'meta') Returns: Callable that creates a child with that tag. Raises: AttributeError: If name is not a valid HTML tag. """ if name.startswith("_"): raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'") if name in self._schema_data["elements"]: return self._make_tag_method(name) raise AttributeError(f"'{name}' is not a valid HTML tag")
def _make_tag_method(self, name: str) -> Callable[..., TreeStore | TreeStoreNode]: """Create a method for a specific tag.""" is_void = name in self._schema_data["void_elements"] def tag_method( target: TreeStore, tag: str = name, value: Any = None, **attr: Any ) -> TreeStore | TreeStoreNode: # Void elements get empty string value (self-closing) if is_void and value is None: value = "" return self.child(target, tag, value=value, **attr) return tag_method
class HtmlHeadBuilder(HtmlBuilder): """Builder for HTML head section. Allows all HTML tags but semantically intended for head content (meta, title, link, style, script, etc.) """ pass class HtmlBodyBuilder(HtmlBuilder): """Builder for HTML body section. Allows all HTML tags for body content generation. """ pass class HtmlPage: """HTML page with separate head and body TreeStores. Creates a complete HTML document structure with: - html root TreeStore - head TreeStore with HtmlHeadBuilder (metadata only) - body TreeStore with HtmlBodyBuilder (flow content) Usage: >>> page = HtmlPage() >>> page.head.title(value='My Page') >>> page.head.meta(charset='utf-8') >>> page.body.div(id='main').p(value='Hello World') >>> html = page.to_html() """ def __init__(self): """Initialize the page with head and body.""" from ..store import TreeStore self.html = TreeStore() self.head = TreeStore(builder=HtmlHeadBuilder()) self.body = TreeStore(builder=HtmlBodyBuilder()) self.html.set_item("head", self.head) self.html.set_item("body", self.body) def _node_to_html(self, node: TreeStoreNode, indent: int = 0) -> str: """Recursively convert a node to HTML.""" tag = node.tag or node.label attrs = " ".join( f'{k}="{v}"' for k, v in node.attr.items() if not k.startswith("_") ) attrs_str = f" {attrs}" if attrs else "" spaces = " " * indent if node.is_leaf: if node.value == "": return f"{spaces}<{tag}{attrs_str}>" return f"{spaces}<{tag}{attrs_str}>{node.value}</{tag}>" lines = [f"{spaces}<{tag}{attrs_str}>"] for child in node.value.nodes(): lines.append(self._node_to_html(child, indent + 1)) lines.append(f"{spaces}</{tag}>") return "\n".join(lines) def _store_to_html(self, store: TreeStore, tag: str, indent: int = 0) -> str: """Convert a TreeStore to HTML with a wrapper tag.""" spaces = " " * indent lines = [f"{spaces}<{tag}>"] for node in store.nodes(): lines.append(self._node_to_html(node, indent + 1)) lines.append(f"{spaces}</{tag}>") return "\n".join(lines) def to_html(self, filename: str | None = None, output_dir: str | None = None) -> str: """Generate complete HTML. Args: filename: If provided, save to output_dir/filename output_dir: Directory to save to (default: current directory) Returns: HTML string, or path if filename was provided """ html_lines = [ "<!DOCTYPE html>", "<html>", self._store_to_html(self.head, "head", indent=0), self._store_to_html(self.body, "body", indent=0), "</html>", ] html_content = "\n".join(html_lines) if filename: if output_dir is None: output_dir = Path.cwd() else: output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True) output_path = output_dir / filename output_path.write_text(html_content) return str(output_path) return html_content def print_tree(self): """Print the tree structure for debugging.""" print("=" * 60) print("HEAD") print("=" * 60) for path, node in self.head.walk(): indent_level = " " * path.count(".") tag = node.tag or node.label value_str = "" if node.is_leaf and node.value: val = str(node.value) value_str = f': "{val[:30]}..."' if len(val) > 30 else f': "{val}"' print(f"{indent_level}<{tag}>{value_str}") print("\n" + "=" * 60) print("BODY") print("=" * 60) for path, node in self.body.walk(): indent_level = " " * path.count(".") tag = node.tag or node.label value_str = f': "{node.value}"' if node.is_leaf and node.value else "" attrs = " ".join( f'{k}="{v}"' for k, v in node.attr.items() if not k.startswith("_") ) attrs_str = f" [{attrs}]" if attrs else "" print(f"{indent_level}<{tag}{attrs_str}>{value_str}")