Source code for genro_treestore.resolvers.directory

# Copyright 2025 Softwell S.r.l. - Genropy Team
# SPDX-License-Identifier: Apache-2.0

"""Directory resolver for lazy filesystem traversal.

This module provides resolvers for lazy loading of filesystem hierarchies,
compatible with Genropy's DirectoryResolver from gnrbag.py.

The DirectoryResolver enables lazy traversal of directory structures:
- Directories become branch nodes with their own DirectoryResolver
- Files become leaf nodes with optional content resolvers
- File metadata (mtime, size, etc.) stored in node attributes

Example:
    Basic usage::

        from genro_treestore import TreeStore
        from genro_treestore.resolvers import DirectoryResolver

        store = TreeStore()
        store.set_item('docs')
        store.set_resolver('docs', DirectoryResolver('/path/to/docs'))

        # Lazy traversal - directories resolved on access
        store['docs.subdir.readme_txt']

        # Access file metadata
        node = store.get_node('docs.subdir.readme_txt')
        print(node.attr['abs_path'])  # /path/to/docs/subdir/readme.txt
        print(node.attr['mtime'])     # datetime of last modification

    With filtering::

        # Only include Python files, exclude __pycache__
        resolver = DirectoryResolver(
            '/path/to/project',
            include='*.py',
            exclude='__pycache__'
        )

    Custom file processors::

        def process_json(path):
            with open(path) as f:
                return json.load(f)

        resolver = DirectoryResolver(
            '/path/to/data',
            ext='json',
            processors={'json': process_json}
        )
"""

from __future__ import annotations

import fnmatch
import os
import re
from datetime import datetime
from typing import Any, Callable

from genro_toolbox import smartasync

from .base import TreeStoreResolver
from ..store import TreeStore


[docs] class DirectoryResolver(TreeStoreResolver): """Resolver for lazy loading of filesystem directory contents. Loads directory contents on demand, creating a TreeStore where: - Subdirectories become branch nodes with their own DirectoryResolver - Files become leaf nodes (value=None by default, or processed content) - File metadata stored in node attributes Compatible with Genropy's DirectoryResolver API. Attributes: path: Absolute path to the directory. relocate: Relative path prefix for rel_path attribute. invisible: If True, include hidden files (starting with '.'). ext: Comma-separated list of extensions to process (e.g., 'xml,json'). Format: 'ext' or 'ext:processor_name'. include: Glob pattern for files to include (e.g., '*.py'). exclude: Glob pattern for files/dirs to exclude (e.g., '__pycache__'). callback: Optional callback(nodeattr) -> bool to filter nodes. dropext: If True, don't include extension in node labels. processors: Dict mapping extension to processor function. Example: Basic directory listing:: resolver = DirectoryResolver('/home/user/docs') store.set_item('docs') store.set_resolver('docs', resolver) # Access triggers lazy load for label in store['docs'].keys(): node = store.get_node(f'docs.{label}') print(f"{label}: {node.attr['file_ext']}") With XML processing:: resolver = DirectoryResolver( '/path/to/config', ext='xml', processors={'xml': lambda p: parse_xml(p)} ) """ __slots__ = ( "path", "relocate", "invisible", "ext", "include", "exclude", "callback", "dropext", "processors", )
[docs] def __init__( self, path: str, relocate: str = "", *, cache_time: int = 500, read_only: bool = True, invisible: bool = False, ext: str = "", include: str = "", exclude: str = "", callback: Callable[[dict], bool | None] | None = None, dropext: bool = False, processors: dict[str, Callable[[str], Any] | bool] | None = None, **kwargs: Any, ) -> None: """Initialize the directory resolver. Args: path: Absolute path to the directory to resolve. relocate: Relative path prefix for rel_path attribute. Used to maintain relative paths when nested. cache_time: Cache duration in seconds. Default 500 (like Bag). read_only: If True, resolved value not stored in node._value. invisible: If True, include hidden files (starting with '.'). ext: Comma-separated extensions to process. Format: 'xml' or 'xml:processor_name,json:processor_name'. include: Glob pattern for files to include (e.g., '*.py,*.txt'). exclude: Glob pattern for files/dirs to exclude. callback: Function called with nodeattr dict for each entry. Return False to skip the entry, None/True to include. dropext: If True, don't include extension in node labels. processors: Dict mapping extension/processor_name to: - Callable[[str], Any]: Function to process file, returns value - False: Skip files with this extension - None: Use default processor (returns None) **kwargs: Additional arguments for TreeStoreResolver. """ super().__init__(cache_time=cache_time, read_only=read_only, **kwargs) self.path = path self.relocate = relocate self.invisible = invisible self.ext = ext self.include = include self.exclude = exclude self.callback = callback self.dropext = dropext self.processors = processors or {} # Store args for serialization self._init_args = (path, relocate) self._init_kwargs = { "invisible": invisible, "ext": ext, "include": include, "exclude": exclude, "dropext": dropext, # Note: callback and processors not serializable }
@property def instance_kwargs(self) -> dict[str, Any]: """Get kwargs for creating child DirectoryResolvers.""" return { "cache_time": self.cache_time, "read_only": self.read_only, "invisible": self.invisible, "ext": self.ext, "include": self.include, "exclude": self.exclude, "callback": self.callback, "dropext": self.dropext, "processors": self.processors, }
[docs] @smartasync async def load(self) -> TreeStore: """Load directory contents into a TreeStore. Returns: TreeStore containing directory entries as nodes. Subdirectories have DirectoryResolver attached. Files have metadata in attributes. """ # Parse extensions mapping: 'xml' or 'xml:processor_name' extensions: dict[str, str] = {} if self.ext: for ext_spec in self.ext.split(","): parts = ext_spec.strip().split(":") ext_name = parts[0] processor_name = parts[1] if len(parts) > 1 else parts[0] extensions[ext_name] = processor_name extensions["directory"] = "directory" result = TreeStore() # List directory contents try: directory = sorted(os.listdir(self.path)) except OSError: directory = [] # Filter hidden files if not self.invisible: directory = [x for x in directory if not x.startswith(".")] for fname in directory: # Skip editor backup/journal files if fname.startswith("#") or fname.endswith("#") or fname.endswith("~"): continue nodecaption = fname fullpath = os.path.join(self.path, fname) relpath = os.path.join(self.relocate, fname) add_it = True if os.path.isdir(fullpath): ext = "directory" if self.exclude: add_it = self._filter_match(fname, exclude=self.exclude) else: if self.include or self.exclude: add_it = self._filter_match(fname, include=self.include, exclude=self.exclude) fname_base, ext = os.path.splitext(fname) ext = ext[1:] # Remove leading dot fname = fname_base if not add_it: continue # Create label label = self._make_label(fname, ext) # Get processor processor_name = extensions.get(ext.lower()) handler = self.processors.get(processor_name) if processor_name else None if handler is False: continue # Skip this extension if handler is None: # Try method-based processor handler = getattr(self, f"processor_{processor_name}", None) if handler is None: handler = self.processor_default # Get file stats try: stat = os.stat(fullpath) mtime = datetime.fromtimestamp(stat.st_mtime) atime = datetime.fromtimestamp(stat.st_atime) ctime = datetime.fromtimestamp(stat.st_ctime) size = stat.st_size except OSError: mtime = atime = ctime = size = None # Build caption (like Bag) caption = fname.replace("_", " ").strip() m = re.match(r"(\d+) (.*)", caption) if m: caption = f"!!{int(m.group(1))} {m.group(2).capitalize()}" else: caption = caption.capitalize() # Build node attributes nodeattr = { "file_name": fname, "file_ext": ext, "rel_path": relpath, "abs_path": fullpath, "mtime": mtime, "atime": atime, "ctime": ctime, "nodecaption": nodecaption, "caption": caption, "size": size, } # Apply callback filter if self.callback: cb_result = self.callback(nodeattr) if cb_result is False: continue # Process and add item value = handler(fullpath) result.set_item(label, _attributes=nodeattr) # If handler returned a resolver, set it if isinstance(value, TreeStoreResolver): result.set_resolver(label, value) elif value is not None: result.get_node(label)._value = value return result
def _make_label(self, name: str, ext: str) -> str: """Create node label from filename and extension. Args: name: Filename without extension. ext: File extension (without dot). Returns: Label safe for use as TreeStore key. """ if ext != "directory" and not self.dropext: name = f"{name}_{ext}" return name.replace(".", "_") def _filter_match( self, name: str, include: str = "", exclude: str = "", ) -> bool: """Check if filename matches include/exclude patterns. Args: name: Filename to check. include: Comma-separated glob patterns to include. exclude: Comma-separated glob patterns to exclude. Returns: True if file should be included, False otherwise. """ # Check exclude first if exclude: for pattern in exclude.split(","): pattern = pattern.strip() if fnmatch.fnmatch(name, pattern): return False # Check include if include: for pattern in include.split(","): pattern = pattern.strip() if fnmatch.fnmatch(name, pattern): return True return False # Didn't match any include pattern return True # No include filter, passed exclude
[docs] def processor_directory(self, path: str) -> DirectoryResolver: """Process a subdirectory by creating a new DirectoryResolver. Args: path: Absolute path to the subdirectory. Returns: DirectoryResolver for the subdirectory. """ new_relocate = os.path.join(self.relocate, os.path.basename(path)) return DirectoryResolver(path, new_relocate, **self.instance_kwargs)
[docs] def processor_default(self, path: str) -> None: """Default processor for files - returns None. Args: path: Absolute path to the file. Returns: None (file path stored in attributes). """ return None
def __repr__(self) -> str: return f"DirectoryResolver({self.path!r}, cache_time={self.cache_time})"
[docs] class TxtDocResolver(TreeStoreResolver): """Resolver that loads text file contents. Compatible with Genropy's TxtDocResolver. Attributes: path: Absolute path to the text file. Example: >>> resolver = TxtDocResolver('/path/to/file.txt') >>> node.resolver = resolver >>> content = node.value # Reads file contents """ __slots__ = ("path",)
[docs] def __init__( self, path: str, *, cache_time: int = 500, read_only: bool = True, **kwargs: Any, ) -> None: """Initialize the text document resolver. Args: path: Absolute path to the text file. cache_time: Cache duration in seconds. Default 500. read_only: If True, resolved value not stored in node._value. **kwargs: Additional arguments for TreeStoreResolver. """ super().__init__(cache_time=cache_time, read_only=read_only, **kwargs) self.path = path self._init_args = (path,)
[docs] @smartasync async def load(self) -> bytes: """Load and return the file contents as bytes. Returns: File contents as bytes. """ with open(self.path, mode="rb") as f: return f.read()
def __repr__(self) -> str: return f"TxtDocResolver({self.path!r})"