Source code for jamb.storage.discovery

"""Filesystem discovery for jamb document trees."""

import logging
import os
from pathlib import Path

import yaml

from jamb.storage.document_config import load_document_config
from jamb.storage.document_dag import DocumentDAG

logger = logging.getLogger("jamb")


[docs] def discover_documents(root: Path | None = None) -> DocumentDAG: """Walk filesystem for .jamb.yml files and build a DAG. Args: root: Root directory to search. Defaults to current working directory. Returns: DocumentDAG containing all discovered documents. Raises: FileNotFoundError: If root directory does not exist. """ if root is None: root = Path.cwd() root = root.resolve() if not root.is_dir(): raise FileNotFoundError(f"Root directory not found: {root}") dag = DocumentDAG() for config_path in _find_config_files(root): try: config = load_document_config(config_path) except (ValueError, yaml.YAMLError, OSError) as e: logger.warning("Skipping %s: %s", config_path, e) continue if config.prefix in dag.documents: existing_path = dag.document_paths[config.prefix] raise ValueError( f"Duplicate document prefix '{config.prefix}' found at {existing_path} and {config_path.parent}" ) dag.documents[config.prefix] = config dag.document_paths[config.prefix] = config_path.parent return dag
def _find_config_files(root: Path) -> list[Path]: """Find all .jamb.yml files under root. Args: root: The directory to search recursively. Returns: A list of config file paths sorted alphabetically. Raises: PermissionError: If root is not readable. """ # Check read permission before traversal to fail fast if not os.access(root, os.R_OK): raise PermissionError(f"Cannot read directory: {root}") # Use os.walk with followlinks=False to avoid symlink cycles config_files = [] for dirpath, _dirnames, filenames in os.walk(root, followlinks=False): if ".jamb.yml" in filenames: config_files.append(Path(dirpath) / ".jamb.yml") return sorted(config_files)