Commit 90403586 authored by Jan Reimes's avatar Jan Reimes
Browse files

fix(ai): resolve nested .ai folder, add verbose flag, auto-crawl specs

- Fix _get_ai_directory() to avoid .ai/.ai nesting when file is already inside .ai/
- Add --verbose/-v flag to workspace add-members to show/hide INFO logs
- Auto-crawl spec metadata in checkout_spec_to_workspace when not in database
- Add set_verbosity import to cli.py
- Add build_default_spec_sources import to workspaces.py
parent 9bb2a8cd
Loading
Loading
Loading
Loading
+7 −1
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TextColum
from tdoc_crawler.cli.formatting import TableColumnSpec, print_structured_output
from tdoc_crawler.config import CacheManager, resolve_cache_manager
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.logging import get_console, get_logger
from tdoc_crawler.logging import get_console, get_logger, set_verbosity
from tdoc_crawler.models.base import OutputFormat, SortOrder
from tdoc_crawler.tdocs.models import TDocQueryConfig
from tdoc_crawler.utils.date_parser import parse_partial_date
@@ -851,7 +851,13 @@ def workspace_add_members(
    agenda: AgendaPatternOption = None,
    agenda_ex: AgendaPatternExcludeOption = None,
    limit: WorkspaceLimitOption = None,
    verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output (INFO level logging)"),
) -> None:
    # Set log level based on verbosity
    if verbose:
        set_verbosity("INFO")
    else:
        set_verbosity("WARNING")
    workspace_name = _resolve_workspace_name(workspace)
    kind_normalized = kind.lower().rstrip("s")
    source_kind = SourceKind(kind_normalized) if kind_normalized in {entry.value for entry in SourceKind} else SourceKind.OTHER
+6 −0
Original line number Diff line number Diff line
@@ -117,6 +117,8 @@ def _get_ai_directory(file_path: Path) -> Path:
    """Get the .ai directory for a file.

    Creates .ai folder next to the file if it doesn't exist.
    If the file is already inside a .ai directory (e.g., converted PDF),
    uses the parent directory instead to avoid nested .ai/.ai paths.

    Args:
        file_path: Path to source document.
@@ -124,6 +126,10 @@ def _get_ai_directory(file_path: Path) -> Path:
    Returns:
        Path to .ai directory.
    """
    # If file is already inside a .ai folder, use the parent's parent
    # to avoid creating .ai/.ai nested structure
    if file_path.parent.name == ".ai":
        return file_path.parent.parent / ".ai"
    return file_path.parent / ".ai"


+15 −4
Original line number Diff line number Diff line
@@ -13,14 +13,13 @@ from tdoc_crawler.config import resolve_cache_manager
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.database.specs import SpecDatabase
from tdoc_crawler.logging import get_logger
from tdoc_crawler.specs.operations.checkout import checkout_specs_async
from tdoc_crawler.specs.operations.checkout import build_default_spec_sources, checkout_specs_async
from tdoc_crawler.tdocs.models import TDocMetadata
from tdoc_crawler.tdocs.operations.checkout import checkout_tdoc
from tdoc_crawler.tdocs.sources.base import TDocSourceConfig
from tdoc_crawler.tdocs.sources.portal import PortalSource
from tdoc_crawler.tdocs.sources.whatthespec import resolve_via_whatthespec
from tdoc_crawler.tdocs.utils import normalize_tdoc_id
from tdoc_crawler.utils.normalization import normalize_spec_number, resolve_release_to_full_version
from tdoc_crawler.utils.normalization import normalize_spec_number, normalize_tdoc_id, resolve_release_to_full_version

from threegpp_ai.models import WorkspaceNotFoundError
from threegpp_ai.operations.workspace_names import DEFAULT_WORKSPACE, is_default_workspace, normalize_workspace_name
@@ -581,9 +580,21 @@ async def checkout_spec_to_workspace(
            _logger.debug("Spec %s (release %s) already checked out at %s", spec_number, requested_release, spec_dir)
            return spec_dir

    # Need to checkout the spec - reuse DB connection from resolve_spec_release_from_db
    # Need to checkout the spec - auto-crawl if not in database
    try:
        async with SpecDatabase(db_path) as db:
            # Check if spec is in database, if not, crawl it first
            versions = await db.get_spec_versions(normalized)
            if not versions:
                _logger.info("Spec %s not found in database, auto-crawling...", normalized)
                sources = build_default_spec_sources()
                await db.crawl_specs([normalized], resolved_release, sources)
                # Re-check after crawl
                versions = await db.get_spec_versions(normalized)
                if not versions:
                    _logger.warning("Spec %s not found after crawl - spec may not exist", normalized)
                    return None

            checkout_paths = await checkout_specs_async(
                spec_numbers=[spec_number],
                checkout_dir=checkout_base,