Commit ff0a9919 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(workspace): update workspace directory handling

* Change workspace deletion and processing to use workspaces_dir instead of checkout_dir.
* Add workspaces_dir property to CacheManager for better directory management.
* Update environment variable for workspaces directory.
* Modify settings to include workspaces directory configuration.
parent af64c006
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -132,7 +132,7 @@ def workspace_delete(
    if delete_llm_wiki:
        try:
            manager = resolve_cache_manager()
            llm_wiki_dir = manager.checkout_dir / normalized / "wiki"
            llm_wiki_dir = manager.workspaces_dir / normalized / "wiki"
            if llm_wiki_dir.exists():
                shutil.rmtree(llm_wiki_dir)
                console.print(f"[green]Deleted .llm-wiki folder for '{normalized}'.[/green]")
@@ -226,7 +226,7 @@ def workspace_process(
        raise typer.Exit(1)

    manager = resolve_cache_manager()
    wiki_source_dir_base = manager.checkout_dir / normalized / "sources"
    wiki_source_dir_base = manager.workspaces_dir / normalized / "sources"

    processed = 0
    failed = 0
+6 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ from typing import ClassVar
DEFAULT_DATABASE_FILENAME = "3gpp_crawler.db"
DEFAULT_HTTP_CACHE_FILENAME = "http-cache.sqlite3"
DEFAULT_CHECKOUT_DIRNAME = "checkout"
DEFAULT_WORKSPACES_DIRNAME = "workspaces"
WORKSPACE_REGISTRY_FILENAME = "workspaces.json"


@@ -97,6 +98,11 @@ class CacheManager:
        """Path to checkout directory for documents."""
        return self._cache_dir / DEFAULT_CHECKOUT_DIRNAME

    @property
    def workspaces_dir(self) -> Path:
        """Path to workspaces directory (separate from document checkouts)."""
        return self._cache_dir / DEFAULT_WORKSPACES_DIRNAME

    @property
    def workspace_registry_file(self) -> Path:
        """Path to workspace registry JSON file."""
+1 −0
Original line number Diff line number Diff line
@@ -65,6 +65,7 @@ class ConfigEnvVar(StrEnum):
    TDC_AGENDA_PATTERN_EXCLUDE = "crawl.agenda_pattern_exclude"
    TDC_CHECKOUT = "crawl.checkout"
    TDC_CHECKOUT_DIR = "path.checkout_dir"
    TDC_WORKSPACES_DIR = "path.workspaces_dirname"
    TDC_VERBOSITY = "verbosity"
    TDC_USE_WHATTHESPEC = "http.use_whatthespec"

+11 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ _DEFAULT_CACHE_DIR_STR = "~/.3gpp-crawler"
_DEFAULT_DATABASE_FILENAME = "3gpp_crawler.db"
_DEFAULT_HTTP_CACHE_FILENAME = "http-cache.sqlite3"
_DEFAULT_CHECKOUT_DIRNAME = "checkout"
_DEFAULT_WORKSPACES_DIRNAME = "workspaces"


class PathConfig(BaseSettings):
@@ -50,6 +51,10 @@ class PathConfig(BaseSettings):
        default=_DEFAULT_CHECKOUT_DIRNAME,
        description="Subdirectory name for checked-out documents",
    )
    workspaces_dirname: str = Field(
        default=_DEFAULT_WORKSPACES_DIRNAME,
        description="Subdirectory name for workspace data (sources, wiki)",
    )

    @property
    def db_file(self) -> Path:
@@ -66,6 +71,11 @@ class PathConfig(BaseSettings):
        """Path to the checkout directory for documents."""
        return self.cache_dir / _DEFAULT_CHECKOUT_DIRNAME

    @property
    def workspaces_dir(self) -> Path:
        """Path to the workspaces directory (separate from document checkouts)."""
        return self.cache_dir / _DEFAULT_WORKSPACES_DIRNAME

    @field_validator("cache_dir", mode="before")
    @classmethod
    def _resolve_cache_dir(cls, value: str | Path | None) -> Path:
@@ -279,6 +289,7 @@ class ThreeGPPConfig(BaseSettings):
        """
        self.path.cache_dir.mkdir(parents=True, exist_ok=True)
        self.path.checkout_dir.mkdir(parents=True, exist_ok=True)
        self.path.workspaces_dir.mkdir(parents=True, exist_ok=True)

    @classmethod
    def from_settings(
+2 −2
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ def create_workspace(workspace: str | None, auto_build: bool = False, descriptio
    # Create workspace directory structure
    try:
        manager = resolve_cache_manager()
        ws_dir = manager.checkout_dir / normalized_workspace
        ws_dir = manager.workspaces_dir / normalized_workspace
        ws_dir.mkdir(parents=True, exist_ok=True)

        sources_dir = ws_dir / "sources"
@@ -113,7 +113,7 @@ def delete_workspace(workspace: str | None, *, delete_artifacts: bool = False) -
        _logger.info("Deleting artifacts for %d members", len(members))
        try:
            manager = resolve_cache_manager()
            sources_base = manager.checkout_dir / normalized_workspace / "sources"
            sources_base = manager.workspaces_dir / normalized_workspace / "sources"
            for member in members:
                member_dir = sources_base / member.source_item_id
                delete_artifact_folder(member_dir)