Commit 74d4a1bd authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(ai): add active workspace concept

- Store active workspace in .ai/active_workspace file
- Add ai workspace activate <name> command
- Add ai workspace deactivate command
- Make --workspace optional for all workspace commands
- Show active workspace (*) in workspace list output
- Update documentation with new workflow
parent 0e59a813
Loading
Loading
Loading
Loading
+51 −10
Original line number Diff line number Diff line
@@ -93,15 +93,19 @@ ______________________________________________________________________

The AI module follows a workspace-based workflow for organizing and querying your document collection:

### 1. Create Workspace
### 1. Create and Activate Workspace

```bash
# Create a new workspace for your project
tdoc-crawler ai workspace create my-project --auto-build
tdoc-crawler ai workspace create my-project

# Activate it so you don't need --workspace for other commands
tdoc-crawler ai workspace activate my-project
```

### 2. Process Documents (Generate Embeddings)
Once activated, all workspace commands use the active workspace by default. No need to pass `-w` every time.

### 2. Add TDocs and Process

After adding TDocs to your workspace, process them to generate RAG/GraphRAG embeddings:

@@ -120,11 +124,16 @@ Note: If you created the workspace with `--auto-build`, documents are processed
Once you have a workspace with documents, query using semantic search and knowledge graph (RAG + GraphRAG):

```bash
tdoc-crawler ai query --workspace my-project "your query here"
# Query the active workspace
tdoc-crawler ai query "your query here"

# Or specify a workspace explicitly
tdoc-crawler ai query -w my-project "your query here"
```

Note: `--workspace` is required. This command uses both vector embeddings (RAG) and the knowledge graph (GraphRAG) to provide comprehensive results.
Note: Uses active workspace if `-w` is not provided. Results combine vector embeddings (RAG) and knowledge graph (GraphRAG).

### 4. Single TDoc Operations
### 3. Single TDoc Operations

For individual TDocs, use the `summarize` and `convert` commands:
@@ -154,25 +163,34 @@ Options:
- `--auto-build`: Automatically process documents when added to workspace

# List all workspaces
# Shows (*) next to the active workspace
tdoc-crawler ai workspace list

# Activate a workspace (sets as default for workspace commands)
tdoc-crawler ai workspace activate <name>

# Deactivate the active workspace
tdoc-crawler ai workspace deactivate

# Get workspace details
tdoc-crawler ai workspace get <name>

# Delete a workspace
tdoc-crawler ai workspace delete <name>
```

### Querying

Query the knowledge graph using semantic embeddings and knowledge graph (RAG + GraphRAG).
Query the knowledge base using semantic embeddings and knowledge graph (RAG + GraphRAG).

```bash
# Query the active workspace
tdoc-crawler ai query "your query here"

# Query a specific workspace
tdoc-crawler ai query --workspace <workspace_name> "your query here"
```

Note: `--workspace` is required. This command uses both vector embeddings (RAG) and the knowledge graph (GraphRAG) to provide comprehensive results.

Note: Uses active workspace if `-w` is not provided. Combines vector embeddings (RAG) and knowledge graph (GraphRAG).
### Single TDoc Operations

#### Summarize a TDoc
@@ -204,10 +222,33 @@ Options:
- `--output FILE.md`: Write output to file (prints to stdout if not specified)
- `--json-output`: Output raw JSON

### Workspace Members and Processing

Add TDocs to workspaces and process them to generate embeddings and knowledge graph.

```bash
# Add members to the active workspace
tdoc-crawler ai workspace add-members S4-251971 S4-251972

# Add members to a specific workspace
tdoc-crawler ai workspace add-members -w my-project S4-251971 S4-251972

# List members in the active workspace
tdoc-crawler ai workspace list-members

# Process all TDocs in the active workspace
tdoc-crawler ai workspace process

# Process with options
tdoc-crawler ai workspace process -w my-project --force
```

______________________________________________________________________

## Model Providers

## Model Providers

### Supported LLM Providers

| Provider | Example Model | API Key Env Var | Notes |
+101 −8
Original line number Diff line number Diff line
@@ -32,6 +32,49 @@ ai_app = typer.Typer(help="AI document processing commands")
console = Console()


def _get_ai_dir() -> Path:
    """Get the .ai directory path."""
    manager = CacheManager().register()
    ai_dir = manager.root / ".ai"
    ai_dir.mkdir(parents=True, exist_ok=True)
    return ai_dir


def get_active_workspace() -> str | None:
    """Get the currently active workspace name, or None if not set."""
    active_file = _get_ai_dir() / "active_workspace"
    if active_file.exists():
        return active_file.read_text().strip() or None
    return None


def set_active_workspace(name: str) -> None:
    """Set the active workspace."""
    active_file = _get_ai_dir() / "active_workspace"
    active_file.write_text(normalize_workspace_name(name))


def clear_active_workspace() -> None:
    """Clear the active workspace."""
    active_file = _get_ai_dir() / "active_workspace"
    if active_file.exists():
        active_file.unlink()


def resolve_workspace(workspace: str | None) -> str:
    """Resolve workspace name from option or active workspace."""
    if workspace:
        return normalize_workspace_name(workspace)
    active = get_active_workspace()
    if active:
        return active
    console.print("[red]No workspace specified and no active workspace set. Use -w <name> or 'ai workspace activate <name>'[/red]")
    raise typer.Exit(1)

ai_app = typer.Typer(help="AI document processing commands")
console = Console()


@ai_app.command("summarize")
def ai_summarize(
    tdoc_id: Annotated[str, typer.Argument(..., help="TDoc ID to summarize")],
@@ -89,14 +132,14 @@ def ai_convert(
@ai_app.command("query")
def ai_query(
    query: Annotated[str, typer.Option(..., "--query", help="Semantic search query")],
    workspace: Annotated[str, typer.Option(..., "--workspace", help="Workspace name")],
    workspace: Annotated[str | None, typer.Option("--workspace", "-w", help="Workspace name")] = None,
    top_k: Annotated[int, typer.Option("--top-k", help="Number of embedding results to return")] = 5,
    json_output: Annotated[bool, typer.Option("--json", help="Output as JSON")] = False,
) -> None:
    """Search TDocs using semantic embeddings and knowledge graph (RAG + GraphRAG)."""
    workspace = resolve_workspace(workspace)
    embedding_results = query_embeddings(query, top_k=top_k, workspace=workspace)
    graph_result = query_graph(query)

    payload = {
        "query": query,
        "workspace": workspace,
@@ -173,13 +216,15 @@ def workspace_list(
    else:
        table = Table(title="Workspaces")
        table.add_column("Name", style="cyan")
        table.add_column("Default", style="green")
        table.add_column("Active", style="green")
        table.add_column("Status", style="yellow")
        table.add_column("Created", style="magenta")
        active_workspace = get_active_workspace()
        for ws in workspaces:
            is_active = ws.workspace_name == active_workspace
            table.add_row(
                ws.workspace_name,
                "Yes" if ws.is_default else "No",
                "*" if is_active else "",
                ws.status.value,
                ws.created_at.isoformat() if ws.created_at else "",
            )
@@ -218,14 +263,58 @@ def workspace_get(
        console.print(table)



@_workspace_app.command("activate")
def workspace_activate(
    name: Annotated[str, typer.Argument(..., help="Workspace name to activate")],
    json_output: Annotated[bool, typer.Option("--json", help="Output as JSON")] = False,
) -> None:
    """Set a workspace as the active workspace."""
    manager = CacheManager().register()
    storage = AiStorage(manager.root / ".ai" / "lancedb")
    workspace = storage.get_workspace(name)
    if workspace is None:
        if json_output:
            typer.echo(json.dumps({"error": f"Workspace '{name}' not found"}))
        else:
            console.print(f"[red]Workspace '{name}' not found[/red]")
        raise typer.Exit(1)
    set_active_workspace(name)
    if json_output:
        typer.echo(json.dumps({"active_workspace": normalize_workspace_name(name)}))
    else:
        console.print(f"[green]Activated workspace: {normalize_workspace_name(name)}[/green]")


@_workspace_app.command("deactivate")
def workspace_deactivate(
    json_output: Annotated[bool, typer.Option("--json", help="Output as JSON")] = False,
) -> None:
    """Clear the active workspace."""
    active = get_active_workspace()
    if active is None:
        if json_output:
            typer.echo(json.dumps({"active_workspace": None, "message": "No active workspace set"}))
        else:
            console.print("[yellow]No active workspace set[/yellow]")
        return
    clear_active_workspace()
    if json_output:
        typer.echo(json.dumps({"active_workspace": None, "deactivated": active}))
    else:
        console.print(f"[green]Deactivated workspace: {active}[/green]")



@_workspace_app.command("add-members")
def workspace_add_members(
    workspace: Annotated[str, typer.Option("--workspace", "-w", help="Workspace name")],
    items: Annotated[list[str], typer.Argument(..., help="Source item IDs to add")],
    workspace: Annotated[str | None, typer.Option("--workspace", "-w", help="Workspace name")] = None,
    items: Annotated[list[str], typer.Argument(..., help="Source item IDs to add")] = None,  # type: ignore[assignment]
    kind: Annotated[str, typer.Option("--kind", help="Source kind (tdoc, spec, other)")] = "tdoc",
    json_output: Annotated[bool, typer.Option("--json", help="Output as JSON")] = False,
) -> None:
    """Add source items to a workspace."""
    workspace = resolve_workspace(workspace)
    manager = CacheManager().register()

    storage = AiStorage(manager.root / ".ai" / "lancedb")
@@ -242,11 +331,12 @@ def workspace_add_members(

@_workspace_app.command("list-members")
def workspace_list_members(
    workspace: Annotated[str, typer.Option("--workspace", "-w", help="Workspace name")],
    workspace: Annotated[str | None, typer.Option("--workspace", "-w", help="Workspace name")] = None,
    include_inactive: Annotated[bool, typer.Option("--include-inactive", help="Include inactive members")] = False,
    json_output: Annotated[bool, typer.Option("--json", help="Output as JSON")] = False,
) -> None:
    """List members of a workspace."""
    workspace = resolve_workspace(workspace)
    manager = CacheManager().register()

    storage = AiStorage(manager.root / ".ai" / "lancedb")
@@ -270,12 +360,15 @@ def workspace_list_members(

@_workspace_app.command("process")
def workspace_process(
    workspace: Annotated[str, typer.Option("--workspace", "-w", help="Workspace name")],
    workspace: Annotated[str | None, typer.Option("--workspace", "-w", help="Workspace name")] = None,
    new_only: Annotated[bool, typer.Option("--new-only", help="Process only TDocs not already completed")] = True,
    force_rerun: Annotated[bool, typer.Option("--force", help="Force reprocessing of all TDocs")] = False,
    json_output: Annotated[bool, typer.Option("--json", help="Output as JSON")] = False,
) -> None:
    """Process all TDoc members in a workspace through the AI pipeline."""
    workspace = resolve_workspace(workspace)
    manager = CacheManager().register()
    storage = AiStorage(manager.root / ".ai" / "lancedb")
    manager = CacheManager().register()
    storage = AiStorage(manager.root / ".ai" / "lancedb")