✨ feat(graph): make max_words parameter configurable in _synthesize_with_llm (5fa40da8) · Commits · Jan Reimes / 3gpp-crawler

src/tdoc-ai/tdoc_ai/operations/graph.py

+4 −4

Original line number	Diff line number	Diff line
		@@ -670,7 +670,7 @@ def query_graph(
		# Generate answer based on query level and context availability
		if embedding_chunks and query_level in ("advanced", "medium"):
		# Use LLM synthesis with RAG + GraphRAG context
		answer = _synthesize_with_llm(query, embedding_chunks, matching_nodes, edges, max_words=300)
		answer = _synthesize_with_llm(query, embedding_chunks, matching_nodes, edges, max_words=max_words)
		else:
		# Use simple/medium keyword-based answer
		answer = _generate_answer(query, matching_nodes, edges, query_level)
		@@ -699,7 +699,7 @@ Instructions:
		1. Answer the question based ONLY on the provided context
		2. If the context doesn't contain enough information, say so
		3. Cite specific document IDs and sections when possible
		4. Be concise but comprehensive (target: ~{max_words} words)
		4. Be concise but comprehensive (target: less than {max_words} words)

		Question: {query}

		@@ -762,10 +762,10 @@ def _synthesize_with_llm(
		)

		# Call LLM - calculate roughly 1.3 tokens per word
		int(max_words * 1.3)
		max_tokens = max(4096, int(max_words * 1.3))
		try:
		llm_client = _get_llm_client()
		answer = llm_client.complete(prompt, max_tokens=4096)
		answer = llm_client.complete(prompt, max_tokens=max_tokens)
		return answer
		except Exception as e:
		logger.error(f"LLM synthesis failed: {e}")

src/tdoc-ai/tdoc_ai/operations/summarize.py

+5 −8

Original line number	Diff line number	Diff line
		@@ -390,7 +390,7 @@ def _extract_spec_references(markdown: str) -> list[str]:


		# Prompt for concise summary with word count constraint
		CONCISE_SUMMARY_PROMPT = """Generate a summary of approximately {target_words} words for this document:
		CONCISE_SUMMARY_PROMPT = """Generate a summary of approximately {max_words} words or less for this document:

		{content}

		@@ -407,16 +407,14 @@ Example output format: ["keyword1", "keyword2", "keyword3"]"""

		def summarize_tdoc(
		document_id: str,
		words: int = 200,
		format: str = "markdown",
		max_words: int = 200,
		fetch_remote: bool = True,
		) -> SummarizeResult:
		"""Summarize a TDoc by ID.

		Args:
		document_id: Document identifier (e.g., "R1-2400001").
		words: Target word count for summary (default: 200).
		format: Output format - "markdown", "json", or "yaml" (default: "markdown").
		max_words: Maximum word count for summary (default: 200).
		fetch_remote: If True, fetch TDoc metadata from WhatTheSpec if not local.

		Returns:
		@@ -427,7 +425,6 @@ def summarize_tdoc(
		ValueError: If format is not supported.
		"""
		# Normalize TDoc ID

		normalized_id = document_id.strip().upper()

		# Try to get metadata from WhatTheSpec
		@@ -477,11 +474,11 @@ def summarize_tdoc(
		# Generate summary
		truncated_for_summary = _truncate_text(content, SUMMARY_INPUT_LIMIT)
		summary_prompt = CONCISE_SUMMARY_PROMPT.format(
		target_words=words,
		max_words=max_words,
		content=truncated_for_summary,
		)
		try:
		summary = client.complete(summary_prompt, model=config.llm_model, max_tokens=words * 4)
		summary = client.complete(summary_prompt, model=config.llm_model, max_tokens=max_words * 4)
		except Exception as exc:
		msg = f"LLM endpoint is unreachable or misconfigured: {exc}"
		raise LlmConfigError(msg) from exc