♻️ refactor(summarize): increase max tokens and remove input limits for document processing (1a6e73bc) · Commits · Jan Reimes / 3gpp-crawler

tdoc-ai/tdoc_ai/operations/pipeline.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -289,7 +289,8 @@ def _run_summarize_stage(
		except LlmConfigError as exc:
		logger.warning(f"Falling back to extractive summary for {document_id}: {exc}")
		words = markdown.split()
		abstract = " ".join(words[:220]).strip() or markdown[:1000]
		# TODO: limit number of words (220?) and/or markdown characters (1000?) to ensure we don't exceed storage limits for summary fields
		abstract = " ".join(words).strip() or markdown
		key_points = [line.strip(" -\t") for line in markdown.splitlines() if line.strip() and not line.lstrip().startswith("#")][:5]
		affected_specs = sorted(set(re.findall(r"(?:TS\|TR)\s*\d{2}\.\d{3,4}", markdown)))
		storage.save_summary(

+5 −4

Original line number	Diff line number	Diff line
		@@ -98,7 +98,7 @@ class LiteLLMClient:
		self,
		prompt: str,
		system_prompt: str = SUMMARY_SYSTEM_PROMPT,
		max_tokens: int = 2000,
		max_tokens: int = 256000,
		model: str \| None = None,
		) -> str:
		"""Generate completion from prompt.
		@@ -231,16 +231,17 @@ def summarize_document(
		client = _get_llm_client()

		# Generate abstract
		abstract_prompt = ABSTRACT_PROMPT.format(content=markdown[:5000]) # Limit input
		abstract_prompt = ABSTRACT_PROMPT.format(content=markdown) # TODO: Limit input (5000?)
		try:
		abstract = client.complete(abstract_prompt, model=config.llm_model)
		except Exception as exc:
		msg = f"LLM endpoint is unreachable or misconfigured: {exc}"
		raise LlmConfigError(msg) from exc

		# TODO: Limit input (5000?)
		structured_prompt = STRUCTURED_SUMMARY_PROMPT.format(
		abstract=abstract,
		content=markdown[:5000],
		content=markdown,
		)
		try:
		structured_payload = client.complete(structured_prompt)
		@@ -461,7 +462,7 @@ def summarize_tdoc(
		# Generate summary
		summary_prompt = CONCISE_SUMMARY_PROMPT.format(
		target_words=words,
		content=content[:8000], # Limit input size
		content=content, # TODO: Limit input size (8000)?
		)
		try:
		summary = client.complete(summary_prompt, model=config.llm_model, max_tokens=words * 4)