Spaces:

brickfrog
/

ankigen

Running

App Files Files Community

ankigen / ankigen_core /context7.py

brickfrog

Upload folder using huggingface_hub

2ec553e verified 19 days ago

raw

history blame contribute delete

12.4 kB

	"""Context7 integration for library documentation"""

	import asyncio
	import subprocess
	import json
	from typing import Optional, Dict, Any
	from tenacity import (
	retry,
	stop_after_attempt,
	wait_exponential,
	retry_if_exception_type,
	)
	from ankigen_core.logging import logger
	from ankigen_core.exceptions import (
	ValidationError,
	)

	MAX_STRING_LENGTH = 200 # Prevent excessively long inputs
	SUBPROCESS_TIMEOUT = 60.0 # 60 second timeout for Context7 calls


	class Context7Client:
	"""Context7 MCP client for fetching library documentation"""

	def __init__(self):
	pass # No state needed - each call creates fresh subprocess

	@retry(
	stop=stop_after_attempt(3),
	wait=wait_exponential(multiplier=1, min=2, max=10),
	retry=retry_if_exception_type((TimeoutError, ConnectionError)),
	reraise=True,
	)
	async def call_context7_tool(
	self, tool_name: str, args: Dict[str, Any]
	) -> Optional[Dict[str, Any]]:
	"""Call a Context7 tool via direct JSONRPC with retry logic"""
	try:
	# Build the JSONRPC request
	request = {
	"jsonrpc": "2.0",
	"id": 1,
	"method": "tools/call",
	"params": {"name": tool_name, "arguments": args},
	}

	# Call the Context7 server
	process = await asyncio.create_subprocess_exec(
	"npx",
	"@upstash/context7-mcp",
	stdin=subprocess.PIPE,
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	)

	# Send initialization first
	init_request = {
	"jsonrpc": "2.0",
	"id": 0,
	"method": "initialize",
	"params": {
	"protocolVersion": "2025-06-18",
	"capabilities": {},
	"clientInfo": {"name": "ankigen", "version": "1.0.0"},
	},
	}

	# Send both requests with timeout protection
	# Optimize: Use list join for string concatenation
	input_data = "\n".join([json.dumps(init_request), json.dumps(request), ""])
	try:
	stdout, stderr = await asyncio.wait_for(
	process.communicate(input=input_data.encode()),
	timeout=SUBPROCESS_TIMEOUT,
	)
	except asyncio.TimeoutError:
	# Proper process cleanup on timeout
	try:
	if process.returncode is None: # Process still running
	process.kill()
	# Wait for process to actually terminate
	await asyncio.wait_for(process.wait(), timeout=5.0)
	except Exception as cleanup_error:
	logger.error(f"Error during process cleanup: {cleanup_error}")
	raise TimeoutError(
	f"Context7 subprocess timed out after {SUBPROCESS_TIMEOUT}s"
	)
	except Exception:
	# Clean up process on any other error
	try:
	if process.returncode is None:
	process.kill()
	await asyncio.wait_for(process.wait(), timeout=5.0)
	except Exception:
	pass # Best effort cleanup
	raise

	# Parse responses
	responses = stdout.decode().strip().split("\n")
	if len(responses) >= 2:
	# Skip init response, get tool response
	tool_response = json.loads(responses[1])

	if "result" in tool_response:
	result = tool_response["result"]
	# Extract content from the result
	if "content" in result and result["content"]:
	content_item = result["content"][0]
	if "text" in content_item:
	return {"text": content_item["text"], "success": True}
	elif "type" in content_item and content_item["type"] == "text":
	return {
	"text": content_item.get("text", ""),
	"success": True,
	}
	return {"error": "No content in response", "success": False}
	elif "error" in tool_response:
	return {"error": tool_response["error"], "success": False}

	return {"error": "Invalid response format", "success": False}

	except Exception as e:
	logger.error(f"Error calling Context7 tool {tool_name}: {e}")
	return {"error": str(e), "success": False}

	def _parse_library_response(self, text: str) -> list[Dict[str, Any]]:
	"""Parse Context7 response text into list of library dicts.

	Args:
	text: Raw text response from Context7

	Returns:
	List of library dicts with keys: title, id, snippets, trust
	"""
	libraries = []
	lines = text.split("\n")
	current_lib: Dict[str, Any] = {}

	for line in lines:
	line = line.strip()

	if line.startswith("- Title:"):
	if current_lib and current_lib.get("id"):
	libraries.append(current_lib)
	current_lib = {"title": line.replace("- Title:", "").strip().lower()}

	elif line.startswith("- Context7-compatible library ID:"):
	lib_id = line.replace("- Context7-compatible library ID:", "").strip()
	if current_lib is not None:
	current_lib["id"] = lib_id

	elif line.startswith("- Code Snippets:"):
	snippets_str = line.replace("- Code Snippets:", "").strip()
	try:
	if current_lib is not None:
	current_lib["snippets"] = int(snippets_str)
	except ValueError:
	pass

	elif line.startswith("- Trust Score:"):
	score_str = line.replace("- Trust Score:", "").strip()
	try:
	if current_lib is not None:
	current_lib["trust"] = float(score_str)
	except ValueError:
	pass

	if current_lib and current_lib.get("id"):
	libraries.append(current_lib)

	return libraries

	def _score_library(self, lib: Dict[str, Any], search_term: str) -> float:
	"""Score a library based on how well it matches the search term.

	Args:
	lib: Library dict with title, id, snippets, trust
	search_term: Lowercase search term

	Returns:
	Score (higher is better match)
	"""
	score = 0.0
	lib_title = lib.get("title", "")
	lib_id = lib["id"].lower()

	# Exact title match gets highest priority
	if lib_title == search_term:
	score += 10000
	elif lib_id == f"/{search_term}-dev/{search_term}":
	score += 5000
	elif f"/{search_term}/" in lib_id or lib_id.endswith(f"/{search_term}"):
	score += 2000
	elif search_term in lib_title:
	if lib_title == search_term:
	score += 1000
	elif lib_title.startswith(search_term):
	score += 200
	else:
	score += 50

	# Bonus for code snippets (indicates main library)
	snippets = lib.get("snippets", 0)
	score += snippets / 10

	# Bonus for trust score (high trust = official/authoritative)
	trust = lib.get("trust", 0)
	score += trust * 100

	return score

	def _select_best_library(
	self, libraries: list[Dict[str, Any]], search_term: str
	) -> Optional[Dict[str, Any]]:
	"""Select the best matching library from a list.

	Args:
	libraries: List of library dicts
	search_term: Lowercase search term

	Returns:
	Best matching library dict, or None if no match
	"""
	best_lib = None
	best_score = -1.0

	for lib in libraries:
	score = self._score_library(lib, search_term)

	if search_term in lib.get("title", "") or search_term in lib["id"].lower():
	logger.debug(
	f"Scoring {lib['id']}: title='{lib.get('title', '')}', "
	f"snippets={lib.get('snippets', 0)}, trust={lib.get('trust', 0)}, score={score:.2f}"
	)

	if score > best_score:
	best_score = score
	best_lib = lib

	if best_lib:
	logger.info(
	f"Selected library: {best_lib['id']} (title: {best_lib.get('title', 'unknown')}, "
	f"snippets: {best_lib.get('snippets', 0)}, trust: {best_lib.get('trust', 0)}, "
	f"score: {best_score:.2f})"
	)

	return best_lib

	async def resolve_library_id(self, library_name: str) -> Optional[str]:
	"""Resolve a library name to a Context7-compatible ID"""
	logger.info(f"Resolving library ID for: {library_name}")

	result = await self.call_context7_tool(
	"resolve-library-id", {"libraryName": library_name}
	)

	if not (result and result.get("success") and result.get("text")):
	logger.warning(f"Could not resolve library ID for '{library_name}'")
	return None

	libraries = self._parse_library_response(result["text"])
	if not libraries:
	logger.warning(f"Could not resolve library ID for '{library_name}'")
	return None

	best_lib = self._select_best_library(libraries, library_name.lower())
	if best_lib:
	logger.info(f"Resolved '{library_name}' to ID: {best_lib['id']}")
	return best_lib["id"]

	logger.warning(f"Could not resolve library ID for '{library_name}'")
	return None

	async def get_library_docs(
	self, library_id: str, topic: Optional[str] = None, tokens: int = 5000
	) -> Optional[str]:
	"""Get documentation for a library"""
	# Security: Validate library_id (should start with /)
	if (
	not library_id
	or not library_id.startswith("/")
	or len(library_id) > MAX_STRING_LENGTH
	):
	logger.error(f"Invalid library ID format (security): '{library_id}'")
	raise ValidationError("Invalid library ID format")

	logger.info(
	f"Fetching docs for: {library_id}" + (f" (topic: {topic})" if topic else "")
	)

	args = {"context7CompatibleLibraryID": library_id, "tokens": tokens}
	if topic:
	args["topic"] = topic

	result = await self.call_context7_tool("get-library-docs", args)

	if result and result.get("success") and result.get("text"):
	docs = result["text"]
	logger.info(f"Retrieved {len(docs)} characters of documentation")
	return docs

	logger.warning(f"Could not fetch docs for '{library_id}'")
	return None

	async def fetch_library_documentation(
	self, library_name: str, topic: Optional[str] = None, tokens: int = 5000
	) -> Optional[str]:
	"""Convenience method to resolve and fetch docs in one call"""
	library_id = await self.resolve_library_id(library_name)
	if not library_id:
	return None

	return await self.get_library_docs(library_id, topic, tokens)


	async def test_context7() -> None:
	"""Test the Context7 integration"""
	client = Context7Client()

	print("Testing Context7 integration...")

	# Test resolving a library
	library_id = await client.resolve_library_id("react")
	if library_id:
	print(f"✓ Resolved 'react' to ID: {library_id}")

	# Test fetching docs
	docs = await client.get_library_docs(library_id, topic="hooks", tokens=2000)
	if docs:
	print(f"✓ Fetched {len(docs)} characters of documentation")
	print(f"Preview: {docs[:300]}...")
	else:
	print("✗ Failed to fetch documentation")
	else:
	print("✗ Failed to resolve library ID")


	if __name__ == "__main__":
	asyncio.run(test_context7())