"""Context7 integration for library documentation""" import asyncio import subprocess import json from typing import Optional, Dict, Any from tenacity import ( retry, stop_after_attempt, wait_exponential, retry_if_exception_type, ) from ankigen_core.logging import logger from ankigen_core.exceptions import ( ValidationError, ) MAX_STRING_LENGTH = 200 # Prevent excessively long inputs SUBPROCESS_TIMEOUT = 60.0 # 60 second timeout for Context7 calls class Context7Client: """Context7 MCP client for fetching library documentation""" def __init__(self): pass # No state needed - each call creates fresh subprocess @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10), retry=retry_if_exception_type((TimeoutError, ConnectionError)), reraise=True, ) async def call_context7_tool( self, tool_name: str, args: Dict[str, Any] ) -> Optional[Dict[str, Any]]: """Call a Context7 tool via direct JSONRPC with retry logic""" try: # Build the JSONRPC request request = { "jsonrpc": "2.0", "id": 1, "method": "tools/call", "params": {"name": tool_name, "arguments": args}, } # Call the Context7 server process = await asyncio.create_subprocess_exec( "npx", "@upstash/context7-mcp", stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) # Send initialization first init_request = { "jsonrpc": "2.0", "id": 0, "method": "initialize", "params": { "protocolVersion": "2025-06-18", "capabilities": {}, "clientInfo": {"name": "ankigen", "version": "1.0.0"}, }, } # Send both requests with timeout protection # Optimize: Use list join for string concatenation input_data = "\n".join([json.dumps(init_request), json.dumps(request), ""]) try: stdout, stderr = await asyncio.wait_for( process.communicate(input=input_data.encode()), timeout=SUBPROCESS_TIMEOUT, ) except asyncio.TimeoutError: # Proper process cleanup on timeout try: if process.returncode is None: # Process still running process.kill() # Wait for process to actually terminate await asyncio.wait_for(process.wait(), timeout=5.0) except Exception as cleanup_error: logger.error(f"Error during process cleanup: {cleanup_error}") raise TimeoutError( f"Context7 subprocess timed out after {SUBPROCESS_TIMEOUT}s" ) except Exception: # Clean up process on any other error try: if process.returncode is None: process.kill() await asyncio.wait_for(process.wait(), timeout=5.0) except Exception: pass # Best effort cleanup raise # Parse responses responses = stdout.decode().strip().split("\n") if len(responses) >= 2: # Skip init response, get tool response tool_response = json.loads(responses[1]) if "result" in tool_response: result = tool_response["result"] # Extract content from the result if "content" in result and result["content"]: content_item = result["content"][0] if "text" in content_item: return {"text": content_item["text"], "success": True} elif "type" in content_item and content_item["type"] == "text": return { "text": content_item.get("text", ""), "success": True, } return {"error": "No content in response", "success": False} elif "error" in tool_response: return {"error": tool_response["error"], "success": False} return {"error": "Invalid response format", "success": False} except Exception as e: logger.error(f"Error calling Context7 tool {tool_name}: {e}") return {"error": str(e), "success": False} def _parse_library_response(self, text: str) -> list[Dict[str, Any]]: """Parse Context7 response text into list of library dicts. Args: text: Raw text response from Context7 Returns: List of library dicts with keys: title, id, snippets, trust """ libraries = [] lines = text.split("\n") current_lib: Dict[str, Any] = {} for line in lines: line = line.strip() if line.startswith("- Title:"): if current_lib and current_lib.get("id"): libraries.append(current_lib) current_lib = {"title": line.replace("- Title:", "").strip().lower()} elif line.startswith("- Context7-compatible library ID:"): lib_id = line.replace("- Context7-compatible library ID:", "").strip() if current_lib is not None: current_lib["id"] = lib_id elif line.startswith("- Code Snippets:"): snippets_str = line.replace("- Code Snippets:", "").strip() try: if current_lib is not None: current_lib["snippets"] = int(snippets_str) except ValueError: pass elif line.startswith("- Trust Score:"): score_str = line.replace("- Trust Score:", "").strip() try: if current_lib is not None: current_lib["trust"] = float(score_str) except ValueError: pass if current_lib and current_lib.get("id"): libraries.append(current_lib) return libraries def _score_library(self, lib: Dict[str, Any], search_term: str) -> float: """Score a library based on how well it matches the search term. Args: lib: Library dict with title, id, snippets, trust search_term: Lowercase search term Returns: Score (higher is better match) """ score = 0.0 lib_title = lib.get("title", "") lib_id = lib["id"].lower() # Exact title match gets highest priority if lib_title == search_term: score += 10000 elif lib_id == f"/{search_term}-dev/{search_term}": score += 5000 elif f"/{search_term}/" in lib_id or lib_id.endswith(f"/{search_term}"): score += 2000 elif search_term in lib_title: if lib_title == search_term: score += 1000 elif lib_title.startswith(search_term): score += 200 else: score += 50 # Bonus for code snippets (indicates main library) snippets = lib.get("snippets", 0) score += snippets / 10 # Bonus for trust score (high trust = official/authoritative) trust = lib.get("trust", 0) score += trust * 100 return score def _select_best_library( self, libraries: list[Dict[str, Any]], search_term: str ) -> Optional[Dict[str, Any]]: """Select the best matching library from a list. Args: libraries: List of library dicts search_term: Lowercase search term Returns: Best matching library dict, or None if no match """ best_lib = None best_score = -1.0 for lib in libraries: score = self._score_library(lib, search_term) if search_term in lib.get("title", "") or search_term in lib["id"].lower(): logger.debug( f"Scoring {lib['id']}: title='{lib.get('title', '')}', " f"snippets={lib.get('snippets', 0)}, trust={lib.get('trust', 0)}, score={score:.2f}" ) if score > best_score: best_score = score best_lib = lib if best_lib: logger.info( f"Selected library: {best_lib['id']} (title: {best_lib.get('title', 'unknown')}, " f"snippets: {best_lib.get('snippets', 0)}, trust: {best_lib.get('trust', 0)}, " f"score: {best_score:.2f})" ) return best_lib async def resolve_library_id(self, library_name: str) -> Optional[str]: """Resolve a library name to a Context7-compatible ID""" logger.info(f"Resolving library ID for: {library_name}") result = await self.call_context7_tool( "resolve-library-id", {"libraryName": library_name} ) if not (result and result.get("success") and result.get("text")): logger.warning(f"Could not resolve library ID for '{library_name}'") return None libraries = self._parse_library_response(result["text"]) if not libraries: logger.warning(f"Could not resolve library ID for '{library_name}'") return None best_lib = self._select_best_library(libraries, library_name.lower()) if best_lib: logger.info(f"Resolved '{library_name}' to ID: {best_lib['id']}") return best_lib["id"] logger.warning(f"Could not resolve library ID for '{library_name}'") return None async def get_library_docs( self, library_id: str, topic: Optional[str] = None, tokens: int = 5000 ) -> Optional[str]: """Get documentation for a library""" # Security: Validate library_id (should start with /) if ( not library_id or not library_id.startswith("/") or len(library_id) > MAX_STRING_LENGTH ): logger.error(f"Invalid library ID format (security): '{library_id}'") raise ValidationError("Invalid library ID format") logger.info( f"Fetching docs for: {library_id}" + (f" (topic: {topic})" if topic else "") ) args = {"context7CompatibleLibraryID": library_id, "tokens": tokens} if topic: args["topic"] = topic result = await self.call_context7_tool("get-library-docs", args) if result and result.get("success") and result.get("text"): docs = result["text"] logger.info(f"Retrieved {len(docs)} characters of documentation") return docs logger.warning(f"Could not fetch docs for '{library_id}'") return None async def fetch_library_documentation( self, library_name: str, topic: Optional[str] = None, tokens: int = 5000 ) -> Optional[str]: """Convenience method to resolve and fetch docs in one call""" library_id = await self.resolve_library_id(library_name) if not library_id: return None return await self.get_library_docs(library_id, topic, tokens) async def test_context7() -> None: """Test the Context7 integration""" client = Context7Client() print("Testing Context7 integration...") # Test resolving a library library_id = await client.resolve_library_id("react") if library_id: print(f"✓ Resolved 'react' to ID: {library_id}") # Test fetching docs docs = await client.get_library_docs(library_id, topic="hooks", tokens=2000) if docs: print(f"✓ Fetched {len(docs)} characters of documentation") print(f"Preview: {docs[:300]}...") else: print("✗ Failed to fetch documentation") else: print("✗ Failed to resolve library ID") if __name__ == "__main__": asyncio.run(test_context7())