ankigen / ankigen_core /context7.py
brickfrog's picture
Upload folder using huggingface_hub
2ec553e verified
"""Context7 integration for library documentation"""
import asyncio
import subprocess
import json
from typing import Optional, Dict, Any
from tenacity import (
retry,
stop_after_attempt,
wait_exponential,
retry_if_exception_type,
)
from ankigen_core.logging import logger
from ankigen_core.exceptions import (
ValidationError,
)
MAX_STRING_LENGTH = 200 # Prevent excessively long inputs
SUBPROCESS_TIMEOUT = 60.0 # 60 second timeout for Context7 calls
class Context7Client:
"""Context7 MCP client for fetching library documentation"""
def __init__(self):
pass # No state needed - each call creates fresh subprocess
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=2, max=10),
retry=retry_if_exception_type((TimeoutError, ConnectionError)),
reraise=True,
)
async def call_context7_tool(
self, tool_name: str, args: Dict[str, Any]
) -> Optional[Dict[str, Any]]:
"""Call a Context7 tool via direct JSONRPC with retry logic"""
try:
# Build the JSONRPC request
request = {
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": {"name": tool_name, "arguments": args},
}
# Call the Context7 server
process = await asyncio.create_subprocess_exec(
"npx",
"@upstash/context7-mcp",
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# Send initialization first
init_request = {
"jsonrpc": "2.0",
"id": 0,
"method": "initialize",
"params": {
"protocolVersion": "2025-06-18",
"capabilities": {},
"clientInfo": {"name": "ankigen", "version": "1.0.0"},
},
}
# Send both requests with timeout protection
# Optimize: Use list join for string concatenation
input_data = "\n".join([json.dumps(init_request), json.dumps(request), ""])
try:
stdout, stderr = await asyncio.wait_for(
process.communicate(input=input_data.encode()),
timeout=SUBPROCESS_TIMEOUT,
)
except asyncio.TimeoutError:
# Proper process cleanup on timeout
try:
if process.returncode is None: # Process still running
process.kill()
# Wait for process to actually terminate
await asyncio.wait_for(process.wait(), timeout=5.0)
except Exception as cleanup_error:
logger.error(f"Error during process cleanup: {cleanup_error}")
raise TimeoutError(
f"Context7 subprocess timed out after {SUBPROCESS_TIMEOUT}s"
)
except Exception:
# Clean up process on any other error
try:
if process.returncode is None:
process.kill()
await asyncio.wait_for(process.wait(), timeout=5.0)
except Exception:
pass # Best effort cleanup
raise
# Parse responses
responses = stdout.decode().strip().split("\n")
if len(responses) >= 2:
# Skip init response, get tool response
tool_response = json.loads(responses[1])
if "result" in tool_response:
result = tool_response["result"]
# Extract content from the result
if "content" in result and result["content"]:
content_item = result["content"][0]
if "text" in content_item:
return {"text": content_item["text"], "success": True}
elif "type" in content_item and content_item["type"] == "text":
return {
"text": content_item.get("text", ""),
"success": True,
}
return {"error": "No content in response", "success": False}
elif "error" in tool_response:
return {"error": tool_response["error"], "success": False}
return {"error": "Invalid response format", "success": False}
except Exception as e:
logger.error(f"Error calling Context7 tool {tool_name}: {e}")
return {"error": str(e), "success": False}
def _parse_library_response(self, text: str) -> list[Dict[str, Any]]:
"""Parse Context7 response text into list of library dicts.
Args:
text: Raw text response from Context7
Returns:
List of library dicts with keys: title, id, snippets, trust
"""
libraries = []
lines = text.split("\n")
current_lib: Dict[str, Any] = {}
for line in lines:
line = line.strip()
if line.startswith("- Title:"):
if current_lib and current_lib.get("id"):
libraries.append(current_lib)
current_lib = {"title": line.replace("- Title:", "").strip().lower()}
elif line.startswith("- Context7-compatible library ID:"):
lib_id = line.replace("- Context7-compatible library ID:", "").strip()
if current_lib is not None:
current_lib["id"] = lib_id
elif line.startswith("- Code Snippets:"):
snippets_str = line.replace("- Code Snippets:", "").strip()
try:
if current_lib is not None:
current_lib["snippets"] = int(snippets_str)
except ValueError:
pass
elif line.startswith("- Trust Score:"):
score_str = line.replace("- Trust Score:", "").strip()
try:
if current_lib is not None:
current_lib["trust"] = float(score_str)
except ValueError:
pass
if current_lib and current_lib.get("id"):
libraries.append(current_lib)
return libraries
def _score_library(self, lib: Dict[str, Any], search_term: str) -> float:
"""Score a library based on how well it matches the search term.
Args:
lib: Library dict with title, id, snippets, trust
search_term: Lowercase search term
Returns:
Score (higher is better match)
"""
score = 0.0
lib_title = lib.get("title", "")
lib_id = lib["id"].lower()
# Exact title match gets highest priority
if lib_title == search_term:
score += 10000
elif lib_id == f"/{search_term}-dev/{search_term}":
score += 5000
elif f"/{search_term}/" in lib_id or lib_id.endswith(f"/{search_term}"):
score += 2000
elif search_term in lib_title:
if lib_title == search_term:
score += 1000
elif lib_title.startswith(search_term):
score += 200
else:
score += 50
# Bonus for code snippets (indicates main library)
snippets = lib.get("snippets", 0)
score += snippets / 10
# Bonus for trust score (high trust = official/authoritative)
trust = lib.get("trust", 0)
score += trust * 100
return score
def _select_best_library(
self, libraries: list[Dict[str, Any]], search_term: str
) -> Optional[Dict[str, Any]]:
"""Select the best matching library from a list.
Args:
libraries: List of library dicts
search_term: Lowercase search term
Returns:
Best matching library dict, or None if no match
"""
best_lib = None
best_score = -1.0
for lib in libraries:
score = self._score_library(lib, search_term)
if search_term in lib.get("title", "") or search_term in lib["id"].lower():
logger.debug(
f"Scoring {lib['id']}: title='{lib.get('title', '')}', "
f"snippets={lib.get('snippets', 0)}, trust={lib.get('trust', 0)}, score={score:.2f}"
)
if score > best_score:
best_score = score
best_lib = lib
if best_lib:
logger.info(
f"Selected library: {best_lib['id']} (title: {best_lib.get('title', 'unknown')}, "
f"snippets: {best_lib.get('snippets', 0)}, trust: {best_lib.get('trust', 0)}, "
f"score: {best_score:.2f})"
)
return best_lib
async def resolve_library_id(self, library_name: str) -> Optional[str]:
"""Resolve a library name to a Context7-compatible ID"""
logger.info(f"Resolving library ID for: {library_name}")
result = await self.call_context7_tool(
"resolve-library-id", {"libraryName": library_name}
)
if not (result and result.get("success") and result.get("text")):
logger.warning(f"Could not resolve library ID for '{library_name}'")
return None
libraries = self._parse_library_response(result["text"])
if not libraries:
logger.warning(f"Could not resolve library ID for '{library_name}'")
return None
best_lib = self._select_best_library(libraries, library_name.lower())
if best_lib:
logger.info(f"Resolved '{library_name}' to ID: {best_lib['id']}")
return best_lib["id"]
logger.warning(f"Could not resolve library ID for '{library_name}'")
return None
async def get_library_docs(
self, library_id: str, topic: Optional[str] = None, tokens: int = 5000
) -> Optional[str]:
"""Get documentation for a library"""
# Security: Validate library_id (should start with /)
if (
not library_id
or not library_id.startswith("/")
or len(library_id) > MAX_STRING_LENGTH
):
logger.error(f"Invalid library ID format (security): '{library_id}'")
raise ValidationError("Invalid library ID format")
logger.info(
f"Fetching docs for: {library_id}" + (f" (topic: {topic})" if topic else "")
)
args = {"context7CompatibleLibraryID": library_id, "tokens": tokens}
if topic:
args["topic"] = topic
result = await self.call_context7_tool("get-library-docs", args)
if result and result.get("success") and result.get("text"):
docs = result["text"]
logger.info(f"Retrieved {len(docs)} characters of documentation")
return docs
logger.warning(f"Could not fetch docs for '{library_id}'")
return None
async def fetch_library_documentation(
self, library_name: str, topic: Optional[str] = None, tokens: int = 5000
) -> Optional[str]:
"""Convenience method to resolve and fetch docs in one call"""
library_id = await self.resolve_library_id(library_name)
if not library_id:
return None
return await self.get_library_docs(library_id, topic, tokens)
async def test_context7() -> None:
"""Test the Context7 integration"""
client = Context7Client()
print("Testing Context7 integration...")
# Test resolving a library
library_id = await client.resolve_library_id("react")
if library_id:
print(f"βœ“ Resolved 'react' to ID: {library_id}")
# Test fetching docs
docs = await client.get_library_docs(library_id, topic="hooks", tokens=2000)
if docs:
print(f"βœ“ Fetched {len(docs)} characters of documentation")
print(f"Preview: {docs[:300]}...")
else:
print("βœ— Failed to fetch documentation")
else:
print("βœ— Failed to resolve library ID")
if __name__ == "__main__":
asyncio.run(test_context7())