|
|
"""Context7 integration for library documentation""" |
|
|
|
|
|
import asyncio |
|
|
import subprocess |
|
|
import json |
|
|
from typing import Optional, Dict, Any |
|
|
from tenacity import ( |
|
|
retry, |
|
|
stop_after_attempt, |
|
|
wait_exponential, |
|
|
retry_if_exception_type, |
|
|
) |
|
|
from ankigen_core.logging import logger |
|
|
from ankigen_core.exceptions import ( |
|
|
ValidationError, |
|
|
) |
|
|
|
|
|
MAX_STRING_LENGTH = 200 |
|
|
SUBPROCESS_TIMEOUT = 60.0 |
|
|
|
|
|
|
|
|
class Context7Client: |
|
|
"""Context7 MCP client for fetching library documentation""" |
|
|
|
|
|
def __init__(self): |
|
|
pass |
|
|
|
|
|
@retry( |
|
|
stop=stop_after_attempt(3), |
|
|
wait=wait_exponential(multiplier=1, min=2, max=10), |
|
|
retry=retry_if_exception_type((TimeoutError, ConnectionError)), |
|
|
reraise=True, |
|
|
) |
|
|
async def call_context7_tool( |
|
|
self, tool_name: str, args: Dict[str, Any] |
|
|
) -> Optional[Dict[str, Any]]: |
|
|
"""Call a Context7 tool via direct JSONRPC with retry logic""" |
|
|
try: |
|
|
|
|
|
request = { |
|
|
"jsonrpc": "2.0", |
|
|
"id": 1, |
|
|
"method": "tools/call", |
|
|
"params": {"name": tool_name, "arguments": args}, |
|
|
} |
|
|
|
|
|
|
|
|
process = await asyncio.create_subprocess_exec( |
|
|
"npx", |
|
|
"@upstash/context7-mcp", |
|
|
stdin=subprocess.PIPE, |
|
|
stdout=subprocess.PIPE, |
|
|
stderr=subprocess.PIPE, |
|
|
) |
|
|
|
|
|
|
|
|
init_request = { |
|
|
"jsonrpc": "2.0", |
|
|
"id": 0, |
|
|
"method": "initialize", |
|
|
"params": { |
|
|
"protocolVersion": "2025-06-18", |
|
|
"capabilities": {}, |
|
|
"clientInfo": {"name": "ankigen", "version": "1.0.0"}, |
|
|
}, |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
input_data = "\n".join([json.dumps(init_request), json.dumps(request), ""]) |
|
|
try: |
|
|
stdout, stderr = await asyncio.wait_for( |
|
|
process.communicate(input=input_data.encode()), |
|
|
timeout=SUBPROCESS_TIMEOUT, |
|
|
) |
|
|
except asyncio.TimeoutError: |
|
|
|
|
|
try: |
|
|
if process.returncode is None: |
|
|
process.kill() |
|
|
|
|
|
await asyncio.wait_for(process.wait(), timeout=5.0) |
|
|
except Exception as cleanup_error: |
|
|
logger.error(f"Error during process cleanup: {cleanup_error}") |
|
|
raise TimeoutError( |
|
|
f"Context7 subprocess timed out after {SUBPROCESS_TIMEOUT}s" |
|
|
) |
|
|
except Exception: |
|
|
|
|
|
try: |
|
|
if process.returncode is None: |
|
|
process.kill() |
|
|
await asyncio.wait_for(process.wait(), timeout=5.0) |
|
|
except Exception: |
|
|
pass |
|
|
raise |
|
|
|
|
|
|
|
|
responses = stdout.decode().strip().split("\n") |
|
|
if len(responses) >= 2: |
|
|
|
|
|
tool_response = json.loads(responses[1]) |
|
|
|
|
|
if "result" in tool_response: |
|
|
result = tool_response["result"] |
|
|
|
|
|
if "content" in result and result["content"]: |
|
|
content_item = result["content"][0] |
|
|
if "text" in content_item: |
|
|
return {"text": content_item["text"], "success": True} |
|
|
elif "type" in content_item and content_item["type"] == "text": |
|
|
return { |
|
|
"text": content_item.get("text", ""), |
|
|
"success": True, |
|
|
} |
|
|
return {"error": "No content in response", "success": False} |
|
|
elif "error" in tool_response: |
|
|
return {"error": tool_response["error"], "success": False} |
|
|
|
|
|
return {"error": "Invalid response format", "success": False} |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error calling Context7 tool {tool_name}: {e}") |
|
|
return {"error": str(e), "success": False} |
|
|
|
|
|
def _parse_library_response(self, text: str) -> list[Dict[str, Any]]: |
|
|
"""Parse Context7 response text into list of library dicts. |
|
|
|
|
|
Args: |
|
|
text: Raw text response from Context7 |
|
|
|
|
|
Returns: |
|
|
List of library dicts with keys: title, id, snippets, trust |
|
|
""" |
|
|
libraries = [] |
|
|
lines = text.split("\n") |
|
|
current_lib: Dict[str, Any] = {} |
|
|
|
|
|
for line in lines: |
|
|
line = line.strip() |
|
|
|
|
|
if line.startswith("- Title:"): |
|
|
if current_lib and current_lib.get("id"): |
|
|
libraries.append(current_lib) |
|
|
current_lib = {"title": line.replace("- Title:", "").strip().lower()} |
|
|
|
|
|
elif line.startswith("- Context7-compatible library ID:"): |
|
|
lib_id = line.replace("- Context7-compatible library ID:", "").strip() |
|
|
if current_lib is not None: |
|
|
current_lib["id"] = lib_id |
|
|
|
|
|
elif line.startswith("- Code Snippets:"): |
|
|
snippets_str = line.replace("- Code Snippets:", "").strip() |
|
|
try: |
|
|
if current_lib is not None: |
|
|
current_lib["snippets"] = int(snippets_str) |
|
|
except ValueError: |
|
|
pass |
|
|
|
|
|
elif line.startswith("- Trust Score:"): |
|
|
score_str = line.replace("- Trust Score:", "").strip() |
|
|
try: |
|
|
if current_lib is not None: |
|
|
current_lib["trust"] = float(score_str) |
|
|
except ValueError: |
|
|
pass |
|
|
|
|
|
if current_lib and current_lib.get("id"): |
|
|
libraries.append(current_lib) |
|
|
|
|
|
return libraries |
|
|
|
|
|
def _score_library(self, lib: Dict[str, Any], search_term: str) -> float: |
|
|
"""Score a library based on how well it matches the search term. |
|
|
|
|
|
Args: |
|
|
lib: Library dict with title, id, snippets, trust |
|
|
search_term: Lowercase search term |
|
|
|
|
|
Returns: |
|
|
Score (higher is better match) |
|
|
""" |
|
|
score = 0.0 |
|
|
lib_title = lib.get("title", "") |
|
|
lib_id = lib["id"].lower() |
|
|
|
|
|
|
|
|
if lib_title == search_term: |
|
|
score += 10000 |
|
|
elif lib_id == f"/{search_term}-dev/{search_term}": |
|
|
score += 5000 |
|
|
elif f"/{search_term}/" in lib_id or lib_id.endswith(f"/{search_term}"): |
|
|
score += 2000 |
|
|
elif search_term in lib_title: |
|
|
if lib_title == search_term: |
|
|
score += 1000 |
|
|
elif lib_title.startswith(search_term): |
|
|
score += 200 |
|
|
else: |
|
|
score += 50 |
|
|
|
|
|
|
|
|
snippets = lib.get("snippets", 0) |
|
|
score += snippets / 10 |
|
|
|
|
|
|
|
|
trust = lib.get("trust", 0) |
|
|
score += trust * 100 |
|
|
|
|
|
return score |
|
|
|
|
|
def _select_best_library( |
|
|
self, libraries: list[Dict[str, Any]], search_term: str |
|
|
) -> Optional[Dict[str, Any]]: |
|
|
"""Select the best matching library from a list. |
|
|
|
|
|
Args: |
|
|
libraries: List of library dicts |
|
|
search_term: Lowercase search term |
|
|
|
|
|
Returns: |
|
|
Best matching library dict, or None if no match |
|
|
""" |
|
|
best_lib = None |
|
|
best_score = -1.0 |
|
|
|
|
|
for lib in libraries: |
|
|
score = self._score_library(lib, search_term) |
|
|
|
|
|
if search_term in lib.get("title", "") or search_term in lib["id"].lower(): |
|
|
logger.debug( |
|
|
f"Scoring {lib['id']}: title='{lib.get('title', '')}', " |
|
|
f"snippets={lib.get('snippets', 0)}, trust={lib.get('trust', 0)}, score={score:.2f}" |
|
|
) |
|
|
|
|
|
if score > best_score: |
|
|
best_score = score |
|
|
best_lib = lib |
|
|
|
|
|
if best_lib: |
|
|
logger.info( |
|
|
f"Selected library: {best_lib['id']} (title: {best_lib.get('title', 'unknown')}, " |
|
|
f"snippets: {best_lib.get('snippets', 0)}, trust: {best_lib.get('trust', 0)}, " |
|
|
f"score: {best_score:.2f})" |
|
|
) |
|
|
|
|
|
return best_lib |
|
|
|
|
|
async def resolve_library_id(self, library_name: str) -> Optional[str]: |
|
|
"""Resolve a library name to a Context7-compatible ID""" |
|
|
logger.info(f"Resolving library ID for: {library_name}") |
|
|
|
|
|
result = await self.call_context7_tool( |
|
|
"resolve-library-id", {"libraryName": library_name} |
|
|
) |
|
|
|
|
|
if not (result and result.get("success") and result.get("text")): |
|
|
logger.warning(f"Could not resolve library ID for '{library_name}'") |
|
|
return None |
|
|
|
|
|
libraries = self._parse_library_response(result["text"]) |
|
|
if not libraries: |
|
|
logger.warning(f"Could not resolve library ID for '{library_name}'") |
|
|
return None |
|
|
|
|
|
best_lib = self._select_best_library(libraries, library_name.lower()) |
|
|
if best_lib: |
|
|
logger.info(f"Resolved '{library_name}' to ID: {best_lib['id']}") |
|
|
return best_lib["id"] |
|
|
|
|
|
logger.warning(f"Could not resolve library ID for '{library_name}'") |
|
|
return None |
|
|
|
|
|
async def get_library_docs( |
|
|
self, library_id: str, topic: Optional[str] = None, tokens: int = 5000 |
|
|
) -> Optional[str]: |
|
|
"""Get documentation for a library""" |
|
|
|
|
|
if ( |
|
|
not library_id |
|
|
or not library_id.startswith("/") |
|
|
or len(library_id) > MAX_STRING_LENGTH |
|
|
): |
|
|
logger.error(f"Invalid library ID format (security): '{library_id}'") |
|
|
raise ValidationError("Invalid library ID format") |
|
|
|
|
|
logger.info( |
|
|
f"Fetching docs for: {library_id}" + (f" (topic: {topic})" if topic else "") |
|
|
) |
|
|
|
|
|
args = {"context7CompatibleLibraryID": library_id, "tokens": tokens} |
|
|
if topic: |
|
|
args["topic"] = topic |
|
|
|
|
|
result = await self.call_context7_tool("get-library-docs", args) |
|
|
|
|
|
if result and result.get("success") and result.get("text"): |
|
|
docs = result["text"] |
|
|
logger.info(f"Retrieved {len(docs)} characters of documentation") |
|
|
return docs |
|
|
|
|
|
logger.warning(f"Could not fetch docs for '{library_id}'") |
|
|
return None |
|
|
|
|
|
async def fetch_library_documentation( |
|
|
self, library_name: str, topic: Optional[str] = None, tokens: int = 5000 |
|
|
) -> Optional[str]: |
|
|
"""Convenience method to resolve and fetch docs in one call""" |
|
|
library_id = await self.resolve_library_id(library_name) |
|
|
if not library_id: |
|
|
return None |
|
|
|
|
|
return await self.get_library_docs(library_id, topic, tokens) |
|
|
|
|
|
|
|
|
async def test_context7() -> None: |
|
|
"""Test the Context7 integration""" |
|
|
client = Context7Client() |
|
|
|
|
|
print("Testing Context7 integration...") |
|
|
|
|
|
|
|
|
library_id = await client.resolve_library_id("react") |
|
|
if library_id: |
|
|
print(f"β Resolved 'react' to ID: {library_id}") |
|
|
|
|
|
|
|
|
docs = await client.get_library_docs(library_id, topic="hooks", tokens=2000) |
|
|
if docs: |
|
|
print(f"β Fetched {len(docs)} characters of documentation") |
|
|
print(f"Preview: {docs[:300]}...") |
|
|
else: |
|
|
print("β Failed to fetch documentation") |
|
|
else: |
|
|
print("β Failed to resolve library ID") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
asyncio.run(test_context7()) |
|
|
|