Spaces:

Agents-MCP-Hackathon
/

hfcontext7

Running

File size: 2,885 Bytes

import gradio as gr
import os
import json
import subprocess
import tempfile
import dotenv
import shutil
from pathlib import Path
from string import Template
from pymilvus import MilvusClient, model

_ = dotenv.load_dotenv()

subprocess.run(["python3", "make_docs.py"])
subprocess.run(["python3", "make_rag_db.py"])

template = Template("""\
---
File: $file_path
---
                    
$file_content""")

client = MilvusClient("milvus.db")
embedding_fn = model.dense.OpenAIEmbeddingFunction(
    model_name='text-embedding-3-small', # Specify the model name
    api_key=os.environ.get('OPENAI_API_KEY'), # Provide your OpenAI API key
    dimensions=1536 # Set the embedding dimensionality
)


def list_huggingface_resources_names() -> list[str]:
    """List all the names of the libraries, services, and other resources available within the HuggingFace ecosystem.
    
    Returns:
        A list of libraries, services, and other resources available within the HuggingFace ecosystem
    """
    with open('repos_config.json', 'r') as f:
        repos = json.load(f)

    print([repo['title'] for repo in repos])

    return [repo['title'] for repo in repos]


def get_huggingface_documentation(topic: str, resource_names: list[str] = []) -> str:
    """Get the documentation for the given topic and resource names.
    
    Args:
        topic: Focus the docs on a specific topic (e.g. "Anthropic Provider Chat UI", "LoRA methods PEFT" or "TGI on Intel GPUs")
        resource_names: A list of relevant resource names to the topic
        
    Returns:
        A string of documentation for the given topic and resource names
    """
    print(resource_names)
    query_vectors = embedding_fn.encode_queries([topic])
    res = client.search(collection_name="hf_docs", data=query_vectors, limit=3, output_fields=["text", "file_path"])
    print(res)

    docs_paths = [res[0][i]['file_path'] for i in range(len(res[0]))]
    print(docs_paths)

    documentation = ""
    for path in docs_paths:
        with open(path, 'r') as f:
            content = f.read()
            documentation += template.substitute(file_path=path.replace('docs/', ''), file_content=content) + "\n\n"

    print(documentation.strip())
    return documentation.strip()

list_resources_demo = gr.Interface(
    fn=list_huggingface_resources_names,
    inputs=[],
    outputs="json",
    title="HuggingFace Ecosystem Explorer",
    description="Explore the names of the libraries, services, and other resources available within the HuggingFace ecosystem"
)

get_docs_demo = gr.Interface(
    fn=get_huggingface_documentation,
    inputs=["text", "json"],
    outputs="text",
)

# Create tabbed interface
demo = gr.TabbedInterface(
    [list_resources_demo, get_docs_demo],
    ["List Resources", "Get Documentation"],
    title="HuggingFace Ecosystem Documentation Explorer",
)   

demo.launch(mcp_server=True)