Spaces:
Running
Running
import gradio as gr | |
import os | |
import json | |
import subprocess | |
import tempfile | |
import dotenv | |
import shutil | |
from pathlib import Path | |
from string import Template | |
from pymilvus import MilvusClient, model | |
_ = dotenv.load_dotenv() | |
subprocess.run(["python3", "make_docs.py"]) | |
subprocess.run(["python3", "make_rag_db.py"]) | |
template = Template("""\ | |
--- | |
File: $file_path | |
--- | |
$file_content""") | |
client = MilvusClient("milvus.db") | |
embedding_fn = model.dense.OpenAIEmbeddingFunction( | |
model_name='text-embedding-3-small', # Specify the model name | |
api_key=os.environ.get('OPENAI_API_KEY'), # Provide your OpenAI API key | |
dimensions=1536 # Set the embedding dimensionality | |
) | |
def list_huggingface_resources_names() -> list[str]: | |
"""List all the names of the libraries, services, and other resources available within the HuggingFace ecosystem. | |
Returns: | |
A list of libraries, services, and other resources available within the HuggingFace ecosystem | |
""" | |
with open('repos_config.json', 'r') as f: | |
repos = json.load(f) | |
print([repo['title'] for repo in repos]) | |
return [repo['title'] for repo in repos] | |
def get_huggingface_documentation(topic: str, resource_names: list[str] = []) -> str: | |
"""Get the documentation for the given topic and resource names. | |
Args: | |
topic: Focus the docs on a specific topic (e.g. "Anthropic Provider Chat UI", "LoRA methods PEFT" or "TGI on Intel GPUs") | |
resource_names: A list of relevant resource names to the topic | |
Returns: | |
A string of documentation for the given topic and resource names | |
""" | |
print(resource_names) | |
query_vectors = embedding_fn.encode_queries([topic]) | |
res = client.search(collection_name="hf_docs", data=query_vectors, limit=3, output_fields=["text", "file_path"]) | |
print(res) | |
docs_paths = [res[0][i]['file_path'] for i in range(len(res[0]))] | |
print(docs_paths) | |
documentation = "" | |
for path in docs_paths: | |
with open(path, 'r') as f: | |
content = f.read() | |
documentation += template.substitute(file_path=path.replace('docs/', ''), file_content=content) + "\n\n" | |
print(documentation.strip()) | |
return documentation.strip() | |
list_resources_demo = gr.Interface( | |
fn=list_huggingface_resources_names, | |
inputs=[], | |
outputs="json", | |
title="HuggingFace Ecosystem Explorer", | |
description="Explore the names of the libraries, services, and other resources available within the HuggingFace ecosystem" | |
) | |
get_docs_demo = gr.Interface( | |
fn=get_huggingface_documentation, | |
inputs=["text", "json"], | |
outputs="text", | |
) | |
# Create tabbed interface | |
demo = gr.TabbedInterface( | |
[list_resources_demo, get_docs_demo], | |
["List Resources", "Get Documentation"], | |
title="HuggingFace Ecosystem Documentation Explorer", | |
) | |
demo.launch(mcp_server=True) |