hfcontext7 / app.py
Abdullah Meda
minor edits
92dd823
raw
history blame
2.89 kB
import gradio as gr
import os
import json
import subprocess
import tempfile
import dotenv
import shutil
from pathlib import Path
from string import Template
from pymilvus import MilvusClient, model
_ = dotenv.load_dotenv()
subprocess.run(["python3", "make_docs.py"])
subprocess.run(["python3", "make_rag_db.py"])
template = Template("""\
---
File: $file_path
---
$file_content""")
client = MilvusClient("milvus.db")
embedding_fn = model.dense.OpenAIEmbeddingFunction(
model_name='text-embedding-3-small', # Specify the model name
api_key=os.environ.get('OPENAI_API_KEY'), # Provide your OpenAI API key
dimensions=1536 # Set the embedding dimensionality
)
def list_huggingface_resources_names() -> list[str]:
"""List all the names of the libraries, services, and other resources available within the HuggingFace ecosystem.
Returns:
A list of libraries, services, and other resources available within the HuggingFace ecosystem
"""
with open('repos_config.json', 'r') as f:
repos = json.load(f)
print([repo['title'] for repo in repos])
return [repo['title'] for repo in repos]
def get_huggingface_documentation(topic: str, resource_names: list[str] = []) -> str:
"""Get the documentation for the given topic and resource names.
Args:
topic: Focus the docs on a specific topic (e.g. "Anthropic Provider Chat UI", "LoRA methods PEFT" or "TGI on Intel GPUs")
resource_names: A list of relevant resource names to the topic
Returns:
A string of documentation for the given topic and resource names
"""
print(resource_names)
query_vectors = embedding_fn.encode_queries([topic])
res = client.search(collection_name="hf_docs", data=query_vectors, limit=3, output_fields=["text", "file_path"])
print(res)
docs_paths = [res[0][i]['file_path'] for i in range(len(res[0]))]
print(docs_paths)
documentation = ""
for path in docs_paths:
with open(path, 'r') as f:
content = f.read()
documentation += template.substitute(file_path=path.replace('docs/', ''), file_content=content) + "\n\n"
print(documentation.strip())
return documentation.strip()
list_resources_demo = gr.Interface(
fn=list_huggingface_resources_names,
inputs=[],
outputs="json",
title="HuggingFace Ecosystem Explorer",
description="Explore the names of the libraries, services, and other resources available within the HuggingFace ecosystem"
)
get_docs_demo = gr.Interface(
fn=get_huggingface_documentation,
inputs=["text", "json"],
outputs="text",
)
# Create tabbed interface
demo = gr.TabbedInterface(
[list_resources_demo, get_docs_demo],
["List Resources", "Get Documentation"],
title="HuggingFace Ecosystem Documentation Explorer",
)
demo.launch(mcp_server=True)