File size: 2,885 Bytes
7dc78b3
 
 
 
 
f126864
7dc78b3
 
f126864
 
 
797c083
f126864
dce5b39
 
 
f126864
 
 
 
 
 
 
 
 
 
 
 
 
 
7dc78b3
 
 
 
 
 
 
 
 
 
f126864
 
7dc78b3
 
 
92dd823
f126864
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7dc78b3
 
 
 
 
 
 
 
f126864
 
 
 
 
 
7dc78b3
 
f126864
 
7dc78b3
f126864
7dc78b3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import os
import json
import subprocess
import tempfile
import dotenv
import shutil
from pathlib import Path
from string import Template
from pymilvus import MilvusClient, model

_ = dotenv.load_dotenv()

subprocess.run(["python3", "make_docs.py"])
subprocess.run(["python3", "make_rag_db.py"])

template = Template("""\
---
File: $file_path
---
                    
$file_content""")

client = MilvusClient("milvus.db")
embedding_fn = model.dense.OpenAIEmbeddingFunction(
    model_name='text-embedding-3-small', # Specify the model name
    api_key=os.environ.get('OPENAI_API_KEY'), # Provide your OpenAI API key
    dimensions=1536 # Set the embedding dimensionality
)


def list_huggingface_resources_names() -> list[str]:
    """List all the names of the libraries, services, and other resources available within the HuggingFace ecosystem.
    
    Returns:
        A list of libraries, services, and other resources available within the HuggingFace ecosystem
    """
    with open('repos_config.json', 'r') as f:
        repos = json.load(f)

    print([repo['title'] for repo in repos])

    return [repo['title'] for repo in repos]


def get_huggingface_documentation(topic: str, resource_names: list[str] = []) -> str:
    """Get the documentation for the given topic and resource names.
    
    Args:
        topic: Focus the docs on a specific topic (e.g. "Anthropic Provider Chat UI", "LoRA methods PEFT" or "TGI on Intel GPUs")
        resource_names: A list of relevant resource names to the topic
        
    Returns:
        A string of documentation for the given topic and resource names
    """
    print(resource_names)
    query_vectors = embedding_fn.encode_queries([topic])
    res = client.search(collection_name="hf_docs", data=query_vectors, limit=3, output_fields=["text", "file_path"])
    print(res)

    docs_paths = [res[0][i]['file_path'] for i in range(len(res[0]))]
    print(docs_paths)

    documentation = ""
    for path in docs_paths:
        with open(path, 'r') as f:
            content = f.read()
            documentation += template.substitute(file_path=path.replace('docs/', ''), file_content=content) + "\n\n"

    print(documentation.strip())
    return documentation.strip()

list_resources_demo = gr.Interface(
    fn=list_huggingface_resources_names,
    inputs=[],
    outputs="json",
    title="HuggingFace Ecosystem Explorer",
    description="Explore the names of the libraries, services, and other resources available within the HuggingFace ecosystem"
)

get_docs_demo = gr.Interface(
    fn=get_huggingface_documentation,
    inputs=["text", "json"],
    outputs="text",
)

# Create tabbed interface
demo = gr.TabbedInterface(
    [list_resources_demo, get_docs_demo],
    ["List Resources", "Get Documentation"],
    title="HuggingFace Ecosystem Documentation Explorer",
)   

demo.launch(mcp_server=True)