Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- .github/workflows/update_space.yml +28 -0
- .gitignore +104 -0
- .python-version +1 -0
- README.md +2 -8
- app.py +39 -0
- chroma_db/chroma.sqlite3 +3 -0
- pyproject.toml +9 -0
- query_interface.py +210 -0
- requirements.txt +6 -0
- uv.lock +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
.github/workflows/update_space.yml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Run Python script
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
build:
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
|
12 |
+
steps:
|
13 |
+
- name: Checkout
|
14 |
+
uses: actions/checkout@v2
|
15 |
+
|
16 |
+
- name: Set up Python
|
17 |
+
uses: actions/setup-python@v2
|
18 |
+
with:
|
19 |
+
python-version: '3.9'
|
20 |
+
|
21 |
+
- name: Install Gradio
|
22 |
+
run: python -m pip install gradio
|
23 |
+
|
24 |
+
- name: Log in to Hugging Face
|
25 |
+
run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
|
26 |
+
|
27 |
+
- name: Deploy to Spaces
|
28 |
+
run: gradio deploy
|
.gitignore
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.pyc
|
4 |
+
*.pyo
|
5 |
+
*.pyd
|
6 |
+
|
7 |
+
# C extensions
|
8 |
+
*.so
|
9 |
+
|
10 |
+
# Distribution / packaging
|
11 |
+
.Python
|
12 |
+
build/
|
13 |
+
develop-eggs/
|
14 |
+
dist/
|
15 |
+
downloads/
|
16 |
+
eggs/
|
17 |
+
.eggs/
|
18 |
+
lib/
|
19 |
+
lib64/
|
20 |
+
parts/
|
21 |
+
sdist/
|
22 |
+
var/
|
23 |
+
wheels/
|
24 |
+
pip-wheel-metadata/
|
25 |
+
share/python-wheels/
|
26 |
+
*.egg-info/
|
27 |
+
.installed.cfg
|
28 |
+
*.egg
|
29 |
+
MANIFEST
|
30 |
+
|
31 |
+
# PyInstaller
|
32 |
+
# Usually these files are written by a python script from a template
|
33 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
34 |
+
*.manifest
|
35 |
+
*.spec
|
36 |
+
|
37 |
+
# Installer logs
|
38 |
+
pip-log.txt
|
39 |
+
pip-delete-this-directory.txt
|
40 |
+
|
41 |
+
# Unit test / coverage reports
|
42 |
+
htmlcov/
|
43 |
+
.tox/
|
44 |
+
.nox/
|
45 |
+
.coverage
|
46 |
+
.coverage.*
|
47 |
+
.cache
|
48 |
+
nosetests.xml
|
49 |
+
coverage.xml
|
50 |
+
*.cover
|
51 |
+
*.py,cover
|
52 |
+
.hypothesis/
|
53 |
+
.pytest_cache/
|
54 |
+
|
55 |
+
# Environments
|
56 |
+
.env
|
57 |
+
.venv
|
58 |
+
env/
|
59 |
+
venv/
|
60 |
+
ENV/
|
61 |
+
env.bak/
|
62 |
+
venv.bak/
|
63 |
+
|
64 |
+
# IDEs and editors
|
65 |
+
.idea/
|
66 |
+
.vscode/
|
67 |
+
*.swp
|
68 |
+
*~
|
69 |
+
|
70 |
+
# OS generated files
|
71 |
+
.DS_Store
|
72 |
+
Thumbs.db
|
73 |
+
|
74 |
+
# Ansible
|
75 |
+
*.retry
|
76 |
+
.ansible/
|
77 |
+
.ansible-lint
|
78 |
+
vault.yml
|
79 |
+
vault.yaml
|
80 |
+
vault_pass.txt
|
81 |
+
*.log
|
82 |
+
|
83 |
+
# Ansible AVD generated files
|
84 |
+
intended/
|
85 |
+
documentation/
|
86 |
+
reports/
|
87 |
+
fabric-documentation/
|
88 |
+
|
89 |
+
# Ansible Collections and Roles
|
90 |
+
# These should be managed via requirements.yml
|
91 |
+
ansible_collections/
|
92 |
+
collections/
|
93 |
+
roles/
|
94 |
+
|
95 |
+
# Credentials
|
96 |
+
credentials.yml
|
97 |
+
credentials.yaml
|
98 |
+
*.pem
|
99 |
+
*.key
|
100 |
+
|
101 |
+
# ChromaDB - Keep SQLite but ignore vector index files
|
102 |
+
chroma_db/*/
|
103 |
+
!chroma_db/
|
104 |
+
!chroma_db/chroma.sqlite3
|
.python-version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.12
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: gray
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.38.2
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: mcp-network-doc-dem
|
3 |
+
app_file: app.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 5.38.2
|
|
|
|
|
6 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# /// script
|
2 |
+
# dependencies = [
|
3 |
+
# "gradio[mcp]",
|
4 |
+
# "langchain_community",
|
5 |
+
# "chromadb",
|
6 |
+
# "huggingface_hub",
|
7 |
+
# "langchain_community",
|
8 |
+
# "sentence_transformers",
|
9 |
+
# ]
|
10 |
+
# ///
|
11 |
+
|
12 |
+
import gradio as gr
|
13 |
+
from query_interface import AristaDocumentQuery
|
14 |
+
|
15 |
+
def search_docs(query: str, k: int = 5) -> str:
|
16 |
+
"""
|
17 |
+
Search the Arista AVD documentation vector database.
|
18 |
+
Args:
|
19 |
+
query (str): The search query.
|
20 |
+
k (int): Number of results to return.
|
21 |
+
Returns:
|
22 |
+
str: Formatted string of search results.
|
23 |
+
"""
|
24 |
+
query_interface = AristaDocumentQuery()
|
25 |
+
results = query_interface.similarity_search(query, k=k)
|
26 |
+
return query_interface.format_results(results)
|
27 |
+
|
28 |
+
# Create a standard Gradio interface
|
29 |
+
demo = gr.Interface(
|
30 |
+
fn=search_docs,
|
31 |
+
inputs=["textbox", "number"],
|
32 |
+
outputs="text",
|
33 |
+
title="Document Search",
|
34 |
+
description="Enter a search query and the number of results to return."
|
35 |
+
)
|
36 |
+
|
37 |
+
# Launch both the Gradio web interface and the MCP server
|
38 |
+
if __name__ == "__main__":
|
39 |
+
demo.launch(mcp_server=True)
|
chroma_db/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:579d6b92c38712c65f3d6982a9ff9869602348c201875a9ec529f5b99826abfd
|
3 |
+
size 3698688
|
pyproject.toml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "mcp-arista-avd"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Add your description here"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"gradio>=5.38.2",
|
9 |
+
]
|
query_interface.py
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# /// script
|
3 |
+
# dependencies = [
|
4 |
+
# "langchain_community",
|
5 |
+
# "chromadb",
|
6 |
+
# "huggingface_hub",
|
7 |
+
# "langchain_community",
|
8 |
+
# "sentence_transformers",
|
9 |
+
# "pydantic"
|
10 |
+
# ]
|
11 |
+
# ///
|
12 |
+
#!/usr/bin/env python3
|
13 |
+
"""
|
14 |
+
Query interface for Arista AVD documentation vector database.
|
15 |
+
Provides search and retrieval capabilities.
|
16 |
+
"""
|
17 |
+
|
18 |
+
import argparse
|
19 |
+
import json
|
20 |
+
from typing import List, Dict, Any, Optional
|
21 |
+
from pathlib import Path
|
22 |
+
import logging
|
23 |
+
from pydantic import BaseModel, Field
|
24 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
25 |
+
from langchain_community.vectorstores import Chroma
|
26 |
+
from langchain.schema import Document
|
27 |
+
|
28 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
29 |
+
logger = logging.getLogger(__name__)
|
30 |
+
|
31 |
+
|
32 |
+
class EmbeddingConfig(BaseModel):
|
33 |
+
"""Configuration for embeddings."""
|
34 |
+
model_name: str = Field(default="all-MiniLM-L6-v2", description="The name of the HuggingFace model to use")
|
35 |
+
device: str = Field(default="cpu", description="Device to use for embedding generation (cpu or cuda)")
|
36 |
+
normalize_embeddings: bool = Field(default=True, description="Whether to normalize embeddings")
|
37 |
+
|
38 |
+
class AristaDocumentQuery(BaseModel):
|
39 |
+
"""Query interface for Arista AVD documentation."""
|
40 |
+
persist_directory: str = Field(default="./chroma_db", description="Directory containing the vector store")
|
41 |
+
embedding_config: EmbeddingConfig = Field(default_factory=EmbeddingConfig, description="Configuration for embeddings")
|
42 |
+
|
43 |
+
# These will be initialized in __init__
|
44 |
+
embeddings: Any = Field(default=None, exclude=True)
|
45 |
+
vector_store: Any = Field(default=None, exclude=True)
|
46 |
+
|
47 |
+
class Config:
|
48 |
+
arbitrary_types_allowed = True
|
49 |
+
|
50 |
+
def __init__(self, **data):
|
51 |
+
super().__init__(**data)
|
52 |
+
self.embeddings = HuggingFaceEmbeddings(
|
53 |
+
model_name=self.embedding_config.model_name,
|
54 |
+
model_kwargs={'device': self.embedding_config.device},
|
55 |
+
encode_kwargs={'normalize_embeddings': self.embedding_config.normalize_embeddings}
|
56 |
+
)
|
57 |
+
self.vector_store = self._load_vector_store()
|
58 |
+
|
59 |
+
def _load_vector_store(self) -> Chroma:
|
60 |
+
"""Load the existing vector store."""
|
61 |
+
try:
|
62 |
+
vector_store = Chroma(
|
63 |
+
persist_directory=self.persist_directory,
|
64 |
+
embedding_function=self.embeddings
|
65 |
+
)
|
66 |
+
logger.info(f"Loaded vector store from {self.persist_directory}")
|
67 |
+
return vector_store
|
68 |
+
except Exception as e:
|
69 |
+
logger.error(f"Error loading vector store: {e}")
|
70 |
+
raise
|
71 |
+
|
72 |
+
def similarity_search(self, query: str, k: int = 5, filter_dict: Optional[Dict] = None) -> List[Document]:
|
73 |
+
"""Perform similarity search on the vector store."""
|
74 |
+
try:
|
75 |
+
if filter_dict:
|
76 |
+
results = self.vector_store.similarity_search(
|
77 |
+
query=query,
|
78 |
+
k=k,
|
79 |
+
filter=filter_dict
|
80 |
+
)
|
81 |
+
else:
|
82 |
+
results = self.vector_store.similarity_search(
|
83 |
+
query=query,
|
84 |
+
k=k
|
85 |
+
)
|
86 |
+
return results
|
87 |
+
except Exception as e:
|
88 |
+
logger.error(f"Error during similarity search: {e}")
|
89 |
+
return []
|
90 |
+
|
91 |
+
def search_by_category(self, query: str, category: str, k: int = 5) -> List[Document]:
|
92 |
+
"""Search documents within a specific category."""
|
93 |
+
filter_dict = {"category": category}
|
94 |
+
return self.similarity_search(query, k=k, filter_dict=filter_dict)
|
95 |
+
|
96 |
+
def search_by_type(self, query: str, doc_type: str, k: int = 5) -> List[Document]:
|
97 |
+
"""Search documents of a specific type (markdown/csv)."""
|
98 |
+
filter_dict = {"type": doc_type}
|
99 |
+
return self.similarity_search(query, k=k, filter_dict=filter_dict)
|
100 |
+
|
101 |
+
def get_categories(self) -> List[str]:
|
102 |
+
"""Get all available categories in the vector store."""
|
103 |
+
# This is a simplified version - in a real implementation,
|
104 |
+
# you might want to query the metadata directly from ChromaDB
|
105 |
+
categories = [
|
106 |
+
'device_configuration',
|
107 |
+
'fabric_documentation',
|
108 |
+
'testing',
|
109 |
+
'netbox_integration',
|
110 |
+
'arista_cloud_test',
|
111 |
+
'avd_design',
|
112 |
+
'api_usage',
|
113 |
+
'workflow',
|
114 |
+
'infoblox_integration',
|
115 |
+
'network_testing',
|
116 |
+
'general_documentation',
|
117 |
+
'project_documentation'
|
118 |
+
]
|
119 |
+
return categories
|
120 |
+
|
121 |
+
def format_results(self, results: List[Document], verbose: bool = False) -> str:
|
122 |
+
"""Format search results for display."""
|
123 |
+
output = []
|
124 |
+
|
125 |
+
for i, doc in enumerate(results, 1):
|
126 |
+
output.append(f"\n{'='*80}")
|
127 |
+
output.append(f"Result {i}:")
|
128 |
+
output.append(f"Source: {doc.metadata.get('source', 'Unknown')}")
|
129 |
+
output.append(f"Category: {doc.metadata.get('category', 'Unknown')}")
|
130 |
+
output.append(f"Type: {doc.metadata.get('type', 'Unknown')}")
|
131 |
+
|
132 |
+
if doc.metadata.get('type') == 'csv':
|
133 |
+
output.append(f"Columns: {doc.metadata.get('columns', 'Unknown')}")
|
134 |
+
output.append(f"Rows: {doc.metadata.get('rows', 'Unknown')}")
|
135 |
+
|
136 |
+
output.append(f"\nContent Preview:")
|
137 |
+
content_preview = doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content
|
138 |
+
output.append(content_preview)
|
139 |
+
|
140 |
+
if verbose:
|
141 |
+
output.append(f"\nFull Content:")
|
142 |
+
output.append(doc.page_content)
|
143 |
+
|
144 |
+
return "\n".join(output)
|
145 |
+
|
146 |
+
def export_results(self, results: List[Document], output_file: str) -> None:
|
147 |
+
"""Export search results to a JSON file."""
|
148 |
+
data = []
|
149 |
+
for doc in results:
|
150 |
+
data.append({
|
151 |
+
'content': doc.page_content,
|
152 |
+
'metadata': doc.metadata
|
153 |
+
})
|
154 |
+
|
155 |
+
with open(output_file, 'w') as f:
|
156 |
+
json.dump(data, f, indent=2)
|
157 |
+
|
158 |
+
logger.info(f"Results exported to {output_file}")
|
159 |
+
|
160 |
+
|
161 |
+
def main():
|
162 |
+
"""Main function for command-line interface."""
|
163 |
+
parser = argparse.ArgumentParser(description="Query Arista AVD documentation vector database")
|
164 |
+
parser.add_argument("query", nargs="?", help="Search query")
|
165 |
+
parser.add_argument("-k", "--top-k", type=int, default=5, help="Number of results to return (default: 5)")
|
166 |
+
parser.add_argument("-c", "--category", help="Filter by category")
|
167 |
+
parser.add_argument("-t", "--type", choices=['markdown', 'csv'], help="Filter by document type")
|
168 |
+
parser.add_argument("-v", "--verbose", action="store_true", help="Show full content")
|
169 |
+
parser.add_argument("-e", "--export", help="Export results to JSON file")
|
170 |
+
parser.add_argument("--list-categories", action="store_true", help="List available categories")
|
171 |
+
|
172 |
+
args = parser.parse_args()
|
173 |
+
|
174 |
+
# Initialize query interface
|
175 |
+
query_interface = AristaDocumentQuery()
|
176 |
+
|
177 |
+
# List categories if requested
|
178 |
+
if args.list_categories:
|
179 |
+
categories = query_interface.get_categories()
|
180 |
+
print("Available categories:")
|
181 |
+
for cat in categories:
|
182 |
+
print(f" - {cat}")
|
183 |
+
return
|
184 |
+
|
185 |
+
# Ensure query is provided if not listing categories
|
186 |
+
if not args.query:
|
187 |
+
parser.error("Query is required unless using --list-categories")
|
188 |
+
|
189 |
+
# Perform search
|
190 |
+
if args.category:
|
191 |
+
results = query_interface.search_by_category(args.query, args.category, k=args.top_k)
|
192 |
+
elif args.type:
|
193 |
+
results = query_interface.search_by_type(args.query, args.type, k=args.top_k)
|
194 |
+
else:
|
195 |
+
results = query_interface.similarity_search(args.query, k=args.top_k)
|
196 |
+
|
197 |
+
# Display results
|
198 |
+
if results:
|
199 |
+
formatted_results = query_interface.format_results(results, verbose=args.verbose)
|
200 |
+
print(formatted_results)
|
201 |
+
|
202 |
+
# Export if requested
|
203 |
+
if args.export:
|
204 |
+
query_interface.export_results(results, args.export)
|
205 |
+
else:
|
206 |
+
print("No results found.")
|
207 |
+
|
208 |
+
|
209 |
+
if __name__ == "__main__":
|
210 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio[mcp]
|
2 |
+
langchain_community
|
3 |
+
chromadb
|
4 |
+
huggingface_hub
|
5 |
+
sentence_transformers
|
6 |
+
pydantic
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|