rogerscuall commited on
Commit
63f7ae3
·
verified ·
1 Parent(s): 1d3d5b9

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
.gitignore ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ pip-wheel-metadata/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+
55
+ # Environments
56
+ .env
57
+ .venv
58
+ env/
59
+ venv/
60
+ ENV/
61
+ env.bak/
62
+ venv.bak/
63
+
64
+ # IDEs and editors
65
+ .idea/
66
+ .vscode/
67
+ *.swp
68
+ *~
69
+
70
+ # OS generated files
71
+ .DS_Store
72
+ Thumbs.db
73
+
74
+ # Ansible
75
+ *.retry
76
+ .ansible/
77
+ .ansible-lint
78
+ vault.yml
79
+ vault.yaml
80
+ vault_pass.txt
81
+ *.log
82
+
83
+ # Ansible AVD generated files
84
+ intended/
85
+ documentation/
86
+ reports/
87
+ fabric-documentation/
88
+
89
+ # Ansible Collections and Roles
90
+ # These should be managed via requirements.yml
91
+ ansible_collections/
92
+ collections/
93
+ roles/
94
+
95
+ # Credentials
96
+ credentials.yml
97
+ credentials.yaml
98
+ *.pem
99
+ *.key
100
+
101
+ # ChromaDB - Keep SQLite but ignore vector index files
102
+ chroma_db/*/
103
+ !chroma_db/
104
+ !chroma_db/chroma.sqlite3
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Mcp Network Doc Dem
3
- emoji: 🐨
4
- colorFrom: red
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 5.38.2
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: mcp-network-doc-dem
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 5.38.2
 
 
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = [
3
+ # "gradio[mcp]",
4
+ # "langchain_community",
5
+ # "chromadb",
6
+ # "huggingface_hub",
7
+ # "langchain_community",
8
+ # "sentence_transformers",
9
+ # ]
10
+ # ///
11
+
12
+ import gradio as gr
13
+ from query_interface import AristaDocumentQuery
14
+
15
+ def search_docs(query: str, k: int = 5) -> str:
16
+ """
17
+ Search the Arista AVD documentation vector database.
18
+ Args:
19
+ query (str): The search query.
20
+ k (int): Number of results to return.
21
+ Returns:
22
+ str: Formatted string of search results.
23
+ """
24
+ query_interface = AristaDocumentQuery()
25
+ results = query_interface.similarity_search(query, k=k)
26
+ return query_interface.format_results(results)
27
+
28
+ # Create a standard Gradio interface
29
+ demo = gr.Interface(
30
+ fn=search_docs,
31
+ inputs=["textbox", "number"],
32
+ outputs="text",
33
+ title="Document Search",
34
+ description="Enter a search query and the number of results to return."
35
+ )
36
+
37
+ # Launch both the Gradio web interface and the MCP server
38
+ if __name__ == "__main__":
39
+ demo.launch(mcp_server=True)
chroma_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:579d6b92c38712c65f3d6982a9ff9869602348c201875a9ec529f5b99826abfd
3
+ size 3698688
pyproject.toml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "mcp-arista-avd"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "gradio>=5.38.2",
9
+ ]
query_interface.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # dependencies = [
4
+ # "langchain_community",
5
+ # "chromadb",
6
+ # "huggingface_hub",
7
+ # "langchain_community",
8
+ # "sentence_transformers",
9
+ # "pydantic"
10
+ # ]
11
+ # ///
12
+ #!/usr/bin/env python3
13
+ """
14
+ Query interface for Arista AVD documentation vector database.
15
+ Provides search and retrieval capabilities.
16
+ """
17
+
18
+ import argparse
19
+ import json
20
+ from typing import List, Dict, Any, Optional
21
+ from pathlib import Path
22
+ import logging
23
+ from pydantic import BaseModel, Field
24
+ from langchain_community.embeddings import HuggingFaceEmbeddings
25
+ from langchain_community.vectorstores import Chroma
26
+ from langchain.schema import Document
27
+
28
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class EmbeddingConfig(BaseModel):
33
+ """Configuration for embeddings."""
34
+ model_name: str = Field(default="all-MiniLM-L6-v2", description="The name of the HuggingFace model to use")
35
+ device: str = Field(default="cpu", description="Device to use for embedding generation (cpu or cuda)")
36
+ normalize_embeddings: bool = Field(default=True, description="Whether to normalize embeddings")
37
+
38
+ class AristaDocumentQuery(BaseModel):
39
+ """Query interface for Arista AVD documentation."""
40
+ persist_directory: str = Field(default="./chroma_db", description="Directory containing the vector store")
41
+ embedding_config: EmbeddingConfig = Field(default_factory=EmbeddingConfig, description="Configuration for embeddings")
42
+
43
+ # These will be initialized in __init__
44
+ embeddings: Any = Field(default=None, exclude=True)
45
+ vector_store: Any = Field(default=None, exclude=True)
46
+
47
+ class Config:
48
+ arbitrary_types_allowed = True
49
+
50
+ def __init__(self, **data):
51
+ super().__init__(**data)
52
+ self.embeddings = HuggingFaceEmbeddings(
53
+ model_name=self.embedding_config.model_name,
54
+ model_kwargs={'device': self.embedding_config.device},
55
+ encode_kwargs={'normalize_embeddings': self.embedding_config.normalize_embeddings}
56
+ )
57
+ self.vector_store = self._load_vector_store()
58
+
59
+ def _load_vector_store(self) -> Chroma:
60
+ """Load the existing vector store."""
61
+ try:
62
+ vector_store = Chroma(
63
+ persist_directory=self.persist_directory,
64
+ embedding_function=self.embeddings
65
+ )
66
+ logger.info(f"Loaded vector store from {self.persist_directory}")
67
+ return vector_store
68
+ except Exception as e:
69
+ logger.error(f"Error loading vector store: {e}")
70
+ raise
71
+
72
+ def similarity_search(self, query: str, k: int = 5, filter_dict: Optional[Dict] = None) -> List[Document]:
73
+ """Perform similarity search on the vector store."""
74
+ try:
75
+ if filter_dict:
76
+ results = self.vector_store.similarity_search(
77
+ query=query,
78
+ k=k,
79
+ filter=filter_dict
80
+ )
81
+ else:
82
+ results = self.vector_store.similarity_search(
83
+ query=query,
84
+ k=k
85
+ )
86
+ return results
87
+ except Exception as e:
88
+ logger.error(f"Error during similarity search: {e}")
89
+ return []
90
+
91
+ def search_by_category(self, query: str, category: str, k: int = 5) -> List[Document]:
92
+ """Search documents within a specific category."""
93
+ filter_dict = {"category": category}
94
+ return self.similarity_search(query, k=k, filter_dict=filter_dict)
95
+
96
+ def search_by_type(self, query: str, doc_type: str, k: int = 5) -> List[Document]:
97
+ """Search documents of a specific type (markdown/csv)."""
98
+ filter_dict = {"type": doc_type}
99
+ return self.similarity_search(query, k=k, filter_dict=filter_dict)
100
+
101
+ def get_categories(self) -> List[str]:
102
+ """Get all available categories in the vector store."""
103
+ # This is a simplified version - in a real implementation,
104
+ # you might want to query the metadata directly from ChromaDB
105
+ categories = [
106
+ 'device_configuration',
107
+ 'fabric_documentation',
108
+ 'testing',
109
+ 'netbox_integration',
110
+ 'arista_cloud_test',
111
+ 'avd_design',
112
+ 'api_usage',
113
+ 'workflow',
114
+ 'infoblox_integration',
115
+ 'network_testing',
116
+ 'general_documentation',
117
+ 'project_documentation'
118
+ ]
119
+ return categories
120
+
121
+ def format_results(self, results: List[Document], verbose: bool = False) -> str:
122
+ """Format search results for display."""
123
+ output = []
124
+
125
+ for i, doc in enumerate(results, 1):
126
+ output.append(f"\n{'='*80}")
127
+ output.append(f"Result {i}:")
128
+ output.append(f"Source: {doc.metadata.get('source', 'Unknown')}")
129
+ output.append(f"Category: {doc.metadata.get('category', 'Unknown')}")
130
+ output.append(f"Type: {doc.metadata.get('type', 'Unknown')}")
131
+
132
+ if doc.metadata.get('type') == 'csv':
133
+ output.append(f"Columns: {doc.metadata.get('columns', 'Unknown')}")
134
+ output.append(f"Rows: {doc.metadata.get('rows', 'Unknown')}")
135
+
136
+ output.append(f"\nContent Preview:")
137
+ content_preview = doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content
138
+ output.append(content_preview)
139
+
140
+ if verbose:
141
+ output.append(f"\nFull Content:")
142
+ output.append(doc.page_content)
143
+
144
+ return "\n".join(output)
145
+
146
+ def export_results(self, results: List[Document], output_file: str) -> None:
147
+ """Export search results to a JSON file."""
148
+ data = []
149
+ for doc in results:
150
+ data.append({
151
+ 'content': doc.page_content,
152
+ 'metadata': doc.metadata
153
+ })
154
+
155
+ with open(output_file, 'w') as f:
156
+ json.dump(data, f, indent=2)
157
+
158
+ logger.info(f"Results exported to {output_file}")
159
+
160
+
161
+ def main():
162
+ """Main function for command-line interface."""
163
+ parser = argparse.ArgumentParser(description="Query Arista AVD documentation vector database")
164
+ parser.add_argument("query", nargs="?", help="Search query")
165
+ parser.add_argument("-k", "--top-k", type=int, default=5, help="Number of results to return (default: 5)")
166
+ parser.add_argument("-c", "--category", help="Filter by category")
167
+ parser.add_argument("-t", "--type", choices=['markdown', 'csv'], help="Filter by document type")
168
+ parser.add_argument("-v", "--verbose", action="store_true", help="Show full content")
169
+ parser.add_argument("-e", "--export", help="Export results to JSON file")
170
+ parser.add_argument("--list-categories", action="store_true", help="List available categories")
171
+
172
+ args = parser.parse_args()
173
+
174
+ # Initialize query interface
175
+ query_interface = AristaDocumentQuery()
176
+
177
+ # List categories if requested
178
+ if args.list_categories:
179
+ categories = query_interface.get_categories()
180
+ print("Available categories:")
181
+ for cat in categories:
182
+ print(f" - {cat}")
183
+ return
184
+
185
+ # Ensure query is provided if not listing categories
186
+ if not args.query:
187
+ parser.error("Query is required unless using --list-categories")
188
+
189
+ # Perform search
190
+ if args.category:
191
+ results = query_interface.search_by_category(args.query, args.category, k=args.top_k)
192
+ elif args.type:
193
+ results = query_interface.search_by_type(args.query, args.type, k=args.top_k)
194
+ else:
195
+ results = query_interface.similarity_search(args.query, k=args.top_k)
196
+
197
+ # Display results
198
+ if results:
199
+ formatted_results = query_interface.format_results(results, verbose=args.verbose)
200
+ print(formatted_results)
201
+
202
+ # Export if requested
203
+ if args.export:
204
+ query_interface.export_results(results, args.export)
205
+ else:
206
+ print("No results found.")
207
+
208
+
209
+ if __name__ == "__main__":
210
+ main()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio[mcp]
2
+ langchain_community
3
+ chromadb
4
+ huggingface_hub
5
+ sentence_transformers
6
+ pydantic
uv.lock ADDED
The diff for this file is too large to render. See raw diff