ABDALLALSWAITI commited on
Commit
d00c422
Β·
verified Β·
1 Parent(s): 38304ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +593 -1
app.py CHANGED
@@ -30,8 +30,600 @@ class HuggingFaceInfoServer:
30
  })
31
  self.cache = {}
32
  self.cache_ttl = 3600 # 1 hour cache TTL
33
-
34
  def _is_cache_valid(self, cache_key: str) -> bool:
35
  if cache_key not in self.cache:
36
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  )
 
30
  })
31
  self.cache = {}
32
  self.cache_ttl = 3600 # 1 hour cache TTL
33
+
34
  def _is_cache_valid(self, cache_key: str) -> bool:
35
  if cache_key not in self.cache:
36
  return False
37
+ cache_time = self.cache[cache_key].get('timestamp', 0)
38
+ return time.time() - cache_time < self.cache_ttl
39
+
40
+ def _get_from_cache(self, cache_key: str) -> Optional[str]:
41
+ if self._is_cache_valid(cache_key):
42
+ return self.cache[cache_key]['content']
43
+ return None
44
+
45
+ def _store_in_cache(self, cache_key: str, content: str):
46
+ self.cache[cache_key] = {
47
+ 'content': content,
48
+ 'timestamp': time.time()
49
+ }
50
+
51
+ def _fetch_with_retry(self, url: str, max_retries: int = 3) -> Optional[str]:
52
+ cache_key = f"url_{hash(url)}"
53
+ cached_content = self._get_from_cache(cache_key)
54
+ if cached_content:
55
+ logger.info(f"Cache hit for {url}")
56
+ return cached_content
57
+ for attempt in range(max_retries):
58
+ try:
59
+ logger.info(f"Fetching {url} (attempt {attempt + 1})")
60
+ response = self.session.get(url, timeout=20)
61
+ response.raise_for_status()
62
+ content = response.text
63
+ self._store_in_cache(cache_key, content)
64
+ return content
65
+ except requests.exceptions.RequestException as e:
66
+ logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
67
+ if attempt < max_retries - 1:
68
+ time.sleep(2 ** attempt)
69
+ else:
70
+ logger.error(f"All attempts failed for {url}")
71
+ return None
72
+ return None
73
+
74
+ def _extract_code_examples(self, soup: BeautifulSoup) -> List[Dict[str, str]]:
75
+ code_blocks = []
76
+ code_elements = soup.find_all(['code', 'pre'])
77
+ for code_elem in code_elements:
78
+ lang_class = code_elem.get('class', [])
79
+ language = 'python'
80
+ for cls in lang_class:
81
+ if 'language-' in str(cls):
82
+ language = str(cls).replace('language-', '')
83
+ break
84
+ elif any(lang in str(cls).lower() for lang in ['python', 'bash', 'javascript', 'json']):
85
+ language = str(cls).lower()
86
+ break
87
+ code_text = code_elem.get_text(strip=True)
88
+ if len(code_text) > 20 and any(keyword in code_text.lower() for keyword in ['import', 'from', 'def', 'class', 'pip install', 'transformers']):
89
+ code_blocks.append({'code': code_text, 'language': language, 'type': 'usage' if any(word in code_text.lower() for word in ['import', 'load', 'pipeline']) else 'example'})
90
+ highlight_blocks = soup.find_all('div', class_=re.compile(r'highlight|code-block|language'))
91
+ for block in highlight_blocks:
92
+ code_text = block.get_text(strip=True)
93
+ if len(code_text) > 20:
94
+ code_blocks.append({'code': code_text, 'language': 'python', 'type': 'example'})
95
+ seen = set()
96
+ unique_blocks = []
97
+ for block in code_blocks:
98
+ code_hash = hash(block['code'][:100])
99
+ if code_hash not in seen:
100
+ seen.add(code_hash)
101
+ unique_blocks.append(block)
102
+ if len(unique_blocks) >= 5:
103
+ break
104
+ return unique_blocks
105
+
106
+ def _extract_practical_content(self, soup: BeautifulSoup, topic: str) -> Dict[str, Any]:
107
+ content = {'overview': '', 'code_examples': [], 'usage_instructions': [], 'parameters': [], 'methods': [], 'installation': '', 'quickstart': ''}
108
+ main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile(r'content|docs|prose'))
109
+ if not main_content:
110
+ return content
111
+ overview_sections = main_content.find_all('p', limit=5)
112
+ overview_texts = []
113
+ for p in overview_sections:
114
+ text = p.get_text(strip=True)
115
+ if len(text) > 30 and not text.startswith('Table of contents'):
116
+ overview_texts.append(text)
117
+ if overview_texts:
118
+ overview = ' '.join(overview_texts)
119
+ content['overview'] = overview[:1000] + "..." if len(overview) > 1000 else overview
120
+ content['code_examples'] = self._extract_code_examples(main_content)
121
+ install_headings = main_content.find_all(['h1', 'h2', 'h3', 'h4'], string=re.compile(r'install|setup|getting started', re.IGNORECASE))
122
+ for heading in install_headings:
123
+ next_elem = heading.find_next_sibling()
124
+ install_text = []
125
+ while next_elem and next_elem.name not in ['h1', 'h2', 'h3', 'h4'] and len(install_text) < 3:
126
+ if next_elem.name in ['p', 'pre', 'code']:
127
+ text = next_elem.get_text(strip=True)
128
+ if text and len(text) > 10:
129
+ install_text.append(text)
130
+ next_elem = next_elem.find_next_sibling()
131
+ if install_text:
132
+ content['installation'] = ' '.join(install_text)
133
+ break
134
+ usage_headings = main_content.find_all(['h1', 'h2', 'h3', 'h4'])
135
+ for heading in usage_headings:
136
+ heading_text = heading.get_text(strip=True).lower()
137
+ if any(keyword in heading_text for keyword in ['usage', 'example', 'how to', 'quickstart', 'getting started']):
138
+ next_elem = heading.find_next_sibling()
139
+ instruction_parts = []
140
+ while next_elem and next_elem.name not in ['h1', 'h2', 'h3', 'h4']:
141
+ if next_elem.name in ['p', 'li', 'div', 'ol', 'ul']:
142
+ text = next_elem.get_text(strip=True)
143
+ if text and len(text) > 15:
144
+ instruction_parts.append(text)
145
+ next_elem = next_elem.find_next_sibling()
146
+ if len(instruction_parts) >= 5:
147
+ break
148
+ if instruction_parts:
149
+ content['usage_instructions'].extend(instruction_parts)
150
+ tables = main_content.find_all('table')
151
+ for table in tables:
152
+ headers = [th.get_text(strip=True).lower() for th in table.find_all('th')]
153
+ if any(keyword in ' '.join(headers) for keyword in ['parameter', 'argument', 'option', 'attribute', 'name', 'type']):
154
+ rows = table.find_all('tr')[1:]
155
+ for row in rows[:8]:
156
+ cells = [td.get_text(strip=True) for td in row.find_all('td')]
157
+ if len(cells) >= 2:
158
+ param_info = {'name': cells[0], 'description': cells[1] if len(cells) > 1 else '', 'type': cells[2] if len(cells) > 2 else '', 'default': cells[3] if len(cells) > 3 else ''}
159
+ content['parameters'].append(param_info)
160
+ return content
161
+
162
+ def search_documentation(self, query: str, max_results: int = 3) -> str:
163
+ """
164
+ Searches the official Hugging Face documentation for a specific topic and returns a summary.
165
+ This tool is useful for finding how-to guides, explanations of concepts like 'pipeline' or 'tokenizer', and usage examples.
166
+ Args:
167
+ query (str): The topic or keyword to search for in the documentation (e.g., 'fine-tuning', 'peft', 'datasets').
168
+ max_results (int): The maximum number of documentation pages to retrieve and summarize. Defaults to 3.
169
+ """
170
+ try:
171
+ max_results = int(max_results) if isinstance(max_results, str) else max_results
172
+ max_results = min(max_results, 5)
173
+ query_lower = query.lower().strip()
174
+ if not query_lower:
175
+ return "Please provide a search query."
176
+ doc_sections = {
177
+ 'transformers': {'base_url': 'https://huggingface.co/docs/transformers', 'topics': {'pipeline': '/main_classes/pipelines', 'tokenizer': '/main_classes/tokenizer', 'trainer': '/main_classes/trainer', 'model': '/main_classes/model', 'quicktour': '/quicktour', 'installation': '/installation', 'fine-tuning': '/training', 'training': '/training', 'inference': '/main_classes/pipelines', 'preprocessing': '/preprocessing', 'tutorial': '/tutorials', 'configuration': '/main_classes/configuration', 'peft': '/peft', 'lora': '/peft', 'quantization': '/main_classes/quantization', 'generation': '/main_classes/text_generation', 'optimization': '/perf_train_gpu_one', 'deployment': '/deployment', 'custom': '/custom_models'}},
178
+ 'datasets': {'base_url': 'https://huggingface.co/docs/datasets', 'topics': {'loading': '/load_hub', 'load': '/load_hub', 'processing': '/process', 'streaming': '/stream', 'audio': '/audio_process', 'image': '/image_process', 'text': '/nlp_process', 'arrow': '/about_arrow', 'cache': '/cache', 'upload': '/upload_dataset', 'custom': '/dataset_script'}},
179
+ 'diffusers': {'base_url': 'https://huggingface.co/docs/diffusers', 'topics': {'pipeline': '/using-diffusers/loading', 'stable diffusion': '/using-diffusers/stable_diffusion', 'controlnet': '/using-diffusers/controlnet', 'inpainting': '/using-diffusers/inpaint', 'training': '/training/overview', 'optimization': '/optimization/fp16', 'schedulers': '/using-diffusers/schedulers'}},
180
+ 'hub': {'base_url': 'https://huggingface.co/docs/hub', 'topics': {'repositories': '/repositories', 'git': '/repositories-getting-started', 'spaces': '/spaces', 'models': '/models', 'datasets': '/datasets'}}
181
+ }
182
+ relevant_urls = []
183
+ for section_name, section_data in doc_sections.items():
184
+ base_url = section_data['base_url']
185
+ topics = section_data['topics']
186
+ for topic, path in topics.items():
187
+ relevance = 0
188
+ if query_lower == topic.lower(): relevance = 1.0
189
+ elif query_lower in topic.lower(): relevance = 0.9
190
+ elif any(word in topic.lower() for word in query_lower.split()): relevance = 0.7
191
+ elif any(word in query_lower for word in topic.lower().split()): relevance = 0.6
192
+ if relevance > 0:
193
+ full_url = base_url + path
194
+ relevant_urls.append({'url': full_url, 'topic': topic, 'section': section_name, 'relevance': relevance})
195
+ relevant_urls.sort(key=lambda x: x['relevance'], reverse=True)
196
+ relevant_urls = relevant_urls[:max_results]
197
+ if not relevant_urls:
198
+ return f"❌ No documentation found for '{query}'. Try: pipeline, tokenizer, trainer, model, fine-tuning, datasets, diffusers, or peft."
199
+ result = f"# πŸ“š Hugging Face Documentation: {query}\n\n"
200
+ for i, url_info in enumerate(relevant_urls, 1):
201
+ section_emoji = {'transformers': 'πŸ€–', 'datasets': 'πŸ“Š', 'diffusers': '🎨', 'hub': '🌐'}.get(url_info['section'], 'πŸ“„')
202
+ result += f"## {i}. {section_emoji} {url_info['topic'].title()} ({url_info['section'].title()})\n\n"
203
+ content = self._fetch_with_retry(url_info['url'])
204
+ if content:
205
+ soup = BeautifulSoup(content, 'html.parser')
206
+ practical_content = self._extract_practical_content(soup, url_info['topic'])
207
+ if practical_content['overview']: result += f"**πŸ“– Overview:**\n{practical_content['overview']}\n\n"
208
+ if practical_content['installation']: result += f"**βš™οΈ Installation:**\n{practical_content['installation']}\n\n"
209
+ if practical_content['code_examples']:
210
+ result += "**πŸ’» Code Examples:**\n\n"
211
+ for j, code_block in enumerate(practical_content['code_examples'][:3], 1):
212
+ lang = code_block.get('language', 'python')
213
+ code_type = code_block.get('type', 'example')
214
+ result += f"*{code_type.title()} {j}:*\n```{lang}\n{code_block['code']}\n```\n\n"
215
+ if practical_content['usage_instructions']:
216
+ result += "**πŸ› οΈ Usage Instructions:**\n"
217
+ for idx, instruction in enumerate(practical_content['usage_instructions'][:4], 1):
218
+ result += f"{idx}. {instruction}\n"
219
+ result += "\n"
220
+ if practical_content['parameters']:
221
+ result += "**βš™οΈ Parameters:**\n"
222
+ for param in practical_content['parameters'][:6]:
223
+ param_type = f" (`{param['type']}`)" if param.get('type') else ""
224
+ default_val = f" *Default: {param['default']}*" if param.get('default') else ""
225
+ result += f"β€’ **{param['name']}**{param_type}: {param['description']}{default_val}\n"
226
+ result += "\n"
227
+ result += f"**πŸ”— Full Documentation:** {url_info['url']}\n\n"
228
+ else:
229
+ result += f"⚠️ Could not fetch content. Visit directly: {url_info['url']}\n\n"
230
+ result += "---\n\n"
231
+ return result
232
+ except Exception as e:
233
+ logger.error(f"Error in search_documentation: {e}")
234
+ return f"❌ Error searching documentation: {str(e)}\n\nTry a simpler search term or check your internet connection."
235
+
236
+ def get_model_info(self, model_name: str) -> str:
237
+ """
238
+ Fetches comprehensive information about a specific model from the Hugging Face Hub.
239
+ Provides statistics like downloads and likes, a description, usage examples, and a quick-start code snippet.
240
+ Args:
241
+ model_name (str): The full identifier of the model on the Hub, such as 'bert-base-uncased' or 'meta-llama/Llama-2-7b-hf'.
242
+ """
243
+ try:
244
+ model_name = model_name.strip()
245
+ if not model_name: return "Please provide a model name."
246
+ api_url = f"{self.api_url}/models/{model_name}"
247
+ response = self.session.get(api_url, timeout=15)
248
+ if response.status_code == 404: return f"❌ Model '{model_name}' not found. Please check the model name."
249
+ elif response.status_code != 200: return f"❌ Error fetching model info (Status: {response.status_code})"
250
+ model_data = response.json()
251
+ result = f"# πŸ€– Model: {model_name}\n\n"
252
+ downloads = model_data.get('downloads', 0)
253
+ likes = model_data.get('likes', 0)
254
+ task = model_data.get('pipeline_tag', 'N/A')
255
+ library = model_data.get('library_name', 'N/A')
256
+ result += f"**πŸ“Š Statistics:**\nβ€’ **Downloads:** {downloads:,}\nβ€’ **Likes:** {likes:,}\nβ€’ **Task:** {task}\nβ€’ **Library:** {library}\nβ€’ **Created:** {model_data.get('createdAt', 'N/A')[:10]}\nβ€’ **Updated:** {model_data.get('lastModified', 'N/A')[:10]}\n\n"
257
+ if 'tags' in model_data and model_data['tags']: result += f"**🏷️ Tags:** {', '.join(model_data['tags'][:10])}\n\n"
258
+ model_url = f"{self.base_url}/{model_name}"
259
+ page_content = self._fetch_with_retry(model_url)
260
+ if page_content:
261
+ soup = BeautifulSoup(page_content, 'html.parser')
262
+ readme_content = soup.find('div', class_=re.compile(r'prose|readme|model-card'))
263
+ if readme_content:
264
+ paragraphs = readme_content.find_all('p')[:3]
265
+ description_parts = []
266
+ for p in paragraphs:
267
+ text = p.get_text(strip=True)
268
+ if len(text) > 30 and not any(skip in text.lower() for skip in ['table of contents', 'toc']):
269
+ description_parts.append(text)
270
+ if description_parts:
271
+ description = ' '.join(description_parts)
272
+ result += f"**πŸ“ Description:**\n{description[:800]}{'...' if len(description) > 800 else ''}\n\n"
273
+ code_examples = self._extract_code_examples(soup)
274
+ if code_examples:
275
+ result += "**πŸ’» Usage Examples:**\n\n"
276
+ for i, code_block in enumerate(code_examples[:3], 1):
277
+ lang = code_block.get('language', 'python')
278
+ result += f"*Example {i}:*\n```{lang}\n{code_block['code']}\n```\n\n"
279
+ if task and task != 'N/A':
280
+ result += f"**πŸš€ Quick Start Template:**\n"
281
+ if library == 'transformers':
282
+ result += f"```python\nfrom transformers import pipeline\n\n# Load the model\nmodel = pipeline('{task}', model='{model_name}')\n\n# Use the model\n# result = model(your_input_here)\nprint(result)\n```\n\n"
283
+ else:
284
+ result += f"```python\n# Load and use {model_name}\n# Refer to the documentation for specific usage\n```\n\n"
285
+ if 'siblings' in model_data:
286
+ files = [f['rfilename'] for f in model_data['siblings'][:10]]
287
+ if files:
288
+ result += f"**πŸ“ Model Files:** {', '.join(files)}\n\n"
289
+ result += f"**πŸ”— Model Page:** {model_url}\n"
290
+ return result
291
+ except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
292
+ except Exception as e:
293
+ logger.error(f"Error in get_model_info: {e}")
294
+ return f"❌ Error fetching model info: {str(e)}"
295
+
296
+ def get_dataset_info(self, dataset_name: str) -> str:
297
+ """
298
+ Retrieves detailed information about a specific dataset from the Hugging Face Hub.
299
+ Includes statistics, a description, and a quick-start code snippet showing how to load the dataset.
300
+ Args:
301
+ dataset_name (str): The full identifier of the dataset on the Hub, for example 'squad' or 'imdb'.
302
+ """
303
+ try:
304
+ dataset_name = dataset_name.strip()
305
+ if not dataset_name: return "Please provide a dataset name."
306
+ api_url = f"{self.api_url}/datasets/{dataset_name}"
307
+ response = self.session.get(api_url, timeout=15)
308
+ if response.status_code == 404: return f"❌ Dataset '{dataset_name}' not found. Please check the dataset name."
309
+ elif response.status_code != 200: return f"❌ Error fetching dataset info (Status: {response.status_code})"
310
+ dataset_data = response.json()
311
+ result = f"# πŸ“Š Dataset: {dataset_name}\n\n"
312
+ downloads = dataset_data.get('downloads', 0)
313
+ likes = dataset_data.get('likes', 0)
314
+ result += f"**πŸ“ˆ Statistics:**\nβ€’ **Downloads:** {downloads:,}\nβ€’ **Likes:** {likes:,}\nβ€’ **Created:** {dataset_data.get('createdAt', 'N/A')[:10]}\nβ€’ **Updated:** {dataset_data.get('lastModified', 'N/A')[:10]}\n\n"
315
+ if 'tags' in dataset_data and dataset_data['tags']: result += f"**🏷️ Tags:** {', '.join(dataset_data['tags'][:10])}\n\n"
316
+ dataset_url = f"{self.base_url}/datasets/{dataset_name}"
317
+ page_content = self._fetch_with_retry(dataset_url)
318
+ if page_content:
319
+ soup = BeautifulSoup(page_content, 'html.parser')
320
+ readme_content = soup.find('div', class_=re.compile(r'prose|readme|dataset-card'))
321
+ if readme_content:
322
+ paragraphs = readme_content.find_all('p')[:3]
323
+ description_parts = []
324
+ for p in paragraphs:
325
+ text = p.get_text(strip=True)
326
+ if len(text) > 30: description_parts.append(text)
327
+ if description_parts:
328
+ description = ' '.join(description_parts)
329
+ result += f"**πŸ“ Description:**\n{description[:800]}{'...' if len(description) > 800 else ''}\n\n"
330
+ code_examples = self._extract_code_examples(soup)
331
+ if code_examples:
332
+ result += "**πŸ’» Usage Examples:**\n\n"
333
+ for i, code_block in enumerate(code_examples[:3], 1):
334
+ lang = code_block.get('language', 'python')
335
+ result += f"*Example {i}:*\n```{lang}\n{code_block['code']}\n```\n\n"
336
+ result += f"**πŸš€ Quick Start Template:**\n"
337
+ result += f"```python\nfrom datasets import load_dataset\n\n# Load the dataset\ndataset = load_dataset('{dataset_name}')\n\n# Explore the dataset\nprint(dataset)\nprint(f\"Dataset keys: {{list(dataset.keys())}}\")\n\n# Access first example\nif 'train' in dataset:\n print(\"First example:\")\n print(dataset['train'][0])\n```\n\n"
338
+ result += f"**πŸ”— Dataset Page:** {dataset_url}\n"
339
+ return result
340
+ except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
341
+ except Exception as e:
342
+ logger.error(f"Error in get_dataset_info: {e}")
343
+ return f"❌ Error fetching dataset info: {str(e)}"
344
+
345
+ def search_models(self, task: str, limit: str = "5") -> str:
346
+ """
347
+ Searches the Hugging Face Hub for models based on a specified task or keyword and returns a list of top models.
348
+ Each result includes statistics and a quick usage example.
349
+ Args:
350
+ task (str): The task to search for, such as 'text-classification', 'image-generation', or 'question-answering'.
351
+ limit (str): The maximum number of models to return. Defaults to '5'.
352
+ """
353
+ try:
354
+ task = task.strip()
355
+ if not task: return "Please provide a search task or keyword."
356
+ limit = int(limit) if isinstance(limit, str) and limit.isdigit() else 5
357
+ limit = min(max(limit, 1), 10)
358
+ params = {'search': task, 'limit': limit * 3, 'sort': 'downloads', 'direction': -1}
359
+ response = self.session.get(f"{self.api_url}/models", params=params, timeout=20)
360
+ response.raise_for_status()
361
+ models = response.json()
362
+ if not models: return f"❌ No models found for task: '{task}'. Try different keywords."
363
+ filtered_models = []
364
+ for model in models:
365
+ if (model.get('downloads', 0) > 0 or model.get('likes', 0) > 0 or 'pipeline_tag' in model):
366
+ filtered_models.append(model)
367
+ if len(filtered_models) >= limit: break
368
+ if not filtered_models: filtered_models = models[:limit]
369
+ result = f"# πŸ” Top {len(filtered_models)} Models for '{task}'\n\n"
370
+ for i, model in enumerate(filtered_models, 1):
371
+ model_id = model.get('id', 'Unknown')
372
+ downloads = model.get('downloads', 0)
373
+ likes = model.get('likes', 0)
374
+ task_type = model.get('pipeline_tag', 'N/A')
375
+ library = model.get('library_name', 'N/A')
376
+ quality_score = ""
377
+ if downloads > 10000: quality_score = "⭐ Popular"
378
+ elif downloads > 1000: quality_score = "πŸ”₯ Active"
379
+ elif likes > 10: quality_score = "πŸ‘ Liked"
380
+ result += f"## {i}. {model_id} {quality_score}\n\n"
381
+ result += f"**πŸ“Š Stats:**\nβ€’ **Downloads:** {downloads:,}\nβ€’ **Likes:** {likes}\nβ€’ **Task:** {task_type}\nβ€’ **Library:** {library}\n\n"
382
+ if task_type and task_type != 'N/A':
383
+ result += f"**πŸš€ Quick Usage:**\n"
384
+ if library == 'transformers':
385
+ result += f"```python\nfrom transformers import pipeline\n\n# Load model\nmodel = pipeline('{task_type}', model='{model_id}')\n\n# Use model\nresult = model(\"Your input here\")\nprint(result)\n```\n\n"
386
+ else:
387
+ result += f"```python\n# Load and use {model_id}\n# Check model page for specific usage instructions\n```\n\n"
388
+ result += f"**πŸ”— Model Page:** {self.base_url}/{model_id}\n\n---\n\n"
389
+ return result
390
+ except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
391
+ except Exception as e:
392
+ logger.error(f"Error in search_models: {e}")
393
+ return f"❌ Error searching models: {str(e)}"
394
+
395
+ def get_transformers_docs(self, topic: str) -> str:
396
+ """
397
+ Fetches detailed documentation specifically for the Hugging Face Transformers library on a given topic.
398
+ This provides in-depth explanations, code examples, and parameter descriptions for core library components.
399
+ Args:
400
+ topic (str): The Transformers library topic to look up, such as 'pipeline', 'tokenizer', 'trainer', or 'generation'.
401
+ """
402
+ try:
403
+ topic = topic.strip().lower()
404
+ if not topic: return "Please provide a topic to search for."
405
+ docs_url = "https://huggingface.co/docs/transformers"
406
+ topic_map = {'pipeline': f"{docs_url}/main_classes/pipelines", 'pipelines': f"{docs_url}/main_classes/pipelines", 'tokenizer': f"{docs_url}/main_classes/tokenizer", 'tokenizers': f"{docs_url}/main_classes/tokenizer", 'trainer': f"{docs_url}/main_classes/trainer", 'training': f"{docs_url}/training", 'model': f"{docs_url}/main_classes/model", 'models': f"{docs_url}/main_classes/model", 'configuration': f"{docs_url}/main_classes/configuration", 'config': f"{docs_url}/main_classes/configuration", 'quicktour': f"{docs_url}/quicktour", 'quick': f"{docs_url}/quicktour", 'installation': f"{docs_url}/installation", 'install': f"{docs_url}/installation", 'tutorial': f"{docs_url}/tutorials", 'tutorials': f"{docs_url}/tutorials", 'generation': f"{docs_url}/main_classes/text_generation", 'text_generation': f"{docs_url}/main_classes/text_generation", 'preprocessing': f"{docs_url}/preprocessing", 'preprocess': f"{docs_url}/preprocessing", 'peft': f"{docs_url}/peft", 'lora': f"{docs_url}/peft", 'quantization': f"{docs_url}/main_classes/quantization", 'optimization': f"{docs_url}/perf_train_gpu_one", 'performance': f"{docs_url}/perf_train_gpu_one", 'deployment': f"{docs_url}/deployment", 'custom': f"{docs_url}/custom_models", 'fine-tuning': f"{docs_url}/training", 'finetuning': f"{docs_url}/training"}
407
+ url = topic_map.get(topic)
408
+ if not url:
409
+ for key, value in topic_map.items():
410
+ if topic in key or key in topic:
411
+ url = value
412
+ topic = key
413
+ break
414
+ if not url:
415
+ url = f"{docs_url}/quicktour"
416
+ topic = "quicktour"
417
+ content = self._fetch_with_retry(url)
418
+ if not content: return f"❌ Could not fetch documentation for '{topic}'. Please try again or visit: {url}"
419
+ soup = BeautifulSoup(content, 'html.parser')
420
+ practical_content = self._extract_practical_content(soup, topic)
421
+ result = f"# πŸ“š Transformers Documentation: {topic.replace('_', ' ').title()}\n\n"
422
+ if practical_content['overview']: result += f"**πŸ“– Overview:**\n{practical_content['overview']}\n\n"
423
+ if practical_content['installation']: result += f"**βš™οΈ Installation:**\n{practical_content['installation']}\n\n"
424
+ if practical_content['code_examples']:
425
+ result += "**πŸ’» Code Examples:**\n\n"
426
+ for i, code_block in enumerate(practical_content['code_examples'][:4], 1):
427
+ lang = code_block.get('language', 'python')
428
+ code_type = code_block.get('type', 'example')
429
+ result += f"### {code_type.title()} {i}:\n```{lang}\n{code_block['code']}\n```\n\n"
430
+ if practical_content['usage_instructions']:
431
+ result += "**πŸ› οΈ Step-by-Step Usage:**\n"
432
+ for i, instruction in enumerate(practical_content['usage_instructions'][:6], 1):
433
+ result += f"{i}. {instruction}\n"
434
+ result += "\n"
435
+ if practical_content['parameters']:
436
+ result += "**βš™οΈ Key Parameters:**\n"
437
+ for param in practical_content['parameters'][:10]:
438
+ param_type = f" (`{param['type']}`)" if param.get('type') else ""
439
+ default_val = f" *Default: `{param['default']}`*" if param.get('default') else ""
440
+ result += f"β€’ **`{param['name']}`**{param_type}: {param['description']}{default_val}\n"
441
+ result += "\n"
442
+ related_topics = [k for k in topic_map.keys() if k != topic][:5]
443
+ if related_topics: result += f"**πŸ”— Related Topics:** {', '.join(related_topics)}\n\n"
444
+ result += f"**πŸ“„ Full Documentation:** {url}\n"
445
+ return result
446
+ except Exception as e:
447
+ logger.error(f"Error in get_transformers_docs: {e}")
448
+ return f"❌ Error fetching Transformers documentation: {str(e)}"
449
+
450
+ def get_trending_models(self, limit: str = "10") -> str:
451
+ """
452
+ Fetches a list of the most downloaded models currently trending on the Hugging Face Hub.
453
+ This is useful for discovering popular and widely-used models.
454
+ Args:
455
+ limit (str): The number of trending models to return. Defaults to '10'.
456
+ """
457
+ try:
458
+ limit = int(limit) if isinstance(limit, str) and limit.isdigit() else 10
459
+ limit = min(max(limit, 1), 20)
460
+ params = {'sort': 'downloads', 'direction': -1, 'limit': limit}
461
+ response = self.session.get(f"{self.api_url}/models", params=params, timeout=20)
462
+ response.raise_for_status()
463
+ models = response.json()
464
+ if not models: return "❌ Could not fetch trending models."
465
+ result = f"# πŸ”₯ Trending Models (Top {len(models)})\n\n"
466
+ for i, model in enumerate(models, 1):
467
+ model_id = model.get('id', 'Unknown')
468
+ downloads = model.get('downloads', 0)
469
+ likes = model.get('likes', 0)
470
+ task = model.get('pipeline_tag', 'N/A')
471
+ if downloads > 1000000: trend = "πŸš€ Mega Popular"
472
+ elif downloads > 100000: trend = "πŸ”₯ Very Popular"
473
+ elif downloads > 10000: trend = "⭐ Popular"
474
+ else: trend = "πŸ“ˆ Trending"
475
+ result += f"## {i}. {model_id} {trend}\n"
476
+ result += f"β€’ **Downloads:** {downloads:,} | **Likes:** {likes} | **Task:** {task}\n"
477
+ result += f"β€’ **Link:** {self.base_url}/{model_id}\n\n"
478
+ return result
479
+ except Exception as e:
480
+ logger.error(f"Error in get_trending_models: {e}")
481
+ return f"❌ Error fetching trending models: {str(e)}"
482
+
483
+ # Initialize the server
484
+ hf_server = HuggingFaceInfoServer()
485
+
486
+ # Create Gradio interface
487
+ with gr.Blocks(
488
+ title="πŸ€— Hugging Face Information Server",
489
+ theme=gr.themes.Soft(),
490
+ css="""
491
+ .gradio-container {
492
+ font-family: 'Inter', sans-serif;
493
+ }
494
+ .main-header {
495
+ text-align: center;
496
+ padding: 20px;
497
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
498
+ color: white;
499
+ border-radius: 10px;
500
+ margin-bottom: 20px;
501
+ }
502
+ """) as demo:
503
+ # Header
504
+ with gr.Row():
505
+ gr.HTML("""
506
+ <div class="main-header">
507
+ <h1>πŸ€— Hugging Face Information Server</h1>
508
+ <p>Get comprehensive documentation with <strong>real code examples</strong>, <strong>usage instructions</strong>, and <strong>practical content</strong></p>
509
+ </div>
510
+ """)
511
+
512
+ with gr.Tab("πŸ“š Documentation Search", elem_id="docs"):
513
+ gr.Markdown("### Search for documentation with **comprehensive code examples** and **step-by-step instructions**")
514
+ with gr.Row():
515
+ with gr.Column(scale=3):
516
+ doc_query = gr.Textbox(label="πŸ” Search Query", placeholder="e.g., tokenizer, pipeline, fine-tuning, peft, trainer, quantization")
517
+ with gr.Column(scale=1):
518
+ doc_max_results = gr.Number(label="Max Results", value=2, minimum=1, maximum=5)
519
+ doc_output = gr.Textbox(label="πŸ“– Documentation with Examples", lines=25, max_lines=30)
520
+ with gr.Row():
521
+ doc_btn = gr.Button("πŸ” Search Documentation", variant="primary", size="lg")
522
+ doc_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
523
+ gr.Markdown("**Quick Examples:**")
524
+ with gr.Row():
525
+ gr.Button("Pipeline", size="sm").click(lambda: "pipeline", outputs=doc_query)
526
+ gr.Button("Tokenizer", size="sm").click(lambda: "tokenizer", outputs=doc_query)
527
+ gr.Button("Fine-tuning", size="sm").click(lambda: "fine-tuning", outputs=doc_query)
528
+ gr.Button("PEFT", size="sm").click(lambda: "peft", outputs=doc_query)
529
+ doc_btn.click(lambda q, m: hf_server.search_documentation(q, int(m) if str(m).isdigit() else 2), inputs=[doc_query, doc_max_results], outputs=doc_output)
530
+ doc_clear.click(lambda: "", outputs=doc_output)
531
+
532
+ with gr.Tab("πŸ€– Model Information", elem_id="models"):
533
+ gr.Markdown("### Get detailed model information with **usage examples** and **code snippets**")
534
+ model_name = gr.Textbox(label="πŸ€– Model Name", placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium, meta-llama/Llama-2-7b-hf")
535
+ model_output = gr.Textbox(label="πŸ“Š Model Information + Usage Examples", lines=25, max_lines=30)
536
+ with gr.Row():
537
+ model_btn = gr.Button("πŸ“Š Get Model Info", variant="primary", size="lg")
538
+ model_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
539
+ gr.Markdown("**Popular Models:**")
540
+ with gr.Row():
541
+ gr.Button("BERT", size="sm").click(lambda: "bert-base-uncased", outputs=model_name)
542
+ gr.Button("GPT-2", size="sm").click(lambda: "gpt2", outputs=model_name)
543
+ gr.Button("T5", size="sm").click(lambda: "t5-small", outputs=model_name)
544
+ gr.Button("DistilBERT", size="sm").click(lambda: "distilbert-base-uncased", outputs=model_name)
545
+ model_btn.click(hf_server.get_model_info, inputs=model_name, outputs=model_output)
546
+ model_clear.click(lambda: "", outputs=model_output)
547
+
548
+ with gr.Tab("πŸ“Š Dataset Information", elem_id="datasets"):
549
+ gr.Markdown("### Get dataset information with **loading examples** and **usage code**")
550
+ dataset_name = gr.Textbox(label="πŸ“Š Dataset Name", placeholder="e.g., squad, imdb, glue, common_voice, wikitext")
551
+ dataset_output = gr.Textbox(label="πŸ“ˆ Dataset Information + Usage Examples", lines=25, max_lines=30)
552
+ with gr.Row():
553
+ dataset_btn = gr.Button("πŸ“ˆ Get Dataset Info", variant="primary", size="lg")
554
+ dataset_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
555
+ gr.Markdown("**Popular Datasets:**")
556
+ with gr.Row():
557
+ gr.Button("SQuAD", size="sm").click(lambda: "squad", outputs=dataset_name)
558
+ gr.Button("IMDB", size="sm").click(lambda: "imdb", outputs=dataset_name)
559
+ gr.Button("GLUE", size="sm").click(lambda: "glue", outputs=dataset_name)
560
+ gr.Button("Common Voice", size="sm").click(lambda: "common_voice", outputs=dataset_name)
561
+ dataset_btn.click(hf_server.get_dataset_info, inputs=dataset_name, outputs=dataset_output)
562
+ dataset_clear.click(lambda: "", outputs=dataset_output)
563
+
564
+ with gr.Tab("πŸ” Model Search", elem_id="search"):
565
+ gr.Markdown("### Search models with **quick usage examples** and **quality indicators**")
566
+ with gr.Row():
567
+ with gr.Column(scale=3):
568
+ search_task = gr.Textbox(label="πŸ” Task or Keyword", placeholder="e.g., text-classification, image-generation, question-answering, sentiment-analysis")
569
+ with gr.Column(scale=1):
570
+ search_limit = gr.Number(label="Max Results", value=5, minimum=1, maximum=10)
571
+ search_output = gr.Textbox(label="πŸš€ Models with Usage Examples", lines=25, max_lines=30)
572
+ with gr.Row():
573
+ search_btn = gr.Button("πŸš€ Search Models", variant="primary", size="lg")
574
+ search_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
575
+ gr.Markdown("**Popular Tasks:**")
576
+ with gr.Row():
577
+ gr.Button("Text Classification", size="sm").click(lambda: "text-classification", outputs=search_task)
578
+ gr.Button("Question Answering", size="sm").click(lambda: "question-answering", outputs=search_task)
579
+ gr.Button("Text Generation", size="sm").click(lambda: "text-generation", outputs=search_task)
580
+ gr.Button("Image Classification", size="sm").click(lambda: "image-classification", outputs=search_task)
581
+ search_btn.click(lambda task, limit: hf_server.search_models(task, int(limit) if str(limit).isdigit() else 5), inputs=[search_task, search_limit], outputs=search_output)
582
+ search_clear.click(lambda: "", outputs=search_output)
583
+
584
+ with gr.Tab("⚑ Transformers Docs", elem_id="transformers"):
585
+ gr.Markdown("### Get comprehensive Transformers documentation with **detailed examples** and **parameters**")
586
+ transformers_topic = gr.Textbox(label="πŸ“š Topic", placeholder="e.g., pipeline, tokenizer, trainer, model, peft, generation, quantization")
587
+ transformers_output = gr.Textbox(label="πŸ“– Comprehensive Documentation", lines=25, max_lines=30)
588
+ with gr.Row():
589
+ transformers_btn = gr.Button("πŸ“– Get Documentation", variant="primary", size="lg")
590
+ transformers_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
591
+ gr.Markdown("**Core Topics:**")
592
+ with gr.Row():
593
+ gr.Button("Pipeline", size="sm").click(lambda: "pipeline", outputs=transformers_topic)
594
+ gr.Button("Tokenizer", size="sm").click(lambda: "tokenizer", outputs=transformers_topic)
595
+ gr.Button("Trainer", size="sm").click(lambda: "trainer", outputs=transformers_topic)
596
+ gr.Button("Generation", size="sm").click(lambda: "generation", outputs=transformers_topic)
597
+ transformers_btn.click(hf_server.get_transformers_docs, inputs=transformers_topic, outputs=transformers_output)
598
+ transformers_clear.click(lambda: "", outputs=transformers_output)
599
+
600
+ with gr.Tab("πŸ”₯ Trending Models", elem_id="trending"):
601
+ gr.Markdown("### Discover the most popular and trending models")
602
+ trending_limit = gr.Number(label="Number of Models", value=10, minimum=1, maximum=20)
603
+ trending_output = gr.Textbox(label="πŸ”₯ Trending Models", lines=20, max_lines=25)
604
+ with gr.Row():
605
+ trending_btn = gr.Button("πŸ”₯ Get Trending Models", variant="primary", size="lg")
606
+ trending_clear = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
607
+ trending_btn.click(lambda limit: hf_server.get_trending_models(int(limit) if str(limit).isdigit() else 10), inputs=trending_limit, outputs=trending_output)
608
+ trending_clear.click(lambda: "", outputs=trending_output)
609
+
610
+ # Footer
611
+ with gr.Row():
612
+ gr.HTML("""
613
+ <div style="text-align: center; padding: 20px; color: #666;">
614
+ <h3>πŸ’‘ Features</h3>
615
+ <p><strong>βœ… Real code examples</strong> β€’ <strong>βœ… Step-by-step instructions</strong> β€’ <strong>βœ… Parameter documentation</strong> β€’ <strong>βœ… Quality indicators</strong></p>
616
+ <p><em>Get practical, actionable information, directly from the source.</em></p>
617
+ <p><a href="https://huggingface.co/spaces/Agents-MCP-Hackathon/HuggingFaceDoc/blob/main/README.md" target="_blank" style="text-decoration: none; color: #4a90e2;">πŸ“– Read the Guide on Hugging Face Spaces</a></p>
618
+ </div>
619
+ """)
620
+
621
+ if __name__ == "__main__":
622
+ print("πŸš€ Starting Hugging Face Information Server...")
623
+ print("πŸ“Š Features: Code examples, usage instructions, comprehensive documentation")
624
+ demo.launch(
625
+ server_name="0.0.0.0",
626
+ server_port=7860,
627
+ show_error=True,
628
+ share=True # Set to True to get a public link
629
  )