raktimhugging commited on
Commit
02d23e2
Β·
verified Β·
1 Parent(s): 1c69645

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +193 -126
app.py CHANGED
@@ -6,6 +6,8 @@ import torch
6
  import os
7
  from typing import List, Dict, Any
8
  import time
 
 
9
 
10
  # Configure device
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -17,7 +19,7 @@ class RAGtimBot:
17
  self.knowledge_base = []
18
  self.embeddings = []
19
  self.initialize_models()
20
- self.load_knowledge_base()
21
 
22
  def initialize_models(self):
23
  """Initialize the embedding model"""
@@ -33,84 +35,35 @@ class RAGtimBot:
33
  print(f"❌ Error loading embedding model: {e}")
34
  raise e
35
 
36
- def load_knowledge_base(self):
37
- """Load and process the knowledge base"""
38
- print("Loading knowledge base...")
39
 
40
- # Comprehensive knowledge base about Raktim Mondol
41
- self.knowledge_base = [
42
- {
43
- "id": "about_1",
44
- "content": "Raktim Mondol is a PhD candidate in Computer Science & Engineering at UNSW Sydney. He is a researcher, data scientist, bioinformatician, biostatistician, LLM engineer, and father. His research focuses on deep learning-based prognosis and explainability for breast cancer. He is located at the School of Computer Science and Engineering, Building K17, UNSW Sydney, NSW 2052. Contact: [email protected], Phone: +61 412 936 237.",
45
- "metadata": {"type": "about", "priority": 10}
46
- },
47
- {
48
- "id": "education_1",
49
- "content": "Raktim Mondol is pursuing a PhD in Computer Science & Engineering at UNSW Sydney (2021-2025 Expected). His thesis is on 'Deep Learning Based Prognosis and Explainability for Breast Cancer'. He completed his MS by Research in Computer Science & Bioinformatics at RMIT University (2017-2019) with High Distinction. His master's thesis was 'Deep learning in classifying cancer subtypes, extracting relevant genes and identifying novel mutations'.",
50
- "metadata": {"type": "education", "priority": 9}
51
- },
52
- {
53
- "id": "research_llm",
54
- "content": "Raktim's research focuses on Large Language Models (LLMs) including training, fine-tuning, and evaluating LLMs using parameter-efficient techniques like LoRA and QLoRA, with applications in retrieval-augmented generation, summarisation, and multi-hop reasoning. He works on Agentic AI & Multi-Agent Systems, designing autonomous, tool-using agents for reasoning, planning, and collaboration using frameworks like the Agent Development Kit.",
55
- "metadata": {"type": "research", "priority": 9}
56
- },
57
- {
58
- "id": "research_rag",
59
- "content": "His expertise includes Retrieval-Augmented Generation (RAG), building hybrid search and generation pipelines integrating semantic and keyword-based retrieval using technologies like FAISS, BM25, ChromaDB, Weaviate, and Milvus for vector search and retrieval systems.",
60
- "metadata": {"type": "research", "priority": 9}
61
- },
62
- {
63
- "id": "skills_ai",
64
- "content": "Raktim has expertise in Generative AI & LLM Toolkits including Hugging Face Transformers, LoRA/QLoRA (PEFT), LangChain, OpenAI API/Gemini Pro, GPTQ/GGUF, Prompt Engineering, Agent Development Kit, and RAG Pipelines. He is skilled in Multimodal & CV + NLP including CLIP/BLIP/LLaVA, Segment Anything (SAM), Visual Question Answering, and Multimodal Transformers.",
65
- "metadata": {"type": "skills", "priority": 7}
66
- },
67
- {
68
- "id": "skills_programming",
69
- "content": "Programming languages: Python, R, SQL, LaTeX. Deep Learning Frameworks: PyTorch, TensorFlow. Cloud Computing: AWS, GCP, Galaxy. Development tools: Git, Jupyter Notebook, RStudio, Spyder. Statistical Analysis: Stata, SPSS, SAS, NCSS.",
70
- "metadata": {"type": "skills", "priority": 7}
71
- },
72
- {
73
- "id": "experience_current",
74
- "content": "Raktim Mondol has been working as a Casual Academic at UNSW since July 2021, conducting laboratory and tutorial classes for Computer Vision, Neural Networks and Deep Learning, and Artificial Intelligence courses. He provides guidance to students and assists in course material development.",
75
- "metadata": {"type": "experience", "priority": 8}
76
- },
77
- {
78
- "id": "experience_rmit",
79
- "content": "Previously, he was a Teaching Assistant at RMIT University (July 2017 - Oct 2019), conducting laboratory classes for Electronics, Software Engineering Design, Engineering Computing, and Introduction to Embedded Systems.",
80
- "metadata": {"type": "experience", "priority": 8}
81
- },
82
- {
83
- "id": "experience_lecturer",
84
- "content": "He worked as a full-time Lecturer at World University of Bangladesh (Sep 2013 - Dec 2016), teaching Electrical Circuit I & II, Engineering Materials, Electronics I & II, Digital Logic Design, and supervising student projects and thesis.",
85
- "metadata": {"type": "experience", "priority": 8}
86
- },
87
- {
88
- "id": "publication_biofusion",
89
- "content": "BioFusionNet: Deep Learning-Based Survival Risk Stratification in ER+ Breast Cancer Through Multifeature and Multimodal Data Fusion published in IEEE Journal of Biomedical and Health Informatics (2024). This work demonstrates novel multimodal fusion architecture combining histopathology, genomics, and clinical data with attention-based feature selection for interpretability.",
90
- "metadata": {"type": "publications", "priority": 8}
91
- },
92
- {
93
- "id": "publication_hist2rna",
94
- "content": "hist2RNA: An Efficient Deep Learning Architecture to Predict Gene Expression from Breast Cancer Histopathology Images published in Cancers journal (2023). This enables gene expression profiling without expensive molecular assays, making personalized medicine more accessible.",
95
- "metadata": {"type": "publications", "priority": 8}
96
- },
97
- {
98
- "id": "publication_afexnet",
99
- "content": "AFExNet: An Adversarial Autoencoder for Differentiating Breast Cancer Sub-types and Extracting Biologically Relevant Genes published in IEEE/ACM Transactions on Computational Biology and Bioinformatics (2021). Provides insights into cancer biology while achieving high classification accuracy.",
100
- "metadata": {"type": "publications", "priority": 8}
101
- },
102
- {
103
- "id": "statistics_expertise",
104
- "content": "Raktim demonstrates exceptional proficiency in advanced statistical methods including survival analysis with weighted Cox proportional hazards models, multivariate regression analysis, hypothesis testing, correlation analysis with multiple-testing control, and comprehensive biostatistical applications. His BioFusionNet work achieved mean concordance index of 0.77 and time-dependent AUC of 0.84.",
105
- "metadata": {"type": "statistics", "priority": 9}
106
- },
107
- {
108
- "id": "awards",
109
- "content": "Awards include: Doctoral Research Scholarship from UNSW Sydney (2021), Masters by Research with High Distinction from RMIT University (2019), RMIT Research Scholarships (2017), B.Sc. with High Distinction from BRAC University (2013), Vice Chancellor Award from BRAC University (2013), and Dean Awards (2010-2011).",
110
- "metadata": {"type": "awards", "priority": 6}
111
- }
112
  ]
113
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  # Generate embeddings for knowledge base
115
  print("Generating embeddings for knowledge base...")
116
  self.embeddings = []
@@ -127,6 +80,66 @@ class RAGtimBot:
127
 
128
  print(f"βœ… Knowledge base loaded with {len(self.knowledge_base)} documents")
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  def cosine_similarity(self, a, b):
131
  """Calculate cosine similarity between two vectors"""
132
  return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
@@ -150,8 +163,8 @@ class RAGtimBot:
150
  "index": i
151
  })
152
 
153
- # Sort by similarity and return top_k
154
- similarities.sort(key=lambda x: x["score"], reverse=True)
155
  return similarities[:top_k]
156
 
157
  except Exception as e:
@@ -168,12 +181,16 @@ class RAGtimBot:
168
  content_lower = doc["content"].lower()
169
  score = sum(content_lower.count(term) for term in query_terms)
170
 
 
 
 
 
171
  if score > 0:
172
  results.append({
173
  "id": doc["id"],
174
  "content": doc["content"],
175
  "metadata": doc["metadata"],
176
- "score": score,
177
  "index": i
178
  })
179
 
@@ -181,7 +198,7 @@ class RAGtimBot:
181
  return results[:top_k]
182
 
183
  # Initialize the bot
184
- print("Initializing RAGtim Bot...")
185
  bot = RAGtimBot()
186
 
187
  def search_only_api(query, top_k=5):
@@ -192,7 +209,8 @@ def search_only_api(query, top_k=5):
192
  "results": results,
193
  "query": query,
194
  "top_k": top_k,
195
- "search_type": "semantic"
 
196
  }
197
  except Exception as e:
198
  print(f"Error in search API: {e}")
@@ -200,29 +218,63 @@ def search_only_api(query, top_k=5):
200
 
201
  def get_stats_api():
202
  """API endpoint for knowledge base statistics"""
 
 
 
 
 
 
 
 
 
 
 
203
  return {
204
  "total_documents": len(bot.knowledge_base),
 
 
205
  "model_name": "sentence-transformers/all-MiniLM-L6-v2",
206
  "embedding_dimension": 384,
207
- "search_capabilities": ["Semantic Search", "GPU Accelerated", "Transformer Embeddings"],
208
- "backend_type": "Hugging Face Space"
 
209
  }
210
 
211
  def chat_interface(message, history):
212
- """Main chat interface function - now just for demo purposes"""
213
  if not message.strip():
214
- return "Please ask me something about Raktim Mondol!"
215
 
216
  try:
217
  # Search knowledge base
218
- search_results = bot.search_knowledge_base(message, top_k=5)
219
 
220
- # Simple response for demo (in hybrid mode, DeepSeek will handle this)
221
  if search_results:
 
 
 
 
 
222
  best_match = search_results[0]
223
- return f"Based on the search results (similarity: {best_match['score']:.2f}):\n\n{best_match['content']}\n\n[Note: In hybrid mode, DeepSeek LLM will generate more natural responses using this context]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  else:
225
- return "I don't have specific information about that topic. Could you please ask something else about Raktim Mondol?"
226
 
227
  except Exception as e:
228
  print(f"Error in chat interface: {e}")
@@ -246,39 +298,53 @@ css = """
246
  # Create the main chat interface
247
  iface = gr.ChatInterface(
248
  fn=chat_interface,
249
- title="πŸ€– RAGtim Bot - Hybrid Search Engine",
250
- description="""
251
- **Hybrid RAG System**: This Hugging Face Space provides GPU-accelerated semantic search that can be combined with external LLMs like DeepSeek for response generation.
 
 
 
 
 
 
 
 
252
 
253
  **Search Capabilities:**
254
  - πŸ” Semantic similarity search using transformers
255
- - πŸš€ GPU-accelerated embeddings
256
- - πŸ“Š Relevance scoring and ranking
257
- - 🎯 Context-aware retrieval
258
 
259
  **API Endpoints:**
260
- - `/api/search` - Search-only functionality
261
- - `/api/stats` - Knowledge base statistics
262
 
263
- **Ask me about Raktim Mondol:**
264
- - πŸ”¬ Research in LLMs, RAG, and AI for healthcare
265
- - πŸ“š Publications and academic work
266
- - πŸ’» Technical skills and programming expertise
267
- - πŸŽ“ Education and academic background
268
- - πŸ‘¨β€πŸ« Teaching and professional experience
269
- - πŸ“Š Statistical methods and biostatistics
 
 
270
 
271
- **Note**: This demo shows search results. In hybrid mode, these results are passed to DeepSeek LLM for natural response generation.
272
  """,
273
  examples=[
274
  "What is Raktim's research about?",
275
- "Tell me about his publications",
276
- "What programming languages does he know?",
277
- "What is his educational background?",
278
- "How can I contact Raktim?",
279
- "What is BioFusionNet?",
280
- "Tell me about his LLM research",
281
- "What statistical methods does he use?"
 
 
 
 
282
  ],
283
  css=css,
284
  theme=gr.themes.Soft(
@@ -287,17 +353,17 @@ iface = gr.ChatInterface(
287
  neutral_hue="slate"
288
  ),
289
  chatbot=gr.Chatbot(
290
- height=500,
291
  show_label=False,
292
  container=True,
293
  bubble_full_width=False
294
  ),
295
  textbox=gr.Textbox(
296
- placeholder="Ask me anything about Raktim Mondol...",
297
  container=False,
298
  scale=7
299
  ),
300
- submit_btn="Search",
301
  retry_btn="πŸ”„ Retry",
302
  undo_btn="↩️ Undo",
303
  clear_btn="πŸ—‘οΈ Clear"
@@ -307,31 +373,32 @@ iface = gr.ChatInterface(
307
  search_api = gr.Interface(
308
  fn=search_only_api,
309
  inputs=[
310
- gr.Textbox(label="Search Query", placeholder="Enter your search query..."),
311
- gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Top K Results")
312
  ],
313
- outputs=gr.JSON(label="Search Results"),
314
- title="πŸ” Search API",
315
- description="Direct access to the semantic search functionality"
316
  )
317
 
318
  stats_api = gr.Interface(
319
  fn=get_stats_api,
320
  inputs=[],
321
- outputs=gr.JSON(label="Knowledge Base Statistics"),
322
- title="πŸ“Š Stats API",
323
- description="Knowledge base statistics and capabilities"
324
  )
325
 
326
  # Combine interfaces
327
  demo = gr.TabbedInterface(
328
  [iface, search_api, stats_api],
329
- ["πŸ’¬ Chat Demo", "πŸ” Search API", "πŸ“Š Stats API"],
330
- title="πŸ€– RAGtim Bot - Hybrid Search System"
331
  )
332
 
333
  if __name__ == "__main__":
334
- print("πŸš€ Launching RAGtim Bot Hybrid Search System...")
 
335
  demo.launch(
336
  server_name="0.0.0.0",
337
  server_port=7860,
 
6
  import os
7
  from typing import List, Dict, Any
8
  import time
9
+ import requests
10
+ import re
11
 
12
  # Configure device
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
19
  self.knowledge_base = []
20
  self.embeddings = []
21
  self.initialize_models()
22
+ self.load_markdown_knowledge_base()
23
 
24
  def initialize_models(self):
25
  """Initialize the embedding model"""
 
35
  print(f"❌ Error loading embedding model: {e}")
36
  raise e
37
 
38
+ def load_markdown_knowledge_base(self):
39
+ """Load knowledge base from markdown files"""
40
+ print("Loading knowledge base from markdown files...")
41
 
42
+ # Reset knowledge base
43
+ self.knowledge_base = []
44
+
45
+ # Load all markdown files
46
+ markdown_files = [
47
+ 'about.md',
48
+ 'research_details.md',
49
+ 'publications_detailed.md',
50
+ 'skills_expertise.md',
51
+ 'experience_detailed.md',
52
+ 'statistics.md'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  ]
54
 
55
+ for filename in markdown_files:
56
+ try:
57
+ if os.path.exists(filename):
58
+ with open(filename, 'r', encoding='utf-8') as f:
59
+ content = f.read()
60
+ self.process_markdown_file(content, filename)
61
+ print(f"βœ… Loaded {filename}")
62
+ else:
63
+ print(f"⚠️ File not found: {filename}")
64
+ except Exception as e:
65
+ print(f"❌ Error loading {filename}: {e}")
66
+
67
  # Generate embeddings for knowledge base
68
  print("Generating embeddings for knowledge base...")
69
  self.embeddings = []
 
80
 
81
  print(f"βœ… Knowledge base loaded with {len(self.knowledge_base)} documents")
82
 
83
+ def process_markdown_file(self, content: str, filename: str):
84
+ """Process a markdown file and extract sections"""
85
+ # Determine file type and priority
86
+ file_type_map = {
87
+ 'about.md': ('about', 10),
88
+ 'research_details.md': ('research', 9),
89
+ 'publications_detailed.md': ('publications', 8),
90
+ 'skills_expertise.md': ('skills', 7),
91
+ 'experience_detailed.md': ('experience', 8),
92
+ 'statistics.md': ('statistics', 9)
93
+ }
94
+
95
+ file_type, priority = file_type_map.get(filename, ('general', 5))
96
+
97
+ # Split content into sections
98
+ sections = self.split_markdown_into_sections(content)
99
+
100
+ for section in sections:
101
+ if len(section['content'].strip()) > 100: # Only process substantial content
102
+ doc = {
103
+ "id": f"{filename}_{section['title']}_{len(self.knowledge_base)}",
104
+ "content": section['content'],
105
+ "metadata": {
106
+ "type": file_type,
107
+ "priority": priority,
108
+ "section": section['title'],
109
+ "source": filename
110
+ }
111
+ }
112
+ self.knowledge_base.append(doc)
113
+
114
+ def split_markdown_into_sections(self, content: str) -> List[Dict[str, str]]:
115
+ """Split markdown content into sections based on headers"""
116
+ sections = []
117
+ lines = content.split('\n')
118
+ current_section = {'title': 'Introduction', 'content': ''}
119
+
120
+ for line in lines:
121
+ # Check if line is a header
122
+ if line.startswith('#'):
123
+ # Save previous section if it has content
124
+ if current_section['content'].strip():
125
+ sections.append(current_section.copy())
126
+
127
+ # Start new section
128
+ header_level = len(line) - len(line.lstrip('#'))
129
+ title = line.lstrip('#').strip()
130
+ current_section = {
131
+ 'title': title,
132
+ 'content': line + '\n'
133
+ }
134
+ else:
135
+ current_section['content'] += line + '\n'
136
+
137
+ # Add the last section
138
+ if current_section['content'].strip():
139
+ sections.append(current_section)
140
+
141
+ return sections
142
+
143
  def cosine_similarity(self, a, b):
144
  """Calculate cosine similarity between two vectors"""
145
  return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
 
163
  "index": i
164
  })
165
 
166
+ # Sort by similarity and priority
167
+ similarities.sort(key=lambda x: (x["score"], x["metadata"]["priority"]), reverse=True)
168
  return similarities[:top_k]
169
 
170
  except Exception as e:
 
181
  content_lower = doc["content"].lower()
182
  score = sum(content_lower.count(term) for term in query_terms)
183
 
184
+ # Add priority boost
185
+ priority_boost = doc["metadata"]["priority"] / 10
186
+ final_score = score + priority_boost
187
+
188
  if score > 0:
189
  results.append({
190
  "id": doc["id"],
191
  "content": doc["content"],
192
  "metadata": doc["metadata"],
193
+ "score": final_score,
194
  "index": i
195
  })
196
 
 
198
  return results[:top_k]
199
 
200
  # Initialize the bot
201
+ print("Initializing RAGtim Bot with markdown knowledge base...")
202
  bot = RAGtimBot()
203
 
204
  def search_only_api(query, top_k=5):
 
209
  "results": results,
210
  "query": query,
211
  "top_k": top_k,
212
+ "search_type": "semantic",
213
+ "total_documents": len(bot.knowledge_base)
214
  }
215
  except Exception as e:
216
  print(f"Error in search API: {e}")
 
218
 
219
  def get_stats_api():
220
  """API endpoint for knowledge base statistics"""
221
+ # Calculate document distribution by type
222
+ doc_types = {}
223
+ sections_by_file = {}
224
+
225
+ for doc in bot.knowledge_base:
226
+ doc_type = doc["metadata"]["type"]
227
+ source_file = doc["metadata"]["source"]
228
+
229
+ doc_types[doc_type] = doc_types.get(doc_type, 0) + 1
230
+ sections_by_file[source_file] = sections_by_file.get(source_file, 0) + 1
231
+
232
  return {
233
  "total_documents": len(bot.knowledge_base),
234
+ "document_types": doc_types,
235
+ "sections_by_file": sections_by_file,
236
  "model_name": "sentence-transformers/all-MiniLM-L6-v2",
237
  "embedding_dimension": 384,
238
+ "search_capabilities": ["Semantic Search", "GPU Accelerated", "Transformer Embeddings", "Markdown Knowledge Base"],
239
+ "backend_type": "Hugging Face Space",
240
+ "knowledge_sources": list(sections_by_file.keys())
241
  }
242
 
243
  def chat_interface(message, history):
244
+ """Chat interface with markdown knowledge base"""
245
  if not message.strip():
246
+ return "Please ask me something about Raktim Mondol! I have comprehensive information loaded from his complete portfolio markdown files."
247
 
248
  try:
249
  # Search knowledge base
250
+ search_results = bot.search_knowledge_base(message, top_k=6)
251
 
 
252
  if search_results:
253
+ # Build comprehensive response
254
+ response_parts = []
255
+ response_parts.append(f"Based on my markdown knowledge base (found {len(search_results)} relevant sections):\n")
256
+
257
+ # Use the best match as primary response
258
  best_match = search_results[0]
259
+ response_parts.append(f"**Primary Answer** (Relevance: {best_match['score']:.2f}):")
260
+ response_parts.append(f"Source: {best_match['metadata']['source']} - {best_match['metadata']['section']}")
261
+ response_parts.append(f"{best_match['content']}\n")
262
+
263
+ # Add additional context if available
264
+ if len(search_results) > 1:
265
+ response_parts.append("**Additional Context:**")
266
+ for i, result in enumerate(search_results[1:3], 1): # Show up to 2 additional results
267
+ section_info = f"{result['metadata']['source']} - {result['metadata']['section']}"
268
+ response_parts.append(f"{i}. {section_info} (Relevance: {result['score']:.2f})")
269
+ # Add a brief excerpt
270
+ excerpt = result['content'][:200] + "..." if len(result['content']) > 200 else result['content']
271
+ response_parts.append(f" {excerpt}\n")
272
+
273
+ response_parts.append("\n[Note: This response is generated from your complete markdown knowledge base. In hybrid mode, DeepSeek LLM would generate more natural responses using this context.]")
274
+
275
+ return "\n".join(response_parts)
276
  else:
277
+ return "I don't have specific information about that topic in my markdown knowledge base. Could you please ask something else about Raktim Mondol?"
278
 
279
  except Exception as e:
280
  print(f"Error in chat interface: {e}")
 
298
  # Create the main chat interface
299
  iface = gr.ChatInterface(
300
  fn=chat_interface,
301
+ title="πŸ€– RAGtim Bot - Markdown Knowledge Base",
302
+ description=f"""
303
+ **Complete Markdown Knowledge Base**: This Hugging Face Space loads all markdown files from Raktim Mondol's portfolio with **{len(bot.knowledge_base)} knowledge sections**.
304
+
305
+ **Loaded Markdown Files:**
306
+ - πŸ“„ **about.md** - Personal information, contact details, professional summary
307
+ - πŸ”¬ **research_details.md** - Detailed research projects, methodologies, current work
308
+ - πŸ“š **publications_detailed.md** - Complete publication details, technical contributions
309
+ - πŸ’» **skills_expertise.md** - Comprehensive technical skills, tools, frameworks
310
+ - πŸ’Ό **experience_detailed.md** - Professional experience, teaching, research roles
311
+ - πŸ“Š **statistics.md** - Statistical methods, biostatistics expertise, methodologies
312
 
313
  **Search Capabilities:**
314
  - πŸ” Semantic similarity search using transformers
315
+ - πŸš€ GPU-accelerated embeddings with priority ranking
316
+ - πŸ“Š Relevance scoring across all markdown content
317
+ - 🎯 Section-level granular search within each file
318
 
319
  **API Endpoints:**
320
+ - `/api/search` - Search across complete markdown knowledge base
321
+ - `/api/stats` - Detailed statistics about loaded content
322
 
323
+ **Ask me anything about Raktim Mondol:**
324
+ - Research projects, methodologies, and innovations
325
+ - Publications with technical details and impact
326
+ - Technical skills, programming expertise, and tools
327
+ - Educational background and academic achievements
328
+ - Professional experience and teaching roles
329
+ - Statistical methods and biostatistics applications
330
+ - Awards, recognition, and professional development
331
+ - Contact information and collaboration opportunities
332
 
333
+ **Note**: This demo shows search results from the complete markdown knowledge base. In hybrid mode, these results are passed to DeepSeek LLM for natural response generation.
334
  """,
335
  examples=[
336
  "What is Raktim's research about?",
337
+ "Tell me about BioFusionNet in detail",
338
+ "What are his LLM and RAG expertise?",
339
+ "Describe his statistical methods and biostatistics work",
340
+ "What programming languages and frameworks does he use?",
341
+ "Tell me about his educational background",
342
+ "What is his current position at UNSW?",
343
+ "How can I contact Raktim for collaboration?",
344
+ "What awards and recognition has he received?",
345
+ "Explain his multimodal AI research",
346
+ "What is hist2RNA and its impact?",
347
+ "Tell me about his teaching experience"
348
  ],
349
  css=css,
350
  theme=gr.themes.Soft(
 
353
  neutral_hue="slate"
354
  ),
355
  chatbot=gr.Chatbot(
356
+ height=600,
357
  show_label=False,
358
  container=True,
359
  bubble_full_width=False
360
  ),
361
  textbox=gr.Textbox(
362
+ placeholder="Ask me anything about Raktim Mondol's research, skills, experience, publications...",
363
  container=False,
364
  scale=7
365
  ),
366
+ submit_btn="Search Knowledge Base",
367
  retry_btn="πŸ”„ Retry",
368
  undo_btn="↩️ Undo",
369
  clear_btn="πŸ—‘οΈ Clear"
 
373
  search_api = gr.Interface(
374
  fn=search_only_api,
375
  inputs=[
376
+ gr.Textbox(label="Search Query", placeholder="Enter your search query about Raktim Mondol..."),
377
+ gr.Slider(minimum=1, maximum=15, value=5, step=1, label="Top K Results")
378
  ],
379
+ outputs=gr.JSON(label="Markdown Knowledge Base Search Results"),
380
+ title="πŸ” Markdown Knowledge Base Search API",
381
+ description="Direct access to semantic search across all loaded markdown files"
382
  )
383
 
384
  stats_api = gr.Interface(
385
  fn=get_stats_api,
386
  inputs=[],
387
+ outputs=gr.JSON(label="Markdown Knowledge Base Statistics"),
388
+ title="πŸ“Š Knowledge Base Stats",
389
+ description="Detailed statistics about the loaded markdown knowledge base"
390
  )
391
 
392
  # Combine interfaces
393
  demo = gr.TabbedInterface(
394
  [iface, search_api, stats_api],
395
+ ["πŸ’¬ Markdown Chat", "πŸ” Search API", "πŸ“Š Stats API"],
396
+ title="πŸ€– RAGtim Bot - Complete Markdown Knowledge Base"
397
  )
398
 
399
  if __name__ == "__main__":
400
+ print("πŸš€ Launching RAGtim Bot with Markdown Knowledge Base...")
401
+ print(f"πŸ“š Loaded {len(bot.knowledge_base)} sections from markdown files")
402
  demo.launch(
403
  server_name="0.0.0.0",
404
  server_port=7860,