milwright commited on
Commit
10dec08
·
verified ·
1 Parent(s): bd579a3

update URL grounding, add language field

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +41 -22
  3. config.json +4 -7
README.md CHANGED
@@ -8,12 +8,12 @@ sdk_version: 5.39.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- short_description: Italian language learning partner for beginner Italian conve
12
  ---
13
 
14
  # Language Learning Partner
15
 
16
- Italian language learning partner for beginner Italian conversation practice
17
 
18
  ## Quick Setup
19
 
 
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ short_description: Italian conversation partner for beginners
12
  ---
13
 
14
  # Language Learning Partner
15
 
16
+ Italian conversation partner for beginners
17
 
18
  ## Quick Setup
19
 
app.py CHANGED
@@ -13,19 +13,19 @@ from typing import List, Dict, Optional, Any, Tuple
13
 
14
  # Configuration
15
  SPACE_NAME = 'Language Learning Partner'
16
- SPACE_DESCRIPTION = 'Italian language learning partner for beginner Italian conve'
17
 
18
  # Default configuration values
19
  DEFAULT_CONFIG = {
20
  'name': SPACE_NAME,
21
  'description': SPACE_DESCRIPTION,
22
- 'system_prompt': "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Expect to finish every response in under 300 tokens. --- KNOWLEDGE CUTOFF WARNINGS: 1. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses, 2. Favor vocabulary words and grammar from the fetched URL page. 3. Address students using the informal second-person singular 'tu' form.",
23
  'temperature': 0.5,
24
  'max_tokens': 250,
25
  'model': 'google/gemma-3-27b-it',
26
  'api_key_var': 'API_KEY',
27
  'theme': 'Default',
28
- 'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
29
  'enable_dynamic_urls': True,
30
  'enable_file_upload': True,
31
  'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
@@ -176,7 +176,7 @@ def validate_url_domain(url: str) -> bool:
176
  return False
177
 
178
 
179
- def fetch_url_content(url: str) -> str:
180
  """Fetch and convert URL content to text"""
181
  try:
182
  if not validate_url_domain(url):
@@ -205,16 +205,16 @@ def fetch_url_content(url: str) -> str:
205
  text = ' '.join(text.split())
206
 
207
  # Limit content length
208
- if len(text) > 3000:
209
- text = text[:3000] + "... [truncated]"
210
 
211
- return f"📄 Content from {url}:\n{text}\n"
212
 
213
  elif any(ct in content_type for ct in ['text/plain', 'application/json']):
214
  text = response.text
215
- if len(text) > 3000:
216
- text = text[:3000] + "... [truncated]"
217
- return f"📄 Content from {url}:\n{text}\n"
218
 
219
  else:
220
  return f"⚠️ Unsupported content type at {url}: {content_type}"
@@ -294,19 +294,37 @@ def get_grounding_context() -> str:
294
  if not urls:
295
  return ""
296
 
297
- context_parts = ["📚 **Reference Context:**\n"]
298
 
299
- for i, url in enumerate(urls[:2], 1): # Primary URLs only
300
- if url in _url_content_cache:
301
- content = _url_content_cache[url]
302
- else:
303
- content = fetch_url_content(url)
304
- _url_content_cache[url] = content
305
-
306
- if not content.startswith("❌") and not content.startswith("⏱️"):
307
- context_parts.append(f"\n**Source {i}:** {content}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
309
- if len(context_parts) > 1:
310
  return "\n".join(context_parts)
311
  return ""
312
 
@@ -405,6 +423,7 @@ Get your API key at: https://openrouter.ai/keys"""
405
  system_content += f"\n\nIMPORTANT: You must respond EXCLUSIVELY in {LANGUAGE}. All your responses should be written entirely in {LANGUAGE}, even when user input is in a different language, particularly English."
406
 
407
  if grounding_context:
 
408
  system_content = f"{system_content}\n\n{grounding_context}"
409
  if file_context:
410
  system_content = f"{system_content}\n\n{file_context}"
@@ -820,7 +839,7 @@ def create_interface():
820
  placeholder="https://example.com/docs\nhttps://example.com/api",
821
  value='\n'.join(config.get('grounding_urls', [])),
822
  lines=5,
823
- info="Add URLs to provide context. First 2 URLs are primary sources."
824
  )
825
 
826
  with gr.Row():
 
13
 
14
  # Configuration
15
  SPACE_NAME = 'Language Learning Partner'
16
+ SPACE_DESCRIPTION = 'Italian conversation partner for beginners'
17
 
18
  # Default configuration values
19
  DEFAULT_CONFIG = {
20
  'name': SPACE_NAME,
21
  'description': SPACE_DESCRIPTION,
22
+ 'system_prompt': "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. \n\nKNOWLEDGE CUTOFF WARNINGS: 1. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses; 2. Address students using the informal second-person singular 'tu' form; [n2s: add more here]",
23
  'temperature': 0.5,
24
  'max_tokens': 250,
25
  'model': 'google/gemma-3-27b-it',
26
  'api_key_var': 'API_KEY',
27
  'theme': 'Default',
28
+ 'grounding_urls': [],
29
  'enable_dynamic_urls': True,
30
  'enable_file_upload': True,
31
  'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
 
176
  return False
177
 
178
 
179
+ def fetch_url_content(url: str, max_length: int = 3000) -> str:
180
  """Fetch and convert URL content to text"""
181
  try:
182
  if not validate_url_domain(url):
 
205
  text = ' '.join(text.split())
206
 
207
  # Limit content length
208
+ if len(text) > max_length:
209
+ text = text[:max_length] + "... [truncated]"
210
 
211
+ return f"📄 **Content from:** {url}\n\n{text}\n"
212
 
213
  elif any(ct in content_type for ct in ['text/plain', 'application/json']):
214
  text = response.text
215
+ if len(text) > max_length:
216
+ text = text[:max_length] + "... [truncated]"
217
+ return f"📄 **Content from:** {url}\n\n{text}\n"
218
 
219
  else:
220
  return f"⚠️ Unsupported content type at {url}: {content_type}"
 
294
  if not urls:
295
  return ""
296
 
297
+ context_parts = []
298
 
299
+ # Process primary sources (first 2 URLs with 8000 char limit)
300
+ primary_urls = urls[:2]
301
+ if primary_urls:
302
+ context_parts.append("📚 **PRIMARY SOURCES:**\n")
303
+ for i, url in enumerate(primary_urls, 1):
304
+ if url in _url_content_cache:
305
+ content = _url_content_cache[url]
306
+ else:
307
+ content = fetch_url_content(url, max_length=8000)
308
+ _url_content_cache[url] = content
309
+
310
+ if not content.startswith("❌") and not content.startswith("⏱️"):
311
+ context_parts.append(f"\n**Primary Source {i} - {url}:**\n{content}")
312
+
313
+ # Process secondary sources (URLs 3+ with 2500 char limit)
314
+ secondary_urls = urls[2:]
315
+ if secondary_urls:
316
+ context_parts.append("\n\n📎 **SECONDARY SOURCES:**\n")
317
+ for i, url in enumerate(secondary_urls, 1):
318
+ if url in _url_content_cache:
319
+ content = _url_content_cache[url]
320
+ else:
321
+ content = fetch_url_content(url, max_length=2500)
322
+ _url_content_cache[url] = content
323
+
324
+ if not content.startswith("❌") and not content.startswith("⏱️"):
325
+ context_parts.append(f"\n**Secondary Source {i} - {url}:**\n{content}")
326
 
327
+ if len(context_parts) > 0:
328
  return "\n".join(context_parts)
329
  return ""
330
 
 
423
  system_content += f"\n\nIMPORTANT: You must respond EXCLUSIVELY in {LANGUAGE}. All your responses should be written entirely in {LANGUAGE}, even when user input is in a different language, particularly English."
424
 
425
  if grounding_context:
426
+ system_content += "\n\nIMPORTANT: When providing information from the reference sources below, please cite the specific URL(s) where the information can be found."
427
  system_content = f"{system_content}\n\n{grounding_context}"
428
  if file_context:
429
  system_content = f"{system_content}\n\n{file_context}"
 
839
  placeholder="https://example.com/docs\nhttps://example.com/api",
840
  value='\n'.join(config.get('grounding_urls', [])),
841
  lines=5,
842
+ info="First 2 URLs: Primary sources (8000 chars). URLs 3+: Secondary sources (2500 chars)."
843
  )
844
 
845
  with gr.Row():
config.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "name": "Language Learning Partner",
3
- "tagline": "Italian language learning partner for beginner Italian conve",
4
  "description": "Italian conversation partner for beginners",
5
- "system_prompt": "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Expect to finish every response in under 300 tokens. --- KNOWLEDGE CUTOFF WARNINGS: 1. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses, 2. Favor vocabulary words and grammar from the fetched URL page. 3. Address students using the informal second-person singular 'tu' form.",
6
  "model": "google/gemma-3-27b-it",
7
  "language": "Italian",
8
  "api_key_var": "API_KEY",
@@ -15,11 +15,8 @@
15
  "A che ora ti svegli la mattina?",
16
  "Qual \u00e8 il tuo sport preferito?"
17
  ],
18
- "grounding_urls": [
19
- "https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"
20
- ],
21
  "enable_dynamic_urls": true,
22
  "enable_file_upload": true,
23
- "theme": "Default",
24
- "locked": false
25
  }
 
1
  {
2
  "name": "Language Learning Partner",
3
+ "tagline": "Italian conversation partner for beginners",
4
  "description": "Italian conversation partner for beginners",
5
+ "system_prompt": "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. \n\nKNOWLEDGE CUTOFF WARNINGS: 1. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses; 2. Address students using the informal second-person singular 'tu' form; [n2s: add more here]",
6
  "model": "google/gemma-3-27b-it",
7
  "language": "Italian",
8
  "api_key_var": "API_KEY",
 
15
  "A che ora ti svegli la mattina?",
16
  "Qual \u00e8 il tuo sport preferito?"
17
  ],
18
+ "grounding_urls": [],
 
 
19
  "enable_dynamic_urls": true,
20
  "enable_file_upload": true,
21
+ "theme": "Default"
 
22
  }