Spaces:
Sleeping
Sleeping
update URL grounding, add language field
Browse files- README.md +2 -2
- app.py +41 -22
- config.json +4 -7
README.md
CHANGED
@@ -8,12 +8,12 @@ sdk_version: 5.39.0
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
-
short_description: Italian
|
12 |
---
|
13 |
|
14 |
# Language Learning Partner
|
15 |
|
16 |
-
Italian
|
17 |
|
18 |
## Quick Setup
|
19 |
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
+
short_description: Italian conversation partner for beginners
|
12 |
---
|
13 |
|
14 |
# Language Learning Partner
|
15 |
|
16 |
+
Italian conversation partner for beginners
|
17 |
|
18 |
## Quick Setup
|
19 |
|
app.py
CHANGED
@@ -13,19 +13,19 @@ from typing import List, Dict, Optional, Any, Tuple
|
|
13 |
|
14 |
# Configuration
|
15 |
SPACE_NAME = 'Language Learning Partner'
|
16 |
-
SPACE_DESCRIPTION = 'Italian
|
17 |
|
18 |
# Default configuration values
|
19 |
DEFAULT_CONFIG = {
|
20 |
'name': SPACE_NAME,
|
21 |
'description': SPACE_DESCRIPTION,
|
22 |
-
'system_prompt': "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete.
|
23 |
'temperature': 0.5,
|
24 |
'max_tokens': 250,
|
25 |
'model': 'google/gemma-3-27b-it',
|
26 |
'api_key_var': 'API_KEY',
|
27 |
'theme': 'Default',
|
28 |
-
'grounding_urls': [
|
29 |
'enable_dynamic_urls': True,
|
30 |
'enable_file_upload': True,
|
31 |
'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
|
@@ -176,7 +176,7 @@ def validate_url_domain(url: str) -> bool:
|
|
176 |
return False
|
177 |
|
178 |
|
179 |
-
def fetch_url_content(url: str) -> str:
|
180 |
"""Fetch and convert URL content to text"""
|
181 |
try:
|
182 |
if not validate_url_domain(url):
|
@@ -205,16 +205,16 @@ def fetch_url_content(url: str) -> str:
|
|
205 |
text = ' '.join(text.split())
|
206 |
|
207 |
# Limit content length
|
208 |
-
if len(text) >
|
209 |
-
text = text[:
|
210 |
|
211 |
-
return f"📄 Content from {url}
|
212 |
|
213 |
elif any(ct in content_type for ct in ['text/plain', 'application/json']):
|
214 |
text = response.text
|
215 |
-
if len(text) >
|
216 |
-
text = text[:
|
217 |
-
return f"📄 Content from {url}
|
218 |
|
219 |
else:
|
220 |
return f"⚠️ Unsupported content type at {url}: {content_type}"
|
@@ -294,19 +294,37 @@ def get_grounding_context() -> str:
|
|
294 |
if not urls:
|
295 |
return ""
|
296 |
|
297 |
-
context_parts = [
|
298 |
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
|
309 |
-
if len(context_parts) >
|
310 |
return "\n".join(context_parts)
|
311 |
return ""
|
312 |
|
@@ -405,6 +423,7 @@ Get your API key at: https://openrouter.ai/keys"""
|
|
405 |
system_content += f"\n\nIMPORTANT: You must respond EXCLUSIVELY in {LANGUAGE}. All your responses should be written entirely in {LANGUAGE}, even when user input is in a different language, particularly English."
|
406 |
|
407 |
if grounding_context:
|
|
|
408 |
system_content = f"{system_content}\n\n{grounding_context}"
|
409 |
if file_context:
|
410 |
system_content = f"{system_content}\n\n{file_context}"
|
@@ -820,7 +839,7 @@ def create_interface():
|
|
820 |
placeholder="https://example.com/docs\nhttps://example.com/api",
|
821 |
value='\n'.join(config.get('grounding_urls', [])),
|
822 |
lines=5,
|
823 |
-
info="
|
824 |
)
|
825 |
|
826 |
with gr.Row():
|
|
|
13 |
|
14 |
# Configuration
|
15 |
SPACE_NAME = 'Language Learning Partner'
|
16 |
+
SPACE_DESCRIPTION = 'Italian conversation partner for beginners'
|
17 |
|
18 |
# Default configuration values
|
19 |
DEFAULT_CONFIG = {
|
20 |
'name': SPACE_NAME,
|
21 |
'description': SPACE_DESCRIPTION,
|
22 |
+
'system_prompt': "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. \n\nKNOWLEDGE CUTOFF WARNINGS: 1. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses; 2. Address students using the informal second-person singular 'tu' form; [n2s: add more here]",
|
23 |
'temperature': 0.5,
|
24 |
'max_tokens': 250,
|
25 |
'model': 'google/gemma-3-27b-it',
|
26 |
'api_key_var': 'API_KEY',
|
27 |
'theme': 'Default',
|
28 |
+
'grounding_urls': [],
|
29 |
'enable_dynamic_urls': True,
|
30 |
'enable_file_upload': True,
|
31 |
'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
|
|
|
176 |
return False
|
177 |
|
178 |
|
179 |
+
def fetch_url_content(url: str, max_length: int = 3000) -> str:
|
180 |
"""Fetch and convert URL content to text"""
|
181 |
try:
|
182 |
if not validate_url_domain(url):
|
|
|
205 |
text = ' '.join(text.split())
|
206 |
|
207 |
# Limit content length
|
208 |
+
if len(text) > max_length:
|
209 |
+
text = text[:max_length] + "... [truncated]"
|
210 |
|
211 |
+
return f"📄 **Content from:** {url}\n\n{text}\n"
|
212 |
|
213 |
elif any(ct in content_type for ct in ['text/plain', 'application/json']):
|
214 |
text = response.text
|
215 |
+
if len(text) > max_length:
|
216 |
+
text = text[:max_length] + "... [truncated]"
|
217 |
+
return f"📄 **Content from:** {url}\n\n{text}\n"
|
218 |
|
219 |
else:
|
220 |
return f"⚠️ Unsupported content type at {url}: {content_type}"
|
|
|
294 |
if not urls:
|
295 |
return ""
|
296 |
|
297 |
+
context_parts = []
|
298 |
|
299 |
+
# Process primary sources (first 2 URLs with 8000 char limit)
|
300 |
+
primary_urls = urls[:2]
|
301 |
+
if primary_urls:
|
302 |
+
context_parts.append("📚 **PRIMARY SOURCES:**\n")
|
303 |
+
for i, url in enumerate(primary_urls, 1):
|
304 |
+
if url in _url_content_cache:
|
305 |
+
content = _url_content_cache[url]
|
306 |
+
else:
|
307 |
+
content = fetch_url_content(url, max_length=8000)
|
308 |
+
_url_content_cache[url] = content
|
309 |
+
|
310 |
+
if not content.startswith("❌") and not content.startswith("⏱️"):
|
311 |
+
context_parts.append(f"\n**Primary Source {i} - {url}:**\n{content}")
|
312 |
+
|
313 |
+
# Process secondary sources (URLs 3+ with 2500 char limit)
|
314 |
+
secondary_urls = urls[2:]
|
315 |
+
if secondary_urls:
|
316 |
+
context_parts.append("\n\n📎 **SECONDARY SOURCES:**\n")
|
317 |
+
for i, url in enumerate(secondary_urls, 1):
|
318 |
+
if url in _url_content_cache:
|
319 |
+
content = _url_content_cache[url]
|
320 |
+
else:
|
321 |
+
content = fetch_url_content(url, max_length=2500)
|
322 |
+
_url_content_cache[url] = content
|
323 |
+
|
324 |
+
if not content.startswith("❌") and not content.startswith("⏱️"):
|
325 |
+
context_parts.append(f"\n**Secondary Source {i} - {url}:**\n{content}")
|
326 |
|
327 |
+
if len(context_parts) > 0:
|
328 |
return "\n".join(context_parts)
|
329 |
return ""
|
330 |
|
|
|
423 |
system_content += f"\n\nIMPORTANT: You must respond EXCLUSIVELY in {LANGUAGE}. All your responses should be written entirely in {LANGUAGE}, even when user input is in a different language, particularly English."
|
424 |
|
425 |
if grounding_context:
|
426 |
+
system_content += "\n\nIMPORTANT: When providing information from the reference sources below, please cite the specific URL(s) where the information can be found."
|
427 |
system_content = f"{system_content}\n\n{grounding_context}"
|
428 |
if file_context:
|
429 |
system_content = f"{system_content}\n\n{file_context}"
|
|
|
839 |
placeholder="https://example.com/docs\nhttps://example.com/api",
|
840 |
value='\n'.join(config.get('grounding_urls', [])),
|
841 |
lines=5,
|
842 |
+
info="First 2 URLs: Primary sources (8000 chars). URLs 3+: Secondary sources (2500 chars)."
|
843 |
)
|
844 |
|
845 |
with gr.Row():
|
config.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"name": "Language Learning Partner",
|
3 |
-
"tagline": "Italian
|
4 |
"description": "Italian conversation partner for beginners",
|
5 |
-
"system_prompt": "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete.
|
6 |
"model": "google/gemma-3-27b-it",
|
7 |
"language": "Italian",
|
8 |
"api_key_var": "API_KEY",
|
@@ -15,11 +15,8 @@
|
|
15 |
"A che ora ti svegli la mattina?",
|
16 |
"Qual \u00e8 il tuo sport preferito?"
|
17 |
],
|
18 |
-
"grounding_urls": [
|
19 |
-
"https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"
|
20 |
-
],
|
21 |
"enable_dynamic_urls": true,
|
22 |
"enable_file_upload": true,
|
23 |
-
"theme": "Default"
|
24 |
-
"locked": false
|
25 |
}
|
|
|
1 |
{
|
2 |
"name": "Language Learning Partner",
|
3 |
+
"tagline": "Italian conversation partner for beginners",
|
4 |
"description": "Italian conversation partner for beginners",
|
5 |
+
"system_prompt": "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. \n\nKNOWLEDGE CUTOFF WARNINGS: 1. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses; 2. Address students using the informal second-person singular 'tu' form; [n2s: add more here]",
|
6 |
"model": "google/gemma-3-27b-it",
|
7 |
"language": "Italian",
|
8 |
"api_key_var": "API_KEY",
|
|
|
15 |
"A che ora ti svegli la mattina?",
|
16 |
"Qual \u00e8 il tuo sport preferito?"
|
17 |
],
|
18 |
+
"grounding_urls": [],
|
|
|
|
|
19 |
"enable_dynamic_urls": true,
|
20 |
"enable_file_upload": true,
|
21 |
+
"theme": "Default"
|
|
|
22 |
}
|