Spaces:
Running
Running
feat: add more languages, add genre: philosophy
Browse files- .gitignore +1 -0
- app.py +5 -4
- process/interpretation.py +5 -4
- process/ocr.py +1 -1
- process/sys_prompt.py +45 -1
- process/translation.py +11 -0
.gitignore
CHANGED
@@ -4,3 +4,4 @@ __pycache__/
|
|
4 |
process/__pycache__/
|
5 |
pyproject.toml
|
6 |
uv.lock
|
|
|
|
4 |
process/__pycache__/
|
5 |
pyproject.toml
|
6 |
uv.lock
|
7 |
+
*.pdf
|
app.py
CHANGED
@@ -40,6 +40,7 @@ def ocr_workflow_wrapper(file: File, mistral_key: str):
|
|
40 |
Yields:
|
41 |
Status messages and the extracted text or error messages.
|
42 |
"""
|
|
|
43 |
if not mistral_key:
|
44 |
error_msg = "Error: Mistral API Key not set."
|
45 |
yield error_msg, error_msg
|
@@ -226,9 +227,9 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=CUSTOM_CSS) as demo:
|
|
226 |
|
227 |
with gr.Row():
|
228 |
with gr.Column(scale=1):
|
229 |
-
prof_language_seletor = gr.Dropdown(["DE", "EN", "FR", "RU", "ZH"], label="Prof's Language", value="EN")
|
230 |
-
learn_language_seletor = gr.Dropdown(["DE", "EN", "FR", "RU", "ZH"], label="Language to Learn", value="EN")
|
231 |
-
style_seletor = gr.Dropdown(["General", "
|
232 |
interpret_button = gr.Button("Generate Interpretation", variant="primary")
|
233 |
|
234 |
with gr.Column(scale=2):
|
@@ -250,7 +251,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=CUSTOM_CSS) as demo:
|
|
250 |
with gr.Row():
|
251 |
with gr.Column(scale=1):
|
252 |
target_language_selector = gr.Dropdown(
|
253 |
-
["Deutsch", "English", "Français", "Русский язык", "中文"],
|
254 |
value="English",
|
255 |
label="Target Language",
|
256 |
interactive=True)
|
|
|
40 |
Yields:
|
41 |
Status messages and the extracted text or error messages.
|
42 |
"""
|
43 |
+
yield file, file
|
44 |
if not mistral_key:
|
45 |
error_msg = "Error: Mistral API Key not set."
|
46 |
yield error_msg, error_msg
|
|
|
227 |
|
228 |
with gr.Row():
|
229 |
with gr.Column(scale=1):
|
230 |
+
prof_language_seletor = gr.Dropdown(["AR", "DE", "ES", "EN", "FR", "IT", "JA", "RU", "ZH"], label="Prof's Language", value="EN")
|
231 |
+
learn_language_seletor = gr.Dropdown(["AR", "DE", "ES", "EN", "FR", "IT", "JA", "RU", "ZH"], label="Language to Learn", value="EN")
|
232 |
+
style_seletor = gr.Dropdown(["General", "News", "Philosophy", "Narrative", "Poem", "Paper"], label="Genre")
|
233 |
interpret_button = gr.Button("Generate Interpretation", variant="primary")
|
234 |
|
235 |
with gr.Column(scale=2):
|
|
|
251 |
with gr.Row():
|
252 |
with gr.Column(scale=1):
|
253 |
target_language_selector = gr.Dropdown(
|
254 |
+
["العربية", "Deutsch", "Español", "English", "Français", "Italiano", "日本語", "Русский язык", "中文"],
|
255 |
value="English",
|
256 |
label="Target Language",
|
257 |
interactive=True)
|
process/interpretation.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
from google import genai
|
2 |
from google.genai import types
|
3 |
-
from process.sys_prompt import GENERAL_PROMPT, NEWS_PROMPT
|
4 |
|
5 |
|
6 |
NARRATIVE_PROMPT = ""
|
7 |
POEM_PROMPT = ""
|
8 |
-
|
9 |
|
10 |
def get_interpretation(genre: str,
|
11 |
api_key: str,
|
@@ -23,7 +23,8 @@ def get_interpretation(genre: str,
|
|
23 |
except Exception as e:
|
24 |
return f"ERROR: {str(e)}"
|
25 |
|
26 |
-
|
|
|
27 |
learn_lang = lang_map.get(learn_language.upper(), "English")
|
28 |
prof_lang = lang_map.get(prof_language.upper(), "English")
|
29 |
genres = {
|
@@ -33,7 +34,7 @@ def get_interpretation(genre: str,
|
|
33 |
"poem": POEM_PROMPT,
|
34 |
"philosophy": PHILO_PROMPT
|
35 |
}
|
36 |
-
if genre.lower() in ["general", "news"]:
|
37 |
sys_prompt = genres[genre.lower()].replace("[LEARN_LANGUAGE]", learn_lang).replace("[PROF_LANGUAGE]", prof_lang)
|
38 |
|
39 |
response = client.models.generate_content(
|
|
|
1 |
from google import genai
|
2 |
from google.genai import types
|
3 |
+
from process.sys_prompt import GENERAL_PROMPT, NEWS_PROMPT, PHILO_PROMPT
|
4 |
|
5 |
|
6 |
NARRATIVE_PROMPT = ""
|
7 |
POEM_PROMPT = ""
|
8 |
+
|
9 |
|
10 |
def get_interpretation(genre: str,
|
11 |
api_key: str,
|
|
|
23 |
except Exception as e:
|
24 |
return f"ERROR: {str(e)}"
|
25 |
|
26 |
+
|
27 |
+
lang_map ={"AR": "Arabic", "DE": "German", "ES": "Spanish", "EN": "English", "FR": "French", "IT": "Italian", "JA": "Japanese", "RU":"Russian", "ZH": "Chinese"}
|
28 |
learn_lang = lang_map.get(learn_language.upper(), "English")
|
29 |
prof_lang = lang_map.get(prof_language.upper(), "English")
|
30 |
genres = {
|
|
|
34 |
"poem": POEM_PROMPT,
|
35 |
"philosophy": PHILO_PROMPT
|
36 |
}
|
37 |
+
if genre.lower() in ["general", "news", "philosophy"]:
|
38 |
sys_prompt = genres[genre.lower()].replace("[LEARN_LANGUAGE]", learn_lang).replace("[PROF_LANGUAGE]", prof_lang)
|
39 |
|
40 |
response = client.models.generate_content(
|
process/ocr.py
CHANGED
@@ -95,7 +95,7 @@ def correct_text_with_ai(text: str, api_key: str) -> str:
|
|
95 |
|
96 |
|
97 |
def perform_raw_ocr(input_file: File, api_key: str):
|
98 |
-
if input_file
|
99 |
file_ext = input_file.name.split('.')[-1].lower()
|
100 |
else:
|
101 |
return "File/Text not found"
|
|
|
95 |
|
96 |
|
97 |
def perform_raw_ocr(input_file: File, api_key: str):
|
98 |
+
if input_file and input_file.name:
|
99 |
file_ext = input_file.name.split('.')[-1].lower()
|
100 |
else:
|
101 |
return "File/Text not found"
|
process/sys_prompt.py
CHANGED
@@ -89,6 +89,50 @@ NEWS_PROMPT = """
|
|
89 |
* Clear and logical in language, able to deconstruct complex news content into easily understandable parts.
|
90 |
"""
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
NARRATIVE_PROMPT = ""
|
93 |
POEM_PROMPT = ""
|
94 |
-
PHILO_PROMPT = ""
|
|
|
89 |
* Clear and logical in language, able to deconstruct complex news content into easily understandable parts.
|
90 |
"""
|
91 |
|
92 |
+
|
93 |
+
PHILO_PROMPT = """
|
94 |
+
## Core Purpose and Goals:
|
95 |
+
|
96 |
+
* To assist students with **intermediate to advanced `[LEARN_LANGUAGE]` proficiency** in mastering sophisticated aspects of the language through the deep analysis of complex, authentic **philosophical and literary** texts.
|
97 |
+
* To explain complex grammatical phenomena and syntactic structures, particularly those characteristic of philosophical argumentation or literary prose in `[LEARN_LANGUAGE]`.
|
98 |
+
* To analyze and clarify idiomatic expressions, domain-specific terminology (especially philosophical and literary terms), and fixed phrases found in the `[LEARN_LANGUAGE]` text.
|
99 |
+
* To deconstruct long, complex sentences in `[LEARN_LANGUAGE]`, analyzing their internal logical relationships, information hierarchy, and argumentative chains.
|
100 |
+
* To deeply explore rhetorical devices (e.g., metaphors, irony, paradoxes) and any cultural, historical, or philosophical allusions within the text.
|
101 |
+
* To reveal the underlying logical connections—such as causality, contrast, or dialectical progression—between sentences and paragraphs.
|
102 |
+
* To analyze the macro-structure, authorial intent, and organizational methods of the text, adapting the analysis to its specific genre (e.g., argumentative structure of an essay, narrative framework of prose).
|
103 |
+
* To deliver instruction primarily in **`[PROF_LANGUAGE]`**, while flexibly using `[LEARN_LANGUAGE]` and English for clarification. When relevant, to introduce etymological insights from source languages (e.g., Latin, Ancient Greek) to aid vocabulary comprehension.
|
104 |
+
* To demonstrate profound understanding of the text's **philosophical or literary** subject matter, interpreting it from a broader intellectual perspective to help the student grasp its full context and deeper meaning.
|
105 |
+
|
106 |
+
## Behaviors and Rules:
|
107 |
+
|
108 |
+
### 1) Text Selection and Presentation:
|
109 |
+
|
110 |
+
* a) Use the challenging and profound `[LEARN_LANGUAGE]` text chosen by the user as the core material for analysis.
|
111 |
+
* b) When presenting the text, add annotations or highlights to key terminology and complex structures as needed to aid understanding.
|
112 |
+
|
113 |
+
### 2) Explanation and Analysis:
|
114 |
+
|
115 |
+
* a) Explain **advanced or complex grammar points** within the text, focusing on syntactic structures common to its specific style and genre. **Omit basic grammar explanations.**
|
116 |
+
* b) Elucidate commonly used `[LEARN_LANGUAGE]` idioms, domain-specific terminology, and fixed phrases, providing contextual examples.
|
117 |
+
* c) Analyze the architecture of complex sentences to help the student map their logical flow and information hierarchy.
|
118 |
+
* d) Discuss rhetorical devices in the text and how the author uses language to construct an argument, shape opinion, or create a literary effect.
|
119 |
+
* e) Explain any cultural allusions, historical backgrounds, or philosophical concepts necessary to understand the text, providing essential context.
|
120 |
+
* f) Analyze the logical connectors and relationships between sentences and paragraphs, showing how the discourse unfolds.
|
121 |
+
* g) Explain the text's overall structure, argumentation methods, or narrative techniques, adapting the analysis to the text's genre (e.g., philosophical essay, literary prose).
|
122 |
+
* h) For each part of the analysis, deliver a **coherent, continuous lecture-style talk**, integrating all knowledge points into a unified and flowing explanation.
|
123 |
+
|
124 |
+
### 3) Language Usage:
|
125 |
+
|
126 |
+
* a) Primarily use **`[PROF_LANGUAGE]`** and **`[LEARN_LANGUAGE]`** for instruction, with English readily available as an auxiliary language for clarification.
|
127 |
+
* b) When discussing etymology, introduce it based on its relevance to the `[LEARN_LANGUAGE]` and its practical utility for understanding modern vocabulary, especially philosophical terms.
|
128 |
+
* c) Demonstrate rigorous, clear, and precise language, especially when explaining complex concepts and structural analyses.
|
129 |
+
|
130 |
+
## Overall Tone:
|
131 |
+
|
132 |
+
* **Knowledgeable and Rigorous:** Demonstrating deep expertise in `[LEARN_LANGUAGE]` linguistics as well as the philosophical and literary subject matter of the text.
|
133 |
+
* **Patient and Inspiring:** Encouraging the student to engage in critical thinking and deep reading.
|
134 |
+
* **Clear and Logical:** Capable of deconstructing complex material into understandable components.
|
135 |
+
"""
|
136 |
+
|
137 |
NARRATIVE_PROMPT = ""
|
138 |
POEM_PROMPT = ""
|
|
process/translation.py
CHANGED
@@ -21,6 +21,17 @@ def get_translaton(text: str, api_key: str, target_language: str) -> str:
|
|
21 |
except Exception as e:
|
22 |
return f"ERROR: {str(e)}"
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
lang_map = {"Deutsch": "German", "English": "English", "Français": "French", "Русский язык": "Russain", "中文": "Chinese"}
|
25 |
tar_lang = lang_map.get(target_language, "English")
|
26 |
sys_prompt = SYS_PROMPT_TRANSLATION.replace("[TARGET_LANGUAGE]", tar_lang)
|
|
|
21 |
except Exception as e:
|
22 |
return f"ERROR: {str(e)}"
|
23 |
|
24 |
+
lang_map = {
|
25 |
+
"العربية": "Arabic",
|
26 |
+
"Deutsch": "German",
|
27 |
+
"Español": "Spanish",
|
28 |
+
"English": "English",
|
29 |
+
"Français": "French",
|
30 |
+
"Italiano": "Italian",
|
31 |
+
"日本語": "Japanese",
|
32 |
+
"Русский язык": "Russian",
|
33 |
+
"中文": "Chinese"
|
34 |
+
}
|
35 |
lang_map = {"Deutsch": "German", "English": "English", "Français": "French", "Русский язык": "Russain", "中文": "Chinese"}
|
36 |
tar_lang = lang_map.get(target_language, "English")
|
37 |
sys_prompt = SYS_PROMPT_TRANSLATION.replace("[TARGET_LANGUAGE]", tar_lang)
|