IvanMiao commited on
Commit
8dbd2a0
·
1 Parent(s): 676dc22

feat: add more languages, add genre: philosophy

Browse files
.gitignore CHANGED
@@ -4,3 +4,4 @@ __pycache__/
4
  process/__pycache__/
5
  pyproject.toml
6
  uv.lock
 
 
4
  process/__pycache__/
5
  pyproject.toml
6
  uv.lock
7
+ *.pdf
app.py CHANGED
@@ -40,6 +40,7 @@ def ocr_workflow_wrapper(file: File, mistral_key: str):
40
  Yields:
41
  Status messages and the extracted text or error messages.
42
  """
 
43
  if not mistral_key:
44
  error_msg = "Error: Mistral API Key not set."
45
  yield error_msg, error_msg
@@ -226,9 +227,9 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=CUSTOM_CSS) as demo:
226
 
227
  with gr.Row():
228
  with gr.Column(scale=1):
229
- prof_language_seletor = gr.Dropdown(["DE", "EN", "FR", "RU", "ZH"], label="Prof's Language", value="EN")
230
- learn_language_seletor = gr.Dropdown(["DE", "EN", "FR", "RU", "ZH"], label="Language to Learn", value="EN")
231
- style_seletor = gr.Dropdown(["General", "Paper", "News", "Narrative", "Poem", "Philosophy"], label="Genre")
232
  interpret_button = gr.Button("Generate Interpretation", variant="primary")
233
 
234
  with gr.Column(scale=2):
@@ -250,7 +251,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=CUSTOM_CSS) as demo:
250
  with gr.Row():
251
  with gr.Column(scale=1):
252
  target_language_selector = gr.Dropdown(
253
- ["Deutsch", "English", "Français", "Русский язык", "中文"],
254
  value="English",
255
  label="Target Language",
256
  interactive=True)
 
40
  Yields:
41
  Status messages and the extracted text or error messages.
42
  """
43
+ yield file, file
44
  if not mistral_key:
45
  error_msg = "Error: Mistral API Key not set."
46
  yield error_msg, error_msg
 
227
 
228
  with gr.Row():
229
  with gr.Column(scale=1):
230
+ prof_language_seletor = gr.Dropdown(["AR", "DE", "ES", "EN", "FR", "IT", "JA", "RU", "ZH"], label="Prof's Language", value="EN")
231
+ learn_language_seletor = gr.Dropdown(["AR", "DE", "ES", "EN", "FR", "IT", "JA", "RU", "ZH"], label="Language to Learn", value="EN")
232
+ style_seletor = gr.Dropdown(["General", "News", "Philosophy", "Narrative", "Poem", "Paper"], label="Genre")
233
  interpret_button = gr.Button("Generate Interpretation", variant="primary")
234
 
235
  with gr.Column(scale=2):
 
251
  with gr.Row():
252
  with gr.Column(scale=1):
253
  target_language_selector = gr.Dropdown(
254
+ ["العربية", "Deutsch", "Español", "English", "Français", "Italiano", "日本語", "Русский язык", "中文"],
255
  value="English",
256
  label="Target Language",
257
  interactive=True)
process/interpretation.py CHANGED
@@ -1,11 +1,11 @@
1
  from google import genai
2
  from google.genai import types
3
- from process.sys_prompt import GENERAL_PROMPT, NEWS_PROMPT
4
 
5
 
6
  NARRATIVE_PROMPT = ""
7
  POEM_PROMPT = ""
8
- PHILO_PROMPT = ""
9
 
10
  def get_interpretation(genre: str,
11
  api_key: str,
@@ -23,7 +23,8 @@ def get_interpretation(genre: str,
23
  except Exception as e:
24
  return f"ERROR: {str(e)}"
25
 
26
- lang_map ={"DE": "German", "EN": "English", "FR": "French", "RU":"Russian", "ZH": "Chinese"}
 
27
  learn_lang = lang_map.get(learn_language.upper(), "English")
28
  prof_lang = lang_map.get(prof_language.upper(), "English")
29
  genres = {
@@ -33,7 +34,7 @@ def get_interpretation(genre: str,
33
  "poem": POEM_PROMPT,
34
  "philosophy": PHILO_PROMPT
35
  }
36
- if genre.lower() in ["general", "news"]:
37
  sys_prompt = genres[genre.lower()].replace("[LEARN_LANGUAGE]", learn_lang).replace("[PROF_LANGUAGE]", prof_lang)
38
 
39
  response = client.models.generate_content(
 
1
  from google import genai
2
  from google.genai import types
3
+ from process.sys_prompt import GENERAL_PROMPT, NEWS_PROMPT, PHILO_PROMPT
4
 
5
 
6
  NARRATIVE_PROMPT = ""
7
  POEM_PROMPT = ""
8
+
9
 
10
  def get_interpretation(genre: str,
11
  api_key: str,
 
23
  except Exception as e:
24
  return f"ERROR: {str(e)}"
25
 
26
+
27
+ lang_map ={"AR": "Arabic", "DE": "German", "ES": "Spanish", "EN": "English", "FR": "French", "IT": "Italian", "JA": "Japanese", "RU":"Russian", "ZH": "Chinese"}
28
  learn_lang = lang_map.get(learn_language.upper(), "English")
29
  prof_lang = lang_map.get(prof_language.upper(), "English")
30
  genres = {
 
34
  "poem": POEM_PROMPT,
35
  "philosophy": PHILO_PROMPT
36
  }
37
+ if genre.lower() in ["general", "news", "philosophy"]:
38
  sys_prompt = genres[genre.lower()].replace("[LEARN_LANGUAGE]", learn_lang).replace("[PROF_LANGUAGE]", prof_lang)
39
 
40
  response = client.models.generate_content(
process/ocr.py CHANGED
@@ -95,7 +95,7 @@ def correct_text_with_ai(text: str, api_key: str) -> str:
95
 
96
 
97
  def perform_raw_ocr(input_file: File, api_key: str):
98
- if input_file != None:
99
  file_ext = input_file.name.split('.')[-1].lower()
100
  else:
101
  return "File/Text not found"
 
95
 
96
 
97
  def perform_raw_ocr(input_file: File, api_key: str):
98
+ if input_file and input_file.name:
99
  file_ext = input_file.name.split('.')[-1].lower()
100
  else:
101
  return "File/Text not found"
process/sys_prompt.py CHANGED
@@ -89,6 +89,50 @@ NEWS_PROMPT = """
89
  * Clear and logical in language, able to deconstruct complex news content into easily understandable parts.
90
  """
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  NARRATIVE_PROMPT = ""
93
  POEM_PROMPT = ""
94
- PHILO_PROMPT = ""
 
89
  * Clear and logical in language, able to deconstruct complex news content into easily understandable parts.
90
  """
91
 
92
+
93
+ PHILO_PROMPT = """
94
+ ## Core Purpose and Goals:
95
+
96
+ * To assist students with **intermediate to advanced `[LEARN_LANGUAGE]` proficiency** in mastering sophisticated aspects of the language through the deep analysis of complex, authentic **philosophical and literary** texts.
97
+ * To explain complex grammatical phenomena and syntactic structures, particularly those characteristic of philosophical argumentation or literary prose in `[LEARN_LANGUAGE]`.
98
+ * To analyze and clarify idiomatic expressions, domain-specific terminology (especially philosophical and literary terms), and fixed phrases found in the `[LEARN_LANGUAGE]` text.
99
+ * To deconstruct long, complex sentences in `[LEARN_LANGUAGE]`, analyzing their internal logical relationships, information hierarchy, and argumentative chains.
100
+ * To deeply explore rhetorical devices (e.g., metaphors, irony, paradoxes) and any cultural, historical, or philosophical allusions within the text.
101
+ * To reveal the underlying logical connections—such as causality, contrast, or dialectical progression—between sentences and paragraphs.
102
+ * To analyze the macro-structure, authorial intent, and organizational methods of the text, adapting the analysis to its specific genre (e.g., argumentative structure of an essay, narrative framework of prose).
103
+ * To deliver instruction primarily in **`[PROF_LANGUAGE]`**, while flexibly using `[LEARN_LANGUAGE]` and English for clarification. When relevant, to introduce etymological insights from source languages (e.g., Latin, Ancient Greek) to aid vocabulary comprehension.
104
+ * To demonstrate profound understanding of the text's **philosophical or literary** subject matter, interpreting it from a broader intellectual perspective to help the student grasp its full context and deeper meaning.
105
+
106
+ ## Behaviors and Rules:
107
+
108
+ ### 1) Text Selection and Presentation:
109
+
110
+ * a) Use the challenging and profound `[LEARN_LANGUAGE]` text chosen by the user as the core material for analysis.
111
+ * b) When presenting the text, add annotations or highlights to key terminology and complex structures as needed to aid understanding.
112
+
113
+ ### 2) Explanation and Analysis:
114
+
115
+ * a) Explain **advanced or complex grammar points** within the text, focusing on syntactic structures common to its specific style and genre. **Omit basic grammar explanations.**
116
+ * b) Elucidate commonly used `[LEARN_LANGUAGE]` idioms, domain-specific terminology, and fixed phrases, providing contextual examples.
117
+ * c) Analyze the architecture of complex sentences to help the student map their logical flow and information hierarchy.
118
+ * d) Discuss rhetorical devices in the text and how the author uses language to construct an argument, shape opinion, or create a literary effect.
119
+ * e) Explain any cultural allusions, historical backgrounds, or philosophical concepts necessary to understand the text, providing essential context.
120
+ * f) Analyze the logical connectors and relationships between sentences and paragraphs, showing how the discourse unfolds.
121
+ * g) Explain the text's overall structure, argumentation methods, or narrative techniques, adapting the analysis to the text's genre (e.g., philosophical essay, literary prose).
122
+ * h) For each part of the analysis, deliver a **coherent, continuous lecture-style talk**, integrating all knowledge points into a unified and flowing explanation.
123
+
124
+ ### 3) Language Usage:
125
+
126
+ * a) Primarily use **`[PROF_LANGUAGE]`** and **`[LEARN_LANGUAGE]`** for instruction, with English readily available as an auxiliary language for clarification.
127
+ * b) When discussing etymology, introduce it based on its relevance to the `[LEARN_LANGUAGE]` and its practical utility for understanding modern vocabulary, especially philosophical terms.
128
+ * c) Demonstrate rigorous, clear, and precise language, especially when explaining complex concepts and structural analyses.
129
+
130
+ ## Overall Tone:
131
+
132
+ * **Knowledgeable and Rigorous:** Demonstrating deep expertise in `[LEARN_LANGUAGE]` linguistics as well as the philosophical and literary subject matter of the text.
133
+ * **Patient and Inspiring:** Encouraging the student to engage in critical thinking and deep reading.
134
+ * **Clear and Logical:** Capable of deconstructing complex material into understandable components.
135
+ """
136
+
137
  NARRATIVE_PROMPT = ""
138
  POEM_PROMPT = ""
 
process/translation.py CHANGED
@@ -21,6 +21,17 @@ def get_translaton(text: str, api_key: str, target_language: str) -> str:
21
  except Exception as e:
22
  return f"ERROR: {str(e)}"
23
 
 
 
 
 
 
 
 
 
 
 
 
24
  lang_map = {"Deutsch": "German", "English": "English", "Français": "French", "Русский язык": "Russain", "中文": "Chinese"}
25
  tar_lang = lang_map.get(target_language, "English")
26
  sys_prompt = SYS_PROMPT_TRANSLATION.replace("[TARGET_LANGUAGE]", tar_lang)
 
21
  except Exception as e:
22
  return f"ERROR: {str(e)}"
23
 
24
+ lang_map = {
25
+ "العربية": "Arabic",
26
+ "Deutsch": "German",
27
+ "Español": "Spanish",
28
+ "English": "English",
29
+ "Français": "French",
30
+ "Italiano": "Italian",
31
+ "日本語": "Japanese",
32
+ "Русский язык": "Russian",
33
+ "中文": "Chinese"
34
+ }
35
  lang_map = {"Deutsch": "German", "English": "English", "Français": "French", "Русский язык": "Russain", "中文": "Chinese"}
36
  tar_lang = lang_map.get(target_language, "English")
37
  sys_prompt = SYS_PROMPT_TRANSLATION.replace("[TARGET_LANGUAGE]", tar_lang)