Spaces:
Running
Running
feat: add app
Browse files- .gitignore +6 -0
- README.md +4 -2
- __init__.py +0 -0
- app.py +275 -0
- process/__init__.py +0 -0
- process/gradio_css.py +18 -0
- process/interpretation.py +44 -0
- process/ocr.py +103 -0
- process/sys_prompt.py +94 -0
- process/translation.py +32 -0
- requirements.txt +74 -0
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.gradio/
|
2 |
+
.env
|
3 |
+
__pycache__/
|
4 |
+
process/__pycache__/
|
5 |
+
pyproject.toml
|
6 |
+
uv.lock
|
README.md
CHANGED
@@ -1,13 +1,15 @@
|
|
1 |
---
|
2 |
title: LogosAI
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: indigo
|
|
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.33.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: LogosAI
|
3 |
+
emoji: 📚
|
4 |
+
colorFrom: purple
|
5 |
colorTo: indigo
|
6 |
+
python_version: 3.13
|
7 |
sdk: gradio
|
8 |
sdk_version: 5.33.0
|
9 |
app_file: app.py
|
10 |
pinned: false
|
11 |
license: mit
|
12 |
+
short_description: Deeply read any text in any language, from news to philosoph
|
13 |
---
|
14 |
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
__init__.py
ADDED
File without changes
|
app.py
ADDED
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from process.ocr import perform_raw_ocr, correct_text_with_ai
|
3 |
+
from process.interpretation import get_interpretation
|
4 |
+
from process.translation import get_tranlaton
|
5 |
+
from process.gradio_css import CUSTOM_CSS
|
6 |
+
|
7 |
+
|
8 |
+
MISTRAL_API_KEY = ""
|
9 |
+
GEMINI_API_KEY = ""
|
10 |
+
|
11 |
+
|
12 |
+
def update_api_keys(mistral_key, gemini_key):
|
13 |
+
"""
|
14 |
+
Updates the global MISTRAL_API_KEY and GEMINI_API_KEY variables.
|
15 |
+
|
16 |
+
Args:
|
17 |
+
mistral_key: The Mistral API key.
|
18 |
+
gemini_key: The Gemini API key.
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
A string confirming that the API keys have been saved.
|
22 |
+
"""
|
23 |
+
global MISTRAL_API_KEY, GEMINI_API_KEY
|
24 |
+
|
25 |
+
MISTRAL_API_KEY = mistral_key
|
26 |
+
GEMINI_API_KEY = gemini_key
|
27 |
+
|
28 |
+
return "API keys saved"
|
29 |
+
|
30 |
+
|
31 |
+
def ocr_workflow_wrapper(file, mistral_key):
|
32 |
+
"""
|
33 |
+
Manages the OCR workflow, processing an uploaded file to extract text.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
file: The file object to process (image, PDF, or text).
|
37 |
+
mistral_key: The Mistral API key for OCR processing.
|
38 |
+
|
39 |
+
Yields:
|
40 |
+
Status messages and the extracted text or error messages.
|
41 |
+
"""
|
42 |
+
if not mistral_key:
|
43 |
+
error_msg = "Error: Mistral API Key not set."
|
44 |
+
yield error_msg, error_msg + "\n\n"
|
45 |
+
return
|
46 |
+
if not file:
|
47 |
+
error_msg = "Error: File/Text not found."
|
48 |
+
yield error_msg, error_msg + "\n\n"
|
49 |
+
return
|
50 |
+
|
51 |
+
yield "Processing...", "⏳ Processing, please wait...\n\n"
|
52 |
+
|
53 |
+
try:
|
54 |
+
result = perform_raw_ocr(file, mistral_key)
|
55 |
+
yield result, f"\n{result}\n"
|
56 |
+
except Exception as e:
|
57 |
+
error_msg = f"An error occurred during processing: {str(e)}"
|
58 |
+
yield error_msg, error_msg + "\n\n"
|
59 |
+
|
60 |
+
|
61 |
+
def ai_correct(current_text: str, mistral_key: str):
|
62 |
+
"""
|
63 |
+
Corrects the provided text using an AI model.
|
64 |
+
|
65 |
+
Args:
|
66 |
+
current_text: The text to be corrected.
|
67 |
+
mistral_key: The Mistral API key for AI correction.
|
68 |
+
|
69 |
+
Yields:
|
70 |
+
Status messages and the corrected text or error messages.
|
71 |
+
"""
|
72 |
+
if not mistral_key:
|
73 |
+
error_msg = "Error: Mistral API Key not set."
|
74 |
+
yield error_msg, error_msg + "\n\n"
|
75 |
+
return
|
76 |
+
if not current_text or current_text.strip() == "":
|
77 |
+
error_msg = "*No text to correct. Upload a file, or paste text into 'Raw Text' box first*"
|
78 |
+
yield error_msg, error_msg
|
79 |
+
return
|
80 |
+
|
81 |
+
yield "⏳ AI Correcting text...", "⏳ AI Correcting text...\n\n*Please wait...*"
|
82 |
+
try:
|
83 |
+
result = correct_text_with_ai(current_text, mistral_key)
|
84 |
+
yield result, result
|
85 |
+
except Exception as e:
|
86 |
+
error_msg = f"Error : {e}"
|
87 |
+
yield error_msg, error_msg + "\n\n"
|
88 |
+
|
89 |
+
|
90 |
+
def interpretation_workflow(text: str, genre: str, learn_language: str, target_language: str, gemini_key: str):
|
91 |
+
"""
|
92 |
+
Generates an interpretation of the text based on genre and language settings.
|
93 |
+
|
94 |
+
Args:
|
95 |
+
text: The text to interpret.
|
96 |
+
genre: The genre of the text (e.g., "general", "news").
|
97 |
+
learn_language: The language being learned.
|
98 |
+
target_language: The language for the interpretation output.
|
99 |
+
gemini_key: The Gemini API key for interpretation.
|
100 |
+
|
101 |
+
Yields:
|
102 |
+
Status messages and the generated interpretation or error messages.
|
103 |
+
"""
|
104 |
+
if not gemini_key:
|
105 |
+
yield "Error: Gemini api key not found."
|
106 |
+
return
|
107 |
+
if not text or text.strip() == "":
|
108 |
+
yield "Error: Text is empty"
|
109 |
+
return
|
110 |
+
if not learn_language or target_language:
|
111 |
+
yield "Error: Language not selected"
|
112 |
+
|
113 |
+
if genre.lower() in ["general", "news"]:
|
114 |
+
yield f"⏳ Generating interpretation for genre: {[genre]} ... (10s - 2min)"
|
115 |
+
result = get_interpretation(genre.lower(), gemini_key, text, learn_language, target_language)
|
116 |
+
yield result
|
117 |
+
else:
|
118 |
+
yield "not implemented yet"
|
119 |
+
|
120 |
+
|
121 |
+
def translation_workflow(text: str, target_language: str, gemini_key):
|
122 |
+
"""
|
123 |
+
Translates the provided text to the target language.
|
124 |
+
|
125 |
+
Args:
|
126 |
+
text: The text to translate.
|
127 |
+
target_language: The language to translate the text into.
|
128 |
+
gemini_key: The Gemini API key for translation.
|
129 |
+
|
130 |
+
Yields:
|
131 |
+
Status messages and the translated text or error messages.
|
132 |
+
"""
|
133 |
+
if not gemini_key:
|
134 |
+
yield "Error: Gemini api key not found."
|
135 |
+
return
|
136 |
+
if not text or text.strip() == "":
|
137 |
+
yield "Error: Text is empty"
|
138 |
+
return
|
139 |
+
if not target_language:
|
140 |
+
yield "Error: Language not selected"
|
141 |
+
|
142 |
+
if target_language in ["Deutsch", "English", "Français", "Русский язык", "中文"]:
|
143 |
+
yield f"⏳ Generating interpretation for target_language: {[target_language]} ..."
|
144 |
+
result = get_tranlaton(text, gemini_key, target_language)
|
145 |
+
yield result
|
146 |
+
else:
|
147 |
+
yield "not implemented yet"
|
148 |
+
|
149 |
+
|
150 |
+
with gr.Blocks(theme=gr.themes.Monochrome(), css=CUSTOM_CSS) as demo:
|
151 |
+
gr.Markdown("# 📚 LogosAI - Intensive Reading in Any Language", elem_classes=["section-header"])
|
152 |
+
|
153 |
+
# --- API Key ---
|
154 |
+
with gr.Accordion("API Configuration", open=True):
|
155 |
+
with gr.Row():
|
156 |
+
with gr.Column(scale=2):
|
157 |
+
mistral_api = gr.Textbox(
|
158 |
+
label="Mistral API Key",
|
159 |
+
type="password",
|
160 |
+
placeholder="Enter your key",
|
161 |
+
info="OCR recognition & text processing"
|
162 |
+
)
|
163 |
+
with gr.Column(scale=2):
|
164 |
+
gemini_api = gr.Textbox(
|
165 |
+
label="Gemini API Key",
|
166 |
+
type="password",
|
167 |
+
placeholder="Enter your key",
|
168 |
+
info="text interpretation"
|
169 |
+
)
|
170 |
+
with gr.Column(scale=1):
|
171 |
+
update_keys_button = gr.Button("Save keys")
|
172 |
+
|
173 |
+
api_key_status_output = gr.Markdown()
|
174 |
+
|
175 |
+
update_keys_button.click(
|
176 |
+
fn=update_api_keys,
|
177 |
+
inputs=[mistral_api, gemini_api],
|
178 |
+
outputs=api_key_status_output
|
179 |
+
)
|
180 |
+
|
181 |
+
# --- Text Processing ---
|
182 |
+
gr.Markdown("---")
|
183 |
+
with gr.Tab("Text"):
|
184 |
+
|
185 |
+
with gr.Row():
|
186 |
+
with gr.Column(scale=1):
|
187 |
+
gr.Markdown("### Upload documents")
|
188 |
+
file_input = gr.File(
|
189 |
+
label="Upload Image/PDF/text",
|
190 |
+
file_types=["image", ".pdf", ".txt"]
|
191 |
+
)
|
192 |
+
process_button = gr.Button("1. File Process (OCR/Read)", variant="primary")
|
193 |
+
ai_correct_button = gr.Button("2. AI Correct", variant="primary")
|
194 |
+
with gr.Column(scale=2):
|
195 |
+
gr.Markdown("### Processed result")
|
196 |
+
with gr.Tabs():
|
197 |
+
with gr.Tab("Raw Text"):
|
198 |
+
text_display = gr.Textbox(
|
199 |
+
label="Raw Text(editable)",
|
200 |
+
lines=15,
|
201 |
+
max_lines=20,
|
202 |
+
show_copy_button=True,
|
203 |
+
value="",
|
204 |
+
interactive=True
|
205 |
+
)
|
206 |
+
with gr.Tab("Formatted Text"):
|
207 |
+
text_markdown = gr.Markdown(
|
208 |
+
value="*Processed text will appear here...*\n\n",
|
209 |
+
)
|
210 |
+
|
211 |
+
# Hook the ocr button to click event
|
212 |
+
process_button.click(
|
213 |
+
fn=ocr_workflow_wrapper,
|
214 |
+
inputs=[file_input, mistral_api],
|
215 |
+
outputs=[text_display, text_markdown]
|
216 |
+
)
|
217 |
+
|
218 |
+
# AI correction button to click event
|
219 |
+
ai_correct_button.click(
|
220 |
+
fn=ai_correct,
|
221 |
+
inputs=[text_display, mistral_api],
|
222 |
+
outputs=[text_display, text_markdown]
|
223 |
+
)
|
224 |
+
|
225 |
+
# --- Text Interpertation ---
|
226 |
+
with gr.Tab("🎓 Interpretation"):
|
227 |
+
gr.Markdown("### Configure Interpretation Settings")
|
228 |
+
|
229 |
+
with gr.Row():
|
230 |
+
with gr.Column(scale=1):
|
231 |
+
prof_language_seletor = gr.Dropdown(["DE", "EN", "FR", "RU", "ZH"], label="Prof's Language", value="EN")
|
232 |
+
learn_language_seletor = gr.Dropdown(["DE", "EN", "FR", "RU", "ZH"], label="Language to Learn", value="EN")
|
233 |
+
style_seletor = gr.Dropdown(["General", "Paper", "News", "Narrative", "Poem", "Philosophy"], label="Genre")
|
234 |
+
interpret_button = gr.Button("Generate Interpretation", variant="primary")
|
235 |
+
|
236 |
+
with gr.Column(scale=2):
|
237 |
+
gr.Markdown("### COURSE")
|
238 |
+
interpretation_output = gr.Markdown(
|
239 |
+
value="*Interpretation will appear here after processing...*\n\n",
|
240 |
+
show_copy_button=True
|
241 |
+
)
|
242 |
+
|
243 |
+
interpret_button.click(
|
244 |
+
fn=interpretation_workflow,
|
245 |
+
inputs=[text_display, style_seletor, learn_language_seletor, prof_language_seletor, gemini_api],
|
246 |
+
outputs=interpretation_output
|
247 |
+
)
|
248 |
+
|
249 |
+
with gr.Tab("Translation"):
|
250 |
+
gr.Markdown("### Configure Translation Settings")
|
251 |
+
with gr.Row():
|
252 |
+
with gr.Column(scale=1):
|
253 |
+
target_language_selector = gr.Dropdown(
|
254 |
+
["Deutsch", "English", "Français", "Русский язык", "中文"],
|
255 |
+
value="English",
|
256 |
+
label="Target Language",
|
257 |
+
interactive=True)
|
258 |
+
translation_button = gr.Button("Translate!", variant="primary")
|
259 |
+
|
260 |
+
with gr.Column(scale=2):
|
261 |
+
interpretation_output = gr.Markdown(
|
262 |
+
value="*Translation will appear here ...*\n\n",
|
263 |
+
show_copy_button=True
|
264 |
+
)
|
265 |
+
|
266 |
+
translation_button.click(
|
267 |
+
fn=translation_workflow,
|
268 |
+
inputs=[text_display, target_language_selector, gemini_api],
|
269 |
+
outputs=interpretation_output
|
270 |
+
)
|
271 |
+
|
272 |
+
|
273 |
+
|
274 |
+
if __name__ == "__main__":
|
275 |
+
demo.launch(mcp_server=True)
|
process/__init__.py
ADDED
File without changes
|
process/gradio_css.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
CUSTOM_CSS = """
|
2 |
+
.section-header {
|
3 |
+
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
|
4 |
+
-webkit-background-clip: text;
|
5 |
+
-webkit-text-fill-color: transparent;
|
6 |
+
font-weight: bold;
|
7 |
+
font-size: 1.2em;
|
8 |
+
margin: 20px 0 10px 0;
|
9 |
+
}
|
10 |
+
"""
|
11 |
+
|
12 |
+
|
13 |
+
rest = """
|
14 |
+
.gradio-container {
|
15 |
+
max-width: 1200px !important;
|
16 |
+
margin: auto !important;
|
17 |
+
}
|
18 |
+
"""
|
process/interpretation.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from google import genai
|
2 |
+
from google.genai import types
|
3 |
+
from process.sys_prompt import GENERAL_PROMPT, NEWS_PROMPT
|
4 |
+
|
5 |
+
|
6 |
+
NARRATIVE_PROMPT = ""
|
7 |
+
POEM_PROMPT = ""
|
8 |
+
PHILO_PROMPT = ""
|
9 |
+
|
10 |
+
def get_interpretation(genre: str,
|
11 |
+
api_key: str,
|
12 |
+
text: str,
|
13 |
+
learn_language: str,
|
14 |
+
prof_language: str) -> str:
|
15 |
+
|
16 |
+
if not api_key:
|
17 |
+
return "Error: Gemini API Key not found."
|
18 |
+
if not text:
|
19 |
+
return "Error: text not found."
|
20 |
+
|
21 |
+
client = genai.Client(api_key=api_key)
|
22 |
+
|
23 |
+
lang_map ={"DE": "German", "EN": "English", "FR": "French", "RU":"Russian", "ZH": "Chinese"}
|
24 |
+
learn_lang = lang_map.get(learn_language.upper(), "English")
|
25 |
+
prof_lang = lang_map.get(prof_language.upper(), "English")
|
26 |
+
genres = {
|
27 |
+
"general": GENERAL_PROMPT,
|
28 |
+
"news": NEWS_PROMPT,
|
29 |
+
"narrative": NARRATIVE_PROMPT,
|
30 |
+
"poem": POEM_PROMPT,
|
31 |
+
"philosophy": PHILO_PROMPT
|
32 |
+
}
|
33 |
+
if genre.lower() in ["general", "news"]:
|
34 |
+
sys_prompt = genres[genre.lower()].replace("[LEARN_LANGUAGE]", learn_lang).replace("[PROF_LANGUAGE]", prof_lang)
|
35 |
+
|
36 |
+
response = client.models.generate_content(
|
37 |
+
model="gemini-2.5-flash-preview-05-20",
|
38 |
+
config=types.GenerateContentConfig(
|
39 |
+
system_instruction=sys_prompt,
|
40 |
+
temperature=0.3,
|
41 |
+
),
|
42 |
+
contents=[text]
|
43 |
+
)
|
44 |
+
return response.text
|
process/ocr.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mistralai import Mistral
|
2 |
+
from mistralai.models import OCRResponse
|
3 |
+
|
4 |
+
|
5 |
+
OCR_MODEL = "mistral-ocr-latest"
|
6 |
+
CHAT_MODEL = "mistral-large-latest"
|
7 |
+
|
8 |
+
|
9 |
+
def ocr_from_file(file_path, api_key, mode="image"):
|
10 |
+
|
11 |
+
if not api_key:
|
12 |
+
raise ValueError("Mistral API Key is required.")
|
13 |
+
client = Mistral(api_key=api_key)
|
14 |
+
uploaded_image = client.files.upload(
|
15 |
+
file={
|
16 |
+
"file_name": file_path,
|
17 |
+
"content": open(file_path, "rb"),
|
18 |
+
},
|
19 |
+
purpose="ocr"
|
20 |
+
)
|
21 |
+
signed_url = client.files.get_signed_url(file_id=uploaded_image.id)
|
22 |
+
|
23 |
+
if mode == "image":
|
24 |
+
ocr_response = client.ocr.process(
|
25 |
+
model=OCR_MODEL,
|
26 |
+
document={
|
27 |
+
"type": "image_url",
|
28 |
+
"image_url": signed_url.url,
|
29 |
+
},
|
30 |
+
include_image_base64=True
|
31 |
+
)
|
32 |
+
elif mode == "pdf":
|
33 |
+
ocr_response = client.ocr.process(
|
34 |
+
model=OCR_MODEL,
|
35 |
+
document={
|
36 |
+
"type": "document_url",
|
37 |
+
"document_url": signed_url.url,
|
38 |
+
},
|
39 |
+
include_image_base64=True
|
40 |
+
)
|
41 |
+
|
42 |
+
return ocr_response
|
43 |
+
|
44 |
+
|
45 |
+
def get_combined_markdown(ocr_response: OCRResponse) -> str:
|
46 |
+
|
47 |
+
markdowns: list[str] = []
|
48 |
+
for page in ocr_response.pages:
|
49 |
+
markdowns.append(page.markdown)
|
50 |
+
|
51 |
+
return "\n\n".join(markdowns)
|
52 |
+
|
53 |
+
|
54 |
+
def correct_text_with_ai(text: str, api_key: str):
|
55 |
+
|
56 |
+
if not api_key:
|
57 |
+
raise ValueError("Mistral API Key is required.")
|
58 |
+
client = Mistral(api_key=api_key)
|
59 |
+
|
60 |
+
response = client.chat.complete(
|
61 |
+
model=CHAT_MODEL,
|
62 |
+
messages=[
|
63 |
+
{
|
64 |
+
"role": "system",
|
65 |
+
"content":
|
66 |
+
"""You are an expert proofreader specializing in Markdown formatting and OCR error correction. Your task is to meticulously review provided Markdown text that has been generated via OCR.
|
67 |
+
Your primary goal is to identify and correct **typographical errors, spelling mistakes, and redundant symbols** that are clearly a result of the OCR process.
|
68 |
+
Additionally, you must correct any illogical or jumbled line breaks to ensure proper Markdown paragraph formatting.
|
69 |
+
|
70 |
+
**Crucially, you must NOT alter the original meaning or content of the text.** Your corrections should be limited to:
|
71 |
+
* Obvious OCR-induced spelling errors
|
72 |
+
* Erroneous or redundant symbols
|
73 |
+
* Markdown formatting errors
|
74 |
+
* Jumbled or incorrect line breaks for proper paragraphing
|
75 |
+
|
76 |
+
After your thorough review, output the carefully corrected Markdown text. JUST the text."""
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"role": "user",
|
80 |
+
"content": text
|
81 |
+
},
|
82 |
+
],
|
83 |
+
temperature=0.1,
|
84 |
+
)
|
85 |
+
return(response.choices[0].message.content)
|
86 |
+
|
87 |
+
|
88 |
+
def perform_raw_ocr(input_file, api_key):
|
89 |
+
if input_file != None:
|
90 |
+
file_ext = input_file.name.split('.')[-1].lower()
|
91 |
+
else:
|
92 |
+
return "File/Text not found"
|
93 |
+
|
94 |
+
if file_ext == "txt":
|
95 |
+
with open(input_file, "r", encoding="utf-8") as f:
|
96 |
+
return f.read()
|
97 |
+
elif file_ext == "pdf":
|
98 |
+
file_type = "pdf"
|
99 |
+
else:
|
100 |
+
file_type = "image"
|
101 |
+
response = ocr_from_file(input_file, api_key, file_type)
|
102 |
+
res_text = get_combined_markdown(response)
|
103 |
+
return res_text
|
process/sys_prompt.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GENERAL_PROMPT ="""
|
2 |
+
## Core Purpose and Goals:
|
3 |
+
|
4 |
+
* To assist students with **intermediate to advanced `[LEARN_LANGUAGE]` proficiency** in mastering sophisticated aspects of the language through the deep analysis of complex, authentic texts.
|
5 |
+
* To explain complex grammatical phenomena and syntactic structures, particularly those characteristic of the specific genre or domain of the provided text.
|
6 |
+
* To analyze and clarify idiomatic expressions, domain-specific terminology, and fixed phrases found in the `[LEARN_LANGUAGE]` text.
|
7 |
+
* To deconstruct long, complex sentences in `[LEARN_LANGUAGE]`, analyzing their internal logical relationships, information hierarchy, and argumentative chains.
|
8 |
+
* To deeply explore rhetorical devices (e.g., metaphors, irony, euphemisms), and any cultural, historical, or political allusions within the text.
|
9 |
+
* To reveal the underlying logical connections—such as causality, contrast, or progression—between sentences and paragraphs.
|
10 |
+
* To analyze the macro-structure, authorial intent, and organizational methods of the text, adapting the analysis to its specific genre (e.g., argumentative structure, narrative framework).
|
11 |
+
* To deliver instruction primarily in **`[PROF_LANGUAGE]`**, while flexibly using `[LEARN_LANGUAGE]` and English for clarification. When relevant, to introduce etymological insights from source languages (e.g., Latin, Ancient Greek, as relevant to the `[LEARN_LANGUAGE]`) to aid vocabulary comprehension.
|
12 |
+
* To demonstrate profound understanding of the text's subject matter, interpreting it from a broader perspective to help the student grasp its full context and deeper meaning.
|
13 |
+
|
14 |
+
## Behaviors and Rules:
|
15 |
+
|
16 |
+
### 1) Text Selection and Presentation:
|
17 |
+
|
18 |
+
* a) Use the challenging and profound `[LEARN_LANGUAGE]` text chosen by the user as the core material for analysis.
|
19 |
+
* b) When presenting the text, add annotations or highlights to key terminology and complex structures as needed to aid understanding.
|
20 |
+
|
21 |
+
### 2) Explanation and Analysis:
|
22 |
+
|
23 |
+
* a) Explain **advanced or complex grammar points** within the text, focusing on syntactic structures common to its specific style and genre. **Omit basic grammar explanations.**
|
24 |
+
* b) Elucidate commonly used `[LEARN_LANGUAGE]` idioms, domain-specific terminology, and fixed phrases, providing contextual examples.
|
25 |
+
* c) Analyze the architecture of complex sentences to help the student map their logical flow and information hierarchy.
|
26 |
+
* d) Discuss rhetorical devices in the text and how the author uses language to shape opinion or construct a narrative.
|
27 |
+
* e) Explain any cultural allusions, historical backgrounds, or socio-political phenomena necessary to understand the text, providing essential context.
|
28 |
+
* f) Analyze the logical connectors and relationships between sentences and paragraphs, showing how the discourse unfolds.
|
29 |
+
* g) Explain the text's overall structure, argumentation methods, information layers, adapting the analysis to the text's genre (e.g., news report, philosophical essay, literary prose).
|
30 |
+
* h) For each part of the analysis, deliver a **coherent, continuous lecture-style talk**, integrating all knowledge points into a unified and flowing explanation.
|
31 |
+
|
32 |
+
### 3) Language Usage:
|
33 |
+
|
34 |
+
* a) Primarily use **[PROF_LANGUAGE]** and **[LEARN_LANGUAGE]** for instruction, with English readily available as an auxiliary language for clarification.
|
35 |
+
* b) When discussing etymology, introduce it based on its relevance to the `[LEARN_LANGUAGE]` and its practical utility for understanding modern vocabulary.
|
36 |
+
* c) Demonstrate rigorous, clear, and precise language, especially when explaining complex concepts and structural analyses.
|
37 |
+
|
38 |
+
## Overall Tone:
|
39 |
+
|
40 |
+
* Knowledgeable and Rigorous: Demonstrating deep expertise in the subject matter of the text provided by the user.
|
41 |
+
* Patient and Inspiring: Encouraging the student to engage in critical thinking and deep reading.
|
42 |
+
* Clear and Logical: Capable of deconstructing complex material into understandable components.
|
43 |
+
"""
|
44 |
+
|
45 |
+
|
46 |
+
NEWS_PROMPT = """
|
47 |
+
## Purpose and Goals:
|
48 |
+
|
49 |
+
* To assist foreign students with **intermediate to advanced [LEARN_LANGUAGE] proficiency** in mastering more advanced aspects of the language, with a special focus on **understanding and analyzing contemporary [LEARN_LANGUAGE] news and newspaper articles**.
|
50 |
+
* To explain complex grammatical phenomena and structures, particularly those common in journalistic style, using authentic **[LEARN_LANGUAGE] news articles, editorials, and in-depth reports**.
|
51 |
+
* To analyze and clarify common contemporary idiomatic expressions, political and economic terminology, and media-specific fixed phrases found in [LEARN_LANGUAGE] news.
|
52 |
+
* To deconstruct long [LEARN_LANGUAGE] sentences, analyzing their internal logical relationships, paying special attention to the information transmission layers and chains of argumentation.
|
53 |
+
* To deeply explore rhetorical devices (such as figurative language, euphemisms, implications), potential biases or stances, and allusions or background events that may appear in news texts.
|
54 |
+
* To reveal the deeper connections and logical development of argumentation, narration, causality, or contrast between sentences and paragraphs.
|
55 |
+
* To analyze the overall architecture, writing purpose, and organizational methods of news articles (e.g., inverted pyramid structure, arrangement of arguments and evidence).
|
56 |
+
* In the teaching process, the primary language of instruction will be **[PROF_LANGUAGE]**, while flexibly employing other specified auxiliary languages (e.g., French, Chinese, English) for explanations. When necessary, relevant etymological knowledge from source languages (e.g.,Ancient Greek, Old Latin, Latinate languages) will be mentioned to aid in understanding vocabulary evolution.
|
57 |
+
* To demonstrate a profound understanding of sociology, history, political science, and economics, interpreting news texts from a broader perspective to help students grasp their context and deeper meanings.
|
58 |
+
|
59 |
+
## Behaviors and Rules:
|
60 |
+
|
61 |
+
### 1) Text Selection and Presentation:
|
62 |
+
|
63 |
+
* a) Use contemporary [LEARN_LANGUAGE] news articles, editorials, commentaries, or in-depth reports chosen by the user, which are both challenging and profound, prioritizing articles that are topical, argumentative, or analytical.
|
64 |
+
* b) When presenting the text, appropriate markings or annotations can be added based on the student's level and needs, especially for key terminology and complex sentence structures.
|
65 |
+
|
66 |
+
### 2) Explanation and Analysis:
|
67 |
+
|
68 |
+
* a) Explain complex grammar points within the news text, focusing on syntactic structures common in journalistic style, tense usage (e.g., choice of past tenses), subjunctive mood, and passive voice. Basic grammar points should be omitted.
|
69 |
+
* b) Elucidate commonly used **contemporary [LEARN_LANGUAGE] idiomatic expressions, political and economic terminology, and media-specific fixed phrases**, providing example sentences and their precise meaning within context.
|
70 |
+
* c) Analyze the structure of complex long sentences to help students understand their inherent logical relationships, information hierarchy, and the author's narrative intent.
|
71 |
+
* d) Discuss potential rhetorical devices (e.g., hyperbole, irony, similes, metaphors) in news texts, and how authors use language to guide reader opinions or construct narratives.
|
72 |
+
* e) Explain any cultural allusions, historical backgrounds, political events, social trends, or economic phenomena that might be involved in the text, providing necessary background knowledge.
|
73 |
+
* f) Analyze the logical connectives and relationships (e.g., cause and effect, contrast, parallelism, progression, exemplification) between sentences and paragraphs, and how information unfolds step-by-step.
|
74 |
+
* g) Explain the news text's narrative structure, argumentation methods (e.g., inductive, deductive), information presentation layers, author's stance, and potential biases.
|
75 |
+
* h) For each section of explanation, provide a coherent, continuous lecture/talk, integrating all knowledge points within a single block, simulating a classroom instruction flow.
|
76 |
+
|
77 |
+
### 3) Language Usage:
|
78 |
+
|
79 |
+
* a) Primarily use **[PROF_LANGUAGE] and [LEARN_LANGUAGE]** for instruction, with other specified auxiliary languages (e.g., English) available for explanations, especially when clarifying specific concepts or terms.
|
80 |
+
* b) When discussing etymology from source languages (e.g., Latinate languages, Ancient Greek, Old Latin), introduce them according to the student's comprehension ability, emphasizing their utility for understanding modern [LEARN_LANGUAGE] vocabulary.
|
81 |
+
* c) Demonstrate rigorous and clear language expression abilities, especially when elucidating complex concepts and analyzing article structures.
|
82 |
+
|
83 |
+
---
|
84 |
+
|
85 |
+
## Overall Tone:
|
86 |
+
|
87 |
+
* Knowledgeable and rigorous, particularly in the domain of contemporary [LEARN_LANGUAGE] society, politics, and culture.
|
88 |
+
* Patient and inspiring, encouraging students to engage in critical thinking and deep reading.
|
89 |
+
* Clear and logical in language, able to deconstruct complex news content into easily understandable parts.
|
90 |
+
"""
|
91 |
+
|
92 |
+
NARRATIVE_PROMPT = ""
|
93 |
+
POEM_PROMPT = ""
|
94 |
+
PHILO_PROMPT = ""
|
process/translation.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from google import genai
|
2 |
+
from google.genai import types
|
3 |
+
|
4 |
+
|
5 |
+
SYS_PROMPT_TRANSLATION = """
|
6 |
+
You are an expert translator.
|
7 |
+
Your sole purpose is to accurately and faithfully translate the provided text into the [TARGET_LANGUAGE].
|
8 |
+
Do not add any extra information, explanations, or stylistic changes.
|
9 |
+
Maintain the original meaning and tone as closely as possible.
|
10 |
+
"""
|
11 |
+
|
12 |
+
def get_tranlaton(text: str, api_key: str, target_language: str) -> str:
|
13 |
+
|
14 |
+
if not api_key:
|
15 |
+
return "Error: Gemini API Key not found."
|
16 |
+
if not text:
|
17 |
+
return "Error: text not found."
|
18 |
+
|
19 |
+
client = genai.Client(api_key=api_key)
|
20 |
+
|
21 |
+
lang_map = {"Deutsch": "German", "English": "English", "Français": "French", "Русский язык": "Russain", "中文": "Chinese"}
|
22 |
+
tar_lang = lang_map.get(target_language, "English")
|
23 |
+
sys_prompt = SYS_PROMPT_TRANSLATION.replace("[TARGET_LANGUAGE]", tar_lang)
|
24 |
+
response = client.models.generate_content(
|
25 |
+
model="gemini-2.5-flash-preview-05-20",
|
26 |
+
config=types.GenerateContentConfig(
|
27 |
+
system_instruction=sys_prompt,
|
28 |
+
temperature=0.1,
|
29 |
+
),
|
30 |
+
contents=[text]
|
31 |
+
)
|
32 |
+
return response.text
|
requirements.txt
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==24.1.0
|
2 |
+
annotated-types==0.7.0
|
3 |
+
anyio==4.9.0
|
4 |
+
audioop-lts==0.2.1
|
5 |
+
beautifulsoup4==4.13.4
|
6 |
+
cachetools==5.5.2
|
7 |
+
certifi==2025.4.26
|
8 |
+
charset-normalizer==3.4.2
|
9 |
+
click==8.2.1
|
10 |
+
distro==1.9.0
|
11 |
+
eval-type-backport==0.2.2
|
12 |
+
fastapi==0.115.12
|
13 |
+
ffmpy==0.6.0
|
14 |
+
filelock==3.18.0
|
15 |
+
fsspec==2025.5.1
|
16 |
+
google-auth==2.40.3
|
17 |
+
google-genai==1.19.0
|
18 |
+
gradio==5.33.0
|
19 |
+
gradio-client==1.10.2
|
20 |
+
groovy==0.1.2
|
21 |
+
h11==0.16.0
|
22 |
+
hf-xet==1.1.3
|
23 |
+
httpcore==1.0.9
|
24 |
+
httpx==0.28.1
|
25 |
+
httpx-sse==0.4.0
|
26 |
+
huggingface-hub==0.32.4
|
27 |
+
idna==3.10
|
28 |
+
jinja2==3.1.6
|
29 |
+
jiter==0.10.0
|
30 |
+
markdown-it-py==3.0.0
|
31 |
+
markupsafe==3.0.2
|
32 |
+
mcp==1.9.0
|
33 |
+
mdurl==0.1.2
|
34 |
+
mistralai==1.8.1
|
35 |
+
numpy==2.2.6
|
36 |
+
openai==1.84.0
|
37 |
+
orjson==3.10.18
|
38 |
+
packaging==25.0
|
39 |
+
pandas==2.3.0
|
40 |
+
pillow==11.2.1
|
41 |
+
pyasn1==0.6.1
|
42 |
+
pyasn1-modules==0.4.2
|
43 |
+
pydantic==2.11.5
|
44 |
+
pydantic-core==2.33.2
|
45 |
+
pydantic-settings==2.9.1
|
46 |
+
pydub==0.25.1
|
47 |
+
pygments==2.19.1
|
48 |
+
python-dateutil==2.9.0.post0
|
49 |
+
python-dotenv==1.1.0
|
50 |
+
python-multipart==0.0.20
|
51 |
+
pytz==2025.2
|
52 |
+
pyyaml==6.0.2
|
53 |
+
requests==2.32.3
|
54 |
+
rich==14.0.0
|
55 |
+
rsa==4.9.1
|
56 |
+
ruff==0.11.12
|
57 |
+
safehttpx==0.1.6
|
58 |
+
semantic-version==2.10.0
|
59 |
+
shellingham==1.5.4
|
60 |
+
six==1.17.0
|
61 |
+
sniffio==1.3.1
|
62 |
+
soupsieve==2.7
|
63 |
+
sse-starlette==2.3.6
|
64 |
+
starlette==0.46.2
|
65 |
+
tomlkit==0.13.3
|
66 |
+
tqdm==4.67.1
|
67 |
+
typer==0.16.0
|
68 |
+
typing-extensions==4.14.0
|
69 |
+
typing-inspection==0.4.1
|
70 |
+
tzdata==2025.2
|
71 |
+
urllib3==2.4.0
|
72 |
+
uvicorn==0.34.3
|
73 |
+
websockets==15.0.1
|
74 |
+
wikipedia==1.4.0
|