Spaces:
Running
Running
enable chunking
Browse files
app.py
CHANGED
@@ -28,7 +28,7 @@ LANGUAGES = {
|
|
28 |
debug_display = None
|
29 |
debug_header = None
|
30 |
|
31 |
-
def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang, custom_lang, content_format):
|
32 |
"""
|
33 |
Function to extract content from Wikipedia URL (placeholder for now)
|
34 |
"""
|
@@ -42,10 +42,16 @@ def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang
|
|
42 |
|
43 |
# Get the details of the Wikipedia article
|
44 |
wiki_details = get_wiki_details(wiki_id)
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
47 |
else:
|
48 |
-
|
|
|
49 |
|
50 |
return (
|
51 |
"Extraction complete! Sections: " + str(len(content_sections)),
|
@@ -273,7 +279,9 @@ def update_ui_with_sections(sections):
|
|
273 |
return results
|
274 |
|
275 |
# Create Gradio app
|
276 |
-
with gr.Blocks(theme=gr.themes.Monochrome()
|
|
|
|
|
277 |
gr.Markdown("# Wikipedia Translator")
|
278 |
|
279 |
# State variables
|
@@ -343,6 +351,13 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
|
343 |
outputs=[custom_language]
|
344 |
)
|
345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
content_format = gr.Radio(
|
347 |
choices=["Text", "XML"],
|
348 |
value="XML",
|
@@ -433,11 +448,20 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
|
433 |
with gr.Column() as sections_container:
|
434 |
section_components = []
|
435 |
for i in range(100): # Support up to 100 sections
|
436 |
-
with gr.
|
|
|
437 |
section_textbox = gr.Textbox(visible=False, lines=4, show_copy_button=True)
|
438 |
-
|
|
|
|
|
|
|
|
|
|
|
439 |
translation_output = gr.Textbox(visible=False, lines=4, show_copy_button=True)
|
440 |
-
|
|
|
|
|
|
|
441 |
section_components.extend([section_textbox, translate_btn, translation_output, debug_btn])
|
442 |
|
443 |
# Connect the translate button to the translation function
|
@@ -472,7 +496,7 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
|
472 |
# Connect the extract button to the function
|
473 |
extract_button.click(
|
474 |
fn=extract_wikipedia_content,
|
475 |
-
inputs=[wiki_url, api_key, model_id, base_url, target_language, custom_language, content_format],
|
476 |
outputs=[
|
477 |
output,
|
478 |
article_pageid,
|
|
|
28 |
debug_display = None
|
29 |
debug_header = None
|
30 |
|
31 |
+
def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang, custom_lang, content_format, chunking):
|
32 |
"""
|
33 |
Function to extract content from Wikipedia URL (placeholder for now)
|
34 |
"""
|
|
|
42 |
|
43 |
# Get the details of the Wikipedia article
|
44 |
wiki_details = get_wiki_details(wiki_id)
|
45 |
+
|
46 |
+
if chunking:
|
47 |
+
# Split content into sections when chunking is enabled
|
48 |
+
if content_format == "XML":
|
49 |
+
content_sections = split_content_into_sections(wiki_details['wiki_xml'], content_format)
|
50 |
+
else:
|
51 |
+
content_sections = split_content_into_sections(wiki_details['content'], content_format)
|
52 |
else:
|
53 |
+
# Use entire content as a single section when chunking is disabled
|
54 |
+
content_sections = {"Full Article": wiki_details['content'] if content_format == "Text" else wiki_details['wiki_xml']}
|
55 |
|
56 |
return (
|
57 |
"Extraction complete! Sections: " + str(len(content_sections)),
|
|
|
279 |
return results
|
280 |
|
281 |
# Create Gradio app
|
282 |
+
with gr.Blocks(theme=gr.themes.Monochrome(), css="""
|
283 |
+
.odd-section { background-color: rgb(228 213 213); padding: 15px; border-radius: 8px; margin: 10px 0; }
|
284 |
+
""") as demo:
|
285 |
gr.Markdown("# Wikipedia Translator")
|
286 |
|
287 |
# State variables
|
|
|
351 |
outputs=[custom_language]
|
352 |
)
|
353 |
|
354 |
+
# Add chunking control before content format
|
355 |
+
chunking = gr.Checkbox(
|
356 |
+
label="Enable Content Chunking",
|
357 |
+
value=False,
|
358 |
+
info="Split content into sections for individual translation"
|
359 |
+
)
|
360 |
+
|
361 |
content_format = gr.Radio(
|
362 |
choices=["Text", "XML"],
|
363 |
value="XML",
|
|
|
448 |
with gr.Column() as sections_container:
|
449 |
section_components = []
|
450 |
for i in range(100): # Support up to 100 sections
|
451 |
+
with gr.Column(elem_classes=["odd-section"] if i % 2 == 0 else []) as section: # Add class for odd sections
|
452 |
+
# Section content
|
453 |
section_textbox = gr.Textbox(visible=False, lines=4, show_copy_button=True)
|
454 |
+
|
455 |
+
with gr.Row(): # Controls row
|
456 |
+
translate_btn = gr.Button("Translate", visible=False)
|
457 |
+
debug_btn = gr.Button("View Debug Info", visible=False)
|
458 |
+
|
459 |
+
# Translation output
|
460 |
translation_output = gr.Textbox(visible=False, lines=4, show_copy_button=True)
|
461 |
+
|
462 |
+
# Add separator
|
463 |
+
gr.Markdown("---", visible=False)
|
464 |
+
|
465 |
section_components.extend([section_textbox, translate_btn, translation_output, debug_btn])
|
466 |
|
467 |
# Connect the translate button to the translation function
|
|
|
496 |
# Connect the extract button to the function
|
497 |
extract_button.click(
|
498 |
fn=extract_wikipedia_content,
|
499 |
+
inputs=[wiki_url, api_key, model_id, base_url, target_language, custom_language, content_format, chunking],
|
500 |
outputs=[
|
501 |
output,
|
502 |
article_pageid,
|