bakrianoo commited on
Commit
4257826
·
1 Parent(s): d7a887c

enable chunking

Browse files
Files changed (1) hide show
  1. app.py +33 -9
app.py CHANGED
@@ -28,7 +28,7 @@ LANGUAGES = {
28
  debug_display = None
29
  debug_header = None
30
 
31
- def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang, custom_lang, content_format):
32
  """
33
  Function to extract content from Wikipedia URL (placeholder for now)
34
  """
@@ -42,10 +42,16 @@ def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang
42
 
43
  # Get the details of the Wikipedia article
44
  wiki_details = get_wiki_details(wiki_id)
45
- if content_format == "XML":
46
- content_sections = split_content_into_sections(wiki_details['wiki_xml'], content_format)
 
 
 
 
 
47
  else:
48
- content_sections = split_content_into_sections(wiki_details['content'], content_format)
 
49
 
50
  return (
51
  "Extraction complete! Sections: " + str(len(content_sections)),
@@ -273,7 +279,9 @@ def update_ui_with_sections(sections):
273
  return results
274
 
275
  # Create Gradio app
276
- with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
 
 
277
  gr.Markdown("# Wikipedia Translator")
278
 
279
  # State variables
@@ -343,6 +351,13 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
343
  outputs=[custom_language]
344
  )
345
 
 
 
 
 
 
 
 
346
  content_format = gr.Radio(
347
  choices=["Text", "XML"],
348
  value="XML",
@@ -433,11 +448,20 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
433
  with gr.Column() as sections_container:
434
  section_components = []
435
  for i in range(100): # Support up to 100 sections
436
- with gr.Row():
 
437
  section_textbox = gr.Textbox(visible=False, lines=4, show_copy_button=True)
438
- translate_btn = gr.Button("Translate", visible=False)
 
 
 
 
 
439
  translation_output = gr.Textbox(visible=False, lines=4, show_copy_button=True)
440
- debug_btn = gr.Button("View Debug Info", visible=False)
 
 
 
441
  section_components.extend([section_textbox, translate_btn, translation_output, debug_btn])
442
 
443
  # Connect the translate button to the translation function
@@ -472,7 +496,7 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
472
  # Connect the extract button to the function
473
  extract_button.click(
474
  fn=extract_wikipedia_content,
475
- inputs=[wiki_url, api_key, model_id, base_url, target_language, custom_language, content_format],
476
  outputs=[
477
  output,
478
  article_pageid,
 
28
  debug_display = None
29
  debug_header = None
30
 
31
+ def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang, custom_lang, content_format, chunking):
32
  """
33
  Function to extract content from Wikipedia URL (placeholder for now)
34
  """
 
42
 
43
  # Get the details of the Wikipedia article
44
  wiki_details = get_wiki_details(wiki_id)
45
+
46
+ if chunking:
47
+ # Split content into sections when chunking is enabled
48
+ if content_format == "XML":
49
+ content_sections = split_content_into_sections(wiki_details['wiki_xml'], content_format)
50
+ else:
51
+ content_sections = split_content_into_sections(wiki_details['content'], content_format)
52
  else:
53
+ # Use entire content as a single section when chunking is disabled
54
+ content_sections = {"Full Article": wiki_details['content'] if content_format == "Text" else wiki_details['wiki_xml']}
55
 
56
  return (
57
  "Extraction complete! Sections: " + str(len(content_sections)),
 
279
  return results
280
 
281
  # Create Gradio app
282
+ with gr.Blocks(theme=gr.themes.Monochrome(), css="""
283
+ .odd-section { background-color: rgb(228 213 213); padding: 15px; border-radius: 8px; margin: 10px 0; }
284
+ """) as demo:
285
  gr.Markdown("# Wikipedia Translator")
286
 
287
  # State variables
 
351
  outputs=[custom_language]
352
  )
353
 
354
+ # Add chunking control before content format
355
+ chunking = gr.Checkbox(
356
+ label="Enable Content Chunking",
357
+ value=False,
358
+ info="Split content into sections for individual translation"
359
+ )
360
+
361
  content_format = gr.Radio(
362
  choices=["Text", "XML"],
363
  value="XML",
 
448
  with gr.Column() as sections_container:
449
  section_components = []
450
  for i in range(100): # Support up to 100 sections
451
+ with gr.Column(elem_classes=["odd-section"] if i % 2 == 0 else []) as section: # Add class for odd sections
452
+ # Section content
453
  section_textbox = gr.Textbox(visible=False, lines=4, show_copy_button=True)
454
+
455
+ with gr.Row(): # Controls row
456
+ translate_btn = gr.Button("Translate", visible=False)
457
+ debug_btn = gr.Button("View Debug Info", visible=False)
458
+
459
+ # Translation output
460
  translation_output = gr.Textbox(visible=False, lines=4, show_copy_button=True)
461
+
462
+ # Add separator
463
+ gr.Markdown("---", visible=False)
464
+
465
  section_components.extend([section_textbox, translate_btn, translation_output, debug_btn])
466
 
467
  # Connect the translate button to the translation function
 
496
  # Connect the extract button to the function
497
  extract_button.click(
498
  fn=extract_wikipedia_content,
499
+ inputs=[wiki_url, api_key, model_id, base_url, target_language, custom_language, content_format, chunking],
500
  outputs=[
501
  output,
502
  article_pageid,