|
import os |
|
|
|
|
|
os.environ["XDG_CACHE_HOME"] = "/tmp/.cache" |
|
|
|
import gradio as gr |
|
from impresso_pipelines.solrnormalization import SolrNormalizationPipeline |
|
|
|
pipeline = SolrNormalizationPipeline() |
|
|
|
LANGUAGES = ["Auto-detect", "de", "fr", "es", "it", "pt", "nl", "en", "general"] |
|
|
|
def normalize(text, lang_choice): |
|
try: |
|
lang = None if lang_choice == "Auto-detect" else lang_choice |
|
result = pipeline(text, lang=lang, diagnostics=True) |
|
return f"Language: {result['language']}\n\nTokens:\n{result['tokens']}\n\nDetected stopwords:\n{result['stopwords_detected']}" |
|
except Exception as e: |
|
print("❌ Pipeline error:", e) |
|
return f"Error: {e}" |
|
|
|
demo = gr.Interface( |
|
fn=normalize, |
|
inputs=[ |
|
gr.Textbox(label="Enter Text"), |
|
gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language") |
|
], |
|
outputs=gr.Textbox(label="Normalized Output"), |
|
title="Solr Normalization Pipeline", |
|
description="Text normalization using Lucene analyzers. Language auto-detected if not selected.", |
|
allow_flagging="never" |
|
) |
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|