|
import os |
|
|
|
|
|
os.environ["XDG_CACHE_HOME"] = "/tmp/.cache" |
|
|
|
import gradio as gr |
|
from impresso_pipelines.solrnormalization import SolrNormalizationPipeline |
|
|
|
pipeline = SolrNormalizationPipeline() |
|
|
|
LANGUAGES = ["de", "fr", "es", "it", "pt", "nl", "en", "general"] |
|
|
|
def normalize(text, lang_choice): |
|
try: |
|
lang = None if lang_choice == "Auto-detect" else lang_choice |
|
result = pipeline(text, lang=lang, diagnostics=True) |
|
|
|
|
|
analyzer_steps = [] |
|
if 'analyzer_pipeline' in result and result['analyzer_pipeline']: |
|
for i, step in enumerate(result['analyzer_pipeline'], 1): |
|
step_type = step.get('type', 'unknown') |
|
step_name = step.get('name', 'unnamed') |
|
analyzer_steps.append(f" {i}. {step_type}: {step_name}") |
|
|
|
analyzer_display = "\n".join(analyzer_steps) if analyzer_steps else " No analyzer steps found" |
|
|
|
return f"π Language: {result['language']}\n\nπ€ Tokens:\n{result['tokens']}\n\nπ« Detected stopwords:\n{result['stopwords_detected']}\n\nβοΈ Analyzer pipeline:\n{analyzer_display}" |
|
except Exception as e: |
|
print("β Pipeline error:", e) |
|
return f"Error: {e}" |
|
|
|
demo = gr.Interface( |
|
fn=normalize, |
|
inputs=[ |
|
gr.Textbox(label="Enter Text"), |
|
gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language") |
|
], |
|
outputs=gr.Textbox(label="Normalized Output"), |
|
title="Solr Normalization Pipeline", |
|
description="Text normalization replicating Solr functionality.", |
|
allow_flagging="never" |
|
) |
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|