Gleb Gleb
adjusted disaplaying
527919e
import os
# Redirect cache to a writable path inside container
os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"
import gradio as gr
from impresso_pipelines.solrnormalization import SolrNormalizationPipeline
pipeline = SolrNormalizationPipeline()
LANGUAGES = ["de", "fr", "es", "it", "pt", "nl", "en", "general"]
def normalize(text, lang_choice):
try:
lang = None if lang_choice == "Auto-detect" else lang_choice
result = pipeline(text, lang=lang, diagnostics=True)
# Format analyzer pipeline for better readability
analyzer_steps = []
if 'analyzer_pipeline' in result and result['analyzer_pipeline']:
for i, step in enumerate(result['analyzer_pipeline'], 1):
step_type = step.get('type', 'unknown')
step_name = step.get('name', 'unnamed')
analyzer_steps.append(f" {i}. {step_type}: {step_name}")
analyzer_display = "\n".join(analyzer_steps) if analyzer_steps else " No analyzer steps found"
return f"🌍 Language: {result['language']}\n\nπŸ”€ Tokens:\n{result['tokens']}\n\n🚫 Detected stopwords:\n{result['stopwords_detected']}\n\nβš™οΈ Analyzer pipeline:\n{analyzer_display}"
except Exception as e:
print("❌ Pipeline error:", e)
return f"Error: {e}"
demo = gr.Interface(
fn=normalize,
inputs=[
gr.Textbox(label="Enter Text"),
gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language")
],
outputs=gr.Textbox(label="Normalized Output"),
title="Solr Normalization Pipeline",
description="Text normalization replicating Solr functionality.",
allow_flagging="never"
)
demo.launch(server_name="0.0.0.0", server_port=7860)