File size: 1,751 Bytes
327bd85 b09d94b 42c4e1a 8c7a402 42c4e1a e36aaa8 527919e e36aaa8 42c4e1a 8c7a402 93c2b81 42c4e1a b09d94b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import os
# Redirect cache to a writable path inside container
os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"
import gradio as gr
from impresso_pipelines.solrnormalization import SolrNormalizationPipeline
pipeline = SolrNormalizationPipeline()
LANGUAGES = ["de", "fr", "es", "it", "pt", "nl", "en", "general"]
def normalize(text, lang_choice):
try:
lang = None if lang_choice == "Auto-detect" else lang_choice
result = pipeline(text, lang=lang, diagnostics=True)
# Format analyzer pipeline for better readability
analyzer_steps = []
if 'analyzer_pipeline' in result and result['analyzer_pipeline']:
for i, step in enumerate(result['analyzer_pipeline'], 1):
step_type = step.get('type', 'unknown')
step_name = step.get('name', 'unnamed')
analyzer_steps.append(f" {i}. {step_type}: {step_name}")
analyzer_display = "\n".join(analyzer_steps) if analyzer_steps else " No analyzer steps found"
return f"🌍 Language: {result['language']}\n\n🔤 Tokens:\n{result['tokens']}\n\n🚫 Detected stopwords:\n{result['stopwords_detected']}\n\n⚙️ Analyzer pipeline:\n{analyzer_display}"
except Exception as e:
print("❌ Pipeline error:", e)
return f"Error: {e}"
demo = gr.Interface(
fn=normalize,
inputs=[
gr.Textbox(label="Enter Text"),
gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language")
],
outputs=gr.Textbox(label="Normalized Output"),
title="Solr Normalization Pipeline",
description="Text normalization replicating Solr functionality.",
allow_flagging="never"
)
demo.launch(server_name="0.0.0.0", server_port=7860)
|