Spaces:

impresso-project
/

solr-normalization-demo

Running

File size: 1,187 Bytes

327bd85
 
 
 
 
b09d94b
 
42c4e1a
 
 
cc09a85
42c4e1a
 
e36aaa8
 
 
 
 
 
 
 
42c4e1a
 
 
 
 
 
 
 
6a65bcb
93c2b81
42c4e1a
 
b09d94b

import os

# Redirect cache to a writable path inside container
os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"

import gradio as gr
from impresso_pipelines.solrnormalization import SolrNormalizationPipeline

pipeline = SolrNormalizationPipeline()

LANGUAGES = ["Auto-detect", "de", "fr", "es", "it", "pt", "nl", "en", "general"]

def normalize(text, lang_choice):
    try:
        lang = None if lang_choice == "Auto-detect" else lang_choice
        result = pipeline(text, lang=lang, diagnostics=True)
        return f"Language: {result['language']}\n\nTokens:\n{result['tokens']}\n\nDetected stopwords:\n{result['stopwords_detected']}"
    except Exception as e:
        print("❌ Pipeline error:", e)
        return f"Error: {e}"

demo = gr.Interface(
    fn=normalize,
    inputs=[
        gr.Textbox(label="Enter Text"),
        gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language")
    ],
    outputs=gr.Textbox(label="Normalized Output"),
    title="Solr Normalization Pipeline",
    description="Text normalization using Lucene analyzers. Language auto-detected if not selected.",
    allow_flagging="never"
)

demo.launch(server_name="0.0.0.0", server_port=7860)