File size: 894 Bytes
42c4e1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import gradio as gr
from impresso_pipelines.solrnormalization import SolrNormalizationPipeline

pipeline = SolrNormalizationPipeline()

LANGUAGES = ["Auto-detect", "de", "fr", "el", "ru"]

def normalize(text, lang_choice):
    lang = None if lang_choice == "Auto-detect" else lang_choice
    result = pipeline(text, lang=lang, diagnostics=True)
    return f"Language: {result['language']}\n\nTokens:\n{result['tokens']}\n\nDetected stopwords:\n{result['stopwords_detected']}"

demo = gr.Interface(
    fn=normalize,
    inputs=[
        gr.Textbox(label="Enter Text"),
        gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language")
    ],
    outputs=gr.Textbox(label="Normalized Output"),
    title="Solr Normalization Pipeline",
    description="Text normalization using Lucene analyzers. Language auto-detected if not selected."
)

if __name__ == "__main__":
    demo.launch()