|
import gradio as gr |
|
from impresso_pipelines.solrnormalization import SolrNormalizationPipeline |
|
|
|
pipeline = SolrNormalizationPipeline() |
|
|
|
LANGUAGES = ["Auto-detect", "de", "fr", "el", "ru"] |
|
|
|
def normalize(text, lang_choice): |
|
lang = None if lang_choice == "Auto-detect" else lang_choice |
|
result = pipeline(text, lang=lang, diagnostics=True) |
|
return f"Language: {result['language']}\n\nTokens:\n{result['tokens']}\n\nDetected stopwords:\n{result['stopwords_detected']}" |
|
|
|
demo = gr.Interface( |
|
fn=normalize, |
|
inputs=[ |
|
gr.Textbox(label="Enter Text"), |
|
gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language") |
|
], |
|
outputs=gr.Textbox(label="Normalized Output"), |
|
title="Solr Normalization Pipeline", |
|
description="Text normalization using Lucene analyzers. Language auto-detected if not selected." |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |