Gleb Gleb commited on
Commit
42c4e1a
·
1 Parent(s): 4796552

initial version

Browse files
Files changed (3) hide show
  1. Dockerfile +10 -0
  2. app.py +25 -0
  3. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from impresso_pipelines.solrnormalization import SolrNormalizationPipeline
3
+
4
+ pipeline = SolrNormalizationPipeline()
5
+
6
+ LANGUAGES = ["Auto-detect", "de", "fr", "el", "ru"]
7
+
8
+ def normalize(text, lang_choice):
9
+ lang = None if lang_choice == "Auto-detect" else lang_choice
10
+ result = pipeline(text, lang=lang, diagnostics=True)
11
+ return f"Language: {result['language']}\n\nTokens:\n{result['tokens']}\n\nDetected stopwords:\n{result['stopwords_detected']}"
12
+
13
+ demo = gr.Interface(
14
+ fn=normalize,
15
+ inputs=[
16
+ gr.Textbox(label="Enter Text"),
17
+ gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language")
18
+ ],
19
+ outputs=gr.Textbox(label="Normalized Output"),
20
+ title="Solr Normalization Pipeline",
21
+ description="Text normalization using Lucene analyzers. Language auto-detected if not selected."
22
+ )
23
+
24
+ if __name__ == "__main__":
25
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ impresso_pipelines[solrnormalization]
2
+ gradio
3
+ jpype1