import os # Redirect cache to a writable path inside container os.environ["XDG_CACHE_HOME"] = "/tmp/.cache" import gradio as gr from impresso_pipelines.solrnormalization import SolrNormalizationPipeline pipeline = SolrNormalizationPipeline() LANGUAGES = ["de", "fr", "es", "it", "pt", "nl", "en", "general"] def normalize(text, lang_choice): try: lang = None if lang_choice == "Auto-detect" else lang_choice result = pipeline(text, lang=lang, diagnostics=True) # Format analyzer pipeline for better readability analyzer_steps = [] if 'analyzer_pipeline' in result and result['analyzer_pipeline']: for i, step in enumerate(result['analyzer_pipeline'], 1): step_type = step.get('type', 'unknown') step_name = step.get('name', 'unnamed') analyzer_steps.append(f" {i}. {step_type}: {step_name}") analyzer_display = "\n".join(analyzer_steps) if analyzer_steps else " No analyzer steps found" return f"šŸŒ Language: {result['language']}\n\nšŸ”¤ Tokens:\n{result['tokens']}\n\n🚫 Detected stopwords:\n{result['stopwords_detected']}\n\nāš™ļø Analyzer pipeline:\n{analyzer_display}" except Exception as e: print("āŒ Pipeline error:", e) return f"Error: {e}" demo = gr.Interface( fn=normalize, inputs=[ gr.Textbox(label="Enter Text"), gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language") ], outputs=gr.Textbox(label="Normalized Output"), title="Solr Normalization Pipeline", description="Text normalization replicating Solr functionality.", allow_flagging="never" ) demo.launch(server_name="0.0.0.0", server_port=7860)