Spaces:

impresso-project
/

solr-normalization-demo

Running

Gleb Gleb

adjusted disaplaying

527919e 27 days ago

1.75 kB

	import os

	# Redirect cache to a writable path inside container
	os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"

	import gradio as gr
	from impresso_pipelines.solrnormalization import SolrNormalizationPipeline

	pipeline = SolrNormalizationPipeline()

	LANGUAGES = ["de", "fr", "es", "it", "pt", "nl", "en", "general"]

	def normalize(text, lang_choice):
	try:
	lang = None if lang_choice == "Auto-detect" else lang_choice
	result = pipeline(text, lang=lang, diagnostics=True)

	# Format analyzer pipeline for better readability
	analyzer_steps = []
	if 'analyzer_pipeline' in result and result['analyzer_pipeline']:
	for i, step in enumerate(result['analyzer_pipeline'], 1):
	step_type = step.get('type', 'unknown')
	step_name = step.get('name', 'unnamed')
	analyzer_steps.append(f" {i}. {step_type}: {step_name}")

	analyzer_display = "\n".join(analyzer_steps) if analyzer_steps else " No analyzer steps found"

	return f"🌍 Language: {result['language']}\n\n🔤 Tokens:\n{result['tokens']}\n\n🚫 Detected stopwords:\n{result['stopwords_detected']}\n\n⚙️ Analyzer pipeline:\n{analyzer_display}"
	except Exception as e:
	print("❌ Pipeline error:", e)
	return f"Error: {e}"

	demo = gr.Interface(
	fn=normalize,
	inputs=[
	gr.Textbox(label="Enter Text"),
	gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language")
	],
	outputs=gr.Textbox(label="Normalized Output"),
	title="Solr Normalization Pipeline",
	description="Text normalization replicating Solr functionality.",
	allow_flagging="never"
	)

	demo.launch(server_name="0.0.0.0", server_port=7860)