Spaces:

mgbam
/

CraAssitant

Runtime error

App Files Files Community

CraAssitant / app.py

mgbam

Update app.py

50a3ce2 verified 7 months ago

raw

history blame

30.8 kB

	import os
	import io
	import json
	import csv
	import asyncio
	import xml.etree.ElementTree as ET
	from typing import Any, Dict, Optional, Tuple, Union, List

	import httpx
	import gradio as gr
	import torch
	from dotenv import load_dotenv
	from loguru import logger
	from huggingface_hub import login
	from openai import OpenAI
	from reportlab.pdfgen import canvas
	from transformers import (
	AutoTokenizer,
	AutoModelForSequenceClassification,
	MarianMTModel,
	MarianTokenizer,
	)
	import pandas as pd
	import altair as alt
	import spacy
	import spacy.cli
	import PyPDF2

	# Ensure spaCy model is downloaded
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	logger.info("Downloading SpaCy 'en_core_web_sm' model...")
	spacy.cli.download("en_core_web_sm")
	nlp = spacy.load("en_core_web_sm")

	# Logging
	logger.add("error_logs.log", rotation="1 MB", level="ERROR")

	# Load environment variables
	load_dotenv()
	HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	ENTREZ_EMAIL = os.getenv("ENTREZ_EMAIL")

	if not HUGGINGFACE_TOKEN or not OPENAI_API_KEY:
	logger.error("Missing Hugging Face or OpenAI credentials.")
	raise ValueError("Missing credentials for Hugging Face or OpenAI.")

	# Hugging Face & OpenAI
	login(HUGGINGFACE_TOKEN)
	client = OpenAI(api_key=OPENAI_API_KEY)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	logger.info(f"Using device: {device}")

	# Model: Classification
	MODEL_NAME = "mgbam/bert-base-finetuned-mgbam"
	try:
	model = AutoModelForSequenceClassification.from_pretrained(
	MODEL_NAME, use_auth_token=HUGGINGFACE_TOKEN
	).to(device)
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_NAME, use_auth_token=HUGGINGFACE_TOKEN
	)
	except Exception as e:
	logger.error(f"Model load error: {e}")
	raise

	# Model: Translation
	try:
	translation_model_name = "Helsinki-NLP/opus-mt-en-fr"
	translation_model = MarianMTModel.from_pretrained(
	translation_model_name, use_auth_token=HUGGINGFACE_TOKEN
	).to(device)
	translation_tokenizer = MarianTokenizer.from_pretrained(
	translation_model_name, use_auth_token=HUGGINGFACE_TOKEN
	)
	except Exception as e:
	logger.error(f"Translation model load error: {e}")
	raise

	LANGUAGE_MAP: Dict[str, Tuple[str, str]] = {
	"English to French": ("en", "fr"),
	"French to English": ("fr", "en"),
	}

	# API endpoints
	PUBMED_SEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
	PUBMED_FETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
	EUROPE_PMC_BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"

	##########################################################
	# HELPER FUNCTIONS #
	##########################################################

	def safe_json_parse(text: str) -> Union[Dict, None]:
	try:
	return json.loads(text)
	except json.JSONDecodeError as e:
	logger.error(f"JSON parsing error: {e}")
	return None

	def parse_pubmed_xml(xml_data: str) -> List[Dict[str, Any]]:
	"""Parse PubMed XML and return structured articles."""
	root = ET.fromstring(xml_data)
	articles = []
	for article in root.findall(".//PubmedArticle"):
	pmid = article.findtext(".//PMID")
	title = article.findtext(".//ArticleTitle")
	abstract = article.findtext(".//AbstractText")
	journal = article.findtext(".//Journal/Title")
	pub_date_elem = article.find(".//JournalIssue/PubDate")
	pub_date = None
	if pub_date_elem is not None:
	year = pub_date_elem.findtext("Year")
	month = pub_date_elem.findtext("Month")
	day = pub_date_elem.findtext("Day")
	if year and month and day:
	pub_date = f"{year}-{month}-{day}"
	else:
	pub_date = year
	articles.append({
	"PMID": pmid,
	"Title": title,
	"Abstract": abstract,
	"Journal": journal,
	"PublicationDate": pub_date,
	})
	return articles

	##########################################################
	# ASYNC FETCH FUNCTIONS #
	##########################################################

	async def fetch_articles_by_nct_id(nct_id: str) -> Dict[str, Any]:
	params = {"query": nct_id, "format": "json"}
	async with httpx.AsyncClient() as client_http:
	try:
	response = await client_http.get(EUROPE_PMC_BASE_URL, params=params)
	response.raise_for_status()
	return response.json()
	except Exception as e:
	logger.error(f"Error fetching articles for {nct_id}: {e}")
	return {"error": str(e)}

	async def fetch_articles_by_query(query_params: str) -> Dict[str, Any]:
	parsed_params = safe_json_parse(query_params)
	if not parsed_params or not isinstance(parsed_params, dict):
	return {"error": "Invalid JSON."}
	query_string = " AND ".join(f"{k}:{v}" for k, v in parsed_params.items())
	params = {"query": query_string, "format": "json"}
	async with httpx.AsyncClient() as client_http:
	try:
	response = await client_http.get(EUROPE_PMC_BASE_URL, params=params)
	response.raise_for_status()
	return response.json()
	except Exception as e:
	logger.error(f"Error fetching articles: {e}")
	return {"error": str(e)}

	async def fetch_pubmed_by_query(query_params: str) -> Dict[str, Any]:
	parsed_params = safe_json_parse(query_params)
	if not parsed_params or not isinstance(parsed_params, dict):
	return {"error": "Invalid JSON for PubMed."}

	search_params = {
	"db": "pubmed",
	"retmode": "json",
	"email": ENTREZ_EMAIL,
	"retmax": parsed_params.get("retmax", "10"),
	"term": parsed_params.get("term", ""),
	}

	async with httpx.AsyncClient() as client_http:
	try:
	search_response = await client_http.get(PUBMED_SEARCH_URL, params=search_params)
	search_response.raise_for_status()
	search_data = search_response.json()
	id_list = search_data.get("esearchresult", {}).get("idlist", [])
	if not id_list:
	return {"result": ""}

	fetch_params = {
	"db": "pubmed",
	"id": ",".join(id_list),
	"retmode": "xml",
	"email": ENTREZ_EMAIL,
	}
	fetch_response = await client_http.get(PUBMED_FETCH_URL, params=fetch_params)
	fetch_response.raise_for_status()
	return {"result": fetch_response.text}
	except Exception as e:
	logger.error(f"Error fetching PubMed articles: {e}")
	return {"error": str(e)}

	async def fetch_crossref_by_query(query_params: str) -> Dict[str, Any]:
	parsed_params = safe_json_parse(query_params)
	if not parsed_params or not isinstance(parsed_params, dict):
	return {"error": "Invalid JSON for Crossref."}
	CROSSREF_API_URL = "https://api.crossref.org/works"
	async with httpx.AsyncClient() as client_http:
	try:
	response = await client_http.get(CROSSREF_API_URL, params=parsed_params)
	response.raise_for_status()
	return response.json()
	except Exception as e:
	logger.error(f"Error fetching Crossref data: {e}")
	return {"error": str(e)}

	##########################################################
	# CORE FUNCTIONS #
	##########################################################

	def summarize_text(text: str) -> str:
	if not text.strip():
	return "No text provided for summarization."
	try:
	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{"role": "user", "content": f"Summarize the following clinical data:\n{text}"}],
	max_tokens=200,
	temperature=0.7,
	)
	return response.choices[0].message.content.strip()
	except Exception as e:
	logger.error(f"Summarization Error: {e}")
	return "Summarization failed."

	def predict_outcome(text: str) -> Union[Dict[str, float], str]:
	if not text.strip():
	return "No text provided for prediction."
	try:
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
	inputs = {k: v.to(device) for k, v in inputs.items()}
	with torch.no_grad():
	outputs = model(**inputs)
	probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
	return {f"Label {i+1}": float(prob.item()) for i, prob in enumerate(probabilities)}
	except Exception as e:
	logger.error(f"Prediction Error: {e}")
	return "Prediction failed."

	def generate_report(text: str, filename: str = "clinical_report.pdf") -> Optional[str]:
	try:
	if not text.strip():
	logger.warning("No text provided for the report.")
	c = canvas.Canvas(filename)
	c.drawString(100, 750, "Clinical Research Report")
	lines = text.split("\n")
	y = 730
	for line in lines:
	if y < 50:
	c.showPage()
	y = 750
	c.drawString(100, y, line)
	y -= 15
	c.save()
	logger.info(f"Report generated: {filename}")
	return filename
	except Exception as e:
	logger.error(f"Report Generation Error: {e}")
	return None

	def visualize_predictions(predictions: Dict[str, float]) -> Optional[alt.Chart]:
	try:
	data = pd.DataFrame(list(predictions.items()), columns=["Label", "Probability"])
	chart = (
	alt.Chart(data)
	.mark_bar()
	.encode(
	x=alt.X("Label:N", sort=None),
	y="Probability:Q",
	tooltip=["Label", "Probability"],
	)
	.properties(title="Prediction Probabilities", width=500, height=300)
	)
	return chart
	except Exception as e:
	logger.error(f"Visualization Error: {e}")
	return None

	def translate_text(text: str, translation_option: str) -> str:
	if not text.strip():
	return "No text provided for translation."
	try:
	if translation_option not in LANGUAGE_MAP:
	return "Unsupported translation option."
	inputs = translation_tokenizer(text, return_tensors="pt", padding=True).to(device)
	translated_tokens = translation_model.generate(**inputs)
	return translation_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
	except Exception as e:
	logger.error(f"Translation Error: {e}")
	return "Translation failed."

	def perform_named_entity_recognition(text: str) -> str:
	if not text.strip():
	return "No text provided for NER."
	try:
	doc = nlp(text)
	entities = [(ent.text, ent.label_) for ent in doc.ents]
	if not entities:
	return "No named entities found."
	return "\n".join(f"{ent_text} -> {ent_label}" for ent_text, ent_label in entities)
	except Exception as e:
	logger.error(f"NER Error: {e}")
	return "Named Entity Recognition failed."

	##########################################################
	# ENHANCED EDA FUNCTIONS #
	##########################################################

	def perform_enhanced_eda(df: pd.DataFrame) -> Tuple[str, Optional[alt.Chart], Optional[alt.Chart]]:
	"""Show columns, shape, numeric summary, correlation heatmap, distribution histograms."""
	try:
	columns_info = f"Columns: {list(df.columns)}"
	shape_info = f"Shape: {df.shape[0]} rows x {df.shape[1]} columns"

	with pd.option_context("display.max_colwidth", 200, "display.max_rows", None):
	describe_info = df.describe(include="all").to_string()

	summary_text = (
	f"--- Enhanced EDA Summary ---\n"
	f"{columns_info}\n{shape_info}\n\n"
	f"Summary Statistics:\n{describe_info}\n"
	)

	numeric_cols = df.select_dtypes(include="number")
	corr_chart, distribution_chart = None, None

	# Correlation
	if numeric_cols.shape[1] >= 2:
	corr = numeric_cols.corr()
	corr_melted = corr.reset_index().melt(id_vars="index")
	corr_melted.columns = ["Feature1", "Feature2", "Correlation"]
	corr_chart = (
	alt.Chart(corr_melted)
	.mark_rect()
	.encode(
	x="Feature1:O",
	y="Feature2:O",
	color="Correlation:Q",
	tooltip=["Feature1", "Feature2", "Correlation"]
	)
	.properties(width=400, height=400, title="Correlation Heatmap")
	)

	# Distribution
	if numeric_cols.shape[1] >= 1:
	df_long = numeric_cols.melt(var_name='Column', value_name='Value')
	distribution_chart = (
	alt.Chart(df_long)
	.mark_bar()
	.encode(
	alt.X("Value:Q", bin=alt.Bin(maxbins=30)),
	alt.Y('count()'),
	alt.Facet('Column:N', columns=2),
	tooltip=["Value"]
	)
	.properties(
	title='Distribution of Numeric Columns',
	width=300,
	height=200
	)
	.interactive()
	)

	return summary_text, corr_chart, distribution_chart

	except Exception as e:
	logger.error(f"Enhanced EDA Error: {e}")
	return f"Enhanced EDA failed: {e}", None, None

	##########################################################
	# PARSING FILES WITH MULTI-ENCODING CSV #
	##########################################################

	def parse_csv_file_to_df(file_up: gr.File) -> pd.DataFrame:
	"""
	Safely parse a CSV by:
	1) Checking if the file path on disk exists; if so, read from disk.
	2) Otherwise, read from .file in memory.
	3) For each approach, we try multiple encodings:
	["utf-8", "utf-8-sig", "latin1", "ISO-8859-1"].
	"""
	path = file_up.name
	# 1) If the file exists on disk, read from that path
	if os.path.isfile(path):
	for enc in ["utf-8", "utf-8-sig", "latin1", "ISO-8859-1"]:
	try:
	df = pd.read_csv(path, encoding=enc)
	return df
	except UnicodeDecodeError:
	logger.warning(f"CSV parse failed with encoding={enc}. Trying next...")
	except Exception as e:
	logger.warning(f"Unexpected CSV read error with encoding={enc}: {e}")
	raise ValueError("Could not parse CSV with any tried encodings (disk).")
	else:
	# 2) Fallback: read from in-memory
	if not hasattr(file_up, "file"):
	raise ValueError("Gradio file object has no .file attribute. Cannot parse CSV.")
	raw_bytes = file_up.file.read()

	# Try multiple encodings on the raw bytes
	for enc in ["utf-8", "utf-8-sig", "latin1", "ISO-8859-1"]:
	try:
	text_decoded = raw_bytes.decode(enc, errors="replace")
	from io import StringIO
	df = pd.read_csv(StringIO(text_decoded))
	return df
	except UnicodeDecodeError:
	logger.warning(f"In-memory CSV parse failed with encoding={enc}. Trying next...")
	except Exception as e:
	logger.warning(f"Unexpected in-memory CSV error (enc={enc}): {e}")
	raise ValueError("Could not parse CSV with any tried encodings (in-memory).")

	def parse_excel_file_to_df(file_up: gr.File) -> pd.DataFrame:
	"""
	For .xls or .xlsx:
	1) If file path exists, read from that path.
	2) Else read from .file in memory.
	"""
	import os
	excel_path = file_up.name
	if os.path.isfile(excel_path):
	return pd.read_excel(excel_path, engine="openpyxl")
	else:
	if not hasattr(file_up, "file"):
	raise ValueError("Gradio file object has no .file attribute. Cannot parse Excel.")
	try:
	excel_bytes = file_up.file.read()
	return pd.read_excel(io.BytesIO(excel_bytes), engine="openpyxl")
	except Exception as e:
	raise ValueError(f"Excel parse error: {e}")

	def parse_pdf_file_as_str(file_up: gr.File) -> str:
	"""
	For PDFs, read pages with PyPDF2.
	Similar two-step approach: local path or fallback to memory.
	"""
	pdf_path = file_up.name
	if os.path.isfile(pdf_path):
	with open(pdf_path, "rb") as f:
	pdf_reader = PyPDF2.PdfReader(f)
	text_content = []
	for page in pdf_reader.pages:
	text_content.append(page.extract_text() or "")
	return "\n".join(text_content)
	else:
	if not hasattr(file_up, "file"):
	raise ValueError("Gradio file object has no .file attribute. Cannot parse PDF.")
	try:
	pdf_bytes = file_up.file.read()
	reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
	text_content = []
	for page in reader.pages:
	text_content.append(page.extract_text() or "")
	return "\n".join(text_content)
	except Exception as e:
	raise ValueError(f"PDF parse error: {e}")

	def parse_text_file_as_str(file_up: gr.File) -> str:
	"""
	For .txt, do the same path or fallback approach,
	possibly with multiple encodings if needed.
	"""
	path = file_up.name
	if os.path.isfile(path):
	with open(path, "rb") as f:
	return f.read().decode("utf-8", errors="replace")
	else:
	if not hasattr(file_up, "file"):
	raise ValueError("Gradio file object has no .file attribute. Cannot parse txt.")
	raw_bytes = file_up.file.read()
	return raw_bytes.decode("utf-8", errors="replace")

	##########################################################
	# GRADIO APP SETUP #
	##########################################################

	with gr.Blocks() as demo:
	gr.Markdown("# 🩺 Enhanced Clinical Research Assistant with EDA")
	gr.Markdown("""
	- Summarize text (GPT-3.5)
	- Predict outcomes (fine-tuned model)
	- Translate (English ↔ French)
	- Named Entity Recognition (spaCy)
	- Fetch from PubMed, Crossref, Europe PMC
	- Generate PDF reports
	- Enhanced EDA on CSV/Excel (with fallback encodings)
	""")

	with gr.Row():
	text_input = gr.Textbox(label="Input Text", lines=5)
	file_input = gr.File(
	label="Upload File (txt/csv/xls/xlsx/pdf)",
	file_types=[".txt", ".csv", ".xls", ".xlsx", ".pdf"]
	)

	action = gr.Radio(
	[
	"Summarize",
	"Predict Outcome",
	"Generate Report",
	"Translate",
	"Perform Named Entity Recognition",
	"Perform Enhanced EDA",
	"Fetch Clinical Studies",
	"Fetch PubMed Articles (Legacy)",
	"Fetch PubMed by Query",
	"Fetch Crossref by Query",
	],
	label="Select an Action",
	)
	translation_option = gr.Dropdown(
	choices=list(LANGUAGE_MAP.keys()),
	label="Translation Option",
	value="English to French"
	)
	query_params_input = gr.Textbox(label="Query Params (JSON)", placeholder='{"term": "cancer"}')
	nct_id_input = gr.Textbox(label="NCT ID")
	report_filename_input = gr.Textbox(label="Report Filename", value="clinical_report.pdf")
	export_format = gr.Dropdown(choices=["None", "CSV", "JSON"], label="Export Format")

	output_text = gr.Textbox(label="Output", lines=8)
	with gr.Row():
	output_chart = gr.Plot(label="Chart 1")
	output_chart2 = gr.Plot(label="Chart 2")
	output_file = gr.File(label="Generated File")

	submit_btn = gr.Button("Submit")

	################################################################
	# MAIN ACTION HANDLER #
	################################################################
	async def handle_action(
	action: str,
	txt: str,
	file_up: gr.File,
	translation_opt: str,
	query_str: str,
	nct_id: str,
	report_fn: str,
	exp_fmt: str
	) -> Tuple[Optional[str], Optional[Any], Optional[Any], Optional[str]]:

	combined_text = txt.strip()

	# If a file is uploaded, parse based on extension
	if file_up is not None:
	file_ext = os.path.splitext(file_up.name)[1].lower()
	try:
	if file_ext == ".txt":
	txt_content = parse_text_file_as_str(file_up)
	combined_text += "\n" + txt_content
	elif file_ext == ".pdf":
	pdf_text = parse_pdf_file_as_str(file_up)
	combined_text += "\n" + pdf_text
	# For CSV/Excel, we usually parse them inside certain actions (EDA, Summarize, etc.)
	# Because sometimes you want the raw DataFrame, not the text.
	except Exception as e:
	return f"File parse error: {e}", None, None, None

	# Now handle the action
	if action == "Summarize":
	# If CSV or Excel is uploaded, parse into DF and then convert to text
	if file_up:
	fx = file_up.name.lower()
	if fx.endswith(".csv"):
	try:
	df_csv = parse_csv_file_to_df(file_up)
	combined_text += "\n" + df_csv.to_csv(index=False)
	except Exception as e:
	return f"CSV parse error for Summarize: {e}", None, None, None
	elif fx.endswith((".xls", ".xlsx")):
	try:
	df_xl = parse_excel_file_to_df(file_up)
	combined_text += "\n" + df_xl.to_csv(index=False)
	except Exception as e:
	return f"Excel parse error for Summarize: {e}", None, None, None

	summary = summarize_text(combined_text)
	return summary, None, None, None

	elif action == "Predict Outcome":
	if file_up:
	fx = file_up.name.lower()
	if fx.endswith(".csv"):
	try:
	df_csv = parse_csv_file_to_df(file_up)
	combined_text += "\n" + df_csv.to_csv(index=False)
	except Exception as e:
	return f"CSV parse error for Predict: {e}", None, None, None
	elif fx.endswith((".xls", ".xlsx")):
	try:
	df_xl = parse_excel_file_to_df(file_up)
	combined_text += "\n" + df_xl.to_csv(index=False)
	except Exception as e:
	return f"Excel parse error for Predict: {e}", None, None, None

	predictions = predict_outcome(combined_text)
	if isinstance(predictions, dict):
	chart = visualize_predictions(predictions)
	return json.dumps(predictions, indent=2), chart, None, None
	return predictions, None, None, None

	elif action == "Generate Report":
	if file_up:
	fx = file_up.name.lower()
	if fx.endswith(".csv"):
	try:
	df_csv = parse_csv_file_to_df(file_up)
	combined_text += "\n" + df_csv.to_csv(index=False)
	except Exception as e:
	return f"CSV parse error for Report: {e}", None, None, None
	elif fx.endswith((".xls", ".xlsx")):
	try:
	df_xl = parse_excel_file_to_df(file_up)
	combined_text += "\n" + df_xl.to_csv(index=False)
	except Exception as e:
	return f"Excel parse error for Report: {e}", None, None, None

	fp = generate_report(combined_text, report_fn)
	msg = f"Report generated: {fp}" if fp else "Report generation failed."
	return msg, None, None, fp

	elif action == "Translate":
	if file_up:
	fx = file_up.name.lower()
	if fx.endswith(".csv"):
	try:
	df_csv = parse_csv_file_to_df(file_up)
	combined_text += "\n" + df_csv.to_csv(index=False)
	except Exception as e:
	return f"CSV parse error for Translate: {e}", None, None, None
	elif fx.endswith((".xls", ".xlsx")):
	try:
	df_xl = parse_excel_file_to_df(file_up)
	combined_text += "\n" + df_xl.to_csv(index=False)
	except Exception as e:
	return f"Excel parse error for Translate: {e}", None, None, None

	translated = translate_text(combined_text, translation_opt)
	return translated, None, None, None

	elif action == "Perform Named Entity Recognition":
	if file_up:
	fx = file_up.name.lower()
	if fx.endswith(".csv"):
	try:
	df_csv = parse_csv_file_to_df(file_up)
	combined_text += "\n" + df_csv.to_csv(index=False)
	except Exception as e:
	return f"CSV parse error for NER: {e}", None, None, None
	elif fx.endswith((".xls", ".xlsx")):
	try:
	df_xl = parse_excel_file_to_df(file_up)
	combined_text += "\n" + df_xl.to_csv(index=False)
	except Exception as e:
	return f"Excel parse error for NER: {e}", None, None, None

	ner_result = perform_named_entity_recognition(combined_text)
	return ner_result, None, None, None

	elif action == "Perform Enhanced EDA":
	return await _action_eda(file_up, txt)

	elif action == "Fetch Clinical Studies":
	if nct_id:
	result = await fetch_articles_by_nct_id(nct_id)
	elif query_str:
	result = await fetch_articles_by_query(query_str)
	else:
	return "Provide either an NCT ID or valid query parameters.", None, None, None

	articles = result.get("resultList", {}).get("result", [])
	if not articles:
	return "No articles found.", None, None, None

	formatted_results = "\n\n".join(
	f"Title: {a.get('title')}\nJournal: {a.get('journalTitle')} ({a.get('pubYear')})"
	for a in articles
	)
	return formatted_results, None, None, None

	elif action in ["Fetch PubMed Articles (Legacy)", "Fetch PubMed by Query"]:
	pubmed_result = await fetch_pubmed_by_query(query_str)
	xml_data = pubmed_result.get("result")
	if xml_data:
	articles = parse_pubmed_xml(xml_data)
	if not articles:
	return "No articles found.", None, None, None
	formatted = "\n\n".join(
	f"{a['Title']} - {a['Journal']} ({a['PublicationDate']})"
	for a in articles if a['Title']
	)
	return formatted if formatted else "No articles found.", None, None, None
	return "No articles found or error fetching data.", None, None, None

	elif action == "Fetch Crossref by Query":
	crossref_result = await fetch_crossref_by_query(query_str)
	items = crossref_result.get("message", {}).get("items", [])
	if not items:
	return "No results found.", None, None, None
	formatted = "\n\n".join(
	f"Title: {item.get('title', ['No title'])[0]}, DOI: {item.get('DOI')}"
	for item in items
	)
	return formatted, None, None, None

	return "Invalid action.", None, None, None

	async def _action_eda(file_up: Optional[gr.File], raw_text: str) -> Tuple[Optional[str], Optional[Any], Optional[Any], Optional[str]]:
	"""Perform Enhanced EDA on CSV or Excel. If no file, try parsing raw_text as CSV."""
	if file_up is None and not raw_text.strip():
	return "No data provided for EDA.", None, None, None

	if file_up:
	ext = os.path.splitext(file_up.name)[1].lower()
	if ext == ".csv":
	try:
	df = parse_csv_file_to_df(file_up)
	eda_summary, corr_chart, dist_chart = perform_enhanced_eda(df)
	return eda_summary, corr_chart, dist_chart, None
	except Exception as e:
	return f"CSV EDA failed: {e}", None, None, None
	elif ext in [".xls", ".xlsx"]:
	try:
	df = parse_excel_file_to_df(file_up)
	eda_summary, corr_chart, dist_chart = perform_enhanced_eda(df)
	return eda_summary, corr_chart, dist_chart, None
	except Exception as e:
	return f"Excel EDA failed: {e}", None, None, None
	else:
	return "No valid CSV/Excel data for EDA.", None, None, None
	else:
	# If no file, maybe user pasted CSV text
	if "," in raw_text:
	from io import StringIO
	try:
	df = pd.read_csv(StringIO(raw_text))
	eda_summary, corr_chart, dist_chart = perform_enhanced_eda(df)
	return eda_summary, corr_chart, dist_chart, None
	except Exception as e:
	return f"Text-based CSV parse error: {e}", None, None, None
	return "No valid CSV/Excel data found for EDA.", None, None, None

	submit_btn.click(
	fn=handle_action,
	inputs=[action, text_input, file_input, translation_option, query_params_input, nct_id_input, report_filename_input, export_format],
	outputs=[output_text, output_chart, output_chart2, output_file],
	)

	demo.launch(server_name="0.0.0.0", server_port=7860, share=True)