Spaces:

mgbam
/

CraAssitant

Runtime error

App Files Files Community

mgbam commited on Jan 20

Commit

a791ee6

verified ·

1 Parent(s): 4290ea7

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -519

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import torch
 from dotenv import load_dotenv
 from loguru import logger
 from huggingface_hub import login
-import openai
 from reportlab.pdfgen import canvas
 from transformers import (
     AutoTokenizer,
@@ -30,17 +30,24 @@ import PyPDF2
 #                          1) ENVIRONMENT & LOGGING                           #
 ###############################################################################
-# Initialize Logging
 logger.add("error_logs.log", rotation="1 MB", level="ERROR")
 # Load environment variables
 load_dotenv()
 HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
 ENTREZ_EMAIL = os.getenv("ENTREZ_EMAIL")
-# Validate API Keys
 if not HUGGINGFACE_TOKEN or not OPENAI_API_KEY:
     logger.error("Missing Hugging Face or OpenAI credentials.")
     raise ValueError("Missing credentials for Hugging Face or OpenAI.")
@@ -52,52 +59,42 @@ if not BIOPORTAL_API_KEY:
 # Hugging Face login
 login(HUGGINGFACE_TOKEN)
-# OpenAI Initialization
-openai.api_key = OPENAI_API_KEY
-# Device Configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logger.info(f"Using device: {device}")
-# Ensure spaCy model is downloaded (English Core Web)
-try:
-    nlp = spacy.load("en_core_web_sm")
-except OSError:
-    logger.info("Downloading SpaCy 'en_core_web_sm' model...")
-    spacy.cli.download("en_core_web_sm")
-    nlp = spacy.load("en_core_web_sm")
 ###############################################################################
 #                 2) HUGGING FACE & TRANSLATION MODEL SETUP                   #
 ###############################################################################
-# Outcome Prediction Model (Fine-Tuned BERT)
-OUTCOME_MODEL_NAME = "mgbam/bert-base-finetuned-mgbam"
 try:
-    outcome_model = AutoModelForSequenceClassification.from_pretrained(
-        OUTCOME_MODEL_NAME, use_auth_token=HUGGINGFACE_TOKEN
     ).to(device)
-    outcome_tokenizer = AutoTokenizer.from_pretrained(
-        OUTCOME_MODEL_NAME, use_auth_token=HUGGINGFACE_TOKEN
     )
 except Exception as e:
-    logger.error(f"Outcome Model load error: {e}")
     raise
-# Translation Model (English ↔ French)
-TRANSLATION_MODEL_NAME = "Helsinki-NLP/opus-mt-en-fr"
 try:
     translation_model = MarianMTModel.from_pretrained(
-        TRANSLATION_MODEL_NAME, use_auth_token=HUGGINGFACE_TOKEN
     ).to(device)
     translation_tokenizer = MarianTokenizer.from_pretrained(
-        TRANSLATION_MODEL_NAME, use_auth_token=HUGGINGFACE_TOKEN
     )
 except Exception as e:
     logger.error(f"Translation model load error: {e}")
     raise
-# Language Mapping for Translation
 LANGUAGE_MAP: Dict[str, Tuple[str, str]] = {
     "English to French": ("en", "fr"),
     "French to English": ("fr", "en"),
@@ -153,130 +150,33 @@ def parse_pubmed_xml(xml_data: str) -> List[Dict[str, Any]]:
         })
     return articles
-###############################################################################
-#                          5) ASYNC FETCH FUNCTIONS                           #
-###############################################################################
-async def fetch_articles_by_nct_id(nct_id: str) -> Dict[str, Any]:
-    """Fetch articles from Europe PMC using NCT ID."""
-    params = {"query": nct_id, "format": "json"}
-    async with httpx.AsyncClient() as client_http:
-        try:
-            resp = await client_http.get(EUROPE_PMC_BASE_URL, params=params)
-            resp.raise_for_status()
-            return resp.json()
-        except Exception as e:
-            logger.error(f"Error fetching articles for {nct_id}: {e}")
-            return {"error": str(e)}
-async def fetch_articles_by_query(query_params: str) -> Dict[str, Any]:
-    """Fetch articles from Europe PMC based on query parameters."""
-    parsed_params = safe_json_parse(query_params)
-    if not parsed_params or not isinstance(parsed_params, dict):
-        return {"error": "Invalid JSON."}
-    query_string = " AND ".join(f"{k}:{v}" for k, v in parsed_params.items())
-    req_params = {"query": query_string, "format": "json"}
-    async with httpx.AsyncClient() as client_http:
-        try:
-            resp = await client_http.get(EUROPE_PMC_BASE_URL, params=req_params)
-            resp.raise_for_status()
-            return resp.json()
-        except Exception as e:
-            logger.error(f"Error fetching Europe PMC articles: {e}")
-            return {"error": str(e)}
-async def fetch_pubmed_by_query(query_params: str) -> Dict[str, Any]:
-    """Fetch articles from PubMed based on query parameters."""
-    parsed_params = safe_json_parse(query_params)
-    if not parsed_params or not isinstance(parsed_params, dict):
-        return {"error": "Invalid JSON for PubMed."}
-    search_params = {
-        "db": "pubmed",
-        "retmode": "json",
-        "email": ENTREZ_EMAIL,
-        "retmax": parsed_params.get("retmax", "10"),
-        "term": parsed_params.get("term", ""),
-    }
-    async with httpx.AsyncClient() as client_http:
-        try:
-            # Search PubMed
-            search_resp = await client_http.get(PUBMED_SEARCH_URL, params=search_params)
-            search_resp.raise_for_status()
-            data = search_resp.json()
-            id_list = data.get("esearchresult", {}).get("idlist", [])
-            if not id_list:
-                return {"result": ""}
-            # Fetch PubMed Articles
-            fetch_params = {
-                "db": "pubmed",
-                "id": ",".join(id_list),
-                "retmode": "xml",
-                "email": ENTREZ_EMAIL,
-            }
-            fetch_resp = await client_http.get(PUBMED_FETCH_URL, params=fetch_params)
-            fetch_resp.raise_for_status()
-            return {"result": fetch_resp.text}
-        except Exception as e:
-            logger.error(f"Error fetching PubMed articles: {e}")
-            return {"error": str(e)}
-async def fetch_crossref_by_query(query_params: str) -> Dict[str, Any]:
-    """Fetch articles from Crossref based on query parameters."""
-    parsed_params = safe_json_parse(query_params)
-    if not parsed_params or not isinstance(parsed_params, dict):
-        return {"error": "Invalid JSON for Crossref."}
-    async with httpx.AsyncClient() as client_http:
-        try:
-            resp = await client_http.get(CROSSREF_API_URL, params=parsed_params)
-            resp.raise_for_status()
-            return resp.json()
-        except Exception as e:
-            logger.error(f"Error fetching Crossref data: {e}")
-            return {"error": str(e)}
-async def fetch_bioportal_by_query(query_params: str) -> Dict[str, Any]:
-    """
-    Fetch ontology data from BioPortal based on query parameters.
-    Expects JSON like: {"q": "cancer"}
-    """
-    if not BIOPORTAL_API_KEY:
-        return {"error": "No BioPortal API Key set."}
-    parsed_params = safe_json_parse(query_params)
-    if not parsed_params or not isinstance(parsed_params, dict):
-        return {"error": "Invalid JSON for BioPortal."}
-    search_term = parsed_params.get("q", "")
-    if not search_term:
-        return {"error": "No 'q' found in JSON. Provide a search term."}
-    url = f"{BIOPORTAL_API_BASE}/search"
-    headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
-    req_params = {"q": search_term}
-    async with httpx.AsyncClient() as client_http:
-        try:
-            resp = await client_http.get(url, params=req_params, headers=headers)
-            resp.raise_for_status()
-            return resp.json()
-        except Exception as e:
-            logger.error(f"Error fetching BioPortal data: {e}")
-            return {"error": str(e)}
 ###############################################################################
 #                           6) CORE FUNCTIONS                                 #
 ###############################################################################
 def summarize_text(text: str) -> str:
-    """Summarize clinical text using OpenAI GPT-3.5."""
     if not text.strip():
         return "No text provided for summarization."
     try:
-        response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
             messages=[{"role": "user", "content": f"Summarize this clinical data:\n{text}"}],
-            max_tokens=500,
             temperature=0.7,
         )
         return response.choices[0].message.content.strip()
@@ -284,67 +184,19 @@ def summarize_text(text: str) -> str:
         logger.error(f"Summarization error: {e}")
         return "Summarization failed."
-def predict_outcome(text: str) -> Union[Dict[str, float], str]:
-    """Predict outcomes using a fine-tuned Hugging Face BERT model."""
-    if not text.strip():
-        return "No text provided for prediction."
-    try:
-        inputs = outcome_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-        inputs = {k: v.to(device) for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = outcome_model(**inputs)
-        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
-        labels = outcome_model.config.id2label
-        return {labels[i]: float(prob.item()) for i, prob in enumerate(probabilities)}
-    except Exception as e:
-        logger.error(f"Prediction error: {e}")
-        return "Prediction failed."
-def translate_text(text: str, translation_option: str) -> str:
-    """Translate text between English and French using MarianMT."""
-    if not text.strip():
-        return "No text provided for translation."
-    try:
-        if translation_option not in LANGUAGE_MAP:
-            return "Unsupported translation option."
-        inputs = translation_tokenizer(text, return_tensors="pt", padding=True).to(device)
-        translated_tokens = translation_model.generate(**inputs)
-        translated_text = translation_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
-        return translated_text
-    except Exception as e:
-        logger.error(f"Translation error: {e}")
-        return "Translation failed."
-def perform_named_entity_recognition(text: str) -> str:
-    """Perform Named Entity Recognition using spaCy."""
-    if not text.strip():
-        return "No text provided for NER."
-    try:
-        doc = nlp(text)
-        entities = [(ent.text, ent.label_) for ent in doc.ents]
-        if not entities:
-            return "No named entities found."
-        return "\n".join(f"{t} -> {lbl}" for t, lbl in entities)
-    except Exception as e:
-        logger.error(f"NER error: {e}")
-        return "NER failed."
 def generate_report(text: str, filename: str = "clinical_report.pdf") -> Optional[str]:
-    """Generate a professional PDF report from the text using ReportLab."""
     try:
         if not text.strip():
             logger.warning("No text provided for the report.")
         c = canvas.Canvas(filename)
-        c.setFont("Helvetica-Bold", 16)
-        c.drawString(100, 800, "Clinical Research Report")
-        c.setFont("Helvetica", 12)
         lines = text.split("\n")
-        y = 780
         for line in lines:
             if y < 50:
                 c.showPage()
-                c.setFont("Helvetica", 12)
-                y = 800
             c.drawString(100, y, line)
             y -= 15
         c.save()
@@ -355,7 +207,7 @@ def generate_report(text: str, filename: str = "clinical_report.pdf") -> Optiona
         return None
 def visualize_predictions(predictions: Dict[str, float]) -> alt.Chart:
-    """Visualize prediction probabilities using Altair."""
     data = pd.DataFrame(list(predictions.items()), columns=["Label", "Probability"])
     chart = (
         alt.Chart(data)
@@ -369,345 +221,66 @@ def visualize_predictions(predictions: Dict[str, float]) -> alt.Chart:
     )
     return chart
-def fetch_web_search(query: str) -> str:
-    """Use OpenAI to perform a web search and provide explanations."""
-    if not query.strip():
-        return "No query provided for web search."
-    try:
-        response = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant that provides detailed explanations based on the latest research."},
-                {"role": "user", "content": f"Explain the following query using the latest research: {query}"},
-            ],
-            max_tokens=700,
-            temperature=0.7,
-        )
-        return response.choices[0].message.content.strip()
-    except Exception as e:
-        logger.error(f"Web search error: {e}")
-        return "Web search failed."
-###############################################################################
-#                     7) FILE PARSING (TXT, PDF, CSV, XLS)                    #
-###############################################################################
-def parse_pdf_file_as_str(file_up: gr.File) -> str:
-    """Extract text from a PDF file using PyPDF2."""
-    try:
-        pdf_bytes = file_up.read()
-        reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
-        return "\n".join(page.extract_text() or "" for page in reader.pages)
-    except Exception as e:
-        logger.error(f"PDF parse error: {e}")
-        return "Failed to extract text from PDF."
-def parse_text_file_as_str(file_up: gr.File) -> str:
-    """Extract text from a TXT file."""
-    try:
-        return file_up.read().decode("utf-8", errors="replace")
-    except Exception as e:
-        logger.error(f"TXT parse error: {e}")
-        return "Failed to extract text from TXT file."
-def parse_csv_file_to_df(file_up: gr.File) -> pd.DataFrame:
-    """Parse CSV file into a pandas DataFrame with multiple encoding attempts."""
-    try:
-        return pd.read_csv(io.StringIO(file_up.read().decode("utf-8", errors="replace")))
-    except UnicodeDecodeError:
-        try:
-            return pd.read_csv(io.StringIO(file_up.read().decode("latin1", errors="replace")))
-        except Exception as e:
-            logger.error(f"CSV parse error: {e}")
-            return pd.DataFrame()
-    except Exception as e:
-        logger.error(f"CSV parse error: {e}")
-        return pd.DataFrame()
-def parse_excel_file_to_df(file_up: gr.File) -> pd.DataFrame:
-    """Parse Excel file into a pandas DataFrame."""
-    try:
-        return pd.read_excel(io.BytesIO(file_up.read()), engine="openpyxl")
-    except Exception as e:
-        logger.error(f"Excel parse error: {e}")
-        return pd.DataFrame()
 ###############################################################################
-#                        8) BUILDING THE GRADIO APP                           #
 ###############################################################################
-def format_articles(articles: List[Dict[str, Any]]) -> str:
-    """Format fetched articles into a readable string."""
-    formatted = ""
-    for article in articles:
-        title = article.get("title", "No Title")
-        journal = article.get("journalTitle", "No Journal")
-        pub_year = article.get("pubYear", "No Year")
-        formatted += f"Title: {title}\nJournal: {journal} ({pub_year})\n\n"
-    return formatted.strip()
-def format_bioportal_results(collection: List[Dict[str, Any]]) -> str:
-    """Format BioPortal results into a readable string."""
-    formatted = ""
-    for col in collection:
-        label = col.get("prefLabel", "No Label")
-        ontology = col.get("ontology", {}).get("name", "No Ontology")
-        formatted += f"Label: {label}\nOntology: {ontology}\n\n"
-    return formatted.strip()
-async def handle_action(
-    action: str,
-    txt: Optional[str],
-    file_up: Optional[gr.File],
-    translation_opt: Optional[str],
-    query_str: Optional[str],
-    nct_id: Optional[str],
-    report_fn: Optional[str],
-    exp_fmt: Optional[str]
-) -> Tuple[Optional[str], Optional[Any], Optional[Any], Optional[str]]:
-    """
-    Master function to handle user actions.
-    Returns a 4-tuple mapped to (output_text, output_chart, output_chart2, output_file).
-    """
-    try:
-        combined_text = txt.strip() if txt else ""
-        # 1) If user uploaded a file, parse text from it
-        if file_up is not None:
-            ext = os.path.splitext(file_up.name)[1].lower()
-            if ext == ".txt":
-                parsed_text = parse_text_file_as_str(file_up)
-                combined_text += "\n" + parsed_text
-            elif ext == ".pdf":
-                parsed_text = parse_pdf_file_as_str(file_up)
-                combined_text += "\n" + parsed_text
-            elif ext == ".csv":
-                df_csv = parse_csv_file_to_df(file_up)
-                combined_text += "\n" + df_csv.to_csv(index=False)
-            elif ext in [".xls", ".xlsx"]:
-                df_xl = parse_excel_file_to_df(file_up)
-                combined_text += "\n" + df_xl.to_csv(index=False)
-            else:
-                return "Unsupported file format.", None, None, None
-        # 2) Branch by action
-        if action == "Summarize":
-            summary = summarize_text(combined_text)
-            return summary, None, None, None
-        elif action == "Predict Outcome":
-            preds = predict_outcome(combined_text)
-            if isinstance(preds, dict):
-                chart = visualize_predictions(preds)
-                return json.dumps(preds, indent=2), chart, None, None
-            return preds, None, None, None
-        elif action == "Generate Report":
-            path = generate_report(combined_text, report_fn or "clinical_report.pdf")
-            msg = f"Report generated: {path}" if path else "Report generation failed."
-            return msg, None, None, path
-        elif action == "Translate":
-            translated = translate_text(combined_text, translation_opt or "English to French")
-            return translated, None, None, None
-        elif action == "Perform Named Entity Recognition":
-            ner_result = perform_named_entity_recognition(combined_text)
-            return ner_result, None, None, None
-        elif action == "Fetch Clinical Studies":
-            if nct_id:
-                result = await fetch_articles_by_nct_id(nct_id)
-            elif query_str:
-                result = await fetch_articles_by_query(query_str)
-            else:
-                return "Provide either an NCT ID or valid query parameters.", None, None, None
-            articles = result.get("resultList", {}).get("result", [])
-            if not articles:
-                return "No articles found.", None, None, None
-            formatted = format_articles(articles)
-            return formatted, None, None, None
-        elif action in ["Fetch PubMed Articles (Legacy)", "Fetch PubMed by Query"]:
-            pubmed_result = await fetch_pubmed_by_query(query_str or "")
-            xml_data = pubmed_result.get("result")
-            if xml_data:
-                articles = parse_pubmed_xml(xml_data)
-                if not articles:
-                    return "No articles found.", None, None, None
-                formatted = "\n\n".join(
-                    f"{a['Title']} - {a['Journal']} ({a['PublicationDate']})"
-                    for a in articles if a['Title']
-                )
-                return formatted if formatted else "No articles found.", None, None, None
-            return "No articles found or error in fetching PubMed data.", None, None, None
-        elif action == "Fetch Crossref by Query":
-            crossref_result = await fetch_crossref_by_query(query_str or "")
-            items = crossref_result.get("message", {}).get("items", [])
-            if not items:
-                return "No results found.", None, None, None
-            crossref_formatted = "\n\n".join(
-                f"Title: {it.get('title', ['No title'])[0]}, DOI: {it.get('DOI')}"
-                for it in items
-            )
-            return crossref_formatted, None, None, None
-        elif action == "Fetch BioPortal by Query":
-            bp_result = await fetch_bioportal_by_query(query_str or "")
-            collection = bp_result.get("collection", [])
-            if not collection:
-                return "No BioPortal results found.", None, None, None
-            formatted = format_bioportal_results(collection)
-            return formatted, None, None, None
-        elif action == "Web Search Explanation":
-            explanation = fetch_web_search(combined_text)
-            return explanation, None, None, None
-        else:
-            return "Invalid action selected.", None, None, None
-    except Exception as ex:
-        # Catch all exceptions, log, and return traceback to 'output_text'
-        tb_str = traceback.format_exc()
-        logger.error(f"Exception in handle_action:\n{tb_str}")
-        return f"Traceback:\n{tb_str}", None, None, None
-###############################################################################
-#                     9) BUILDING THE GRADIO APP                           #
-###############################################################################
-with gr.Blocks(css="""
-    .gradio-container {
-        background-color: #f5f5f5;
-    }
-    .gr-button-primary {
-        background-color: #4CAF50;
-    }
-    .gradio-tabs {
-        background-color: #ffffff;
-    }
-""") as demo:
-    gr.Markdown("# 🏥 **AI-Driven Clinical Assistant**")
     gr.Markdown("""
-    **Highlights**:
-    - **Summarize** clinical text (OpenAI GPT-3.5)
-    - **Predict** outcomes (Hugging Face fine-tuned model)
-    - **Translate** (English ↔ French)
-    - **Named Entity Recognition** (spaCy)
-    - **Fetch** from PubMed, Crossref, Europe PMC, and **BioPortal**
-    - **Generate** professional PDF reports
-    - **Web Search Explanations** (OpenAI)
-    *Disclaimer*: This is a research demo, **not** a medical device.
-    """)
-    with gr.Row():
-        text_input = gr.Textbox(
-            label="Input Clinical Text",
-            lines=5,
-            placeholder="Enter clinical text, research notes, or queries...",
-            interactive=True
-        )
-        file_input = gr.File(
-            label="Upload File",
-            file_types=[".txt", ".csv", ".xls", ".xlsx", ".pdf"],
-            interactive=True
-        )
     action = gr.Radio(
         [
             "Summarize",
-            "Predict Outcome",
             "Generate Report",
-            "Translate",
-            "Perform Named Entity Recognition",
-            "Fetch Clinical Studies",
-            "Fetch PubMed Articles (Legacy)",
-            "Fetch PubMed by Query",
-            "Fetch Crossref by Query",
-            "Fetch BioPortal by Query",
-            "Web Search Explanation"
         ],
         label="Select an Action",
-        interactive=True
-    )
-    translation_option = gr.Dropdown(
-        choices=list(LANGUAGE_MAP.keys()),
-        label="Translation Option",
-        value="English to French",
-        interactive=True
-    )
-    query_params_input = gr.Textbox(
-        label="Query Parameters (JSON)",
-        placeholder='{"term": "cancer"} or {"q": "cancer"} for BioPortal',
-        interactive=True
     )
-    nct_id_input = gr.Textbox(
-        label="NCT ID",
-        placeholder="Enter NCT ID (e.g., NCT00000000)",
-        interactive=True
-    )
-    report_filename_input = gr.Textbox(
-        label="Report Filename",
-        value="clinical_report.pdf",
-        interactive=True
-    )
-    exp_fmt = gr.Dropdown(
-        choices=["None", "CSV", "JSON"],
-        label="Export Format",
-        value="None",
-        interactive=True
-    )
-    # Outputs
-    output_text = gr.Textbox(
-        label="Output",
-        lines=20,
-        interactive=False
-    )
-    with gr.Row():
-        output_chart = gr.Plot(label="Prediction Probabilities")
-        output_chart2 = gr.Plot(label="Additional Visualization")  # Placeholder for future use
-    output_file = gr.File(label="Generated File", interactive=False)
-    submit_btn = gr.Button("Submit", variant="primary")
-    gr.Markdown("""
-    ---
-    ### **Important Disclaimers**
-    - **Not a Medical Device**: This tool is not intended to provide clinical diagnoses or final medical decisions. Always consult qualified healthcare professionals for clinical decisions.
-    - **AI/ML Limitations**: GPT-based summaries and classification models offer powerful insights but may generate incomplete or inaccurate results. Always verify AI-generated content.
-    - **Credential Security**: Ensure the security of your API keys (`OPENAI_API_KEY`, `HF_TOKEN`, `BIOPORTAL_API_KEY`) to safely access external services.
-    - **Data Privacy**: If handling real patient data, ensure compliance with applicable data protection regulations (e.g., HIPAA, GDPR).
-    ---
-    """)
-    # Connect the submit button to the action handler
     submit_btn.click(
-        fn=lambda action, txt, file_up, trans_opt, query, nct_id, report_fn, exp_fm: asyncio.run(
-            handle_action(action, txt, file_up, trans_opt, query, nct_id, report_fn, exp_fm)
-        ),
-        inputs=[action, text_input, file_input, translation_option, query_params_input, nct_id_input, report_filename_input, exp_fmt],
-        outputs=[output_text, output_chart, output_chart2, output_file],
     )
-###############################################################################
-#                    10) LAUNCHING THE GRADIO APP                            #
-###############################################################################
 # Launch the Gradio interface
 demo.launch(server_name="0.0.0.0", server_port=7860, share=True)

 from dotenv import load_dotenv
 from loguru import logger
 from huggingface_hub import login
+from openai import OpenAI
 from reportlab.pdfgen import canvas
 from transformers import (
     AutoTokenizer,
 #                          1) ENVIRONMENT & LOGGING                           #
 ###############################################################################
+# Ensure spaCy model is downloaded (English Core Web)
+try:
+    nlp = spacy.load("en_core_web_sm")
+except OSError:
+    logger.info("Downloading SpaCy 'en_core_web_sm' model...")
+    spacy.cli.download("en_core_web_sm")
+    nlp = spacy.load("en_core_web_sm")
+# Logging
 logger.add("error_logs.log", rotation="1 MB", level="ERROR")
 # Load environment variables
 load_dotenv()
 HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")  # For BioPortal integration
 ENTREZ_EMAIL = os.getenv("ENTREZ_EMAIL")
 if not HUGGINGFACE_TOKEN or not OPENAI_API_KEY:
     logger.error("Missing Hugging Face or OpenAI credentials.")
     raise ValueError("Missing credentials for Hugging Face or OpenAI.")
 # Hugging Face login
 login(HUGGINGFACE_TOKEN)
+# OpenAI
+client = OpenAI(api_key=OPENAI_API_KEY)
+# Device: CPU or GPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logger.info(f"Using device: {device}")
 ###############################################################################
 #                 2) HUGGING FACE & TRANSLATION MODEL SETUP                   #
 ###############################################################################
+MODEL_NAME = "mgbam/bert-base-finetuned-mgbam"
 try:
+    model = AutoModelForSequenceClassification.from_pretrained(
+        MODEL_NAME, use_auth_token=HUGGINGFACE_TOKEN
     ).to(device)
+    tokenizer = AutoTokenizer.from_pretrained(
+        MODEL_NAME, use_auth_token=HUGGINGFACE_TOKEN
     )
 except Exception as e:
+    logger.error(f"Model load error: {e}")
     raise
 try:
+    translation_model_name = "Helsinki-NLP/opus-mt-en-fr"
     translation_model = MarianMTModel.from_pretrained(
+        translation_model_name, use_auth_token=HUGGINGFACE_TOKEN
     ).to(device)
     translation_tokenizer = MarianTokenizer.from_pretrained(
+        translation_model_name, use_auth_token=HUGGINGFACE_TOKEN
     )
 except Exception as e:
     logger.error(f"Translation model load error: {e}")
     raise
+# Language map for translation
 LANGUAGE_MAP: Dict[str, Tuple[str, str]] = {
     "English to French": ("en", "fr"),
     "French to English": ("fr", "en"),
         })
     return articles
+def explain_clinical_results(results: str) -> str:
+    """Generate a clinical explanation from raw results."""
+    try:
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": f"Explain the clinical test results:\n{results}"}],
+            max_tokens=500,
+            temperature=0.7,
+        )
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        logger.error(f"Explanation error: {e}")
+        return "Failed to generate explanation."
 ###############################################################################
 #                           6) CORE FUNCTIONS                                 #
 ###############################################################################
 def summarize_text(text: str) -> str:
+    """OpenAI GPT-3.5 summarization."""
     if not text.strip():
         return "No text provided for summarization."
     try:
+        response = client.chat.completions.create(
             model="gpt-3.5-turbo",
             messages=[{"role": "user", "content": f"Summarize this clinical data:\n{text}"}],
+            max_tokens=200,
             temperature=0.7,
         )
         return response.choices[0].message.content.strip()
         logger.error(f"Summarization error: {e}")
         return "Summarization failed."
 def generate_report(text: str, filename: str = "clinical_report.pdf") -> Optional[str]:
+    """Generate a professional PDF report from the text."""
     try:
         if not text.strip():
             logger.warning("No text provided for the report.")
         c = canvas.Canvas(filename)
+        c.drawString(100, 750, "Clinical Research Report")
         lines = text.split("\n")
+        y = 730
         for line in lines:
             if y < 50:
                 c.showPage()
+                y = 750
             c.drawString(100, y, line)
             y -= 15
         c.save()
         return None
 def visualize_predictions(predictions: Dict[str, float]) -> alt.Chart:
+    """Simple Altair bar chart to visualize classification probabilities."""
     data = pd.DataFrame(list(predictions.items()), columns=["Label", "Probability"])
     chart = (
         alt.Chart(data)
     )
     return chart
 ###############################################################################
+#                     7) BUILDING THE GRADIO APP                           #
 ###############################################################################
+with gr.Blocks() as demo:
+    gr.Markdown("# 🏥 AI-Driven Clinical Assistant")
     gr.Markdown("""
+**Highlights**:
+- **Summarize** clinical text (OpenAI GPT-3.5)
+- **Explain** clinical test results and trial outcomes
+- **Generate** professional PDF reports
+""")
+    text_input = gr.Textbox(label="Input Text", lines=5, placeholder="Enter clinical text or test results...")
     action = gr.Radio(
         [
             "Summarize",
+            "Explain Clinical Results",
             "Generate Report",
         ],
         label="Select an Action",
     )
+    output_text = gr.Textbox(label="Output", lines=8)
+    output_file = gr.File(label="Generated File")
+    submit_btn = gr.Button("Submit")
+    async def handle_action(
+        action: str,
+        txt: str,
+        report_fn: str
+    ) -> Tuple[Optional[str], Optional[str]]:
+        """Handle clinical actions based on the user's selection."""
+        try:
+            combined_text = txt.strip()
+            if action == "Summarize":
+                summary = summarize_text(combined_text)
+                return summary, None
+            elif action == "Explain Clinical Results":
+                explanation = explain_clinical_results(combined_text)
+                return explanation, None
+            elif action == "Generate Report":
+                path = generate_report(combined_text, report_fn)
+                msg = f"Report generated: {path}" if path else "Report generation failed."
+                return msg, path
+            return "Invalid action.", None
+        except Exception as e:
+            logger.error(f"Exception: {e}")
+            return f"Error: {str(e)}", None
     submit_btn.click(
+        fn=handle_action,
+        inputs=[action, text_input, report_filename_input],
+        outputs=[output_text, output_file],
     )
 # Launch the Gradio interface
 demo.launch(server_name="0.0.0.0", server_port=7860, share=True)