import os import re import random import subprocess import requests import streamlit as st import spacy # for additional NLP processing from langchain_huggingface import HuggingFaceEndpoint from langchain_core.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser from transformers import pipeline # Use environment variables for keys HF_TOKEN = os.getenv("HF_TOKEN") if HF_TOKEN is None: raise ValueError("HF_TOKEN environment variable not set. Please set it in your Hugging Face Space settings.") NASA_API_KEY = os.getenv("NASA_API_KEY") if NASA_API_KEY is None: raise ValueError("NASA_API_KEY environment variable not set. Please set it in your Hugging Face Space settings.") # Must be the first Streamlit command! st.set_page_config(page_title="HAL - NASA ChatBot", page_icon="🚀") # --- Initialize Session State Variables --- if "chat_history" not in st.session_state: st.session_state.chat_history = [{"role": "assistant", "content": "Hello! How can I assist you today?"}] if "response_ready" not in st.session_state: st.session_state.response_ready = False if "follow_up" not in st.session_state: st.session_state.follow_up = "" # --- Set Up Model & API Functions --- model_id = "mistralai/Mistral-7B-Instruct-v0.3" sentiment_analyzer = pipeline( "sentiment-analysis", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english", revision="714eb0f" ) # --- Helper to load spaCy model with fallback --- def load_spacy_model(): try: return spacy.load("en_core_web_sm") except OSError: subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True) return spacy.load("en_core_web_sm") nlp_spacy = load_spacy_model() def get_llm_hf_inference(model_id=model_id, max_new_tokens=128, temperature=0.7): return HuggingFaceEndpoint( repo_id=model_id, max_new_tokens=max_new_tokens, temperature=temperature, token=os.getenv("HF_TOKEN"), task="text-generation" ) def get_nasa_apod(): url = f"https://api.nasa.gov/planetary/apod?api_key={os.getenv('NASA_API_KEY')}" response = requests.get(url) if response.status_code == 200: data = response.json() return data.get("url", ""), data.get("title", ""), data.get("explanation", "") else: return "", "NASA Data Unavailable", "I couldn't fetch data from NASA right now. Please try again later." def analyze_sentiment(user_text): result = sentiment_analyzer(user_text)[0] return result['label'] def predict_action(user_text): if "nasa" in user_text.lower() or "space" in user_text.lower(): return "nasa_info" return "general_query" def extract_context(text): """ Uses spaCy to extract named entities for additional context. """ doc = nlp_spacy(text) entities = [ent.text for ent in doc.ents] return ", ".join(entities) if entities else "" def is_apod_query(user_text): """ Checks if the user's question contains keywords indicating they are asking for the Astronomy Picture of the Day. """ keywords = ["apod", "image", "picture", "photo", "astronomy picture"] return any(keyword in user_text.lower() for keyword in keywords) def generate_follow_up(user_text): """ Generates two variant follow-up questions and randomly selects one. """ prompt_text = ( f"Based on the user's question: '{user_text}', generate two concise, friendly follow-up questions " "that invite further discussion. For example, one might be 'Would you like to know more about the six types of quarks?' " "and another 'Would you like to explore another aspect of quantum physics?'. Do not include extra commentary. " "Answer exclusively in English." ) hf = get_llm_hf_inference(max_new_tokens=80, temperature=0.9) output = hf.invoke(input=prompt_text).strip() variants = re.split(r"\n|[;]+", output) cleaned = [v.strip(' "\'') for v in variants if v.strip()] if not cleaned: cleaned = ["Would you like to explore this topic further?"] return random.choice(cleaned) def get_response(system_message, chat_history, user_text, max_new_tokens=1024): """ Generates HAL's detailed, in-depth answer and a follow-up question. Incorporates sentiment analysis, additional NLP context, and style instructions. """ sentiment = analyze_sentiment(user_text) action = predict_action(user_text) # If the user's NASA-related query is specifically an APOD query, handle it specially. if action == "nasa_info" and is_apod_query(user_text): nasa_url, nasa_title, nasa_explanation = get_nasa_apod() response = f"**{nasa_title}**\n\n{nasa_explanation}" chat_history.append({'role': 'user', 'content': user_text}) chat_history.append({'role': 'assistant', 'content': response}) follow_up = generate_follow_up(user_text) chat_history.append({'role': 'assistant', 'content': follow_up}) return response, follow_up, chat_history, nasa_url # Otherwise, treat NASA-related queries as general queries. hf = get_llm_hf_inference(max_new_tokens=max_new_tokens, temperature=0.9) filtered_history = "" for message in chat_history: if message["role"] == "assistant" and message["content"].strip() == "Hello! How can I assist you today?": continue filtered_history += f"{message['role']}: {message['content']}\n" # Extract style instructions if provided. style_instruction = "" lower_text = user_text.lower() if "in the voice of" in lower_text or "speaking as" in lower_text: match = re.search(r"(in the voice of|speaking as)(.*)", lower_text) if match: style_instruction = match.group(2).strip().capitalize() style_instruction = f" Please respond in the voice of {style_instruction}." context_info = extract_context(user_text) context_clause = f" The key topics here are: {context_info}." if context_info else "" language_clause = " Answer exclusively in English." style_clause = style_instruction if style_instruction else "" prompt = PromptTemplate.from_template( ( "[INST] {system_message}\n\nCurrent Conversation:\n{chat_history}\n\n" "User: {user_text}.\n [/INST]\n" "AI: Please provide a detailed, in-depth answer in a friendly, conversational tone that thoroughly covers the topic." + style_clause + context_clause + language_clause + "\nHAL:" ) ) chat = prompt | hf.bind(skip_prompt=True) | StrOutputParser(output_key='content') raw_output = chat.invoke(input=dict(system_message=system_message, user_text=user_text, chat_history=filtered_history)) response = raw_output.split("HAL:")[-1].strip() if not response: response = "Certainly, here is an in-depth explanation: [Fallback explanation]." chat_history.append({'role': 'user', 'content': user_text}) chat_history.append({'role': 'assistant', 'content': response}) if sentiment == "NEGATIVE" and not user_text.strip().endswith("?"): response = "I'm sorry you're feeling this way. I'm here to help. What can I do to assist you further?" chat_history[-1]['content'] = response follow_up = generate_follow_up(user_text) chat_history.append({'role': 'assistant', 'content': follow_up}) return response, follow_up, chat_history, None # --- Chat UI --- st.title("🚀 HAL - Your NASA AI Assistant") st.markdown("🌌 *Ask me about space, NASA, and beyond!*") if st.sidebar.button("Reset Chat"): st.session_state.chat_history = [{"role": "assistant", "content": "Hello! How can I assist you today?"}] st.session_state.response_ready = False st.session_state.follow_up = "" st.experimental_rerun() # --- Appearance CSS --- st.markdown(""" """, unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) for message in st.session_state.chat_history: if message["role"] == "user": st.markdown(f"
You: {message['content']}
", unsafe_allow_html=True) else: st.markdown(f"
HAL: {message['content']}
", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) user_input = st.chat_input("Type your message here...") if user_input: response, follow_up, st.session_state.chat_history, image_url = get_response( system_message="You are a helpful AI assistant.", user_text=user_input, chat_history=st.session_state.chat_history ) if image_url: st.image(image_url, caption="NASA Image of the Day") st.session_state.follow_up = follow_up st.session_state.response_ready = True