import streamlit as st from chatbot import setup_db, fetch_reddit_data, get_chatbot_response, get_db_conn import json import re from urllib.parse import urlparse # Helper function to remove image URLs from text. def remove_image_urls(text): # Regex pattern to remove URLs that end with typical image extensions. image_url_pattern = r'https?://\S+\.(?:png|jpg|jpeg|webp)\S*' return re.sub(image_url_pattern, '', text) # Initialize DB and configure Streamlit setup_db() st.set_page_config(layout="wide") st.title("🚀 Reddit Intelligent Chatbot") # Use session state to store selected post ID for chat context if 'selected_post_id' not in st.session_state: st.session_state['selected_post_id'] = None # Sidebar: Enter keyword and fetch data with st.sidebar: st.header("🔍 Fetch Reddit Data") keyword = st.text_input("Enter keyword/topic:") days = st.slider("Days range:", 1, 90, 7) if st.button("Fetch Data"): fetch_reddit_data(keyword, days=days) st.success("Data fetched!") st.session_state['selected_post_id'] = None # Reset selected post on new fetch # Create two columns: Chat area (left) and Posts display (right) chat_col, posts_col = st.columns([3, 2]) # RIGHT SIDE: Display fetched posts with resource-gather style card layout. with posts_col: st.subheader("📋 Reddit Posts") if keyword.strip(): conn = get_db_conn() cur = conn.cursor() cur.execute(""" SELECT reddit_id, title, post_text, comments, metadata, created_at FROM reddit_posts WHERE keyword = ? ORDER BY datetime(created_at) DESC; """, (keyword,)) posts = cur.fetchall() cur.close() conn.close() if posts: # Set up custom CSS for the card layout. st.markdown( """ """, unsafe_allow_html=True) for post in posts: reddit_id, title, post_text, comments, metadata, created_at = post try: comments_list = json.loads(comments) if isinstance(comments, str) else comments except Exception: comments_list = comments # Convert metadata from JSON string to dict try: metadata = json.loads(metadata) if isinstance(metadata, str) else metadata except Exception as e: metadata = {} post_url = metadata.get('url', "#") subreddit = metadata.get('subreddit', 'N/A') created_str = created_at.strftime('%Y-%m-%d %H:%M:%S') # Remove image URLs from post_text so that they don't show up in the snippet. cleaned_text = remove_image_urls(post_text) snippet = cleaned_text[:200] + ("..." if len(cleaned_text) > 200 else "") # Build the card using HTML. The title itself is a clickable link. card_html = f"""