import streamlit as st import pandas as pd from collections import Counter import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity class SmartBubble: def __init__(self): self.keywords = Counter() self.liked_content = [] self.vectorizer = TfidfVectorizer(stop_words='english') def absorb_content(self, content, is_liked=True): """Absorb content and extract its key features""" # Extract words and update keyword counter words = content.lower().split() if is_liked: self.keywords.update(words) self.liked_content.append(content) def get_similarity_score(self, new_content): """Calculate similarity between new content and liked content""" if not self.liked_content: return 0 # Combine liked content into one document all_content = self.liked_content + [new_content] # Calculate TF-IDF vectors tfidf_matrix = self.vectorizer.fit_transform(all_content) # Calculate average similarity with all liked content similarities = cosine_similarity( tfidf_matrix[-1:], # New content vector tfidf_matrix[:-1] # Liked content vectors ) return float(similarities.mean()) # Streamlit app st.title("Smart Bubble Protocol") # Initialize session state if 'smart_bubble' not in st.session_state: st.session_state.smart_bubble = SmartBubble() if 'content_list' not in st.session_state: st.session_state.content_list = [] # Input section st.header("Add New Content") new_content = st.text_area("Enter content to analyze:") if st.button("Add to Content List"): if new_content: st.session_state.content_list.append(new_content) st.success("Content added!") # Like/Training section st.header("Train Your Bubble") for i, content in enumerate(st.session_state.content_list): col1, col2 = st.columns([4, 1]) with col1: st.text_area(f"Content {i+1}", content, height=100, key=f"content_{i}") with col2: if st.button("Like", key=f"like_{i}"): st.session_state.smart_bubble.absorb_content(content) st.success("Content absorbed into bubble!") # Analysis section st.header("Smart Bubble Analysis") if st.session_state.content_list: analysis_data = [] for content in st.session_state.content_list: score = st.session_state.smart_bubble.get_similarity_score(content) keywords = " ".join(word for word, _ in Counter(content.lower().split()).most_common(5)) analysis_data.append({ 'Content Preview': content[:100] + "...", 'Similarity Score': f"{score:.2%}", 'Top Keywords': keywords }) # Create and display analysis DataFrame df = pd.DataFrame(analysis_data) df = df.sort_values('Similarity Score', ascending=False) st.dataframe(df) # Bubble insights st.header("Bubble Insights") if st.session_state.smart_bubble.keywords: top_keywords = st.session_state.smart_bubble.keywords.most_common(10) st.write("Top keywords in your bubble:") for word, count in top_keywords: st.write(f"- {word}: {count}")