File size: 3,303 Bytes
7a5cd47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import streamlit as st
import pandas as pd
from collections import Counter
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
class SmartBubble:
def __init__(self):
self.keywords = Counter()
self.liked_content = []
self.vectorizer = TfidfVectorizer(stop_words='english')
def absorb_content(self, content, is_liked=True):
"""Absorb content and extract its key features"""
# Extract words and update keyword counter
words = content.lower().split()
if is_liked:
self.keywords.update(words)
self.liked_content.append(content)
def get_similarity_score(self, new_content):
"""Calculate similarity between new content and liked content"""
if not self.liked_content:
return 0
# Combine liked content into one document
all_content = self.liked_content + [new_content]
# Calculate TF-IDF vectors
tfidf_matrix = self.vectorizer.fit_transform(all_content)
# Calculate average similarity with all liked content
similarities = cosine_similarity(
tfidf_matrix[-1:], # New content vector
tfidf_matrix[:-1] # Liked content vectors
)
return float(similarities.mean())
# Streamlit app
st.title("Smart Bubble Protocol")
# Initialize session state
if 'smart_bubble' not in st.session_state:
st.session_state.smart_bubble = SmartBubble()
if 'content_list' not in st.session_state:
st.session_state.content_list = []
# Input section
st.header("Add New Content")
new_content = st.text_area("Enter content to analyze:")
if st.button("Add to Content List"):
if new_content:
st.session_state.content_list.append(new_content)
st.success("Content added!")
# Like/Training section
st.header("Train Your Bubble")
for i, content in enumerate(st.session_state.content_list):
col1, col2 = st.columns([4, 1])
with col1:
st.text_area(f"Content {i+1}", content, height=100, key=f"content_{i}")
with col2:
if st.button("Like", key=f"like_{i}"):
st.session_state.smart_bubble.absorb_content(content)
st.success("Content absorbed into bubble!")
# Analysis section
st.header("Smart Bubble Analysis")
if st.session_state.content_list:
analysis_data = []
for content in st.session_state.content_list:
score = st.session_state.smart_bubble.get_similarity_score(content)
keywords = " ".join(word for word, _ in
Counter(content.lower().split()).most_common(5))
analysis_data.append({
'Content Preview': content[:100] + "...",
'Similarity Score': f"{score:.2%}",
'Top Keywords': keywords
})
# Create and display analysis DataFrame
df = pd.DataFrame(analysis_data)
df = df.sort_values('Similarity Score', ascending=False)
st.dataframe(df)
# Bubble insights
st.header("Bubble Insights")
if st.session_state.smart_bubble.keywords:
top_keywords = st.session_state.smart_bubble.keywords.most_common(10)
st.write("Top keywords in your bubble:")
for word, count in top_keywords:
st.write(f"- {word}: {count}") |