|
import streamlit as st |
|
import pandas as pd |
|
from collections import Counter |
|
import numpy as np |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
class SmartBubble: |
|
def __init__(self): |
|
self.keywords = Counter() |
|
self.liked_content = [] |
|
self.vectorizer = TfidfVectorizer(stop_words='english') |
|
|
|
def absorb_content(self, content, is_liked=True): |
|
"""Absorb content and extract its key features""" |
|
|
|
words = content.lower().split() |
|
if is_liked: |
|
self.keywords.update(words) |
|
self.liked_content.append(content) |
|
|
|
def get_similarity_score(self, new_content): |
|
"""Calculate similarity between new content and liked content""" |
|
if not self.liked_content: |
|
return 0 |
|
|
|
|
|
all_content = self.liked_content + [new_content] |
|
|
|
|
|
tfidf_matrix = self.vectorizer.fit_transform(all_content) |
|
|
|
|
|
similarities = cosine_similarity( |
|
tfidf_matrix[-1:], |
|
tfidf_matrix[:-1] |
|
) |
|
return float(similarities.mean()) |
|
|
|
|
|
st.title("Smart Bubble Protocol") |
|
|
|
|
|
if 'smart_bubble' not in st.session_state: |
|
st.session_state.smart_bubble = SmartBubble() |
|
if 'content_list' not in st.session_state: |
|
st.session_state.content_list = [] |
|
|
|
|
|
st.header("Add New Content") |
|
new_content = st.text_area("Enter content to analyze:") |
|
if st.button("Add to Content List"): |
|
if new_content: |
|
st.session_state.content_list.append(new_content) |
|
st.success("Content added!") |
|
|
|
|
|
st.header("Train Your Bubble") |
|
for i, content in enumerate(st.session_state.content_list): |
|
col1, col2 = st.columns([4, 1]) |
|
with col1: |
|
st.text_area(f"Content {i+1}", content, height=100, key=f"content_{i}") |
|
with col2: |
|
if st.button("Like", key=f"like_{i}"): |
|
st.session_state.smart_bubble.absorb_content(content) |
|
st.success("Content absorbed into bubble!") |
|
|
|
|
|
st.header("Smart Bubble Analysis") |
|
if st.session_state.content_list: |
|
analysis_data = [] |
|
for content in st.session_state.content_list: |
|
score = st.session_state.smart_bubble.get_similarity_score(content) |
|
keywords = " ".join(word for word, _ in |
|
Counter(content.lower().split()).most_common(5)) |
|
analysis_data.append({ |
|
'Content Preview': content[:100] + "...", |
|
'Similarity Score': f"{score:.2%}", |
|
'Top Keywords': keywords |
|
}) |
|
|
|
|
|
df = pd.DataFrame(analysis_data) |
|
df = df.sort_values('Similarity Score', ascending=False) |
|
st.dataframe(df) |
|
|
|
|
|
st.header("Bubble Insights") |
|
if st.session_state.smart_bubble.keywords: |
|
top_keywords = st.session_state.smart_bubble.keywords.most_common(10) |
|
st.write("Top keywords in your bubble:") |
|
for word, count in top_keywords: |
|
st.write(f"- {word}: {count}") |