Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from collections import Counter
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
6 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
+
|
8 |
+
class SmartBubble:
|
9 |
+
def __init__(self):
|
10 |
+
self.keywords = Counter()
|
11 |
+
self.liked_content = []
|
12 |
+
self.vectorizer = TfidfVectorizer(stop_words='english')
|
13 |
+
|
14 |
+
def absorb_content(self, content, is_liked=True):
|
15 |
+
"""Absorb content and extract its key features"""
|
16 |
+
# Extract words and update keyword counter
|
17 |
+
words = content.lower().split()
|
18 |
+
if is_liked:
|
19 |
+
self.keywords.update(words)
|
20 |
+
self.liked_content.append(content)
|
21 |
+
|
22 |
+
def get_similarity_score(self, new_content):
|
23 |
+
"""Calculate similarity between new content and liked content"""
|
24 |
+
if not self.liked_content:
|
25 |
+
return 0
|
26 |
+
|
27 |
+
# Combine liked content into one document
|
28 |
+
all_content = self.liked_content + [new_content]
|
29 |
+
|
30 |
+
# Calculate TF-IDF vectors
|
31 |
+
tfidf_matrix = self.vectorizer.fit_transform(all_content)
|
32 |
+
|
33 |
+
# Calculate average similarity with all liked content
|
34 |
+
similarities = cosine_similarity(
|
35 |
+
tfidf_matrix[-1:], # New content vector
|
36 |
+
tfidf_matrix[:-1] # Liked content vectors
|
37 |
+
)
|
38 |
+
return float(similarities.mean())
|
39 |
+
|
40 |
+
# Streamlit app
|
41 |
+
st.title("Smart Bubble Protocol")
|
42 |
+
|
43 |
+
# Initialize session state
|
44 |
+
if 'smart_bubble' not in st.session_state:
|
45 |
+
st.session_state.smart_bubble = SmartBubble()
|
46 |
+
if 'content_list' not in st.session_state:
|
47 |
+
st.session_state.content_list = []
|
48 |
+
|
49 |
+
# Input section
|
50 |
+
st.header("Add New Content")
|
51 |
+
new_content = st.text_area("Enter content to analyze:")
|
52 |
+
if st.button("Add to Content List"):
|
53 |
+
if new_content:
|
54 |
+
st.session_state.content_list.append(new_content)
|
55 |
+
st.success("Content added!")
|
56 |
+
|
57 |
+
# Like/Training section
|
58 |
+
st.header("Train Your Bubble")
|
59 |
+
for i, content in enumerate(st.session_state.content_list):
|
60 |
+
col1, col2 = st.columns([4, 1])
|
61 |
+
with col1:
|
62 |
+
st.text_area(f"Content {i+1}", content, height=100, key=f"content_{i}")
|
63 |
+
with col2:
|
64 |
+
if st.button("Like", key=f"like_{i}"):
|
65 |
+
st.session_state.smart_bubble.absorb_content(content)
|
66 |
+
st.success("Content absorbed into bubble!")
|
67 |
+
|
68 |
+
# Analysis section
|
69 |
+
st.header("Smart Bubble Analysis")
|
70 |
+
if st.session_state.content_list:
|
71 |
+
analysis_data = []
|
72 |
+
for content in st.session_state.content_list:
|
73 |
+
score = st.session_state.smart_bubble.get_similarity_score(content)
|
74 |
+
keywords = " ".join(word for word, _ in
|
75 |
+
Counter(content.lower().split()).most_common(5))
|
76 |
+
analysis_data.append({
|
77 |
+
'Content Preview': content[:100] + "...",
|
78 |
+
'Similarity Score': f"{score:.2%}",
|
79 |
+
'Top Keywords': keywords
|
80 |
+
})
|
81 |
+
|
82 |
+
# Create and display analysis DataFrame
|
83 |
+
df = pd.DataFrame(analysis_data)
|
84 |
+
df = df.sort_values('Similarity Score', ascending=False)
|
85 |
+
st.dataframe(df)
|
86 |
+
|
87 |
+
# Bubble insights
|
88 |
+
st.header("Bubble Insights")
|
89 |
+
if st.session_state.smart_bubble.keywords:
|
90 |
+
top_keywords = st.session_state.smart_bubble.keywords.most_common(10)
|
91 |
+
st.write("Top keywords in your bubble:")
|
92 |
+
for word, count in top_keywords:
|
93 |
+
st.write(f"- {word}: {count}")
|