awacke1 commited on
Commit
7a5cd47
·
verified ·
1 Parent(s): f71ba9d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from collections import Counter
4
+ import numpy as np
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+
8
+ class SmartBubble:
9
+ def __init__(self):
10
+ self.keywords = Counter()
11
+ self.liked_content = []
12
+ self.vectorizer = TfidfVectorizer(stop_words='english')
13
+
14
+ def absorb_content(self, content, is_liked=True):
15
+ """Absorb content and extract its key features"""
16
+ # Extract words and update keyword counter
17
+ words = content.lower().split()
18
+ if is_liked:
19
+ self.keywords.update(words)
20
+ self.liked_content.append(content)
21
+
22
+ def get_similarity_score(self, new_content):
23
+ """Calculate similarity between new content and liked content"""
24
+ if not self.liked_content:
25
+ return 0
26
+
27
+ # Combine liked content into one document
28
+ all_content = self.liked_content + [new_content]
29
+
30
+ # Calculate TF-IDF vectors
31
+ tfidf_matrix = self.vectorizer.fit_transform(all_content)
32
+
33
+ # Calculate average similarity with all liked content
34
+ similarities = cosine_similarity(
35
+ tfidf_matrix[-1:], # New content vector
36
+ tfidf_matrix[:-1] # Liked content vectors
37
+ )
38
+ return float(similarities.mean())
39
+
40
+ # Streamlit app
41
+ st.title("Smart Bubble Protocol")
42
+
43
+ # Initialize session state
44
+ if 'smart_bubble' not in st.session_state:
45
+ st.session_state.smart_bubble = SmartBubble()
46
+ if 'content_list' not in st.session_state:
47
+ st.session_state.content_list = []
48
+
49
+ # Input section
50
+ st.header("Add New Content")
51
+ new_content = st.text_area("Enter content to analyze:")
52
+ if st.button("Add to Content List"):
53
+ if new_content:
54
+ st.session_state.content_list.append(new_content)
55
+ st.success("Content added!")
56
+
57
+ # Like/Training section
58
+ st.header("Train Your Bubble")
59
+ for i, content in enumerate(st.session_state.content_list):
60
+ col1, col2 = st.columns([4, 1])
61
+ with col1:
62
+ st.text_area(f"Content {i+1}", content, height=100, key=f"content_{i}")
63
+ with col2:
64
+ if st.button("Like", key=f"like_{i}"):
65
+ st.session_state.smart_bubble.absorb_content(content)
66
+ st.success("Content absorbed into bubble!")
67
+
68
+ # Analysis section
69
+ st.header("Smart Bubble Analysis")
70
+ if st.session_state.content_list:
71
+ analysis_data = []
72
+ for content in st.session_state.content_list:
73
+ score = st.session_state.smart_bubble.get_similarity_score(content)
74
+ keywords = " ".join(word for word, _ in
75
+ Counter(content.lower().split()).most_common(5))
76
+ analysis_data.append({
77
+ 'Content Preview': content[:100] + "...",
78
+ 'Similarity Score': f"{score:.2%}",
79
+ 'Top Keywords': keywords
80
+ })
81
+
82
+ # Create and display analysis DataFrame
83
+ df = pd.DataFrame(analysis_data)
84
+ df = df.sort_values('Similarity Score', ascending=False)
85
+ st.dataframe(df)
86
+
87
+ # Bubble insights
88
+ st.header("Bubble Insights")
89
+ if st.session_state.smart_bubble.keywords:
90
+ top_keywords = st.session_state.smart_bubble.keywords.most_common(10)
91
+ st.write("Top keywords in your bubble:")
92
+ for word, count in top_keywords:
93
+ st.write(f"- {word}: {count}")