thecodemasster commited on
Commit
6f296ca
·
verified ·
1 Parent(s): 16026bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -179
app.py CHANGED
@@ -1,179 +1 @@
1
- import streamlit as st
2
- import re
3
- from langdetect import detect
4
- from transformers import pipeline
5
- import nltk
6
- from docx import Document
7
- import io
8
-
9
- # Download required NLTK resources
10
- nltk.download('punkt')
11
-
12
- # Load AI models once to optimize performance
13
- try:
14
- tone_model = pipeline("zero-shot-classification", model="cross-encoder/nli-deberta-v3-large")
15
- except OSError:
16
- st.error("Failed to load tone analysis model. Please check internet connection or model availability.")
17
-
18
- try:
19
- frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
20
- except OSError:
21
- st.error("Failed to load frame classification model. Please check internet connection or model availability.")
22
-
23
- # Updated tone categories
24
- tone_categories = [
25
- "Emotional & Urgent", "Harsh & Critical", "Negative & Somber",
26
- "Empowering & Motivational", "Neutral & Informative", "Hopeful & Positive"
27
- ]
28
-
29
- # Updated frame categories
30
- frame_categories = [
31
- "Human Rights & Justice", "Political & State Accountability", "Gender & Patriarchy",
32
- "Religious Freedom & Persecution", "Grassroots Mobilization", "Environmental Crisis & Activism",
33
- "Anti-Extremism & Anti-Violence", "Social Inequality & Economic Disparities"
34
- ]
35
-
36
- # Detect language
37
- def detect_language(text):
38
- try:
39
- return detect(text)
40
- except Exception:
41
- return "unknown"
42
-
43
- # Analyze tone using DeBERTa model
44
- def analyze_tone(text):
45
- try:
46
- model_result = tone_model(text, candidate_labels=tone_categories)
47
- return model_result["labels"][:2] # Top 2 tone labels
48
- except Exception as e:
49
- st.error(f"Error analyzing tone: {e}")
50
- return ["Unknown"]
51
-
52
- # Extract frames using BART model
53
- def extract_frames(text):
54
- try:
55
- model_result = frame_model(text, candidate_labels=frame_categories)
56
- return model_result["labels"][:2] # Top 2 frame labels
57
- except Exception as e:
58
- st.error(f"Error extracting frames: {e}")
59
- return ["Unknown"]
60
-
61
- # Extract hashtags
62
- def extract_hashtags(text):
63
- return re.findall(r"#\w+", text)
64
-
65
- # Extract captions from DOCX file
66
- def extract_captions_from_docx(docx_file):
67
- doc = Document(docx_file)
68
- captions = {}
69
- current_post = None
70
- for para in doc.paragraphs:
71
- text = para.text.strip()
72
- if re.match(r"Post \d+", text, re.IGNORECASE):
73
- current_post = text
74
- captions[current_post] = []
75
- elif current_post:
76
- captions[current_post].append(text)
77
-
78
- return {post: " ".join(lines) for post, lines in captions.items() if lines}
79
-
80
- # Generate a DOCX file in-memory
81
- def generate_docx(output_data):
82
- doc = Document()
83
- doc.add_heading('Activism Message Analysis', 0)
84
-
85
- for index, (caption, result) in enumerate(output_data.items(), start=1):
86
- doc.add_heading(f"{index}. {caption}", level=1)
87
- doc.add_paragraph("Full Caption:")
88
- doc.add_paragraph(result['Full Caption'], style="Quote")
89
-
90
- doc.add_paragraph(f"Language: {result['Language']}")
91
- doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
92
- doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
93
- doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
94
-
95
- doc.add_heading('Frames:', level=2)
96
- for frame in result['Frames']:
97
- doc.add_paragraph(frame)
98
-
99
- doc_io = io.BytesIO()
100
- doc.save(doc_io)
101
- doc_io.seek(0)
102
-
103
- return doc_io
104
-
105
- # Streamlit app UI
106
- st.title('AI-Powered Activism Message Analyzer')
107
-
108
- st.write("Enter the text to analyze or upload a DOCX file containing captions:")
109
-
110
- # Text Input
111
- input_text = st.text_area("Input Text", height=200)
112
-
113
- # File Upload
114
- uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
115
-
116
- # Initialize output dictionary
117
- output_data = {}
118
-
119
- if input_text:
120
- language = detect_language(input_text)
121
- tone = analyze_tone(input_text)
122
- hashtags = extract_hashtags(input_text)
123
- frames = extract_frames(input_text)
124
-
125
- output_data["Manual Input"] = {
126
- 'Full Caption': input_text,
127
- 'Language': language,
128
- 'Tone of Caption': tone,
129
- 'Hashtags': hashtags,
130
- 'Hashtag Count': len(hashtags),
131
- 'Frames': frames
132
- }
133
-
134
- st.success("Analysis completed for text input.")
135
-
136
- if uploaded_file:
137
- captions = extract_captions_from_docx(uploaded_file)
138
- for caption, text in captions.items():
139
- language = detect_language(text)
140
- tone = analyze_tone(text)
141
- hashtags = extract_hashtags(text)
142
- frames = extract_frames(text)
143
-
144
- output_data[caption] = {
145
- 'Full Caption': text,
146
- 'Language': language,
147
- 'Tone of Caption': tone,
148
- 'Hashtags': hashtags,
149
- 'Hashtag Count': len(hashtags),
150
- 'Frames': frames
151
- }
152
-
153
- st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")
154
-
155
- # Display results
156
- if output_data:
157
- with st.expander("Generated Output"):
158
- st.subheader("Analysis Results")
159
- for index, (caption, result) in enumerate(output_data.items(), start=1):
160
- st.write(f"### {index}. {caption}")
161
- st.write("**Full Caption:**")
162
- st.write(f"> {result['Full Caption']}")
163
- st.write(f"**Language**: {result['Language']}")
164
- st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}")
165
- st.write(f"**Number of Hashtags**: {result['Hashtag Count']}")
166
- st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}")
167
- st.write("**Frames**:")
168
- for frame in result['Frames']:
169
- st.write(f"- {frame}")
170
-
171
- docx_file = generate_docx(output_data)
172
-
173
- if docx_file:
174
- st.download_button(
175
- label="Download Analysis as DOCX",
176
- data=docx_file,
177
- file_name="activism_message_analysis.docx",
178
- mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
179
- )
 
1
+ print("hello world")