similarity-checker

Sleeping

App Files Files Community

thecodemasster commited on Jun 11

Commit

6f296ca

verified ·

1 Parent(s): 16026bd

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -179

app.py CHANGED Viewed

@@ -1,179 +1 @@
-import streamlit as st
-import re
-from langdetect import detect
-from transformers import pipeline
-import nltk
-from docx import Document
-import io
-# Download required NLTK resources
-nltk.download('punkt')
-# Load AI models once to optimize performance
-try:
-    tone_model = pipeline("zero-shot-classification", model="cross-encoder/nli-deberta-v3-large")
-except OSError:
-    st.error("Failed to load tone analysis model. Please check internet connection or model availability.")
-try:
-    frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-except OSError:
-    st.error("Failed to load frame classification model. Please check internet connection or model availability.")
-# Updated tone categories
-tone_categories = [
-    "Emotional & Urgent", "Harsh & Critical", "Negative & Somber",
-    "Empowering & Motivational", "Neutral & Informative", "Hopeful & Positive"
-]
-# Updated frame categories
-frame_categories = [
-    "Human Rights & Justice", "Political & State Accountability", "Gender & Patriarchy",
-    "Religious Freedom & Persecution", "Grassroots Mobilization", "Environmental Crisis & Activism",
-    "Anti-Extremism & Anti-Violence", "Social Inequality & Economic Disparities"
-]
-# Detect language
-def detect_language(text):
-    try:
-        return detect(text)
-    except Exception:
-        return "unknown"
-# Analyze tone using DeBERTa model
-def analyze_tone(text):
-    try:
-        model_result = tone_model(text, candidate_labels=tone_categories)
-        return model_result["labels"][:2]  # Top 2 tone labels
-    except Exception as e:
-        st.error(f"Error analyzing tone: {e}")
-        return ["Unknown"]
-# Extract frames using BART model
-def extract_frames(text):
-    try:
-        model_result = frame_model(text, candidate_labels=frame_categories)
-        return model_result["labels"][:2]  # Top 2 frame labels
-    except Exception as e:
-        st.error(f"Error extracting frames: {e}")
-        return ["Unknown"]
-# Extract hashtags
-def extract_hashtags(text):
-    return re.findall(r"#\w+", text)
-# Extract captions from DOCX file
-def extract_captions_from_docx(docx_file):
-    doc = Document(docx_file)
-    captions = {}
-    current_post = None
-    for para in doc.paragraphs:
-        text = para.text.strip()
-        if re.match(r"Post \d+", text, re.IGNORECASE):
-            current_post = text
-            captions[current_post] = []
-        elif current_post:
-            captions[current_post].append(text)
-    return {post: " ".join(lines) for post, lines in captions.items() if lines}
-# Generate a DOCX file in-memory
-def generate_docx(output_data):
-    doc = Document()
-    doc.add_heading('Activism Message Analysis', 0)
-    for index, (caption, result) in enumerate(output_data.items(), start=1):
-        doc.add_heading(f"{index}. {caption}", level=1)
-        doc.add_paragraph("Full Caption:")
-        doc.add_paragraph(result['Full Caption'], style="Quote")
-        doc.add_paragraph(f"Language: {result['Language']}")
-        doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
-        doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
-        doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
-        doc.add_heading('Frames:', level=2)
-        for frame in result['Frames']:
-            doc.add_paragraph(frame)
-    doc_io = io.BytesIO()
-    doc.save(doc_io)
-    doc_io.seek(0)
-    return doc_io
-# Streamlit app UI
-st.title('AI-Powered Activism Message Analyzer')
-st.write("Enter the text to analyze or upload a DOCX file containing captions:")
-# Text Input
-input_text = st.text_area("Input Text", height=200)
-# File Upload
-uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
-# Initialize output dictionary
-output_data = {}
-if input_text:
-    language = detect_language(input_text)
-    tone = analyze_tone(input_text)
-    hashtags = extract_hashtags(input_text)
-    frames = extract_frames(input_text)
-    output_data["Manual Input"] = {
-        'Full Caption': input_text,
-        'Language': language,
-        'Tone of Caption': tone,
-        'Hashtags': hashtags,
-        'Hashtag Count': len(hashtags),
-        'Frames': frames
-    }
-    st.success("Analysis completed for text input.")
-if uploaded_file:
-    captions = extract_captions_from_docx(uploaded_file)
-    for caption, text in captions.items():
-        language = detect_language(text)
-        tone = analyze_tone(text)
-        hashtags = extract_hashtags(text)
-        frames = extract_frames(text)
-        output_data[caption] = {
-            'Full Caption': text,
-            'Language': language,
-            'Tone of Caption': tone,
-            'Hashtags': hashtags,
-            'Hashtag Count': len(hashtags),
-            'Frames': frames
-        }
-    st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")
-# Display results
-if output_data:
-    with st.expander("Generated Output"):
-        st.subheader("Analysis Results")
-        for index, (caption, result) in enumerate(output_data.items(), start=1):
-            st.write(f"### {index}. {caption}")
-            st.write("**Full Caption:**")
-            st.write(f"> {result['Full Caption']}")
-            st.write(f"**Language**: {result['Language']}")
-            st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}")
-            st.write(f"**Number of Hashtags**: {result['Hashtag Count']}")
-            st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}")
-            st.write("**Frames**:")
-            for frame in result['Frames']:
-                st.write(f"- {frame}")
-    docx_file = generate_docx(output_data)
-    if docx_file:
-        st.download_button(
-            label="Download Analysis as DOCX",
-            data=docx_file,
-            file_name="activism_message_analysis.docx",
-            mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-        )


1	+ print("hello world")