Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import nltk | |
from transformers import pipeline | |
from rake_nltk import Rake | |
from nltk.corpus import stopwords | |
from fuzzywuzzy import fuzz | |
import openai | |
import os | |
from dotenv import load_dotenv | |
# Load environment variables for Llama 3 | |
load_dotenv() | |
# Title of the app | |
st.title("Sentiment Analysis Comparison: Transformers vs Llama 3") | |
# Define the options for the dropdown menu, selecting a remote txt file already created to analyze the text | |
options = ['None', 'Appreciation Letter', 'Regret Letter', 'Kindness Tale', 'Lost Melody Tale', 'Twitter Example 1', 'Twitter Example 2'] | |
# Create a dropdown menu to select options | |
selected_option = st.selectbox("Select a preset option", options) | |
# Define URLs for different options | |
urls = { | |
'Appreciation Letter': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Appreciation_Letter.txt", | |
'Regret Letter': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Regret_Letter.txt", | |
'Kindness Tale': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Kindness_Tale.txt", | |
'Lost Melody Tale': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Lost_Melody_Tale.txt", | |
'Twitter Example 1': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Twitter_Example_1.txt", | |
'Twitter Example 2': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Twitter_Example_2.txt" | |
} | |
# Function to fetch text content based on selected option | |
def fetch_text_content(selected_option): | |
return requests.get(urls[selected_option]).text if selected_option in urls else "" | |
# Fetch text content based on selected option | |
text = fetch_text_content(selected_option) | |
# Display text content in a text area | |
text = st.text_area('Enter the text to analyze', text) | |
# Download NLTK resources | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
# Initialize sentiment, summarization, and keyword extraction pipelines for Transformers | |
pipe_sent = pipeline('sentiment-analysis') | |
pipe_summ = pipeline("summarization", model="facebook/bart-large-cnn") | |
# Llama 3 initialization | |
llama_api_key = os.getenv('HFSecret') | |
llama_base_url = "https://api-inference.huggingface.co/v1" | |
llama_repo_id = "meta-llama/Meta-Llama-3-8B-Instruct" | |
# Function to use Llama 3 for sentiment analysis, summarization, and keyword extraction | |
def analyze_with_llama(text): | |
headers = { | |
"Authorization": f"Bearer {llama_api_key}" | |
} | |
data = { | |
"inputs": text, | |
"parameters": { | |
"max_new_tokens": 200 | |
} | |
} | |
# Perform the request | |
response = requests.post(f"{llama_base_url}/models/{llama_repo_id}", headers=headers, json=data) | |
return response.json() | |
# Function to extract keywords using RAKE and remove duplicates | |
def extract_keywords(text): | |
r = Rake() | |
r.extract_keywords_from_text(text) | |
phrases_with_scores = r.get_ranked_phrases_with_scores() | |
stop_words = set(stopwords.words('english')) | |
keywords = [(score, phrase) for score, phrase in phrases_with_scores if phrase.lower() not in stop_words] | |
keywords.sort(key=lambda x: x[0], reverse=True) | |
unique_keywords = [] | |
seen_phrases = set() | |
for score, phrase in keywords: | |
if phrase not in seen_phrases: | |
similar_phrases = [seen_phrase for seen_phrase in seen_phrases if fuzz.ratio(phrase, seen_phrase) > 70] | |
if similar_phrases: | |
merged_phrase = max([phrase] + similar_phrases, key=len) | |
unique_keywords.append((score, merged_phrase)) | |
else: | |
unique_keywords.append((score, phrase)) | |
seen_phrases.add(phrase) | |
return unique_keywords[:10] | |
# Create two columns | |
col1, col2 = st.columns(2) | |
# Transformer-based analysis in the first column | |
with col1: | |
st.header("Transformer-based Analysis") | |
if st.button("Analyze with Transformers"): | |
with st.spinner("Analyzing with Transformers..."): | |
# Sentiment analysis | |
out_sentiment = pipe_sent(text) | |
sentiment_score = out_sentiment[0]['score'] | |
sentiment_label = out_sentiment[0]['label'] | |
sentiment_emoji = '😊' if sentiment_label == 'POSITIVE' else '😞' | |
sentiment_text = f"Sentiment Score: {sentiment_score}, Sentiment Label: {sentiment_label.capitalize()} {sentiment_emoji}" | |
with st.expander("Sentiment Analysis (Transformers)"): | |
st.write(sentiment_text) | |
# Summarization | |
out_summ = pipe_summ(text) | |
summarized_text = out_summ[0]['summary_text'] | |
with st.expander("Summarization (Transformers)"): | |
st.write(summarized_text) | |
# Keyword extraction | |
keywords = extract_keywords(text) | |
keyword_list = [keyword[1] for keyword in keywords] | |
with st.expander("Keywords (Transformers)"): | |
st.write(keyword_list) | |
# Llama 3-based analysis in the second column | |
with col2: | |
st.header("Llama 3-based Analysis") | |
if st.button("Analyze with Llama 3"): | |
with st.spinner("Analyzing with Llama 3..."): | |
llama_response = analyze_with_llama(text) | |
if llama_response: | |
# Assuming the response returns in the same format, adjust if needed | |
sentiment_text = llama_response.get('sentiment_analysis', 'No sentiment detected') | |
summarized_text = llama_response.get('summarization', 'No summary available') | |
keywords = llama_response.get('keywords', 'No keywords available') | |
with st.expander("Sentiment Analysis (Llama 3)"): | |
st.write(sentiment_text) | |
with st.expander("Summarization (Llama 3)"): | |
st.write(summarized_text) | |
with st.expander("Keywords (Llama 3)"): | |
st.write(keywords) | |