File size: 3,743 Bytes
243879b a7328ec 243879b a7328ec 243879b db87bba 243879b bd7368b 243879b a7328ec b44978e 243879b 9e7ff90 243879b 211e5c4 9e7ff90 b824c83 243879b a7328ec b824c83 a7328ec b824c83 a7328ec b824c83 b44978e 9e7ff90 b824c83 b44978e b824c83 b44978e b824c83 b44978e b824c83 b44978e b824c83 b44978e b824c83 b44978e b824c83 b44978e b824c83 b44978e b824c83 b44978e a7328ec b824c83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import os
from flask import Flask, request, render_template
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from collections import Counter
import matplotlib
matplotlib.use('Agg') # Prevents GUI issues for Matplotlib
import matplotlib.pyplot as plt
import base64
from io import BytesIO
# Fix Permission Issues: Set Writable Directories for Hugging Face & Matplotlib
os.environ["HF_HOME"] = "/tmp"
os.environ["TRANSFORMERS_CACHE"] = "/tmp"
os.environ["MPLCONFIGDIR"] = "/tmp"
# Create directories if they don’t exist
os.makedirs(os.environ["HF_HOME"], exist_ok=True)
os.makedirs(os.environ["TRANSFORMERS_CACHE"], exist_ok=True)
os.makedirs(os.environ["MPLCONFIGDIR"], exist_ok=True)
app = Flask(__name__)
# Load Model from Hugging Face
MODEL_NAME = "philipobiorah/bert-imdb-model"
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained(MODEL_NAME)
model.eval()
# Function to Predict Sentiment
def predict_sentiment(text):
# Split the text into chunks of 512 tokens
tokenized_text = tokenizer.encode(text, add_special_tokens=True)
chunks = [tokenized_text[i:i + 512] for i in range(0, len(tokenized_text), 512)]
# Predict sentiment for each chunk
sentiments = []
for chunk in chunks:
# inputs = tokenizer.decode(chunk, skip_special_tokens=True)
inputs = tokenizer.decode(chunk, skip_special_tokens=True, clean_up_tokenization_spaces=True) # Explicitly set clean_up_tokenization_spaces
inputs = tokenizer(inputs, return_tensors="pt", truncation=True, padding=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
sentiments.append(outputs.logits.argmax(dim=1).item())
# Aggregate the predictions (majority voting)
sentiment_counts = Counter(sentiments)
majority_sentiment = sentiment_counts.most_common(1)[0][0]
return 'Positive' if majority_sentiment == 1 else 'Negative'
@app.route('/')
def upload_file():
return render_template('upload.html')
@app.route('/analyze_text', methods=['POST'])
def analyze_text():
if request.method == 'POST':
text = request.form['text']
sentiment = predict_sentiment(text)
return render_template('upload.html', sentiment=sentiment)
@app.route('/uploader', methods=['GET', 'POST'])
def upload_file_post():
if request.method == 'POST':
f = request.files['file']
data = pd.read_csv(f)
# Predict sentiment for each review
data['sentiment'] = data['review'].apply(predict_sentiment)
# Sentiment Analysis Summary
sentiment_counts = data['sentiment'].value_counts().to_dict()
summary = f"Total Reviews: {len(data)}<br>" \
f"Positive: {sentiment_counts.get('Positive', 0)}<br>" \
f"Negative: {sentiment_counts.get('Negative', 0)}<br>"
# Generate plot
fig, ax = plt.subplots()
ax.bar(sentiment_counts.keys(), sentiment_counts.values(), color=['red', 'blue'])
ax.set_ylabel('Counts')
ax.set_title('Sentiment Analysis Summary')
# Save the plot to a BytesIO object
img = BytesIO()
plt.savefig(img, format='png', bbox_inches='tight')
img.seek(0)
# Encode the image in base64 and decode it to UTF-8
plot_url = base64.b64encode(img.getvalue()).decode('utf8')
# Close the plot to free memory
plt.close(fig)
return render_template('result.html', tables=[data.to_html(classes='data')], titles=data.columns.values, summary=summary, plot_url=plot_url)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860, debug=True)
|