billusanda007 commited on
Commit
95fddff
·
1 Parent(s): 7230f29

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import numpy as np
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ # Import necessary libraries
6
+ import re
7
+ from urllib.parse import urlparse
8
+ from nltk.tokenize import word_tokenize
9
+ from nltk.corpus import stopwords
10
+ from nltk.stem import WordNetLemmatizer
11
+
12
+ # Initialize NLTK resources
13
+ stop_words = set(stopwords.words("english")) # Create a set of English stopwords
14
+ lemmatizer = WordNetLemmatizer() # Initialize the WordNet Lemmatizer
15
+
16
+ # Define a function for text processing
17
+ def textProcess(sent):
18
+ try:
19
+ if sent is None: # Check if the input is None
20
+ return "" # Return an empty string if input is None
21
+
22
+ # Remove square brackets, parentheses, and other special characters
23
+ sent = re.sub('[][)(]', ' ', sent)
24
+
25
+ # Tokenize the text into words
26
+ sent = [word for word in sent.split() if not urlparse(word).scheme]
27
+
28
+ # Join the words back into a sentence
29
+ sent = ' '.join(sent)
30
+
31
+ # Remove Twitter usernames (words starting with @)
32
+ sent = re.sub(r'\@\w+', '', sent)
33
+
34
+ # Remove HTML tags using regular expression
35
+ sent = re.sub(re.compile("<.*?>"), '', sent)
36
+
37
+ # Remove non-alphanumeric characters (keep only letters and numbers)
38
+ sent = re.sub("[^A-Za-z0-9]", ' ', sent)
39
+
40
+ # Convert text to lowercase
41
+ sent = sent.lower()
42
+
43
+ # Split the text into words, strip whitespace, and join them back into a sentence
44
+ sent = [word.strip() for word in sent.split()]
45
+ sent = ' '.join(sent)
46
+
47
+ # Tokenize the text again
48
+ tokens = word_tokenize(sent)
49
+
50
+ # Remove stop words
51
+ for word in tokens.copy():
52
+ if word in stop_words:
53
+ tokens.remove(word)
54
+
55
+ # Lemmatize the remaining words
56
+ sent = [lemmatizer.lemmatize(word) for word in tokens]
57
+
58
+ # Join the lemmatized words back into a sentence
59
+ sent = ' '.join(sent)
60
+
61
+ # Return the processed text
62
+ return sent
63
+
64
+ except Exception as ex:
65
+ print(sent, "\n")
66
+ print("Error ", ex)
67
+ return "" # Return an empty string in case of an error
68
+
69
+ # Rest of your code...
70
+
71
+ # Load the pre-trained model from joblib
72
+ model = joblib.load('Stress identification NLP')
73
+
74
+ # Load the TF-IDF vectorizer used during training
75
+ tfidf_vectorizer = joblib.load('tfidf_vectorizer.joblib')
76
+
77
+ # Define the Streamlit web app
78
+ def main():
79
+ st.title("Stress Predictor Web App")
80
+ st.write("Enter some text to predict if the person is in stress or not.")
81
+
82
+ # Input text box
83
+ user_input = st.text_area("Enter text here:")
84
+
85
+ if st.button("Predict"):
86
+ if user_input:
87
+ # Process the input text
88
+ processed_text = textProcess(user_input)
89
+
90
+ # Use the same TF-IDF vectorizer to transform the input text
91
+ tfidf_text = tfidf_vectorizer.transform([processed_text])
92
+
93
+ # Make predictions using the loaded model
94
+ prediction = model.predict(tfidf_text)[0]
95
+
96
+ if prediction == 1:
97
+ result = "This person is in stress."
98
+ else:
99
+ result = "This person is not in stress."
100
+
101
+ st.write(result)
102
+
103
+ if __name__ == '__main__':
104
+ main()