Spaces:

RyanS974
/

525GradioApp

Sleeping

Ryan commited on Apr 25

Commit

2d9e425

1 Parent(s): 2c58f4e

update

Files changed (5) hide show

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

README.md CHANGED Viewed

@@ -125,6 +125,8 @@ The RoBERTa sentiment analysis classifier will output which model is more positi
 ![alt text](_images/03-roberta.png "Landing Page")
 ![alt text](_images/03a-roberta.png "Landing Page")
 ## Summary
@@ -133,6 +135,8 @@ The summary tab provides a summary of two of the prompts: the Trump and Harris p
 ![alt text](_images/04-summary.png "Landing Page")
 ![alt text](_images/04a-summary.png "Landing Page")
@@ -142,6 +146,8 @@ This is a hard-coded tab that displays some basic graphs.  The first one is a ba
 ![alt text](_images/05-visuals.png "Landing Page")
 ![alt text](_images/05a-visuals.png "Landing Page")

 ![alt text](_images/03-roberta.png "Landing Page")
+The results are shown below.
 ![alt text](_images/03a-roberta.png "Landing Page")
 ## Summary
 ![alt text](_images/04-summary.png "Landing Page")
+Below is the summary area filled in after clicking the button with the YOUR DATASET RESULTS selected.
 ![alt text](_images/04a-summary.png "Landing Page")
 ![alt text](_images/05-visuals.png "Landing Page")
+Below is the chart.
 ![alt text](_images/05a-visuals.png "Landing Page")

processors/bow_analysis.py CHANGED Viewed

@@ -1,16 +1,20 @@
 """
-Updated bow_analysis.py to include similarity metrics
 """
 from sklearn.feature_extraction.text import CountVectorizer
-import numpy as np
-from collections import Counter
-import re
-import nltk
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 from nltk.tokenize import word_tokenize
 from processors.metrics import calculate_similarity
 # Define the compare_bow_across_texts function directly in this file
 def compare_bow_across_texts(texts, model_names, top_n=25):
     """

 """
+Updated bow_analysis.py to include similarity metrics.
+Preprocessing here is more advanced than n-gram version.
+Lowercase, tokenize, remove stopwords, non-alphabetic characters removal, short words removal, lemmatization.
 """
 from sklearn.feature_extraction.text import CountVectorizer
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 from nltk.tokenize import word_tokenize
 from processors.metrics import calculate_similarity
+# not used currently imports, but left in case I start using them again
+import numpy as np
+from collections import Counter
+import re
+import nltk
 # Define the compare_bow_across_texts function directly in this file
 def compare_bow_across_texts(texts, model_names, top_n=25):
     """

processors/ngram_analysis.py CHANGED Viewed

@@ -1,9 +1,12 @@
 """
-N-gram analysis for comparing text responses
 """
 from sklearn.feature_extraction.text import CountVectorizer
-import numpy as np
 from collections import Counter
 import nltk
 from nltk.util import ngrams
 from nltk.tokenize import word_tokenize

 """
+N-gram analysis for comparing text responses.
+Minimal preprocessing is done here, basically just removing stop words and tokenization. From my research this is a good combination for n-gram analysis.
 """
 from sklearn.feature_extraction.text import CountVectorizer
+# these aren't used currently, as they were imports for testing versions with them. the code is removed also, but I decided to just leave these imports incase I start using them again.
 from collections import Counter
+import numpy as np
 import nltk
 from nltk.util import ngrams
 from nltk.tokenize import word_tokenize

processors/roberta_analysis.py CHANGED Viewed

@@ -2,7 +2,7 @@
 RoBERTa-based sentiment analysis for comparing LLM responses
 """
 import torch
-import numpy as np
 from transformers import RobertaTokenizer, RobertaForSequenceClassification
 import nltk
 from nltk.tokenize import sent_tokenize

 RoBERTa-based sentiment analysis for comparing LLM responses
 """
 import torch
+import numpy as np # ended up not using, but left in case I need it later.
 from transformers import RobertaTokenizer, RobertaForSequenceClassification
 import nltk
 from nltk.tokenize import sent_tokenize