Ryan commited on
Commit
2d9e425
·
1 Parent(s): 2c58f4e
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
README.md CHANGED
@@ -125,6 +125,8 @@ The RoBERTa sentiment analysis classifier will output which model is more positi
125
 
126
  ![alt text](_images/03-roberta.png "Landing Page")
127
 
 
 
128
  ![alt text](_images/03a-roberta.png "Landing Page")
129
 
130
  ## Summary
@@ -133,6 +135,8 @@ The summary tab provides a summary of two of the prompts: the Trump and Harris p
133
 
134
  ![alt text](_images/04-summary.png "Landing Page")
135
 
 
 
136
  ![alt text](_images/04a-summary.png "Landing Page")
137
 
138
 
@@ -142,6 +146,8 @@ This is a hard-coded tab that displays some basic graphs. The first one is a ba
142
 
143
  ![alt text](_images/05-visuals.png "Landing Page")
144
 
 
 
145
  ![alt text](_images/05a-visuals.png "Landing Page")
146
 
147
 
 
125
 
126
  ![alt text](_images/03-roberta.png "Landing Page")
127
 
128
+ The results are shown below.
129
+
130
  ![alt text](_images/03a-roberta.png "Landing Page")
131
 
132
  ## Summary
 
135
 
136
  ![alt text](_images/04-summary.png "Landing Page")
137
 
138
+ Below is the summary area filled in after clicking the button with the YOUR DATASET RESULTS selected.
139
+
140
  ![alt text](_images/04a-summary.png "Landing Page")
141
 
142
 
 
146
 
147
  ![alt text](_images/05-visuals.png "Landing Page")
148
 
149
+ Below is the chart.
150
+
151
  ![alt text](_images/05a-visuals.png "Landing Page")
152
 
153
 
processors/bow_analysis.py CHANGED
@@ -1,16 +1,20 @@
1
  """
2
- Updated bow_analysis.py to include similarity metrics
 
 
3
  """
4
  from sklearn.feature_extraction.text import CountVectorizer
5
- import numpy as np
6
- from collections import Counter
7
- import re
8
- import nltk
9
  from nltk.corpus import stopwords
10
  from nltk.stem import WordNetLemmatizer
11
  from nltk.tokenize import word_tokenize
12
  from processors.metrics import calculate_similarity
13
 
 
 
 
 
 
 
14
  # Define the compare_bow_across_texts function directly in this file
15
  def compare_bow_across_texts(texts, model_names, top_n=25):
16
  """
 
1
  """
2
+ Updated bow_analysis.py to include similarity metrics.
3
+ Preprocessing here is more advanced than n-gram version.
4
+ Lowercase, tokenize, remove stopwords, non-alphabetic characters removal, short words removal, lemmatization.
5
  """
6
  from sklearn.feature_extraction.text import CountVectorizer
 
 
 
 
7
  from nltk.corpus import stopwords
8
  from nltk.stem import WordNetLemmatizer
9
  from nltk.tokenize import word_tokenize
10
  from processors.metrics import calculate_similarity
11
 
12
+ # not used currently imports, but left in case I start using them again
13
+ import numpy as np
14
+ from collections import Counter
15
+ import re
16
+ import nltk
17
+
18
  # Define the compare_bow_across_texts function directly in this file
19
  def compare_bow_across_texts(texts, model_names, top_n=25):
20
  """
processors/ngram_analysis.py CHANGED
@@ -1,9 +1,12 @@
1
  """
2
- N-gram analysis for comparing text responses
 
3
  """
4
  from sklearn.feature_extraction.text import CountVectorizer
5
- import numpy as np
 
6
  from collections import Counter
 
7
  import nltk
8
  from nltk.util import ngrams
9
  from nltk.tokenize import word_tokenize
 
1
  """
2
+ N-gram analysis for comparing text responses.
3
+ Minimal preprocessing is done here, basically just removing stop words and tokenization. From my research this is a good combination for n-gram analysis.
4
  """
5
  from sklearn.feature_extraction.text import CountVectorizer
6
+
7
+ # these aren't used currently, as they were imports for testing versions with them. the code is removed also, but I decided to just leave these imports incase I start using them again.
8
  from collections import Counter
9
+ import numpy as np
10
  import nltk
11
  from nltk.util import ngrams
12
  from nltk.tokenize import word_tokenize
processors/roberta_analysis.py CHANGED
@@ -2,7 +2,7 @@
2
  RoBERTa-based sentiment analysis for comparing LLM responses
3
  """
4
  import torch
5
- import numpy as np
6
  from transformers import RobertaTokenizer, RobertaForSequenceClassification
7
  import nltk
8
  from nltk.tokenize import sent_tokenize
 
2
  RoBERTa-based sentiment analysis for comparing LLM responses
3
  """
4
  import torch
5
+ import numpy as np # ended up not using, but left in case I need it later.
6
  from transformers import RobertaTokenizer, RobertaForSequenceClassification
7
  import nltk
8
  from nltk.tokenize import sent_tokenize