File size: 83,698 Bytes
7bf325d
8e34de3
524b56d
7731b47
929cfb4
7731b47
524b56d
 
 
e5c73cc
 
 
0071ad3
769095a
f533950
6334788
524b56d
e59609b
524b56d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36f7a03
 
 
7731b47
524b56d
36f7a03
 
 
8e34de3
 
36f7a03
 
7731b47
524b56d
2c58f4e
6cebf06
 
8e34de3
 
24a2114
 
 
9a806ac
524b56d
9a806ac
 
524b56d
9a806ac
 
 
 
 
 
 
 
 
 
43b87f7
524b56d
9a806ac
 
 
 
 
 
43b87f7
524b56d
9a806ac
 
 
43b87f7
9a806ac
524b56d
9a806ac
 
929cfb4
 
524b56d
 
 
 
 
 
6c6550e
524b56d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56cb19b
2094672
 
 
 
 
 
 
 
56cb19b
2094672
 
 
 
 
524b56d
 
 
 
56cb19b
524b56d
 
 
 
 
 
 
 
 
 
 
 
2c58f4e
524b56d
 
 
929cfb4
524b56d
929cfb4
 
 
524b56d
7138f76
 
524b56d
7138f76
 
524b56d
 
 
 
 
56cb19b
524b56d
 
 
 
 
 
 
 
 
 
 
 
2c58f4e
524b56d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56cb19b
 
 
 
 
 
 
 
 
 
 
 
2c05f74
56cb19b
 
 
7138f76
 
 
 
56cb19b
7138f76
 
 
 
 
 
 
 
 
 
 
 
2c58f4e
7138f76
 
 
 
524b56d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56cb19b
7138f76
 
524b56d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
769095a
 
 
 
 
 
 
524b56d
 
 
 
 
 
 
769095a
 
 
 
 
 
 
524b56d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
929cfb4
769095a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
929cfb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56cb19b
929cfb4
 
 
 
 
 
 
 
 
 
 
 
 
 
7138f76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac85b4b
 
2c58f4e
ac85b4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
929cfb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7138f76
524b56d
 
 
 
56cb19b
524b56d
 
 
 
 
 
 
 
 
 
7731b47
2c58f4e
524b56d
d143368
524b56d
 
 
56cb19b
524b56d
 
 
 
 
 
 
 
 
 
 
 
6334788
524b56d
 
 
 
 
 
 
 
 
56cb19b
524b56d
 
 
 
 
 
 
 
 
 
 
 
2c58f4e
524b56d
7731b47
2c58f4e
7731b47
 
abdf00a
7731b47
9a92712
7731b47
2c58f4e
9a92712
 
7731b47
2c58f4e
6cebf06
7731b47
a1403d1
7731b47
 
 
6cebf06
2c58f4e
9a92712
 
 
7731b47
 
abdf00a
7731b47
 
abdf00a
a1403d1
 
7731b47
6cebf06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c05f74
6cebf06
 
 
7731b47
 
 
 
6cebf06
2c58f4e
9a92712
 
 
7731b47
 
a1403d1
 
 
7731b47
a1403d1
1de0df8
da62934
1de0df8
9a92712
8369b8b
9a92712
8369b8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a92712
1de0df8
9a92712
 
7731b47
 
6cebf06
9a92712
 
 
 
7731b47
1de0df8
7731b47
 
 
 
6334788
7731b47
 
6cebf06
2c58f4e
9a92712
 
 
7731b47
 
2c58f4e
7731b47
 
6cebf06
7731b47
 
6cebf06
7731b47
 
9a92712
 
7731b47
 
 
7303519
 
 
7138f76
 
7303519
953b2c8
7303519
7138f76
2c58f4e
7303519
b41f330
7303519
 
6cebf06
7138f76
 
7303519
43b87f7
953b2c8
 
 
 
 
 
7138f76
7303519
43b87f7
 
 
 
 
 
 
 
7138f76
6cebf06
 
7303519
 
6cebf06
 
 
 
2c58f4e
6cebf06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c58f4e
7303519
6cebf06
7303519
 
 
 
 
2c58f4e
7303519
2c58f4e
7303519
2c58f4e
6cebf06
 
 
 
 
d7d4654
7138f76
7303519
 
6cebf06
 
b41f330
7303519
7138f76
7303519
 
6cebf06
 
b41f330
c435293
b41f330
e5c73cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5925dce
e5c73cc
 
 
 
 
 
 
 
 
 
5925dce
e5c73cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5925dce
 
e5c73cc
 
 
 
 
 
 
 
 
 
 
2c58f4e
e5c73cc
 
 
 
 
2c58f4e
e5c73cc
 
 
 
 
 
 
e66f533
7138f76
 
 
6cebf06
7138f76
 
6cebf06
7138f76
 
 
 
 
 
 
 
 
 
 
 
6334788
a0ef52d
 
d7d4654
 
062a93f
 
 
 
 
 
 
 
d7d4654
6cebf06
8e34de3
36f7a03
 
fe68698
 
524b56d
8e34de3
7138f76
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
import gradio as gr
from ui.dataset_input import create_dataset_input, load_example_dataset
from ui.analysis_screen import create_analysis_screen, process_analysis_request
from ui.roberta_screen import create_roberta_screen, process_roberta_request
from visualization.bow_visualizer import process_and_visualize_analysis
from visualization.roberta_visualizer import process_and_visualize_sentiment_analysis
import nltk
import os
import json
import matplotlib.pyplot as plt
import io
import base64
import datetime
from PIL import Image

# Download necessary NLTK resources function remains unchanged
def download_nltk_resources():
    """Download required NLTK resources if not already downloaded""" 
    try:
        # Create nltk_data directory in the user's home directory if it doesn't exist
        nltk_data_path = os.path.expanduser("~/nltk_data")
        os.makedirs(nltk_data_path, exist_ok=True)
        
        # Add this path to NLTK's data path
        nltk.data.path.append(nltk_data_path)
        
        # Download required resources
        resources = ['punkt', 'wordnet', 'stopwords', 'punkt_tab']
        for resource in resources:
            try:
                # Different resources can be in different directories in NLTK
                locations = [
                    f'tokenizers/{resource}',
                    f'corpora/{resource}',
                    f'taggers/{resource}',
                    f'{resource}'
                ]
                
                found = False
                for location in locations:
                    try:
                        nltk.data.find(location)
                        print(f"Resource {resource} already downloaded")
                        found = True
                        break
                    except LookupError:
                        continue
                
                if not found:
                    print(f"Downloading {resource}...")
                    nltk.download(resource, quiet=True)
            except Exception as e:
                print(f"Error with resource {resource}: {e}")
        
        print("NLTK resources check completed")
    except Exception as e:
        print(f"Error downloading NLTK resources: {e}")

def create_app():
    """
    Create a streamlined Gradio app for dataset input and analysis. 
    
    Returns:
        gr.Blocks: The Gradio application
    """
    with gr.Blocks(title="LLM Response Comparator") as app:
        # Application state to share data between tabs
        dataset_state = gr.State({})
        analysis_results_state = gr.State({})
        roberta_results_state = gr.State({})
        
        # Add a state for storing user dataset analysis results
        user_analysis_log = gr.State({})
        
        # Dataset Input Tab
        with gr.Tab("Dataset Input"):
            # Filter out files that start with 'summary' for the Dataset Input tab
            dataset_files = [f for f in os.listdir("dataset") 
                             if not f.startswith("summary-") and os.path.isfile(os.path.join("dataset", f))]
            dataset_inputs, example_dropdown, load_example_btn, create_btn, prompt, response1, model1, response2, model2 = create_dataset_input()
            
            # Add status indicator to show when dataset is created
            dataset_status = gr.Markdown("*No dataset loaded*")
            
            # Load example dataset
            load_example_btn.click(
                fn=load_example_dataset,
                inputs=[example_dropdown],
                outputs=[prompt, response1, model1, response2, model2]  # Update all field values
            )

            # Save dataset to state and update status
            def create_dataset(p, r1, m1, r2, m2):
                if not p or not r1 or not r2:
                    return {}, "❌ **Error:** Please fill in at least the prompt and both responses"
                
                dataset = {
                    "entries": [
                        {"prompt": p, "response": r1, "model": m1 or "Model 1"},
                        {"prompt": p, "response": r2, "model": m2 or "Model 2"}
                    ]
                }
                return dataset, "✅ **Dataset created successfully!** You can now go to the Analysis tab"
                
            create_btn.click(
                fn=create_dataset,
                inputs=[prompt, response1, model1, response2, model2],
                outputs=[dataset_state, dataset_status]
            )
        
        # Analysis Tab
        with gr.Tab("Analysis"):
            # Use create_analysis_screen to get UI components including visualization container
            analysis_options, analysis_params, run_analysis_btn, analysis_output, ngram_n, topic_count = create_analysis_screen()
            
            # Pre-create visualization components (initially hidden)
            visualization_area_visible = gr.Checkbox(value=False, visible=False, label="Visualization Visible")
            analysis_title = gr.Markdown("## Analysis Results", visible=False)
            prompt_title = gr.Markdown(visible=False)
            models_compared = gr.Markdown(visible=False)
            
            # Container for model 1 words
            model1_title = gr.Markdown(visible=False)
            model1_words = gr.Markdown(visible=False)
            
            # Container for model 2 words
            model2_title = gr.Markdown(visible=False)
            model2_words = gr.Markdown(visible=False)
            
            # Similarity metrics
            similarity_metrics_title = gr.Markdown("### Similarity Metrics", visible=False)
            similarity_metrics = gr.Markdown(visible=False)
            
            # Status or error message area
            status_message_visible = gr.Checkbox(value=False, visible=False, label="Status Message Visible")
            status_message = gr.Markdown(visible=False)
            
            # Define a helper function to extract parameter values and run the analysis
            def run_analysis(dataset, selected_analysis, ngram_n, topic_count, user_analysis_log, *args):
                """
                Run the analysis with the selected parameters
                
                Args:
                    dataset (dict): The dataset state
                    selected_analysis (str): The selected analysis type
                    ngram_n (str or int): N value for n-gram analysis
                    topic_count (str or int): Number of topics for topic modeling
                    user_analysis_log (dict): Log of user analysis results
                    *args: Additional arguments that might be passed by Gradio
                    
                Returns:
                    tuple: Analysis results and UI component updates
                """
                try:
                    if not dataset or "entries" not in dataset or not dataset["entries"]:
                        return (
                            {},  # analysis_results_state
                            user_analysis_log,  # user_analysis_log (unchanged)
                            False,  # analysis_output visibility
                            False,  # visualization_area_visible
                            gr.update(visible=False),  # analysis_title
                            gr.update(visible=False),  # prompt_title
                            gr.update(visible=False),  # models_compared
                            gr.update(visible=False),  # model1_title
                            gr.update(visible=False),  # model1_words
                            gr.update(visible=False),  # model2_title
                            gr.update(visible=False),  # model2_words
                            gr.update(visible=False),  # similarity_metrics_title
                            gr.update(visible=False),  # similarity_metrics
                            True,  # status_message_visible
                            gr.update(visible=True, value="**Error:** No dataset loaded. Please create or load a dataset first.")  # status_message
                        )
                    
                    parameters = {
                        "bow_top": 25,  # Default fixed value for Bag of Words
                        "ngram_n": ngram_n,
                        "ngram_top": 10,  # Default fixed value for N-gram analysis
                        "topic_count": topic_count,
                        "bias_methods": ["partisan"]  # Default to partisan leaning only
                    }
                    print(f"Running analysis with selected type: {selected_analysis}")
                    print("Parameters:", parameters)
                    
                    # Process the analysis request - passing selected_analysis as a string
                    analysis_results, _ = process_analysis_request(dataset, selected_analysis, parameters)
                    
                    # If there's an error or no results
                    if not analysis_results or "analyses" not in analysis_results or not analysis_results["analyses"]:
                        return (
                            analysis_results,
                            user_analysis_log,  # user_analysis_log (unchanged)
                            False,
                            False,
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            True,
                            gr.update(visible=True, value="**No results found.** Try a different analysis option.")
                        )
                    
                    # Extract information to display in components
                    prompt = list(analysis_results["analyses"].keys())[0]
                    analyses = analysis_results["analyses"][prompt]
                    
                    # Initialize visualization components visibilities and contents
                    visualization_area_visible = False
                    prompt_title_visible = False
                    prompt_title_value = ""
                    models_compared_visible = False
                    models_compared_value = ""
                    
                    model1_title_visible = False
                    model1_title_value = ""
                    model1_words_visible = False
                    model1_words_value = ""
                    
                    model2_title_visible = False
                    model2_title_value = ""
                    model2_words_visible = False
                    model2_words_value = ""
                    
                    similarity_title_visible = False
                    similarity_metrics_visible = False
                    similarity_metrics_value = ""
                    
                    # Update the user analysis log with the new results
                    updated_log = user_analysis_log.copy() if user_analysis_log else {}
                    
                    # Initialize this prompt in the log if it doesn't exist
                    if prompt not in updated_log:
                        updated_log[prompt] = {}
                    
                    # Store the analysis results in the log
                    if selected_analysis in ["Bag of Words", "N-gram Analysis", "Classifier", "Bias Detection", "Topic Modeling"]:
                        key = selected_analysis.replace(" ", "_").lower()
                        if key in analyses:
                            updated_log[prompt][selected_analysis] = {
                                "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                                "result": analyses[key]
                            }
                    
                    # Check for messages from placeholder analyses
                    if "message" in analyses:
                        return (
                            analysis_results,
                            updated_log,  # Return updated log
                            False,
                            False,
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            True,
                            gr.update(visible=True, value=f"**{analyses['message']}**")  # status_message
                        )
                    
                    # Process based on the selected analysis type
                    if selected_analysis == "Bag of Words" and "bag_of_words" in analyses:
                        visualization_area_visible = True
                        bow_results = analyses["bag_of_words"]
                        models = bow_results.get("models", [])
                        
                        if len(models) >= 2:
                            prompt_title_visible = True
                            prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
                            
                            models_compared_visible = True
                            models_compared_value = f"### Comparing responses from {models[0]} and {models[1]}"
                            
                            # Extract and format information for display
                            model1_name = models[0]
                            model2_name = models[1]
                            
                            # Format important words for each model
                            important_words = bow_results.get("important_words", {})
                            
                            if model1_name in important_words:
                                model1_title_visible = True
                                model1_title_value = f"#### Top Words Used by {model1_name}"
                                
                                word_list = [f"**{item['word']}** ({item['count']})" for item in important_words[model1_name][:10]]
                                model1_words_visible = True
                                model1_words_value = ", ".join(word_list)
                            
                            if model2_name in important_words:
                                model2_title_visible = True
                                model2_title_value = f"#### Top Words Used by {model2_name}"
                                
                                word_list = [f"**{item['word']}** ({item['count']})" for item in important_words[model2_name][:10]]
                                model2_words_visible = True
                                model2_words_value = ", ".join(word_list)
                            
                            # Format similarity metrics
                            comparisons = bow_results.get("comparisons", {})
                            comparison_key = f"{model1_name} vs {model2_name}"

                            if comparison_key in comparisons:
                                metrics = comparisons[comparison_key]
                                cosine = metrics.get("cosine_similarity", 0)
                                jaccard = metrics.get("jaccard_similarity", 0)
                                semantic = metrics.get("semantic_similarity", 0)
                                common_words = metrics.get("common_word_count", 0)
                                
                                similarity_title_visible = True
                                similarity_metrics_visible = True
                                similarity_metrics_value = f"""
                                - **Cosine Similarity**: {cosine:.2f} (higher means more similar word frequency patterns)
                                - **Jaccard Similarity**: {jaccard:.2f} (higher means more word overlap)
                                - **Semantic Similarity**: {semantic:.2f} (higher means more similar meaning)
                                - **Common Words**: {common_words} words appear in both responses
                                """
                                        
                    # Check for N-gram analysis
                    elif selected_analysis == "N-gram Analysis" and "ngram_analysis" in analyses:
                        visualization_area_visible = True
                        ngram_results = analyses["ngram_analysis"]
                        models = ngram_results.get("models", [])
                        ngram_size = ngram_results.get("ngram_size", 2)
                        size_name = "Unigrams" if ngram_size == 1 else f"{ngram_size}-grams"
                        
                        if len(models) >= 2:
                            prompt_title_visible = True
                            prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
                            
                            models_compared_visible = True
                            models_compared_value = f"### {size_name} Analysis: Comparing responses from {models[0]} and {models[1]}"
                            
                            # Extract and format information for display
                            model1_name = models[0]
                            model2_name = models[1]
                            
                            # Format important n-grams for each model
                            important_ngrams = ngram_results.get("important_ngrams", {})
                            
                            if model1_name in important_ngrams:
                                model1_title_visible = True
                                model1_title_value = f"#### Top {size_name} Used by {model1_name}"
                                
                                # Create a better formatted list of n-grams 
                                ngram_list = []
                                for item in important_ngrams[model1_name][:10]:
                                    ngram_text = item['ngram']
                                    ngram_count = item['count']
                                    ngram_list.append(f"**{ngram_text}** ({ngram_count})")
                                
                                model1_words_visible = True
                                model1_words_value = ", ".join(ngram_list)
                            
                            if model2_name in important_ngrams:
                                model2_title_visible = True
                                model2_title_value = f"#### Top {size_name} Used by {model2_name}"
                                
                                # Create a better formatted list of n-grams
                                ngram_list = []
                                for item in important_ngrams[model2_name][:10]:
                                    ngram_text = item['ngram']
                                    ngram_count = item['count']
                                    ngram_list.append(f"**{ngram_text}** ({ngram_count})")
                                
                                model2_words_visible = True
                                model2_words_value = ", ".join(ngram_list)
                            
                            # Format similarity metrics if available
                            if "comparisons" in ngram_results:
                                comparison_key = f"{model1_name} vs {model2_name}"
                                
                                if comparison_key in ngram_results["comparisons"]:
                                    metrics = ngram_results["comparisons"][comparison_key]
                                    common_count = metrics.get("common_ngram_count", 0)
                                    
                                    similarity_title_visible = True
                                    similarity_metrics_visible = True
                                    similarity_metrics_value = f"""
                                    - **Common {size_name}**: {common_count} {size_name.lower()} appear in both responses
                                    """
                            
                            # Create a new function to generate N-gram visualizations
                            def generate_ngram_visualization(important_ngrams, model1_name, model2_name):
                                plt.figure(figsize=(12, 6))
                                
                                # Process data for model 1
                                model1_data = {}
                                if model1_name in important_ngrams:
                                    for item in important_ngrams[model1_name][:10]:
                                        model1_data[item['ngram']] = item['count']
                                
                                # Process data for model 2
                                model2_data = {}
                                if model2_name in important_ngrams:
                                    for item in important_ngrams[model2_name][:10]:
                                        model2_data[item['ngram']] = item['count']
                                
                                # Plot for the first model
                                plt.subplot(1, 2, 1)
                                sorted_data1 = sorted(model1_data.items(), key=lambda x: x[1], reverse=True)[:10]
                                terms1, counts1 = zip(*sorted_data1) if sorted_data1 else ([], [])
                                
                                # Create horizontal bar chart
                                plt.barh([t[:20] + '...' if len(t) > 20 else t for t in terms1[::-1]], counts1[::-1])
                                plt.xlabel('Frequency')
                                plt.title(f'Top {size_name} Used by {model1_name}')
                                plt.tight_layout()
                                
                                # Plot for the second model
                                plt.subplot(1, 2, 2)
                                sorted_data2 = sorted(model2_data.items(), key=lambda x: x[1], reverse=True)[:10]
                                terms2, counts2 = zip(*sorted_data2) if sorted_data2 else ([], [])
                                
                                # Create horizontal bar chart
                                plt.barh([t[:20] + '...' if len(t) > 20 else t for t in terms2[::-1]], counts2[::-1])
                                plt.xlabel('Frequency')
                                plt.title(f'Top {size_name} Used by {model2_name}')
                                plt.tight_layout()
                                
                                # Save the plot to a bytes buffer
                                buf = io.BytesIO()
                                plt.savefig(buf, format='png', dpi=100)
                                buf.seek(0)
                                
                                # Convert to PIL Image
                                image = Image.open(buf)
                                return image
                            
                            # Create the visualization
                            try:
                                viz_image = generate_ngram_visualization(important_ngrams, model1_name, model2_name)
                                
                                # Convert the image to a base64 string for embedding
                                buffered = io.BytesIO()
                                viz_image.save(buffered, format="PNG")
                                img_str = base64.b64encode(buffered.getvalue()).decode()
                                
                                # Append the image to the metrics_value
                                similarity_metrics_value += f"""
                                <div style="margin-top: 20px;">
                                <img src="data:image/png;base64,{img_str}" alt="N-gram visualization" style="max-width: 100%;">
                                </div>
                                """
                                similarity_metrics_visible = True
                            except Exception as viz_error:
                                print(f"Visualization error: {viz_error}")
                                # Handle the error gracefully - continue without the visualization
                    
                    # Check for Topic Modeling analysis
                    elif selected_analysis == "Topic Modeling" and "topic_modeling" in analyses:
                        visualization_area_visible = True
                        topic_results = analyses["topic_modeling"]
                        models = topic_results.get("models", [])
                        method = topic_results.get("method", "lda").upper()
                        n_topics = topic_results.get("n_topics", 3)
                        
                        if len(models) >= 2:
                            prompt_title_visible = True
                            prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
                            
                            models_compared_visible = True
                            models_compared_value = f"### Topic Modeling Analysis ({method}, {n_topics} topics)"
                            
                            # Extract and format topic information
                            topics = topic_results.get("topics", [])
                            
                            if topics:
                                # Format topic info for display
                                topic_info = []
                                for topic in topics[:3]:  # Show first 3 topics
                                    topic_id = topic.get("id", 0)
                                    words = topic.get("words", [])[:5]  # Top 5 words per topic
                                    
                                    if words:
                                        topic_info.append(f"**Topic {topic_id+1}**: {', '.join(words)}")
                                
                                if topic_info:
                                    model1_title_visible = True
                                    model1_title_value = "#### Discovered Topics"
                                    model1_words_visible = True
                                    model1_words_value = "\n".join(topic_info)
                            
                            # Get topic distributions for models
                            model_topics = topic_results.get("model_topics", {})
                            
                            if model_topics:
                                model1_name = models[0]
                                model2_name = models[1]
                                
                                # Format topic distribution info
                                if model1_name in model_topics and model2_name in model_topics:
                                    model2_title_visible = True
                                    model2_title_value = "#### Topic Distribution"
                                    model2_words_visible = True
                                    
                                    # Simple distribution display
                                    dist1 = model_topics[model1_name]
                                    dist2 = model_topics[model2_name]
                                    
                                    model2_words_value = f"""
                                    **{model1_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist1[:3])])}
                                    
                                    **{model2_name}**: {', '.join([f"Topic {i+1}: {v:.2f}" for i, v in enumerate(dist2[:3])])}
                                    """
                                        
                            # Add similarity metrics if available
                            comparisons = topic_results.get("comparisons", {})
                            if comparisons:
                                comparison_key = f"{model1_name} vs {model2_name}"
                                
                                if comparison_key in comparisons:
                                    metrics = comparisons[comparison_key]
                                    js_div = metrics.get("js_divergence", 0)
                                    
                                    similarity_title_visible = True
                                    similarity_metrics_visible = True
                                    similarity_metrics_value = f"""
                                    - **Topic Distribution Divergence**: {js_div:.4f} (lower means more similar topic distributions)
                                    """
                    
                    # Check for Classifier analysis
                    elif selected_analysis == "Classifier" and "classifier" in analyses:
                        visualization_area_visible = True
                        classifier_results = analyses["classifier"]
                        models = classifier_results.get("models", [])
                        
                        if len(models) >= 2:
                            prompt_title_visible = True
                            prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
                            
                            models_compared_visible = True
                            models_compared_value = f"### Classifier Analysis for {models[0]} and {models[1]}"
                            
                            # Extract and format classifier information
                            model1_name = models[0]
                            model2_name = models[1]
                            
                            # Display classifications for each model
                            classifications = classifier_results.get("classifications", {})
                            
                            if classifications:
                                model1_title_visible = True
                                model1_title_value = f"#### Classification Results"
                                model1_words_visible = True
                                
                                model1_results = classifications.get(model1_name, {})
                                model2_results = classifications.get(model2_name, {})
                                
                                model1_words_value = f"""
                                **{model1_name}**:
                                - Formality: {model1_results.get('formality', 'N/A')}
                                - Sentiment: {model1_results.get('sentiment', 'N/A')}
                                - Complexity: {model1_results.get('complexity', 'N/A')}
                                
                                **{model2_name}**:
                                - Formality: {model2_results.get('formality', 'N/A')}
                                - Sentiment: {model2_results.get('sentiment', 'N/A')}
                                - Complexity: {model2_results.get('complexity', 'N/A')}
                                """
                                
                                # Show comparison
                                model2_title_visible = True
                                model2_title_value = f"#### Classification Comparison"
                                model2_words_visible = True
                                
                                differences = classifier_results.get("differences", {})
                                model2_words_value = "\n".join([
                                    f"- **{category}**: {diff}" 
                                    for category, diff in differences.items()
                                ])
                                
                                # Create visualization using matplotlib
                                
                                
                                try:
                                    # Define metrics and mappings
                                    metrics = ['Formality', 'Sentiment', 'Complexity']
                                    mapping = {
                                        'Formality': {'Informal': 1, 'Neutral': 2, 'Formal': 3},
                                        'Sentiment': {'Negative': 1, 'Neutral': 2, 'Positive': 3},
                                        'Complexity': {'Simple': 1, 'Average': 2, 'Complex': 3}
                                    }
                                    
                                    # Get values for each model
                                    model1_vals = []
                                    model2_vals = []
                                    
                                    # Get formality value for model1
                                    formality1 = model1_results.get('formality', 'Neutral')
                                    if formality1 in mapping['Formality']:
                                        model1_vals.append(mapping['Formality'][formality1])
                                    else:
                                        model1_vals.append(2)  # Default to neutral
                                    
                                    # Get sentiment value for model1
                                    sentiment1 = model1_results.get('sentiment', 'Neutral')
                                    if sentiment1 in mapping['Sentiment']:
                                        model1_vals.append(mapping['Sentiment'][sentiment1])
                                    else:
                                        model1_vals.append(2)  # Default to neutral
                                    
                                    # Get complexity value for model1
                                    complexity1 = model1_results.get('complexity', 'Average')
                                    if complexity1 in mapping['Complexity']:
                                        model1_vals.append(mapping['Complexity'][complexity1])
                                    else:
                                        model1_vals.append(2)  # Default to average
                                    
                                    # Get formality value for model2
                                    formality2 = model2_results.get('formality', 'Neutral')
                                    if formality2 in mapping['Formality']:
                                        model2_vals.append(mapping['Formality'][formality2])
                                    else:
                                        model2_vals.append(2)  # Default to neutral
                                    
                                    # Get sentiment value for model2
                                    sentiment2 = model2_results.get('sentiment', 'Neutral')
                                    if sentiment2 in mapping['Sentiment']:
                                        model2_vals.append(mapping['Sentiment'][sentiment2])
                                    else:
                                        model2_vals.append(2)  # Default to neutral
                                    
                                    # Get complexity value for model2
                                    complexity2 = model2_results.get('complexity', 'Average')
                                    if complexity2 in mapping['Complexity']:
                                        model2_vals.append(mapping['Complexity'][complexity2])
                                    else:
                                        model2_vals.append(2)  # Default to average
                                    
                                    # Plot grouped bar chart
                                    plt.figure(figsize=(10, 6))
                                    x = range(len(metrics))
                                    width = 0.35
                                    plt.bar([p - width/2 for p in x], model1_vals, width=width, label=model1_name)
                                    plt.bar([p + width/2 for p in x], model2_vals, width=width, label=model2_name)
                                    plt.xticks(x, metrics)
                                    plt.yticks([1, 2, 3], ['Low', 'Medium', 'High'])
                                    plt.ylim(0, 3.5)
                                    plt.ylabel('Level')
                                    plt.title('Comparison of Model Characteristics')
                                    plt.legend()
                                    plt.tight_layout()
                                    
                                    # Save the plot to a bytes buffer
                                    buf = io.BytesIO()
                                    plt.savefig(buf, format='png', dpi=100)
                                    buf.seek(0)
                                    
                                    # Convert to PIL Image
                                    viz_image = Image.open(buf)
                                    
                                    # Convert the image to a base64 string for embedding
                                    buffered = io.BytesIO()
                                    viz_image.save(buffered, format="PNG")
                                    img_str = base64.b64encode(buffered.getvalue()).decode()
                                    
                                    # Append the image to the metrics_value
                                    similarity_title_visible = True
                                    similarity_metrics_visible = True
                                    similarity_metrics_value = f"""
                                    <div style="margin-top: 20px;">
                                    <img src="data:image/png;base64,{img_str}" alt="Classifier visualization" style="max-width: 100%;">
                                    </div>
                                    """
                                except Exception as viz_error:
                                    print(f"Classifier visualization error: {viz_error}")

                    # Check for Bias Detection analysis
                    elif selected_analysis == "Bias Detection" and "bias_detection" in analyses:
                        visualization_area_visible = True
                        bias_results = analyses["bias_detection"]
                        models = bias_results.get("models", [])
                        
                        if len(models) >= 2:
                            prompt_title_visible = True
                            prompt_title_value = f"## Analysis of Prompt: \"{prompt[:100]}...\""
                            
                            models_compared_visible = True
                            models_compared_value = f"### Bias Analysis: Comparing responses from {models[0]} and {models[1]}"
                            
                            # Display comparative bias results
                            model1_name = models[0]
                            model2_name = models[1]
                            
                            if "comparative" in bias_results:
                                comparative = bias_results["comparative"]
                                
                                # Format summary for display
                                model1_title_visible = True
                                model1_title_value = "#### Bias Detection Summary"
                                model1_words_visible = True
                                
                                summary_parts = []
                                
                                # Add partisan comparison (focus on partisan leaning)
                                if "partisan" in comparative:
                                    part = comparative["partisan"]
                                    is_significant = part.get("significant", False)
                                    summary_parts.append(
                                        f"**Partisan Leaning**: {model1_name} appears {part.get(model1_name, 'N/A')}, " +
                                        f"while {model2_name} appears {part.get(model2_name, 'N/A')}. " +
                                        f"({'Significant' if is_significant else 'Minor'} difference)"
                                    )
                                
                                # Add overall assessment
                                if "overall" in comparative:
                                    overall = comparative["overall"]
                                    significant = overall.get("significant_bias_difference", False)
                                    summary_parts.append(
                                        f"**Overall Assessment**: " +
                                        f"Analysis shows a {overall.get('difference', 0):.2f}/1.0 difference in bias patterns. " +
                                        f"({'Significant' if significant else 'Minor'} overall bias difference)"
                                    )
                                
                                # Combine all parts
                                model1_words_value = "\n\n".join(summary_parts)
                                
                                # Format detailed term analysis
                                if (model1_name in bias_results and "partisan" in bias_results[model1_name] and
                                    model2_name in bias_results and "partisan" in bias_results[model2_name]):
                                    
                                    model2_title_visible = True
                                    model2_title_value = "#### Partisan Term Analysis"
                                    model2_words_visible = True
                                    
                                    m1_lib = bias_results[model1_name]["partisan"].get("liberal_terms", [])
                                    m1_con = bias_results[model1_name]["partisan"].get("conservative_terms", [])
                                    m2_lib = bias_results[model2_name]["partisan"].get("liberal_terms", [])
                                    m2_con = bias_results[model2_name]["partisan"].get("conservative_terms", [])
                                    
                                    model2_words_value = f"""
                                    **{model1_name}**:
                                    - Liberal terms: {', '.join(m1_lib) if m1_lib else 'None detected'}
                                    - Conservative terms: {', '.join(m1_con) if m1_con else 'None detected'}
                                    
                                    **{model2_name}**:
                                    - Liberal terms: {', '.join(m2_lib) if m2_lib else 'None detected'}
                                    - Conservative terms: {', '.join(m2_con) if m2_con else 'None detected'}
                                    """
                    
                    # If we don't have visualization data from any analysis
                    if not visualization_area_visible:
                        return (
                            analysis_results,
                            updated_log,  # Return updated log
                            False,
                            False,
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            gr.update(visible=False),
                            True,  # status_message_visible
                            gr.update(visible=True, value="**No visualization data found.** Make sure to select a valid analysis option.")
                        )

                    # Return all updated component values
                    return (
                        analysis_results,  # analysis_results_state
                        updated_log,  # user_analysis_log (updated with new results)
                        False,  # analysis_output visibility
                        True,   # visualization_area_visible
                        gr.update(visible=True),  # analysis_title
                        gr.update(visible=prompt_title_visible, value=prompt_title_value),  # prompt_title
                        gr.update(visible=models_compared_visible, value=models_compared_value),  # models_compared
                        gr.update(visible=model1_title_visible, value=model1_title_value),  # model1_title
                        gr.update(visible=model1_words_visible, value=model1_words_value),  # model1_words
                        gr.update(visible=model2_title_visible, value=model2_title_value),  # model2_title
                        gr.update(visible=model2_words_visible, value=model2_words_value),  # model2_words
                        gr.update(visible=similarity_title_visible),  # similarity_metrics_title
                        gr.update(visible=similarity_metrics_visible, value=similarity_metrics_value),  # similarity_metrics
                        False,  # status_message_visible
                        gr.update(visible=False)  # status_message
                    )
                
                except Exception as e:
                    import traceback
                    error_msg = f"Error in analysis: {str(e)}\n{traceback.format_exc()}"
                    print(error_msg)
                    
                    return (
                        {"error": error_msg},  # analysis_results_state
                        user_analysis_log,  # Return unchanged log
                        True,  # analysis_output visibility (show raw JSON for debugging)
                        False,  # visualization_area_visible
                        gr.update(visible=False),
                        gr.update(visible=False),
                        gr.update(visible=False),
                        gr.update(visible=False),
                        gr.update(visible=False),
                        gr.update(visible=False),
                        gr.update(visible=False),
                        gr.update(visible=False),
                        gr.update(visible=False),
                        True,  # status_message_visible
                        gr.update(visible=True, value=f"**Error during analysis:**\n\n```\n{str(e)}\n```")  # status_message
                    )
        
        # RoBERTa Sentiment Analysis Tab
        with gr.Tab("RoBERTa Sentiment"):
            # Create the RoBERTa analysis UI components
            run_roberta_btn, roberta_output, visualization_container, roberta_status = create_roberta_screen()
            
            # Create a container for visualization results
            with gr.Column() as roberta_viz_container:
                # create placeholder components to update
                roberta_viz_title = gr.Markdown("## RoBERTa Sentiment Analysis Results", visible=False)
                roberta_viz_content = gr.HTML("", visible=False)
            
            # Function to run RoBERTa sentiment analysis
            def run_roberta_analysis(dataset, existing_log):
                try:
                    print("Starting run_roberta_analysis function")
                    if not dataset or "entries" not in dataset or not dataset["entries"]:
                        return (
                            {},  # roberta_results_state
                            existing_log,  # no change to user_analysis_log
                            gr.update(visible=True, value="**Error:** No dataset loaded. Please create or load a dataset first."),  # roberta_status
                            gr.update(visible=False),  # roberta_output
                            gr.update(visible=False),  # roberta_viz_title
                            gr.update(visible=False)   # roberta_viz_content
                        )
                    
                    print(f"Running RoBERTa sentiment analysis with sentence-level, style=")
                    
                    # Process the analysis request
                    roberta_results = process_roberta_request(dataset)

                    print(f"RoBERTa results obtained. Size: {len(str(roberta_results))} characters")
                    
                    # NEW: Update the user analysis log with RoBERTa results
                    updated_log = existing_log.copy() if existing_log else {}
                    
                    # Get the prompt text
                    prompt_text = None
                    if "analyses" in roberta_results:
                        prompt_text = list(roberta_results["analyses"].keys())[0] if roberta_results["analyses"] else None
                    
                    if prompt_text:
                        # Initialize this prompt in the log if it doesn't exist
                        if prompt_text not in updated_log:
                            updated_log[prompt_text] = {}
                        
                        # Store the RoBERTa results
                        if "analyses" in roberta_results and prompt_text in roberta_results["analyses"]:
                            if "roberta_sentiment" in roberta_results["analyses"][prompt_text]:
                                updated_log[prompt_text]["RoBERTa Sentiment"] = {
                                    "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                                    "result": roberta_results["analyses"][prompt_text]["roberta_sentiment"]
                                }
                    
                    # Check if we have results
                    if "error" in roberta_results:
                        return (
                            roberta_results,  # Store in state anyway for debugging
                            updated_log,  # Return updated log
                            gr.update(visible=True, value=f"**Error:** {roberta_results['error']}"),  # roberta_status
                            gr.update(visible=False),  # Hide raw output
                            gr.update(visible=False),  # roberta_viz_title
                            gr.update(visible=False)   # roberta_viz_content
                        )
                    
                    print("About to process visualization components")
                    viz_components = process_and_visualize_sentiment_analysis(roberta_results)
                    print(f"Visualization components generated: {len(viz_components)}")
                    
                    print("Starting HTML conversion of visualization components")

                    # Convert the visualization components to HTML - OPTIMIZED VERSION 
                    print("Starting HTML conversion of visualization components")
                    html_content = "<div class='sentiment-visualization'>"
                    html_content += "<h3>Sentiment Analysis Results</h3>"
                    
                    if "analyses" in roberta_results:
                        for prompt, analyses in roberta_results["analyses"].items():
                            if "roberta_sentiment" in analyses:
                                sentiment_result = analyses["roberta_sentiment"]
                                models = sentiment_result.get("models", [])
                                
                                if len(models) >= 2:
                                    # Add overall comparison
                                    if "comparison" in sentiment_result:
                                        comparison = sentiment_result["comparison"]
                                        html_content += f"<div class='comparison-section'>"
                                        html_content += f"<p><strong>{comparison.get('difference_direction', 'Models have different sentiment patterns')}</strong></p>"
                                        html_content += f"</div>"
                                    
                                    # Add individual model results
                                    sentiment_analysis = sentiment_result.get("sentiment_analysis", {})
                                    for model in models:
                                        if model in sentiment_analysis:
                                            model_result = sentiment_analysis[model]
                                            score = model_result.get("sentiment_score", 0)
                                            label = model_result.get("label", "neutral")
                                            
                                            html_content += f"<div class='model-result'>"
                                            html_content += f"<h4>{model}</h4>"
                                            html_content += f"<p>Sentiment: <strong>{label}</strong> (Score: {score:.2f})</p>"
                                            html_content += f"</div>"

                    html_content += "</div>"
                    print("HTML conversion completed")
                    
                    # Return updated values
                    return (
                        roberta_results,  # roberta_results_state
                        updated_log,  # Return updated log
                        gr.update(visible=False),  # roberta_status (hide status message)
                        gr.update(visible=False),  # roberta_output (hide raw output)
                        gr.update(visible=True),   # roberta_viz_title (show title)
                        gr.update(visible=True, value=html_content)  # roberta_viz_content (show content)
                    )
                            
                except Exception as e:
                    import traceback
                    error_msg = f"Error in RoBERTa analysis: {str(e)}\n{traceback.format_exc()}"
                    print(error_msg)
                    
                    return (
                        {"error": error_msg},  # roberta_results_state
                        existing_log,  # Return unchanged log
                        gr.update(visible=True, value=f"**Error during RoBERTa analysis:**\n\n```\n{str(e)}\n```"),  # roberta_status
                        gr.update(visible=False),  # Hide raw output
                        gr.update(visible=False),  # roberta_viz_title
                        gr.update(visible=False)   # roberta_viz_content
                    )
            
            # Connect the run button to the analysis function
            run_roberta_btn.click(
                fn=run_roberta_analysis,
                inputs=[dataset_state, user_analysis_log],
                outputs=[
                    roberta_results_state,
                    user_analysis_log,
                    roberta_status,
                    roberta_output,
                    roberta_viz_title,
                    roberta_viz_content
                ]
            )
        
        # Add a Summary tab
        with gr.Tab("Summary"):
            gr.Markdown("## Analysis Summaries")
            
            with gr.Row():
                with gr.Column(scale=1):
                    # Get summary files from dataset directory 
                    summary_files = [f for f in os.listdir("dataset") if f.startswith("summary-") and f.endswith(".txt")]
                    
                    # Dropdown for selecting summary file
                    summary_dropdown = gr.Dropdown(
                        choices=["YOUR DATASET RESULTS"] + summary_files,
                        label="Select Summary",
                        info="Choose a summary to display",
                        value="YOUR DATASET RESULTS"
                    )
                    
                    load_summary_btn = gr.Button("Load Summary", variant="primary")

                    summary_assistant_prompt = gr.Textbox(
                        value="Attached are the results from various NLP based comparisons between two LLM responses on the same prompt. Give your interpretation of the results.",
                        label="Analysis Assistant Prompt",
                        lines=3,
                        interactive=True,
                    )
                
                with gr.Column(scale=3):
                    summary_content = gr.Textbox(
                        label="Summary Content",
                        lines=25,
                        max_lines=50,
                        interactive=False
                    )
                    
                    summary_status = gr.Markdown("*No summary loaded*")
            
            # Function to load summary content from file or user analysis
            def load_summary_content(file_name, user_log):
                if not file_name:
                    return "", "*No summary selected*"
                
                # Handle the special "YOUR DATASET RESULTS" option
                if file_name == "YOUR DATASET RESULTS":
                    if not user_log or not any(user_log.values()):
                        return "", "**No analysis results available.** Run some analyses in the Analysis tab first."
                    
                    # Format the user analysis log as text
                    content = "# YOUR DATASET ANALYSIS RESULTS\n\n"
                    
                    for prompt, analyses in user_log.items():
                        content += f"## Analysis of Prompt: \"{prompt[:100]}{'...' if len(prompt) > 100 else ''}\"\n\n"
                        
                        if not analyses:
                            content += "_No analyses run for this prompt._\n\n"
                            continue
                        
                        # Order the analyses in a specific sequence
                        analysis_order = ["Bag of Words", "N-gram Analysis", "Classifier", "Bias Detection", "RoBERTa Sentiment"]
                        
                        for analysis_type in analysis_order:
                            if analysis_type in analyses:
                                analysis_data = analyses[analysis_type]
                                timestamp = analysis_data.get("timestamp", "")
                                result = analysis_data.get("result", {})
                                
                                content += f"### {analysis_type} ({timestamp})\n\n"
                                
                                # Format based on analysis type
                                if analysis_type == "Bag of Words":
                                    models = result.get("models", [])
                                    if len(models) >= 2:
                                        content += f"Comparing responses from {models[0]} and {models[1]}\n\n"
                                        
                                        # Add important words for each model
                                        important_words = result.get("important_words", {})
                                        for model_name in models:
                                            if model_name in important_words:
                                                content += f"Top Words Used by {model_name}\n"
                                                word_list = [f"{item['word']} ({item['count']})" for item in important_words[model_name][:10]]
                                                content += ", ".join(word_list) + "\n\n"
                                        
                                        # Add similarity metrics
                                        comparisons = result.get("comparisons", {})
                                        comparison_key = f"{models[0]} vs {models[1]}"
                                        if comparison_key in comparisons:
                                            metrics = comparisons[comparison_key]
                                            content += "Similarity Metrics\n"
                                            content += f"Cosine Similarity: {metrics.get('cosine_similarity', 0):.2f} (higher means more similar word frequency patterns)\n"
                                            content += f"Jaccard Similarity: {metrics.get('jaccard_similarity', 0):.2f} (higher means more word overlap)\n"
                                            content += f"Semantic Similarity: {metrics.get('semantic_similarity', 0):.2f} (higher means more similar meaning)\n"
                                            content += f"Common Words: {metrics.get('common_word_count', 0)} words appear in both responses\n\n"
                                
                                elif analysis_type == "N-gram Analysis":
                                    models = result.get("models", [])
                                    ngram_size = result.get("ngram_size", 2)
                                    size_name = "Unigrams" if ngram_size == 1 else f"{ngram_size}-grams"
                                    
                                    if len(models) >= 2:
                                        content += f"{size_name} Analysis: Comparing responses from {models[0]} and {models[1]}\n\n"
                                        
                                        # Add important n-grams for each model
                                        important_ngrams = result.get("important_ngrams", {})
                                        for model_name in models:
                                            if model_name in important_ngrams:
                                                content += f"Top {size_name} Used by {model_name}\n"
                                                ngram_list = [f"{item['ngram']} ({item['count']})" for item in important_ngrams[model_name][:10]]
                                                content += ", ".join(ngram_list) + "\n\n"
                                        
                                        # Add similarity metrics
                                        if "comparisons" in result:
                                            comparison_key = f"{models[0]} vs {models[1]}"
                                            if comparison_key in result["comparisons"]:
                                                metrics = result["comparisons"][comparison_key]
                                                content += "Similarity Metrics\n"
                                                content += f"Common {size_name}: {metrics.get('common_ngram_count', 0)} {size_name.lower()} appear in both responses\n\n"
                                
                                elif analysis_type == "Classifier":
                                    models = result.get("models", [])
                                    if len(models) >= 2:
                                        content += f"Classifier Analysis for {models[0]} and {models[1]}\n\n"
                                        
                                        # Add classification results
                                        classifications = result.get("classifications", {})
                                        if classifications:
                                            content += "Classification Results\n"
                                            for model_name in models:
                                                if model_name in classifications:
                                                    model_results = classifications[model_name]
                                                    content += f"{model_name}:\n"
                                                    content += f"- Formality: {model_results.get('formality', 'N/A')}\n"
                                                    content += f"- Sentiment: {model_results.get('sentiment', 'N/A')}\n"
                                                    content += f"- Complexity: {model_results.get('complexity', 'N/A')}\n\n"
                                            
                                            # Add differences
                                            differences = result.get("differences", {})
                                            if differences:
                                                content += "Classification Comparison\n"
                                                for category, diff in differences.items():
                                                    content += f"- {category}: {diff}\n"
                                                content += "\n"
                                
                                elif analysis_type == "Bias Detection":
                                    models = result.get("models", [])
                                    if len(models) >= 2:
                                        content += f"Bias Analysis: Comparing responses from {models[0]} and {models[1]}\n\n"
                                        
                                        # Add comparative results
                                        if "comparative" in result:
                                            comparative = result["comparative"]
                                            content += "Bias Detection Summary\n"
                                            
                                            if "partisan" in comparative:
                                                part = comparative["partisan"]
                                                is_significant = part.get("significant", False)
                                                content += f"Partisan Leaning: {models[0]} appears {part.get(models[0], 'N/A')}, "
                                                content += f"while {models[1]} appears {part.get(models[1], 'N/A')}. "
                                                content += f"({'Significant' if is_significant else 'Minor'} difference)\n\n"
                                            
                                            if "overall" in comparative:
                                                overall = comparative["overall"]
                                                significant = overall.get("significant_bias_difference", False)
                                                content += f"Overall Assessment: "
                                                content += f"Analysis shows a {overall.get('difference', 0):.2f}/1.0 difference in bias patterns. "
                                                content += f"({'Significant' if significant else 'Minor'} overall bias difference)\n\n"
                                            
                                            # Add partisan terms
                                            content += "Partisan Term Analysis\n"
                                            for model_name in models:
                                                if model_name in result and "partisan" in result[model_name]:
                                                    partisan = result[model_name]["partisan"]
                                                    content += f"{model_name}:\n"
                                                    
                                                    lib_terms = partisan.get("liberal_terms", [])
                                                    con_terms = partisan.get("conservative_terms", [])
                                                    
                                                    content += f"- Liberal terms: {', '.join(lib_terms) if lib_terms else 'None detected'}\n"
                                                    content += f"- Conservative terms: {', '.join(con_terms) if con_terms else 'None detected'}\n\n"
                                
                                elif analysis_type == "RoBERTa Sentiment":
                                    models = result.get("models", [])
                                    if len(models) >= 2:
                                        content += "Sentiment Analysis Results\n"
                                        
                                        # Add comparison info
                                        if "comparison" in result:
                                            comparison = result["comparison"]
                                            if "difference_direction" in comparison:
                                                content += f"{comparison['difference_direction']}\n\n"
                                        
                                        # Add individual model results
                                        sentiment_analysis = result.get("sentiment_analysis", {})
                                        for model_name in models:
                                            if model_name in sentiment_analysis:
                                                model_result = sentiment_analysis[model_name]
                                                score = model_result.get("sentiment_score", 0)
                                                label = model_result.get("label", "neutral")
                                                
                                                content += f"{model_name}\n"
                                                content += f"Sentiment: {label} (Score: {score:.2f})\n\n"
                    
                    return content, f"**Loaded user analysis results**"
                    
                # Regular file loading for built-in summaries
                file_path = os.path.join("dataset", file_name)
                if os.path.exists(file_path):
                    try:
                        with open(file_path, 'r', encoding='utf-8') as f:
                            content = f.read()
                        return content, f"**Loaded summary**: {file_name}"
                    except Exception as e:
                        return "", f"**Error loading summary**: {str(e)}"
                else:
                    return "", f"**File not found**: {file_path}"
                
            def update_summary_dropdown(user_log):
                """Update summary dropdown options based on user log state"""
                choices = ["YOUR DATASET RESULTS"]
                choices.extend([f for f in os.listdir("dataset") if f.startswith("summary-") and f.endswith(".txt")])
                return gr.update(choices=choices, value="YOUR DATASET RESULTS")
            
            # Connect the load button to the function
            load_summary_btn.click(
                fn=load_summary_content,
                inputs=[summary_dropdown, user_analysis_log],
                outputs=[summary_content, summary_status]
            )
            
            # Also load summary when dropdown changes
            summary_dropdown.change(
                fn=load_summary_content,
                inputs=[summary_dropdown, user_analysis_log],
                outputs=[summary_content, summary_status]
            )
                # Add a Visuals tab for plotting graphs
        with gr.Tab("Visuals"):
            gr.Markdown("## Visualization Graphs")
            
            with gr.Row():
                with gr.Column(scale=1):
                    # Dropdown for selecting visualization type
                    viz_type = gr.Dropdown(
                        choices=["N-gram Comparison", "Word Frequency", "Sentiment Analysis"],
                        label="Visualization Type",
                        info="Select the type of visualization to display",
                        value="N-gram Comparison"
                    )
                    
                    # Button to generate visualization
                    generate_viz_btn = gr.Button("Generate Visualization", variant="primary")
                    
                with gr.Column(scale=3):
                    # Image component to display the plot
                    viz_output = gr.Image(
                        label="Visualization",
                        type="pil",
                        height=500
                    )
                    
                    viz_status = gr.Markdown("*No visualization generated*")
            
            # Function to generate and display visualizations
            def generate_visualization(viz_type, dataset, analysis_results):
                try:
                    if not dataset or "entries" not in dataset or not dataset["entries"]:
                        return None, "❌ **Error:** No dataset loaded. Please create or load a dataset first."
                    
                    # Example data (fallback when no real data is available)
                    ex_data = {
                        'attorney general': 3,
                        'social justice': 3,
                        'centrist approach': 2,
                        'climate change': 2,
                        'criminal justice': 2,
                        'gun control': 2,
                        'human rights': 2,
                        'justice issues': 2,
                        'measures like': 2,
                        'middle class': 2
                    }

                    gran_data = {
                        'political views': 3,
                        'vice president': 3,
                        'criminal justice': 2,
                        'democratic party': 2,
                        'foreign policy': 2,
                        'harris advocated': 2,
                        'lgbtq rights': 2,
                        'president harris': 2,
                        'social issues': 2,
                        '2019 proposed': 1
                    }
                    
                    # Use real data if available in analysis_results
                    model1_data = {}
                    model2_data = {}
                    model1_name = "Model 1"
                    model2_name = "Model 2"
                    
                    # Extract actual model names from dataset
                    if dataset and "entries" in dataset and len(dataset["entries"]) >= 2:
                        model1_name = dataset["entries"][0].get("model", "Model 1")
                        model2_name = dataset["entries"][1].get("model", "Model 2")
                    
                    # Try to get real data from analysis_results
                    if analysis_results and "analyses" in analysis_results:
                        for prompt, analyses in analysis_results["analyses"].items():
                            if viz_type == "N-gram Comparison" and "ngram_analysis" in analyses:
                                ngram_results = analyses["ngram_analysis"]
                                important_ngrams = ngram_results.get("important_ngrams", {})
                                
                                if model1_name in important_ngrams:
                                    model1_data = {item["ngram"]: item["count"] for item in important_ngrams[model1_name]}
                                    
                                if model2_name in important_ngrams:
                                    model2_data = {item["ngram"]: item["count"] for item in important_ngrams[model2_name]}
                                    
                            elif viz_type == "Word Frequency" and "bag_of_words" in analyses:
                                bow_results = analyses["bag_of_words"]
                                important_words = bow_results.get("important_words", {})
                                
                                if model1_name in important_words:
                                    model1_data = {item["word"]: item["count"] for item in important_words[model1_name]}
                                    
                                if model2_name in important_words:
                                    model2_data = {item["word"]: item["count"] for item in important_words[model2_name]}
                    
                    # If we couldn't get real data, use example data
                    if not model1_data:
                        model1_data = ex_data
                    if not model2_data:
                        model2_data = gran_data
                    
                    # Create the visualization
                    plt.figure(figsize=(10, 6))
                    
                    if viz_type == "N-gram Comparison" or viz_type == "Word Frequency":
                        # Plot for the first model
                        plt.subplot(1, 2, 1)
                        sorted_data1 = sorted(model1_data.items(), key=lambda x: x[1], reverse=True)[:10]  # Top 10
                        terms1, counts1 = zip(*sorted_data1) if sorted_data1 else ([], [])
                        
                        # Create horizontal bar chart
                        plt.barh([t[:20] + '...' if len(t) > 20 else t for t in terms1[::-1]], counts1[::-1])
                        plt.xlabel('Frequency')
                        plt.title(f'Harris, Top {viz_type.split()[0]}s Used by {model1_name}')
                        plt.tight_layout()
                        
                        # Plot for the second model
                        plt.subplot(1, 2, 2)
                        sorted_data2 = sorted(model2_data.items(), key=lambda x: x[1], reverse=True)[:10]  # Top 10
                        terms2, counts2 = zip(*sorted_data2) if sorted_data2 else ([], [])
                        
                        # Create horizontal bar chart
                        plt.barh([t[:20] + '...' if len(t) > 20 else t for t in terms2[::-1]], counts2[::-1])
                        plt.xlabel('Frequency')
                        plt.title(f'Harris, Top {viz_type.split()[0]}s Used by {model2_name}')
                        plt.tight_layout()
                    
                    elif viz_type == "Sentiment Analysis":
                        # Generate sentiment comparison visualization
                        # This would be populated with real data when available
                        sentiment_scores = {
                            model1_name: 0.75,  # Example score
                            model2_name: 0.25   # Example score
                        }
                        
                        # Extract real sentiment scores if available
                        if "roberta_results_state" in analysis_results:
                            roberta_results = analysis_results["roberta_results_state"]
                            if "analyses" in roberta_results:
                                for prompt, analyses in roberta_results["analyses"].items():
                                    if "roberta_sentiment" in analyses:
                                        sentiment_result = analyses["roberta_sentiment"]
                                        sentiment_analysis = sentiment_result.get("sentiment_analysis", {})
                                        
                                        if model1_name in sentiment_analysis:
                                            sentiment_scores[model1_name] = sentiment_analysis[model1_name].get("sentiment_score", 0)
                                            
                                        if model2_name in sentiment_analysis:
                                            sentiment_scores[model2_name] = sentiment_analysis[model2_name].get("sentiment_score", 0)
                        
                        # Create sentiment bar chart
                        plt.bar(list(sentiment_scores.keys()), list(sentiment_scores.values()))
                        plt.ylim(-1, 1)
                        plt.ylabel('Harris Sentiment Score (-1 to 1)')
                        plt.title('Harris Sentiment Analysis Comparison')
                        plt.axhline(y=0, color='r', linestyle='-', alpha=0.3)  # Add a zero line
                    
                    # Save the plot to a bytes buffer
                    buf = io.BytesIO()
                    plt.savefig(buf, format='png')
                    buf.seek(0)
                    
                    # Convert plot to PIL Image
                    from PIL import Image
                    image = Image.open(buf)
                    
                    return image, f"**Generated {viz_type} visualization**"
                    
                except Exception as e:
                    import traceback
                    error_msg = f"Error generating visualization: {str(e)}\n{traceback.format_exc()}"
                    print(error_msg)
                    return None, f"**Error:** {str(e)}"
            
            # Connect the generate button to the function
            generate_viz_btn.click(
                fn=generate_visualization,
                inputs=[viz_type, dataset_state, analysis_results_state],
                outputs=[viz_output, viz_status]
            )

        # Run analysis with proper parameters
        run_analysis_btn.click(
            fn=run_analysis,
            inputs=[dataset_state, analysis_options, ngram_n, topic_count, user_analysis_log],
            outputs=[
                analysis_results_state,
                user_analysis_log,
                analysis_output,
                visualization_area_visible,
                analysis_title,
                prompt_title,
                models_compared,
                model1_title,
                model1_words,
                model2_title,
                model2_words,
                similarity_metrics_title,
                similarity_metrics,
                status_message_visible,
                status_message
            ]
        )

        '''
        app.load(
            fn=lambda log: (
                update_summary_dropdown(log),
                load_summary_content("YOUR DATASET RESULTS", log)
            ),
            inputs=[user_analysis_log],
            outputs=[summary_dropdown, summary_content, summary_status]
        )
        '''

    return app

if __name__ == "__main__":
    # Download required NLTK resources before launching the app
    download_nltk_resources()
    
    app = create_app()
    app.launch()