kovacsvi commited on
Commit
72be8df
·
1 Parent(s): ad7818c

use codebooks database for label_names, num_dicts

Browse files
Files changed (1) hide show
  1. label_dicts.py +50 -1086
label_dicts.py CHANGED
@@ -1,1086 +1,50 @@
1
- CAP_MEDIA2_LABEL_NAMES = {
2
- 1: 'Macroeconomics',
3
- 2: 'Civil Rights',
4
- 3: 'Health',
5
- 4: 'Agriculture',
6
- 5: 'Labor',
7
- 6: 'Education',
8
- 7: 'Environment',
9
- 8: 'Energy',
10
- 9: 'Immigration',
11
- 10: 'Transportation',
12
- 12: 'Law and Crime',
13
- 13: 'Social Welfare',
14
- 14: 'Housing',
15
- 15: 'Banking, Finance, and Domestic Commerce',
16
- 16: 'Defense',
17
- 17: 'Technology',
18
- 18: 'Foreign Trade',
19
- 19: 'International Affairs',
20
- 20: 'Government Operations',
21
- 21: 'Public Lands',
22
- 23: 'Culture',
23
- 24: 'State and Local Government Administration',
24
- 25: 'Weather',
25
- 26: 'Fires, emergencies and natural disasters',
26
- 27: 'Crime and trials',
27
- 28: 'Arts, culture, entertainment and history',
28
- 29: 'Style and fashion',
29
- 30: 'Food',
30
- 31: 'Travel',
31
- 32: 'Wellbeing and learning',
32
- 33: 'Personal finance and real estate',
33
- 34: 'Personal technology and popular science',
34
- 35: 'Churches and Religion',
35
- 36: 'Celebrities and human interest',
36
- 37: 'Obituaries and death notices',
37
- 38: 'Sports',
38
- 39: 'Crosswords, puzzles, comics',
39
- 40: 'Media production/internal, letters',
40
- 41: 'Advertisements',
41
- 998: 'No Policy and No Media Content'
42
- }
43
-
44
- CAP_MEDIA2_NUM_DICT = {
45
- 0: 1,
46
- 1: 2,
47
- 2: 3,
48
- 3: 4,
49
- 4: 5,
50
- 5: 6,
51
- 6: 7,
52
- 7: 8,
53
- 8: 9,
54
- 9: 10,
55
- 10: 12,
56
- 11: 13,
57
- 12: 14,
58
- 13: 15,
59
- 14: 16,
60
- 15: 17,
61
- 16: 18,
62
- 17: 19,
63
- 18: 20,
64
- 19: 21,
65
- 20: 23,
66
- 21: 24,
67
- 22: 25,
68
- 23: 26,
69
- 24: 27,
70
- 25: 28,
71
- 26: 29,
72
- 27: 30,
73
- 28: 31,
74
- 29: 32,
75
- 30: 33,
76
- 31: 34,
77
- 32: 35,
78
- 33: 36,
79
- 34: 37,
80
- 35: 38,
81
- 36: 39,
82
- 37: 40,
83
- 38: 41,
84
- 39: 998
85
- }
86
-
87
- CAP_NUM_DICT = {
88
- 0: 1,
89
- 1: 2,
90
- 2: 3,
91
- 3: 4,
92
- 4: 5,
93
- 5: 6,
94
- 6: 7,
95
- 7: 8,
96
- 8: 9,
97
- 9: 10,
98
- 10: 12,
99
- 11: 13,
100
- 12: 14,
101
- 13: 15,
102
- 14: 16,
103
- 15: 17,
104
- 16: 18,
105
- 17: 19,
106
- 18: 20,
107
- 19: 21,
108
- 20: 23,
109
- 21: 999,
110
- 22: 999, # had to do this because of some language-domain models (e.g. english media)
111
- }
112
-
113
- CAP_MEDIA_LABEL_NAMES = {
114
- 1: "Macroeconomics",
115
- 2: "Civil Rights",
116
- 3: "Health",
117
- 4: "Agriculture",
118
- 5: "Labor",
119
- 6: "Education",
120
- 7: "Environment",
121
- 8: "Energy",
122
- 9: "Immigration",
123
- 10: "Transportation",
124
- 12: "Law and Crime",
125
- 13: "Social Welfare",
126
- 14: "Housing",
127
- 15: "Domestic Commerce",
128
- 16: "Defense",
129
- 17: "Technology",
130
- 18: "Foreign Trade",
131
- 19: "International Affairs",
132
- 20: "Government Operations",
133
- 21: "Public Lands",
134
- 23: "Culture",
135
- 24: "State and Local Government Administration",
136
- 26: "Weather and Natural Disasters",
137
- 27: "Fires",
138
- 29: "Sports and Recreation",
139
- 30: "Death Notices",
140
- 31: "Churches and Religions",
141
- 99: "Other, Miscellaneous, and Human Interest",
142
- 998: "No Policy or Media Content"
143
- }
144
-
145
- CAP_MEDIA_NUM_DICT = {
146
- 0: 1,
147
- 1: 2,
148
- 2: 3,
149
- 3: 4,
150
- 4: 5,
151
- 5: 6,
152
- 6: 7,
153
- 7: 8,
154
- 8: 9,
155
- 9: 10,
156
- 10: 12,
157
- 11: 13,
158
- 12: 14,
159
- 13: 15,
160
- 14: 16,
161
- 15: 17,
162
- 16: 18,
163
- 17: 19,
164
- 18: 20,
165
- 19: 21,
166
- 20: 23,
167
- 21: 24,
168
- 22:26,
169
- 23:27,
170
- 24:29,
171
- 25:30,
172
- 26:31,
173
- 27:99,
174
- 28:998
175
- }
176
-
177
- CAP_MIN_NUM_DICT = {
178
- 0: 100,
179
- 1: 101,
180
- 2: 103,
181
- 3: 104,
182
- 4: 105,
183
- 5: 107,
184
- 6: 108,
185
- 7: 110,
186
- 8: 199,
187
- 9: 200,
188
- 10: 201,
189
- 11: 202,
190
- 12: 204,
191
- 13: 205,
192
- 14: 206,
193
- 15: 207,
194
- 16: 208,
195
- 17: 209,
196
- 18: 299,
197
- 19: 300,
198
- 20: 301,
199
- 21: 302,
200
- 22: 321,
201
- 23: 322,
202
- 24: 323,
203
- 25: 324,
204
- 26: 325,
205
- 27: 331,
206
- 28: 332,
207
- 29: 333,
208
- 30: 334,
209
- 31: 335,
210
- 32: 341,
211
- 33: 342,
212
- 34: 398,
213
- 35: 399,
214
- 36: 400,
215
- 37: 401,
216
- 38: 402,
217
- 39: 403,
218
- 40: 404,
219
- 41: 405,
220
- 42: 408,
221
- 43: 498,
222
- 44: 499,
223
- 45: 500,
224
- 46: 501,
225
- 47: 502,
226
- 48: 503,
227
- 49: 504,
228
- 50: 505,
229
- 51: 506,
230
- 52: 529,
231
- 53: 599,
232
- 54: 600,
233
- 55: 601,
234
- 56: 602,
235
- 57: 603,
236
- 58: 604,
237
- 59: 606,
238
- 60: 607,
239
- 61: 698,
240
- 62: 699,
241
- 63: 700,
242
- 64: 701,
243
- 65: 703,
244
- 66: 704,
245
- 67: 705,
246
- 68: 707,
247
- 69: 708,
248
- 70: 709,
249
- 71: 711,
250
- 72: 798,
251
- 73: 799,
252
- 74: 800,
253
- 75: 801,
254
- 76: 802,
255
- 77: 803,
256
- 78: 805,
257
- 79: 806,
258
- 80: 807,
259
- 81: 898,
260
- 82: 899,
261
- 83: 900,
262
- 84: 1000,
263
- 85: 1001,
264
- 86: 1002,
265
- 87: 1003,
266
- 88: 1005,
267
- 89: 1007,
268
- 90: 1010,
269
- 91: 1098,
270
- 92: 1099,
271
- 93: 1200,
272
- 94: 1201,
273
- 95: 1202,
274
- 96: 1203,
275
- 97: 1204,
276
- 98: 1205,
277
- 99: 1206,
278
- 100: 1207,
279
- 101: 1208,
280
- 102: 1210,
281
- 103: 1211,
282
- 104: 1227,
283
- 105: 1299,
284
- 106: 1300,
285
- 107: 1302,
286
- 108: 1303,
287
- 109: 1304,
288
- 110: 1305,
289
- 111: 1308,
290
- 112: 1399,
291
- 113: 1400,
292
- 114: 1401,
293
- 115: 1403,
294
- 116: 1404,
295
- 117: 1405,
296
- 118: 1406,
297
- 119: 1407,
298
- 120: 1408,
299
- 121: 1409,
300
- 122: 1498,
301
- 123: 1499,
302
- 124: 1500,
303
- 125: 1501,
304
- 126: 1502,
305
- 127: 1504,
306
- 128: 1505,
307
- 129: 1507,
308
- 130: 1520,
309
- 131: 1521,
310
- 132: 1522,
311
- 133: 1523,
312
- 134: 1524,
313
- 135: 1525,
314
- 136: 1526,
315
- 137: 1598,
316
- 138: 1599,
317
- 139: 1600,
318
- 140: 1602,
319
- 141: 1603,
320
- 142: 1604,
321
- 143: 1605,
322
- 144: 1606,
323
- 145: 1608,
324
- 146: 1610,
325
- 147: 1611,
326
- 148: 1612,
327
- 149: 1614,
328
- 150: 1615,
329
- 151: 1616,
330
- 152: 1617,
331
- 153: 1619,
332
- 154: 1620,
333
- 155: 1698,
334
- 156: 1699,
335
- 157: 1700,
336
- 158: 1701,
337
- 159: 1704,
338
- 160: 1705,
339
- 161: 1706,
340
- 162: 1707,
341
- 163: 1708,
342
- 164: 1709,
343
- 165: 1798,
344
- 166: 1799,
345
- 167: 1800,
346
- 168: 1802,
347
- 169: 1803,
348
- 170: 1804,
349
- 171: 1806,
350
- 172: 1807,
351
- 173: 1808,
352
- 174: 1899,
353
- 175: 1900,
354
- 176: 1901,
355
- 177: 1902,
356
- 178: 1905,
357
- 179: 1906,
358
- 180: 1910,
359
- 181: 1921,
360
- 182: 1925,
361
- 183: 1926,
362
- 184: 1927,
363
- 185: 1929,
364
- 186: 1999,
365
- 187: 2000,
366
- 188: 2001,
367
- 189: 2002,
368
- 190: 2003,
369
- 191: 2004,
370
- 192: 2005,
371
- 193: 2006,
372
- 194: 2007,
373
- 195: 2008,
374
- 196: 2009,
375
- 197: 2010,
376
- 198: 2011,
377
- 199: 2012,
378
- 200: 2013,
379
- 201: 2014,
380
- 202: 2015,
381
- 203: 2030,
382
- 204: 2099,
383
- 205: 2100,
384
- 206: 2101,
385
- 207: 2102,
386
- 208: 2103,
387
- 209: 2104,
388
- 210: 2105,
389
- 211: 2199,
390
- 212: 2300,
391
- 213: 999
392
- }
393
- CAP_LABEL_NAMES = {
394
- 1: "Macroeconomics",
395
- 2: "Civil Rights",
396
- 3: "Health",
397
- 4: "Agriculture",
398
- 5: "Labor",
399
- 6: "Education",
400
- 7: "Environment",
401
- 8: "Energy",
402
- 9: "Immigration",
403
- 10: "Transportation",
404
- 12: "Law and Crime",
405
- 13: "Social Welfare",
406
- 14: "Housing",
407
- 15: "Domestic Commerce",
408
- 16: "Defense",
409
- 17: "Technology",
410
- 18: "Foreign Trade",
411
- 19: "International Affairs",
412
- 20: "Government Operations",
413
- 21: "Public Lands",
414
- 23: "Culture",
415
- 999: "No Policy Content"
416
- }
417
-
418
- CAP_MIN_MEDIA_NUM_DICT = {0: 100,
419
- 1: 101,
420
- 2: 103,
421
- 3: 104,
422
- 4: 105,
423
- 5: 107,
424
- 6: 108,
425
- 7: 110,
426
- 8: 199,
427
- 9: 200,
428
- 10: 201,
429
- 11: 202,
430
- 12: 204,
431
- 13: 205,
432
- 14: 206,
433
- 15: 207,
434
- 16: 208,
435
- 17: 209,
436
- 18: 299,
437
- 19: 300,
438
- 20: 301,
439
- 21: 302,
440
- 22: 321,
441
- 23: 322,
442
- 24: 323,
443
- 25: 324,
444
- 26: 325,
445
- 27: 331,
446
- 28: 332,
447
- 29: 333,
448
- 30: 334,
449
- 31: 335,
450
- 32: 341,
451
- 33: 342,
452
- 34: 398,
453
- 35: 399,
454
- 36: 400,
455
- 37: 401,
456
- 38: 402,
457
- 39: 403,
458
- 40: 404,
459
- 41: 405,
460
- 42: 408,
461
- 43: 498,
462
- 44: 499,
463
- 45: 500,
464
- 46: 501,
465
- 47: 502,
466
- 48: 503,
467
- 49: 504,
468
- 50: 505,
469
- 51: 506,
470
- 52: 529,
471
- 53: 599,
472
- 54: 600,
473
- 55: 601,
474
- 56: 602,
475
- 57: 603,
476
- 58: 604,
477
- 59: 606,
478
- 60: 607,
479
- 61: 698,
480
- 62: 699,
481
- 63: 700,
482
- 64: 701,
483
- 65: 703,
484
- 66: 704,
485
- 67: 705,
486
- 68: 707,
487
- 69: 708,
488
- 70: 709,
489
- 71: 711,
490
- 72: 798,
491
- 73: 799,
492
- 74: 800,
493
- 75: 801,
494
- 76: 802,
495
- 77: 803,
496
- 78: 805,
497
- 79: 806,
498
- 80: 807,
499
- 81: 898,
500
- 82: 899,
501
- 83: 900,
502
- 84: 1000,
503
- 85: 1001,
504
- 86: 1002,
505
- 87: 1003,
506
- 88: 1005,
507
- 89: 1007,
508
- 90: 1010,
509
- 91: 1098,
510
- 92: 1099,
511
- 93: 1200,
512
- 94: 1201,
513
- 95: 1202,
514
- 96: 1203,
515
- 97: 1204,
516
- 98: 1205,
517
- 99: 1206,
518
- 100: 1207,
519
- 101: 1208,
520
- 102: 1210,
521
- 103: 1211,
522
- 104: 1227,
523
- 105: 1299,
524
- 106: 1300,
525
- 107: 1302,
526
- 108: 1303,
527
- 109: 1304,
528
- 110: 1305,
529
- 111: 1308,
530
- 112: 1399,
531
- 113: 1400,
532
- 114: 1401,
533
- 115: 1403,
534
- 116: 1404,
535
- 117: 1405,
536
- 118: 1406,
537
- 119: 1407,
538
- 120: 1408,
539
- 121: 1409,
540
- 122: 1498,
541
- 123: 1499,
542
- 124: 1500,
543
- 125: 1501,
544
- 126: 1502,
545
- 127: 1504,
546
- 128: 1505,
547
- 129: 1507,
548
- 130: 1520,
549
- 131: 1521,
550
- 132: 1522,
551
- 133: 1523,
552
- 134: 1524,
553
- 135: 1525,
554
- 136: 1526,
555
- 137: 1598,
556
- 138: 1599,
557
- 139: 1600,
558
- 140: 1602,
559
- 141: 1603,
560
- 142: 1604,
561
- 143: 1605,
562
- 144: 1606,
563
- 145: 1608,
564
- 146: 1610,
565
- 147: 1611,
566
- 148: 1612,
567
- 149: 1614,
568
- 150: 1615,
569
- 151: 1616,
570
- 152: 1617,
571
- 153: 1619,
572
- 154: 1620,
573
- 155: 1698,
574
- 156: 1699,
575
- 157: 1700,
576
- 158: 1701,
577
- 159: 1704,
578
- 160: 1705,
579
- 161: 1706,
580
- 162: 1707,
581
- 163: 1708,
582
- 164: 1709,
583
- 165: 1798,
584
- 166: 1799,
585
- 167: 1800,
586
- 168: 1802,
587
- 169: 1803,
588
- 170: 1804,
589
- 171: 1806,
590
- 172: 1807,
591
- 173: 1808,
592
- 174: 1899,
593
- 175: 1900,
594
- 176: 1901,
595
- 177: 1902,
596
- 178: 1905,
597
- 179: 1906,
598
- 180: 1910,
599
- 181: 1921,
600
- 182: 1925,
601
- 183: 1926,
602
- 184: 1927,
603
- 185: 1929,
604
- 186: 1999,
605
- 187: 2000,
606
- 188: 2001,
607
- 189: 2002,
608
- 190: 2003,
609
- 191: 2004,
610
- 192: 2005,
611
- 193: 2006,
612
- 194: 2007,
613
- 195: 2008,
614
- 196: 2009,
615
- 197: 2010,
616
- 198: 2011,
617
- 199: 2012,
618
- 200: 2013,
619
- 201: 2014,
620
- 202: 2015,
621
- 203: 2030,
622
- 204: 2099,
623
- 205: 2100,
624
- 206: 2101,
625
- 207: 2102,
626
- 208: 2103,
627
- 209: 2104,
628
- 210: 2105,
629
- 211: 2300,
630
- 212: 99, # do not use separate 9999 and 99 labels
631
- 213: 24,
632
- 214: 26,
633
- 215: 27,
634
- 216: 29,
635
- 217: 30,
636
- 218: 31,
637
- 219: 99}
638
-
639
- MANIFESTO_NUM_DICT = {
640
- 0: 0,
641
- 1: 101,
642
- 2: 102,
643
- 3: 103,
644
- 4: 104,
645
- 5: 105,
646
- 6: 106,
647
- 7: 107,
648
- 8: 108,
649
- 9: 109,
650
- 10: 110,
651
- 11: 201,
652
- 12: 202,
653
- 13: 203,
654
- 14: 204,
655
- 15: 301,
656
- 16: 302,
657
- 17: 303,
658
- 18: 304,
659
- 19: 305,
660
- 20: 401,
661
- 21: 402,
662
- 22: 403,
663
- 23: 404,
664
- 24: 405,
665
- 25: 406,
666
- 26: 407,
667
- 27: 408,
668
- 28: 409,
669
- 29: 410,
670
- 30: 411,
671
- 31: 412,
672
- 32: 413,
673
- 33: 414,
674
- 34: 415,
675
- 35: 416,
676
- 36: 501,
677
- 37: 502,
678
- 38: 503,
679
- 39: 504,
680
- 40: 505,
681
- 41: 506,
682
- 42: 507,
683
- 43: 601,
684
- 44: 602,
685
- 45: 603,
686
- 46: 604,
687
- 47: 605,
688
- 48: 606,
689
- 49: 607,
690
- 50: 608,
691
- 51: 701,
692
- 52: 702,
693
- 53: 703,
694
- 54: 704,
695
- 55: 705,
696
- 56: 706
697
- }
698
-
699
- CAP_MIN_LABEL_NAMES = {
700
- # 1. Macroeconomics
701
- 100: "General",
702
- 101: "Interest Rates",
703
- 103: "Unemployment Rate",
704
- 104: "Monetary Policy",
705
- 105: "National Budget",
706
- 107: "Tax Code",
707
- 108: "Industrial Policy",
708
- 110: "Price Control",
709
- 199: "Other",
710
- # 2. Civil Rights
711
- 200: "General",
712
- 201: "Minority Discrimination",
713
- 202: "Gender Discrimination",
714
- 204: "Age Discrimination",
715
- 205: "Handicap Discrimination",
716
- 206: "Voting Rights",
717
- 207: "Freedom of Speech",
718
- 208: "Right to Privacy",
719
- 209: "Anti-Government",
720
- 299: "Other",
721
- # 3. Health
722
- 300: "General",
723
- 301: "Health Care Reform",
724
- 302: "Insurance",
725
- 321: "Drug Industry",
726
- 322: "Medical Facilities",
727
- 323: "Insurance Providers",
728
- 324: "Medical Liability",
729
- 325: "Manpower",
730
- 331: "Disease Prevention",
731
- 332: "Infants and Children",
732
- 333: "Mental Health",
733
- 334: "Long-term Care",
734
- 335: "Drug Coverage and Cost",
735
- 341: "Tobacco Abuse",
736
- 342: "Drug and Alcohol Abuse",
737
- 398: "R&D",
738
- 399: "Other",
739
- # 4. Agriculture
740
- 400: "General",
741
- 401: "Trade",
742
- 402: "Subsidies to Farmers",
743
- 403: "Food Inspection & Safety",
744
- 404: "Food Marketing & Promotion",
745
- 405: "Animal and Crop Disease",
746
- 408: "Fisheries & Fishing",
747
- 498: "R&D",
748
- 499: "Other",
749
- # 5. Labor
750
- 500: "General",
751
- 501: "Worker Safety",
752
- 502: "Employment Training",
753
- 503: "Employee Benefits",
754
- 504: "Labor Unions",
755
- 505: "Fair Labor Standards",
756
- 506: "Youth Employment",
757
- 529: "Migrant and Seasonal",
758
- 599: "Other",
759
- # 6. Education
760
- 600: "General",
761
- 601: "Higher",
762
- 602: "Elementary & Secondary",
763
- 603: "Underprivileged",
764
- 604: "Vocational",
765
- 606: "Special",
766
- 607: "Excellence",
767
- 698: "R&D",
768
- 699: "Other",
769
- # 7. Environment
770
- 700: "General",
771
- 701: "Drinking Water",
772
- 703: "Waste Disposal",
773
- 704: "Hazardous Waste",
774
- 705: "Air Pollution",
775
- 707: "Recycling",
776
- 708: "Indoor Hazards",
777
- 709: "Species & Forest",
778
- 711: "Land and Water Conservation",
779
- 798: "R&D",
780
- 799: "Other",
781
- # 8. Energy
782
- 800: "General",
783
- 801: "Nuclear",
784
- 802: "Electricity",
785
- 803: "Natural Gas & Oil",
786
- 805: "Coal",
787
- 806: "Alternative & Renewable",
788
- 807: "Conservation",
789
- 898: "R&D",
790
- 899: "Other",
791
- # 9. Immigration
792
- 900: "Immigration",
793
- # 10. Transportation
794
- 1000: "General",
795
- 1001: "Mass",
796
- 1002: "Highways",
797
- 1003: "Air Travel",
798
- 1005: "Railroad Travel",
799
- 1007: "Maritime",
800
- 1010: "Infrastructure",
801
- 1098: "R&D",
802
- 1099: "Other",
803
- # 12. Law and Crime
804
- 1200: "General",
805
- 1201: "Agencies",
806
- 1202: "White Collar Crime",
807
- 1203: "Illegal Drugs",
808
- 1204: "Court Administration",
809
- 1205: "Prisons",
810
- 1206: "Juvenile Crime",
811
- 1207: "Child Abuse",
812
- 1208: "Family Issues",
813
- 1210: "Criminal & Civil Code",
814
- 1211: "Crime Control",
815
- 1227: "Police",
816
- 1299: "Other",
817
- # 13. Social Welfare
818
- 1300: "General",
819
- 1302: "Low-Income Assistance",
820
- 1303: "Elderly Assistance",
821
- 1304: "Disabled Assistance",
822
- 1305: "Volunteer Associations",
823
- 1308: "Child Care",
824
- 1399: "Other",
825
- # 14. Housing
826
- 1400: "General",
827
- 1401: "Community Development",
828
- 1403: "Urban Development",
829
- 1404: "Rural Housing",
830
- 1405: "Rural Development",
831
- 1406: "Low-Income Assistance",
832
- 1407: "Veterans",
833
- 1408: "Elderly",
834
- 1409: "Homeless",
835
- 1498: "R&D",
836
- 1499: "Other",
837
- # 15. Domestic Commerce
838
- 1500: "General",
839
- 1501: "Banking",
840
- 1502: "Securities & Commodities",
841
- 1504: "Consumer Finance",
842
- 1505: "Insurance Regulation",
843
- 1507: "Bankruptcy",
844
- 1520: "Corporate Management",
845
- 1521: "Small Businesses",
846
- 1522: "Copyrights and Patents",
847
- 1523: "Disaster Relief",
848
- 1524: "Tourism",
849
- 1525: "Consumer Safety",
850
- 1526: "Sports Regulation",
851
- 1598: "R&D",
852
- 1599: "Other",
853
- # 16. Defense
854
- 1600: "General",
855
- 1602: "Alliances",
856
- 1603: "Intelligence",
857
- 1604: "Readiness",
858
- 1605: "Nuclear Arms",
859
- 1606: "Military Aid",
860
- 1608: "Personnel Issues",
861
- 1610: "Procurement",
862
- 1611: "Installations & Land",
863
- 1612: "Reserve Forces",
864
- 1614: "Hazardous Waste",
865
- 1615: "Civil",
866
- 1616: "Civilian Personnel",
867
- 1617: "Contractors",
868
- 1619: "Foreign Operations",
869
- 1620: "Claims against Military",
870
- 1698: "R&D",
871
- 1699: "Other",
872
- # 17. Technology
873
- 1700: "General",
874
- 1701: "Space",
875
- 1704: "Commercial Use of Space",
876
- 1705: "Science Transfer",
877
- 1706: "Telecommunications",
878
- 1707: "Broadcast",
879
- 1708: "Weather Forecasting",
880
- 1709: "Computers",
881
- 1798: "R&D",
882
- 1799: "Other",
883
- # 18. Foreign Trade
884
- 1800: "General",
885
- 1802: "Trade Agreements",
886
- 1803: "Exports",
887
- 1804: "Private Investments",
888
- 1806: "Competitiveness",
889
- 1807: "Tariff & Imports",
890
- 1808: "Exchange Rates",
891
- 1899: "Other",
892
- # 19. International Affairs
893
- 1900: "General",
894
- 1901: "Foreign Aid",
895
- 1902: "Resources Exploitation",
896
- 1905: "Developing Countries",
897
- 1906: "International Finance",
898
- 1910: "Western Europe",
899
- 1921: "Specific Country",
900
- 1925: "Human Rights",
901
- 1926: "Organizations",
902
- 1927: "Terrorism",
903
- 1929: "Diplomats",
904
- 1999: "Other",
905
- # 20. Government Operations
906
- 2000: "General",
907
- 2001: "Intergovernmental Relations",
908
- 2002: "Bureaucracy",
909
- 2003: "Postal Service",
910
- 2004: "Employees",
911
- 2005: "Appointments",
912
- 2006: "Currency",
913
- 2007: "Procurement & Contractors",
914
- 2008: "Property Management",
915
- 2009: "Tax Administration",
916
- 2010: "Scandals",
917
- 2011: "Branch Relations",
918
- 2012: "Political Campaigns",
919
- 2013: "Census & Statistics",
920
- 2014: "Capital City",
921
- 2015: "Claims against the government",
922
- 2030: "National Holidays",
923
- 2099: "Other",
924
- # 21. Public Lands
925
- 2100: "General",
926
- 2101: "National Parks",
927
- 2102: "Indigenous Affairs",
928
- 2103: "Public Lands",
929
- 2104: "Water Resources",
930
- 2105: "Dependencies & Territories",
931
- 2199: "Other",
932
- # 23. Culture
933
- 2300: "General",
934
- # NPC
935
- 999: "No Policy Content",
936
- 9999: "No Policy Content",
937
- }
938
-
939
- MANIFESTO_LABEL_NAMES = {
940
- 0: "No Policy Goal",
941
- 999: "No Policy Goal",
942
- 101: "Foreign Special Relationships: Positive",
943
- 102: "Foreign Special Relationships: Negative",
944
- 103: "Anti-Imperialism",
945
- 104: "Military: Positive",
946
- 105: "Military: Negative",
947
- 106: "Peace",
948
- 107: "Internationalism: Positive",
949
- 108: "European Community/Union: Positive",
950
- 109: "Internationalism: Negative",
951
- 110: "European Community/Union: Negative",
952
- 201: "Freedom and Human Rights",
953
- 202: "Democracy",
954
- 203: "Constitutionalism: Positive",
955
- 204: "Constitutionalism: Negative",
956
- 301: "Federalism",
957
- 302: "Centralisation",
958
- 303: "Governmental and Administrative Efficiency",
959
- 304: "Political Corruption",
960
- 305: "Political Authority",
961
- 401: "Free Market Economy",
962
- 402: "Incentives",
963
- 403: "Market Regulation",
964
- 404: "Economic Planning",
965
- 405: "Corporatism/Mixed Economy",
966
- 406: "Protectionism: Positive",
967
- 407: "Protectionism: Negative",
968
- 408: "Economic Goals",
969
- 409: "Keynesian Demand Management",
970
- 410: "Economic Growth: Positive",
971
- 411: "Technology and Infrastructure",
972
- 412: "Controlled Economy",
973
- 413: "Nationalisation",
974
- 414: "Economic Orthodoxy",
975
- 415: "Marxist Analysis: Positive",
976
- 416: "Anti-Growth Economy: Positive",
977
- 501: "Environmental Protection: Positive",
978
- 502: "Culture: Positive",
979
- 503: "Equality: Positive",
980
- 504: "Welfare State Expansion",
981
- 505: "Welfare State Limitation",
982
- 506: "Education Expansion",
983
- 507: "Education Limitation",
984
- 601: "National Way of Life: Positive",
985
- 602: "National Way of Life: Negative",
986
- 603: "Traditional Morality: Positive",
987
- 604: "Traditional Morality: Negative",
988
- 605: "Law and Order: Positive",
989
- 606: "Civic Mindedness: Positive",
990
- 607: "Multiculturalism: Positive",
991
- 608: "Multiculturalism: Negative",
992
- 701: "Labour Groups: Positive",
993
- 702: "Labour Groups: Negative",
994
- 703: "Agriculture and Farmers: Positive",
995
- 704: "Middle Class and Professional Groups",
996
- 705: "Underprivileged Minority Groups",
997
- 706: "Non-economic Demographic Groups"
998
- }
999
-
1000
- ILLFRAMES_MIGRATION_LABEL_NAMES = {
1001
- 901: "Culture Under Attack",
1002
- 902: "Economic Burden",
1003
- 903: "Illegals and Fraudsters",
1004
- 904: "Extradition Necessity",
1005
- 905: "Nation tate Should Decide",
1006
- 906: "Administrative Burden",
1007
- 907: "General System Failure",
1008
- 908: "Security Threat",
1009
- 909: "Criminals",
1010
- 910: "Welfare State Overload",
1011
- 999: "None of Them",
1012
- }
1013
-
1014
- ILLFRAMES_COVID_LABEL_NAMES = {
1015
- 310: "Skepticism",
1016
- 311: "Great Reset and Elite Control",
1017
- 312: "Undermining the Economy",
1018
- 313: "Medical Choice",
1019
- 314: "Media Fabrication",
1020
- 315: "Threatening Way of Life",
1021
- 399: "None of Them",
1022
- }
1023
-
1024
- ILLFRAMES_WAR_LABEL_NAMES = {
1025
- 101: 'Identity and Cultural Threat',
1026
- 102: 'Economic Fallout/Domestic Welfare Neglected',
1027
- 103: 'Violation of Russian Sovereignty/Western geopolitical meddling',
1028
- 104: 'Illegitimate and corrupt Ukraine leadership',
1029
- 105: 'Ukrainians and Ukraine are a military threat and agressive war-mongerer that threaten EU stability and security',
1030
- 107: 'Western Propaganda and Civilian Suffering',
1031
- 108: 'Historical Betrayal of Russia',
1032
- 109: 'Ukraine/Nazi Allegation',
1033
- 110: "None of Them"
1034
- }
1035
-
1036
- ONTOLISST_LABEL_NAMES = {
1037
- 0: 'Demographics',
1038
- 1: 'Housing and local environment (Housing and environment)',
1039
- 2: 'Physical health',
1040
- 3: 'Mental health and mental processes',
1041
- 4: 'Healthcare',
1042
- 5: 'Health behaviour (Health and lifestyle)',
1043
- 6: 'Family and social networks',
1044
- 7: 'Education',
1045
- 8: 'Employment and income (Employment and pensions)',
1046
- 9: 'Expectation, attitudes and beliefs (Attitudes and beliefs)',
1047
- 10: 'Child development',
1048
- 11: 'Life events',
1049
- 12: 'Omics',
1050
- 13: 'Pregnancy',
1051
- 14: 'Administration',
1052
- 15: 'COVID19'
1053
- }
1054
-
1055
- EMOTION6_LABEL_NAMES = {
1056
- 0: "Anger",
1057
- 1: "Fear",
1058
- 2: "Disgust",
1059
- 3: "Sadness",
1060
- 4: "Joy",
1061
- 5: "None of Them"
1062
- }
1063
-
1064
- EMOTION9_LABEL_NAMES = {
1065
- 0: "Anger",
1066
- 1: "Fear",
1067
- 2: "Disgust",
1068
- 3: "Sadness",
1069
- 4: "Joy",
1070
- 5: "Enthusiasm",
1071
- 6: "Hope",
1072
- 7: "Pride",
1073
- 8: "None of Them",
1074
- }
1075
-
1076
- EMOTION9_V2_LABEL_NAMES = {
1077
- 0: "Anger",
1078
- 1: "Fear",
1079
- 2: "Disgust",
1080
- 3: "Sadness",
1081
- 4: "Joy",
1082
- 5: "None of them",
1083
- 6: "Enthusiasm",
1084
- 7: "Hope",
1085
- 8: "Pride"
1086
- }
 
1
+ import os
2
+ import json
3
+ import pandas as pd
4
+ from google.cloud import bigquery
5
+
6
+ service_account_info = json.loads(os.environ["GCP_SERVICE_ACCOUNT_JSON"])
7
+ client = bigquery.Client.from_service_account_info(service_account_info)
8
+ query = "SELECT * FROM `upheld-magpie-314312.codebooks.codebooks_full`"
9
+ query_job = client.query(query)
10
+ df_codebooks = query_job.result().to_dataframe()
11
+
12
+
13
+ def get_label_names(df, task):
14
+ task_df = df[df["task"] == task].sort_values(by="index")
15
+ label_names_dict = dict(zip(task_df["code"], task_df["name"]))
16
+ return label_names_dict
17
+
18
+
19
+ def get_num_dict(df, task):
20
+ task_df = df[df["task"] == task].sort_values(by="index")
21
+ num_dict = dict(zip(task_df["index"], task_df["code"]))
22
+ return num_dict
23
+
24
+ # NOTE: this is the same logic as in the Babel pipeline (different vairable names!)
25
+ # key: task type (as in codebooks.codebooks_full)
26
+ # value: (tuple) name of label_names, num_dict variables
27
+ # TO-DO: we could replace all of these variables with one dict-like object
28
+
29
+ task_names = {
30
+ "CAP": ("CAP_LABEL_NAMES", "CAP_NUM_DICT"),
31
+ "CAP_MINOR": ("CAP_MIN_LABEL_NAMES", "CAP_MIN_NUM_DICT"),
32
+ "CAP_MEDIA": ("CAP_MEDIA_LABEL_NAMES", "CAP_MEDIA_NUM_DICT"),
33
+ "CAP_MEDIA2": ("CAP_MEDIA2_LABEL_NAMES", "CAP_MEDIA2_DICT"),
34
+ "CAP_MINOR_MEDIA": ("CAP_MIN_MEDIA_LABEL_NAMES", "CAP_MIN_MEDIA_DICT"),
35
+ "MANIFESTO": ("MANIFESTO_LABEL_NAMES", "MANIFESTO_NUM_DICT"),
36
+ "SENTIMENT": ("SENTIMENT_LABEL_NAMES", "SENTIMENT_NUM_DICT"),
37
+ "EMOTION6": ("EMOTION_LABEL_NAMES", "EMOTION_NUM_DICT"),
38
+ "EMOTION9": ("EMOTION9_V2_LABEL_NAMES", "EMOTION9_V2_NUM_DICT"),
39
+ "EMOTION9_LEGACY": ("EMOTION9_LABEL_NAMES", "EMOTION9_NUM_DICT"),
40
+ "ILLFRAMES_MIGRATION": ("ILLFRAMES_MIGRATION_LABEL_NAMES", "ILLFRAMES_MIGRATION_NUM_DICT"),
41
+ "ILLFRAMES_COVID": ("ILLFRAMES_COVID_LABEL_NAMES", "ILLFRAMES_COVID_NUM_DICT"),
42
+ "ILLFRAMES_WAR": ("ILLFRAMES_WAR_LABEL_NAMES", "ILLFRAMES_WAR_NUM_DICT"),
43
+ "ONTOLISST": ("ONTOLISST_LABEL_NAMES", "ONTOLISST_NUM_DICT"),
44
+ }
45
+
46
+ for task, var_names in task_names.items():
47
+ label_name_var = var_names[0]
48
+ num_dict_var = var_names[1]
49
+ globals()[label_name_var] = get_label_names(df_codebooks, task)
50
+ globals()[num_dict_var] = get_num_dict(df_codebooks, task)