labdmitriy commited on
Commit
89ac851
·
verified ·
1 Parent(s): 777f618

Add BERTopic model

Browse files
Files changed (4) hide show
  1. README.md +74 -0
  2. config.json +16 -0
  3. topic_embeddings.safetensors +3 -0
  4. topics.json +461 -0
README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - bertopic
5
+ library_name: bertopic
6
+ pipeline_tag: text-classification
7
+ ---
8
+
9
+ # rag-topic-model
10
+
11
+ This is a [BERTopic](https://github.com/MaartenGr/BERTopic) model.
12
+ BERTopic is a flexible and modular topic modeling framework that allows for the generation of easily interpretable topics from large datasets.
13
+
14
+ ## Usage
15
+
16
+ To use this model, please install BERTopic:
17
+
18
+ ```
19
+ pip install -U bertopic
20
+ ```
21
+
22
+ You can use the model as follows:
23
+
24
+ ```python
25
+ from bertopic import BERTopic
26
+ topic_model = BERTopic.load("labdmitriy/rag-topic-model")
27
+
28
+ topic_model.get_topic_info()
29
+ ```
30
+
31
+ ## Topic overview
32
+
33
+ * Number of topics: 5
34
+ * Number of training documents: 201
35
+
36
+ <details>
37
+ <summary>Click here for an overview of all topics.</summary>
38
+
39
+ | Topic ID | Topic Keywords | Topic Frequency | Label |
40
+ |----------|----------------|-----------------|-------|
41
+ | -1 | my - for - to - account - payment | 13 | -1_my_for_to_account |
42
+ | 0 | refund - nike - my - store - for | 35 | 0_refund_nike_my_store |
43
+ | 1 | my - the - for - klarna - payment | 72 | 1_my_the_for_klarna |
44
+ | 2 | email - to - my - account - the | 45 | 2_email_to_my_account |
45
+ | 3 | card - klarna - it - to - need | 36 | 3_card_klarna_it_to |
46
+
47
+ </details>
48
+
49
+ ## Training hyperparameters
50
+
51
+ * calculate_probabilities: False
52
+ * language: None
53
+ * low_memory: False
54
+ * min_topic_size: 10
55
+ * n_gram_range: (1, 1)
56
+ * nr_topics: None
57
+ * seed_topic_list: None
58
+ * top_n_words: 10
59
+ * verbose: True
60
+ * zeroshot_min_similarity: 0.7
61
+ * zeroshot_topic_list: None
62
+
63
+ ## Framework versions
64
+
65
+ * Numpy: 2.1.3
66
+ * HDBSCAN: 0.8.40
67
+ * UMAP: 0.5.7
68
+ * Pandas: 2.2.3
69
+ * Scikit-Learn: 1.6.1
70
+ * Sentence-transformers: 3.1.1
71
+ * Transformers: 4.45.2
72
+ * Numba: 0.61.0
73
+ * Plotly: 6.0.0
74
+ * Python: 3.11.5
config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "calculate_probabilities": false,
3
+ "language": null,
4
+ "low_memory": false,
5
+ "min_topic_size": 10,
6
+ "n_gram_range": [
7
+ 1,
8
+ 1
9
+ ],
10
+ "nr_topics": null,
11
+ "seed_topic_list": null,
12
+ "top_n_words": 10,
13
+ "verbose": true,
14
+ "zeroshot_min_similarity": 0.7,
15
+ "zeroshot_topic_list": null
16
+ }
topic_embeddings.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d622362edaa5bb518a32a10c62ced927070a39d8f019c804c7b81d216b7e7cf
3
+ size 7768
topics.json ADDED
@@ -0,0 +1,461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "topic_representations": {
3
+ "-1": [
4
+ [
5
+ "my",
6
+ 0.08351247499573676
7
+ ],
8
+ [
9
+ "for",
10
+ 0.05810177344858424
11
+ ],
12
+ [
13
+ "to",
14
+ 0.055188254406297606
15
+ ],
16
+ [
17
+ "account",
18
+ 0.05429338689686527
19
+ ],
20
+ [
21
+ "payment",
22
+ 0.0537219861366755
23
+ ],
24
+ [
25
+ "now",
26
+ 0.04871385681855305
27
+ ],
28
+ [
29
+ "purchase",
30
+ 0.047384226634445095
31
+ ],
32
+ [
33
+ "was",
34
+ 0.046751744593017154
35
+ ],
36
+ [
37
+ "and",
38
+ 0.04368374524745038
39
+ ],
40
+ [
41
+ "do",
42
+ 0.04343470951749222
43
+ ]
44
+ ],
45
+ "0": [
46
+ [
47
+ "refund",
48
+ 0.1008063421569907
49
+ ],
50
+ [
51
+ "nike",
52
+ 0.07953724400934041
53
+ ],
54
+ [
55
+ "my",
56
+ 0.06936418462914268
57
+ ],
58
+ [
59
+ "store",
60
+ 0.06759242554620642
61
+ ],
62
+ [
63
+ "for",
64
+ 0.06549360326834416
65
+ ],
66
+ [
67
+ "returned",
68
+ 0.06387888847375896
69
+ ],
70
+ [
71
+ "to",
72
+ 0.05729815018944691
73
+ ],
74
+ [
75
+ "credit",
76
+ 0.056785410506670936
77
+ ],
78
+ [
79
+ "week",
80
+ 0.05343337517589997
81
+ ],
82
+ [
83
+ "got",
84
+ 0.05057623065494451
85
+ ]
86
+ ],
87
+ "1": [
88
+ [
89
+ "my",
90
+ 0.08279385561753708
91
+ ],
92
+ [
93
+ "the",
94
+ 0.06738827978560745
95
+ ],
96
+ [
97
+ "for",
98
+ 0.06372966377957717
99
+ ],
100
+ [
101
+ "klarna",
102
+ 0.06100773517865077
103
+ ],
104
+ [
105
+ "payment",
106
+ 0.059932914645251106
107
+ ],
108
+ [
109
+ "to",
110
+ 0.05704159130800162
111
+ ],
112
+ [
113
+ "it",
114
+ 0.05497410852432673
115
+ ],
116
+ [
117
+ "app",
118
+ 0.04936721192047436
119
+ ],
120
+ [
121
+ "balance",
122
+ 0.04926492246277606
123
+ ],
124
+ [
125
+ "pay",
126
+ 0.047486429880220185
127
+ ]
128
+ ],
129
+ "2": [
130
+ [
131
+ "email",
132
+ 0.09307598959354162
133
+ ],
134
+ [
135
+ "to",
136
+ 0.08734294945545176
137
+ ],
138
+ [
139
+ "my",
140
+ 0.08105192269852203
141
+ ],
142
+ [
143
+ "account",
144
+ 0.063711503955006
145
+ ],
146
+ [
147
+ "the",
148
+ 0.06263862894973347
149
+ ],
150
+ [
151
+ "klarna",
152
+ 0.06124442070643416
153
+ ],
154
+ [
155
+ "im",
156
+ 0.060358266904742525
157
+ ],
158
+ [
159
+ "and",
160
+ 0.059355349729757555
161
+ ],
162
+ [
163
+ "cant",
164
+ 0.05436252349596517
165
+ ],
166
+ [
167
+ "in",
168
+ 0.051094506827016946
169
+ ]
170
+ ],
171
+ "3": [
172
+ [
173
+ "card",
174
+ 0.17558690454111048
175
+ ],
176
+ [
177
+ "klarna",
178
+ 0.1417970935161136
179
+ ],
180
+ [
181
+ "it",
182
+ 0.12008782905407189
183
+ ],
184
+ [
185
+ "to",
186
+ 0.10146336153016218
187
+ ],
188
+ [
189
+ "need",
190
+ 0.09696934008698394
191
+ ],
192
+ [
193
+ "details",
194
+ 0.09691981975159798
195
+ ],
196
+ [
197
+ "call",
198
+ 0.09361997983212314
199
+ ],
200
+ [
201
+ "my",
202
+ 0.08773560900310651
203
+ ],
204
+ [
205
+ "lost",
206
+ 0.08648818841462437
207
+ ],
208
+ [
209
+ "do",
210
+ 0.07985452125892926
211
+ ]
212
+ ]
213
+ },
214
+ "topics": [
215
+ 2,
216
+ -1,
217
+ 2,
218
+ 1,
219
+ 2,
220
+ -1,
221
+ 0,
222
+ 0,
223
+ -1,
224
+ 1,
225
+ 1,
226
+ 2,
227
+ 2,
228
+ -1,
229
+ -1,
230
+ 1,
231
+ -1,
232
+ -1,
233
+ 2,
234
+ 0,
235
+ 2,
236
+ -1,
237
+ 0,
238
+ 1,
239
+ -1,
240
+ 1,
241
+ 2,
242
+ 1,
243
+ 0,
244
+ 0,
245
+ 0,
246
+ 2,
247
+ -1,
248
+ 1,
249
+ 1,
250
+ 0,
251
+ 0,
252
+ 0,
253
+ 1,
254
+ 2,
255
+ 2,
256
+ 1,
257
+ 0,
258
+ 0,
259
+ 0,
260
+ -1,
261
+ 2,
262
+ 1,
263
+ 2,
264
+ 0,
265
+ 3,
266
+ 1,
267
+ 2,
268
+ 2,
269
+ 2,
270
+ -1,
271
+ 0,
272
+ 0,
273
+ 0,
274
+ 0,
275
+ 0,
276
+ 0,
277
+ 0,
278
+ 0,
279
+ 0,
280
+ -1,
281
+ 1,
282
+ 0,
283
+ 0,
284
+ 0,
285
+ 0,
286
+ 0,
287
+ 0,
288
+ 0,
289
+ 0,
290
+ 0,
291
+ 0,
292
+ 0,
293
+ 0,
294
+ 0,
295
+ 0,
296
+ 0,
297
+ 0,
298
+ 0,
299
+ 0,
300
+ 0,
301
+ 0,
302
+ 0,
303
+ 0,
304
+ 0,
305
+ 0,
306
+ 0,
307
+ 0,
308
+ 0,
309
+ 0,
310
+ 0,
311
+ 0,
312
+ 0,
313
+ 0,
314
+ -1,
315
+ -1,
316
+ -1,
317
+ 3,
318
+ -1,
319
+ 1,
320
+ 0,
321
+ -1,
322
+ 0,
323
+ 1,
324
+ 2,
325
+ 2,
326
+ 1,
327
+ 1,
328
+ 1,
329
+ 3,
330
+ 1,
331
+ 1,
332
+ 3,
333
+ 0,
334
+ 1,
335
+ 1,
336
+ 1,
337
+ 1,
338
+ 1,
339
+ 3,
340
+ 0,
341
+ -1,
342
+ 0,
343
+ 2,
344
+ 0,
345
+ 2,
346
+ 2,
347
+ 0,
348
+ 1,
349
+ 2,
350
+ 0,
351
+ 0,
352
+ 2,
353
+ 0,
354
+ -1,
355
+ 0,
356
+ -1,
357
+ 2,
358
+ 1,
359
+ -1,
360
+ 1,
361
+ 0,
362
+ -1,
363
+ 1,
364
+ 1,
365
+ 3,
366
+ 1,
367
+ 2,
368
+ 0,
369
+ 1,
370
+ 3,
371
+ 3,
372
+ 2,
373
+ 1,
374
+ 0,
375
+ -1,
376
+ -1,
377
+ -1,
378
+ 0,
379
+ 2,
380
+ -1,
381
+ 0,
382
+ 0,
383
+ -1,
384
+ 1,
385
+ 1,
386
+ 1,
387
+ 1,
388
+ 1,
389
+ 1,
390
+ -1,
391
+ 1,
392
+ 3,
393
+ 3,
394
+ 1,
395
+ 1,
396
+ -1,
397
+ 1,
398
+ 1,
399
+ 2,
400
+ 2,
401
+ 2,
402
+ 2,
403
+ 2,
404
+ 2,
405
+ 2,
406
+ 2,
407
+ 2,
408
+ -1,
409
+ -1,
410
+ -1,
411
+ 3,
412
+ -1,
413
+ 3,
414
+ 3,
415
+ -1
416
+ ],
417
+ "topic_sizes": {
418
+ "2": 36,
419
+ "-1": 35,
420
+ "1": 45,
421
+ "0": 72,
422
+ "3": 13
423
+ },
424
+ "topic_mapper": [
425
+ [
426
+ -1,
427
+ -1,
428
+ -1
429
+ ],
430
+ [
431
+ 0,
432
+ 0,
433
+ 0
434
+ ],
435
+ [
436
+ 1,
437
+ 1,
438
+ 2
439
+ ],
440
+ [
441
+ 2,
442
+ 2,
443
+ 3
444
+ ],
445
+ [
446
+ 3,
447
+ 3,
448
+ 1
449
+ ]
450
+ ],
451
+ "topic_labels": {
452
+ "-1": "-1_my_for_to_account",
453
+ "0": "0_refund_nike_my_store",
454
+ "1": "1_my_the_for_klarna",
455
+ "2": "2_email_to_my_account",
456
+ "3": "3_card_klarna_it_to"
457
+ },
458
+ "custom_labels": null,
459
+ "_outliers": 1,
460
+ "topic_aspects": {}
461
+ }