Spaces:
No application file
No application file
Commit
·
90479e9
1
Parent(s):
fde672a
🏗️ Final RAG QA demo: cleaned notebook, retrieval-only prompt, Gradio chat
Browse files- .gradio/certificate.pem +31 -0
- Try_2.ipynb +471 -0
- requirements.txt +7 -0
- try_1.ipynb +1333 -0
.gradio/certificate.pem
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-----BEGIN CERTIFICATE-----
|
2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
31 |
+
-----END CERTIFICATE-----
|
Try_2.ipynb
ADDED
@@ -0,0 +1,471 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "d0df5ec3",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stdout",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"Requirement already satisfied: gradio in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 1)) (5.29.0)\n",
|
14 |
+
"Requirement already satisfied: transformers in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 2)) (4.51.3)\n",
|
15 |
+
"Requirement already satisfied: sentence-transformers in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 3)) (4.1.0)\n",
|
16 |
+
"Requirement already satisfied: faiss-cpu in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 4)) (1.11.0)\n",
|
17 |
+
"Requirement already satisfied: datasets in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 5)) (3.5.1)\n",
|
18 |
+
"Requirement already satisfied: huggingface_hub in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 6)) (0.30.2)\n",
|
19 |
+
"Requirement already satisfied: aiofiles<25.0,>=22.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (24.1.0)\n",
|
20 |
+
"Requirement already satisfied: anyio<5.0,>=3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (4.8.0)\n",
|
21 |
+
"Requirement already satisfied: audioop-lts<1.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.2.1)\n",
|
22 |
+
"Requirement already satisfied: fastapi<1.0,>=0.115.2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.115.12)\n",
|
23 |
+
"Requirement already satisfied: ffmpy in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.5.0)\n",
|
24 |
+
"Requirement already satisfied: gradio-client==1.10.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (1.10.0)\n",
|
25 |
+
"Requirement already satisfied: groovy~=0.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.1.2)\n",
|
26 |
+
"Requirement already satisfied: httpx>=0.24.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.28.1)\n",
|
27 |
+
"Requirement already satisfied: jinja2<4.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (3.1.5)\n",
|
28 |
+
"Requirement already satisfied: markupsafe<4.0,>=2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (3.0.2)\n",
|
29 |
+
"Requirement already satisfied: numpy<3.0,>=1.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (2.2.3)\n",
|
30 |
+
"Requirement already satisfied: orjson~=3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (3.10.18)\n",
|
31 |
+
"Requirement already satisfied: packaging in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (24.2)\n",
|
32 |
+
"Requirement already satisfied: pandas<3.0,>=1.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (2.2.3)\n",
|
33 |
+
"Requirement already satisfied: pillow<12.0,>=8.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (11.1.0)\n",
|
34 |
+
"Requirement already satisfied: pydantic<2.12,>=2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (2.11.4)\n",
|
35 |
+
"Requirement already satisfied: pydub in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.25.1)\n",
|
36 |
+
"Requirement already satisfied: python-multipart>=0.0.18 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.0.20)\n",
|
37 |
+
"Requirement already satisfied: pyyaml<7.0,>=5.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (6.0.2)\n",
|
38 |
+
"Requirement already satisfied: ruff>=0.9.3 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.11.8)\n",
|
39 |
+
"Requirement already satisfied: safehttpx<0.2.0,>=0.1.6 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.1.6)\n",
|
40 |
+
"Requirement already satisfied: semantic-version~=2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (2.10.0)\n",
|
41 |
+
"Requirement already satisfied: starlette<1.0,>=0.40.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.46.2)\n",
|
42 |
+
"Requirement already satisfied: tomlkit<0.14.0,>=0.12.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.13.2)\n",
|
43 |
+
"Requirement already satisfied: typer<1.0,>=0.12 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.15.3)\n",
|
44 |
+
"Requirement already satisfied: typing-extensions~=4.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (4.12.2)\n",
|
45 |
+
"Requirement already satisfied: uvicorn>=0.14.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.34.2)\n",
|
46 |
+
"Requirement already satisfied: fsspec in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio-client==1.10.0->gradio->-r requirements.txt (line 1)) (2025.3.0)\n",
|
47 |
+
"Requirement already satisfied: websockets<16.0,>=10.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio-client==1.10.0->gradio->-r requirements.txt (line 1)) (15.0.1)\n",
|
48 |
+
"Requirement already satisfied: idna>=2.8 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from anyio<5.0,>=3.0->gradio->-r requirements.txt (line 1)) (3.10)\n",
|
49 |
+
"Requirement already satisfied: sniffio>=1.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from anyio<5.0,>=3.0->gradio->-r requirements.txt (line 1)) (1.3.1)\n",
|
50 |
+
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas<3.0,>=1.0->gradio->-r requirements.txt (line 1)) (2.9.0.post0)\n",
|
51 |
+
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas<3.0,>=1.0->gradio->-r requirements.txt (line 1)) (2025.1)\n",
|
52 |
+
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas<3.0,>=1.0->gradio->-r requirements.txt (line 1)) (2025.1)\n",
|
53 |
+
"Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pydantic<2.12,>=2.0->gradio->-r requirements.txt (line 1)) (0.7.0)\n",
|
54 |
+
"Requirement already satisfied: pydantic-core==2.33.2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pydantic<2.12,>=2.0->gradio->-r requirements.txt (line 1)) (2.33.2)\n",
|
55 |
+
"Requirement already satisfied: typing-inspection>=0.4.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pydantic<2.12,>=2.0->gradio->-r requirements.txt (line 1)) (0.4.0)\n",
|
56 |
+
"Requirement already satisfied: click>=8.0.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (8.1.8)\n",
|
57 |
+
"Requirement already satisfied: shellingham>=1.3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (1.5.4)\n",
|
58 |
+
"Requirement already satisfied: rich>=10.11.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (14.0.0)\n",
|
59 |
+
"Requirement already satisfied: filelock in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (3.18.0)\n",
|
60 |
+
"Requirement already satisfied: regex!=2019.12.17 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (2024.11.6)\n",
|
61 |
+
"Requirement already satisfied: requests in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (2.32.3)\n",
|
62 |
+
"Requirement already satisfied: tokenizers<0.22,>=0.21 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (0.21.1)\n",
|
63 |
+
"Requirement already satisfied: safetensors>=0.4.3 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (0.5.3)\n",
|
64 |
+
"Requirement already satisfied: tqdm>=4.27 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (4.67.1)\n",
|
65 |
+
"Requirement already satisfied: torch>=1.11.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sentence-transformers->-r requirements.txt (line 3)) (2.7.0)\n",
|
66 |
+
"Requirement already satisfied: scikit-learn in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sentence-transformers->-r requirements.txt (line 3)) (1.6.1)\n",
|
67 |
+
"Requirement already satisfied: scipy in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sentence-transformers->-r requirements.txt (line 3)) (1.15.2)\n",
|
68 |
+
"Requirement already satisfied: pyarrow>=15.0.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from datasets->-r requirements.txt (line 5)) (20.0.0)\n",
|
69 |
+
"Requirement already satisfied: dill<0.3.9,>=0.3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from datasets->-r requirements.txt (line 5)) (0.3.8)\n",
|
70 |
+
"Requirement already satisfied: xxhash in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from datasets->-r requirements.txt (line 5)) (3.5.0)\n",
|
71 |
+
"Requirement already satisfied: multiprocess<0.70.17 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from datasets->-r requirements.txt (line 5)) (0.70.16)\n",
|
72 |
+
"Requirement already satisfied: aiohttp in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from datasets->-r requirements.txt (line 5)) (3.11.18)\n",
|
73 |
+
"Requirement already satisfied: hf-xet>=0.1.4 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from huggingface_hub[hf_xet]->-r requirements.txt (line 7)) (1.1.0)\n",
|
74 |
+
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (2.6.1)\n",
|
75 |
+
"Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (1.3.2)\n",
|
76 |
+
"Requirement already satisfied: attrs>=17.3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (25.1.0)\n",
|
77 |
+
"Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (1.6.0)\n",
|
78 |
+
"Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (6.4.3)\n",
|
79 |
+
"Requirement already satisfied: propcache>=0.2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (0.3.1)\n",
|
80 |
+
"Requirement already satisfied: yarl<2.0,>=1.17.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (1.20.0)\n",
|
81 |
+
"Requirement already satisfied: colorama in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from click>=8.0.0->typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (0.4.6)\n",
|
82 |
+
"Requirement already satisfied: certifi in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from httpx>=0.24.1->gradio->-r requirements.txt (line 1)) (2025.1.31)\n",
|
83 |
+
"Requirement already satisfied: httpcore==1.* in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from httpx>=0.24.1->gradio->-r requirements.txt (line 1)) (1.0.7)\n",
|
84 |
+
"Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from httpcore==1.*->httpx>=0.24.1->gradio->-r requirements.txt (line 1)) (0.14.0)\n",
|
85 |
+
"Requirement already satisfied: six>=1.5 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio->-r requirements.txt (line 1)) (1.17.0)\n",
|
86 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from requests->transformers->-r requirements.txt (line 2)) (3.4.1)\n",
|
87 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from requests->transformers->-r requirements.txt (line 2)) (2.3.0)\n",
|
88 |
+
"Requirement already satisfied: markdown-it-py>=2.2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (3.0.0)\n",
|
89 |
+
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (2.19.1)\n",
|
90 |
+
"Requirement already satisfied: mdurl~=0.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (0.1.2)\n",
|
91 |
+
"Requirement already satisfied: sympy>=1.13.3 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from torch>=1.11.0->sentence-transformers->-r requirements.txt (line 3)) (1.14.0)\n",
|
92 |
+
"Requirement already satisfied: networkx in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from torch>=1.11.0->sentence-transformers->-r requirements.txt (line 3)) (3.4.2)\n",
|
93 |
+
"Requirement already satisfied: setuptools in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from torch>=1.11.0->sentence-transformers->-r requirements.txt (line 3)) (75.8.2)\n",
|
94 |
+
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sympy>=1.13.3->torch>=1.11.0->sentence-transformers->-r requirements.txt (line 3)) (1.3.0)\n",
|
95 |
+
"Requirement already satisfied: joblib>=1.2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from scikit-learn->sentence-transformers->-r requirements.txt (line 3)) (1.4.2)\n",
|
96 |
+
"Requirement already satisfied: threadpoolctl>=3.1.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from scikit-learn->sentence-transformers->-r requirements.txt (line 3)) (3.6.0)\n"
|
97 |
+
]
|
98 |
+
}
|
99 |
+
],
|
100 |
+
"source": [
|
101 |
+
"! pip install -r requirements.txt"
|
102 |
+
]
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"cell_type": "code",
|
106 |
+
"execution_count": 2,
|
107 |
+
"id": "a50bbe16",
|
108 |
+
"metadata": {},
|
109 |
+
"outputs": [
|
110 |
+
{
|
111 |
+
"name": "stdout",
|
112 |
+
"output_type": "stream",
|
113 |
+
"text": [
|
114 |
+
"RAG configs: ['text-corpus', 'question-answer']\n",
|
115 |
+
"Loaded 3200 wiki passages.\n",
|
116 |
+
"Loaded 1000 SQuAD examples.\n"
|
117 |
+
]
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"data": {
|
121 |
+
"application/vnd.jupyter.widget-view+json": {
|
122 |
+
"model_id": "72faca76fa9f4dd19d6d334cec19c88f",
|
123 |
+
"version_major": 2,
|
124 |
+
"version_minor": 0
|
125 |
+
},
|
126 |
+
"text/plain": [
|
127 |
+
"Resolving data files: 0%| | 0/26 [00:00<?, ?it/s]"
|
128 |
+
]
|
129 |
+
},
|
130 |
+
"metadata": {},
|
131 |
+
"output_type": "display_data"
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"data": {
|
135 |
+
"application/vnd.jupyter.widget-view+json": {
|
136 |
+
"model_id": "09b30d351c304fb9a4cb58ed0fd0fcb6",
|
137 |
+
"version_major": 2,
|
138 |
+
"version_minor": 0
|
139 |
+
},
|
140 |
+
"text/plain": [
|
141 |
+
"Resolving data files: 0%| | 0/26 [00:00<?, ?it/s]"
|
142 |
+
]
|
143 |
+
},
|
144 |
+
"metadata": {},
|
145 |
+
"output_type": "display_data"
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "stdout",
|
149 |
+
"output_type": "stream",
|
150 |
+
"text": [
|
151 |
+
"Loaded 1000 TriviaQA examples.\n"
|
152 |
+
]
|
153 |
+
}
|
154 |
+
],
|
155 |
+
"source": [
|
156 |
+
"from datasets import load_dataset, get_dataset_config_names\n",
|
157 |
+
"\n",
|
158 |
+
"# 1) See which configs RAG mini-wiki offers\n",
|
159 |
+
"print(\"RAG configs:\", get_dataset_config_names(\"rag-datasets/rag-mini-wikipedia\"))\n",
|
160 |
+
"\n",
|
161 |
+
"# 2) Load the raw wiki passages\n",
|
162 |
+
"wiki_ds = load_dataset(\n",
|
163 |
+
" \"rag-datasets/rag-mini-wikipedia\",\n",
|
164 |
+
" \"text-corpus\",\n",
|
165 |
+
" split=\"passages\"\n",
|
166 |
+
")\n",
|
167 |
+
"passages = wiki_ds[\"passage\"]\n",
|
168 |
+
"print(f\"Loaded {len(passages)} wiki passages.\")\n",
|
169 |
+
"\n",
|
170 |
+
"# 3) Load a small SQuAD v2 slice (first 1 000 examples for speed)\n",
|
171 |
+
"squad = load_dataset(\"rajpurkar/squad_v2\", split=\"train[:1000]\")\n",
|
172 |
+
"print(f\"Loaded {len(squad)} SQuAD examples.\")\n",
|
173 |
+
"\n",
|
174 |
+
"# 4) Load TriviaQA small\n",
|
175 |
+
"trivia = load_dataset(\n",
|
176 |
+
" \"mandarjoshi/trivia_qa\",\n",
|
177 |
+
" \"rc\",\n",
|
178 |
+
" split=\"validation[:1000]\"\n",
|
179 |
+
")\n",
|
180 |
+
"print(f\"Loaded {len(trivia)} TriviaQA examples.\")\n"
|
181 |
+
]
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"cell_type": "code",
|
185 |
+
"execution_count": 3,
|
186 |
+
"id": "e9307bab",
|
187 |
+
"metadata": {},
|
188 |
+
"outputs": [
|
189 |
+
{
|
190 |
+
"data": {
|
191 |
+
"application/vnd.jupyter.widget-view+json": {
|
192 |
+
"model_id": "2ee1f7b78acf419eb853d10599b86438",
|
193 |
+
"version_major": 2,
|
194 |
+
"version_minor": 0
|
195 |
+
},
|
196 |
+
"text/plain": [
|
197 |
+
"Batches: 0%| | 0/100 [00:00<?, ?it/s]"
|
198 |
+
]
|
199 |
+
},
|
200 |
+
"metadata": {},
|
201 |
+
"output_type": "display_data"
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"name": "stdout",
|
205 |
+
"output_type": "stream",
|
206 |
+
"text": [
|
207 |
+
"Indexed 3200 vectors of size 384.\n"
|
208 |
+
]
|
209 |
+
}
|
210 |
+
],
|
211 |
+
"source": [
|
212 |
+
"from sentence_transformers import SentenceTransformer\n",
|
213 |
+
"import faiss\n",
|
214 |
+
"\n",
|
215 |
+
"# Embedder\n",
|
216 |
+
"embedder = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
|
217 |
+
"embeddings = embedder.encode(passages, show_progress_bar=True, convert_to_numpy=True)\n",
|
218 |
+
"\n",
|
219 |
+
"# Build index\n",
|
220 |
+
"dim = embeddings.shape[1]\n",
|
221 |
+
"index = faiss.IndexFlatL2(dim)\n",
|
222 |
+
"index.add(embeddings)\n",
|
223 |
+
"print(f\"Indexed {index.ntotal} vectors of size {dim}.\")\n"
|
224 |
+
]
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"cell_type": "code",
|
228 |
+
"execution_count": 4,
|
229 |
+
"id": "745eb30e",
|
230 |
+
"metadata": {},
|
231 |
+
"outputs": [
|
232 |
+
{
|
233 |
+
"name": "stderr",
|
234 |
+
"output_type": "stream",
|
235 |
+
"text": [
|
236 |
+
"Device set to use cpu\n"
|
237 |
+
]
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"name": "stdout",
|
241 |
+
"output_type": "stream",
|
242 |
+
"text": [
|
243 |
+
"a neural network\n"
|
244 |
+
]
|
245 |
+
}
|
246 |
+
],
|
247 |
+
"source": [
|
248 |
+
"# ==== 4. Load & Test the LLM ====\n",
|
249 |
+
"from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline\n",
|
250 |
+
"\n",
|
251 |
+
"# 4.1) Use the correct Flan-T5 repo\n",
|
252 |
+
"MODEL_NAME = \"google/flan-t5-base\"\n",
|
253 |
+
"\n",
|
254 |
+
"# 4.2) Load tokenizer & model (requires that you ran `huggingface-cli login`)\n",
|
255 |
+
"tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n",
|
256 |
+
"model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)\n",
|
257 |
+
"\n",
|
258 |
+
"# 4.3) Create a generation pipeline\n",
|
259 |
+
"qa_pipeline = pipeline(\n",
|
260 |
+
" \"text2text-generation\",\n",
|
261 |
+
" model=model,\n",
|
262 |
+
" tokenizer=tokenizer,\n",
|
263 |
+
" device=-1 # or 0 if you have a GPU\n",
|
264 |
+
")\n",
|
265 |
+
"\n",
|
266 |
+
"# 4.4) Quick sanity check\n",
|
267 |
+
"print(qa_pipeline(\"Question: What is retrieval-augmented generation? Answer:\", max_length=50)[0][\"generated_text\"])\n"
|
268 |
+
]
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"cell_type": "code",
|
272 |
+
"execution_count": 5,
|
273 |
+
"id": "70efc796",
|
274 |
+
"metadata": {},
|
275 |
+
"outputs": [
|
276 |
+
{
|
277 |
+
"name": "stdout",
|
278 |
+
"output_type": "stream",
|
279 |
+
"text": [
|
280 |
+
"Answer: Isaac Newton\n"
|
281 |
+
]
|
282 |
+
}
|
283 |
+
],
|
284 |
+
"source": [
|
285 |
+
"# ==== 5. Retrieval + Generation Function ====\n",
|
286 |
+
"def retrieve_and_answer(question: str, k: int = 5) -> str:\n",
|
287 |
+
" # 1) Embed the question\n",
|
288 |
+
" q_emb = embedder.encode([question], convert_to_numpy=True)\n",
|
289 |
+
" # 2) Retrieve top-k passages\n",
|
290 |
+
" distances, indices = index.search(q_emb, k)\n",
|
291 |
+
" # 3) Build a little context block\n",
|
292 |
+
" contexts = \"\\n\".join(\n",
|
293 |
+
" f\"Context {i+1}: {passages[idx][:200]}...\"\n",
|
294 |
+
" for i, idx in enumerate(indices[0])\n",
|
295 |
+
" )\n",
|
296 |
+
" # 4) Assemble prompt\n",
|
297 |
+
" prompt = (\n",
|
298 |
+
" \"You are a helpful QA assistant. \"\n",
|
299 |
+
" \"Use ONLY the following contexts to answer the question. \"\n",
|
300 |
+
" \"If the answer is not contained in the contexts, respond with \"\n",
|
301 |
+
" \"'Sorry, I don't know.'\\n\\n\"\n",
|
302 |
+
" f\"{contexts}\\n\"\n",
|
303 |
+
" f\"Question: {question}\\nAnswer:\"\n",
|
304 |
+
" )\n",
|
305 |
+
"\n",
|
306 |
+
" # 5) Generate and return\n",
|
307 |
+
" out = qa_pipeline(prompt, max_length=200, do_sample=False)\n",
|
308 |
+
" return out[0][\"generated_text\"]\n",
|
309 |
+
"\n",
|
310 |
+
"\n",
|
311 |
+
"# 5.1) Test it end-to-end\n",
|
312 |
+
"sample_q = \"Who conceptualized the theory of relativity?\"\n",
|
313 |
+
"print(\"Answer:\", retrieve_and_answer(sample_q))\n"
|
314 |
+
]
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"cell_type": "code",
|
318 |
+
"execution_count": null,
|
319 |
+
"id": "a1476c3a",
|
320 |
+
"metadata": {},
|
321 |
+
"outputs": [],
|
322 |
+
"source": [
|
323 |
+
"import gradio as gr\n",
|
324 |
+
"\n",
|
325 |
+
"def chat_fn(user_message, history=None):\n",
|
326 |
+
" # Initialize history if needed\n",
|
327 |
+
" history = history or []\n",
|
328 |
+
" # 1) Record the user turn\n",
|
329 |
+
" history.append({\"role\": \"user\", \"content\": user_message})\n",
|
330 |
+
" # 2) Retrieve & generate\n",
|
331 |
+
" answer = retrieve_and_answer(user_message, k=5)\n",
|
332 |
+
" # 3) Record the assistant turn\n",
|
333 |
+
" history.append({\"role\": \"assistant\", \"content\": answer})\n",
|
334 |
+
" # 4) Return the full message list\n",
|
335 |
+
" return history\n",
|
336 |
+
"\n",
|
337 |
+
"# Launch with the new format\n",
|
338 |
+
"gr.ChatInterface(\n",
|
339 |
+
" fn=chat_fn,\n",
|
340 |
+
" type=\"messages\", # ← switch to messages!\n",
|
341 |
+
" title=\"🔍 RAG QA Demo\",\n",
|
342 |
+
" description=\"I only answer from retrieved contexts; otherwise I'll say 'Sorry, I don't know.'\"\n",
|
343 |
+
").launch(share=True) # ← add share=True if you want a public link\n"
|
344 |
+
]
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"cell_type": "code",
|
348 |
+
"execution_count": 9,
|
349 |
+
"id": "78314cbb",
|
350 |
+
"metadata": {},
|
351 |
+
"outputs": [
|
352 |
+
{
|
353 |
+
"name": "stderr",
|
354 |
+
"output_type": "stream",
|
355 |
+
"text": [
|
356 |
+
"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\gradio\\chat_interface.py:338: UserWarning: The 'tuples' format for chatbot messages is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style 'role' and 'content' keys.\n",
|
357 |
+
" self.chatbot = Chatbot(\n"
|
358 |
+
]
|
359 |
+
},
|
360 |
+
{
|
361 |
+
"name": "stdout",
|
362 |
+
"output_type": "stream",
|
363 |
+
"text": [
|
364 |
+
"* Running on local URL: http://127.0.0.1:7861\n",
|
365 |
+
"* To create a public link, set `share=True` in `launch()`.\n"
|
366 |
+
]
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"data": {
|
370 |
+
"text/html": [
|
371 |
+
"<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
372 |
+
],
|
373 |
+
"text/plain": [
|
374 |
+
"<IPython.core.display.HTML object>"
|
375 |
+
]
|
376 |
+
},
|
377 |
+
"metadata": {},
|
378 |
+
"output_type": "display_data"
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"data": {
|
382 |
+
"text/plain": []
|
383 |
+
},
|
384 |
+
"execution_count": 9,
|
385 |
+
"metadata": {},
|
386 |
+
"output_type": "execute_result"
|
387 |
+
},
|
388 |
+
{
|
389 |
+
"name": "stderr",
|
390 |
+
"output_type": "stream",
|
391 |
+
"text": [
|
392 |
+
"Traceback (most recent call last):\n",
|
393 |
+
" File \"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\gradio\\queueing.py\", line 625, in process_events\n",
|
394 |
+
" response = await route_utils.call_process_api(\n",
|
395 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
396 |
+
" ...<5 lines>...\n",
|
397 |
+
" )\n",
|
398 |
+
" ^\n",
|
399 |
+
" File \"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\gradio\\route_utils.py\", line 322, in call_process_api\n",
|
400 |
+
" output = await app.get_blocks().process_api(\n",
|
401 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
402 |
+
" ...<11 lines>...\n",
|
403 |
+
" )\n",
|
404 |
+
" ^\n",
|
405 |
+
" File \"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\gradio\\blocks.py\", line 2146, in process_api\n",
|
406 |
+
" result = await self.call_function(\n",
|
407 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
408 |
+
" ...<8 lines>...\n",
|
409 |
+
" )\n",
|
410 |
+
" ^\n",
|
411 |
+
" File \"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\gradio\\blocks.py\", line 1662, in call_function\n",
|
412 |
+
" prediction = await fn(*processed_input)\n",
|
413 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
414 |
+
" File \"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\gradio\\utils.py\", line 851, in async_wrapper\n",
|
415 |
+
" response = await f(*args, **kwargs)\n",
|
416 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
417 |
+
" File \"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\gradio\\chat_interface.py\", line 884, in _submit_fn\n",
|
418 |
+
" history = self._append_message_to_history(response, history, \"assistant\")\n",
|
419 |
+
" File \"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\gradio\\chat_interface.py\", line 815, in _append_message_to_history\n",
|
420 |
+
" message_dicts = self._message_as_message_dict(message, role)\n",
|
421 |
+
" File \"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\gradio\\chat_interface.py\", line 853, in _message_as_message_dict\n",
|
422 |
+
" for x in msg.get(\"files\", []):\n",
|
423 |
+
" ^^^^^^^\n",
|
424 |
+
"AttributeError: 'tuple' object has no attribute 'get'\n"
|
425 |
+
]
|
426 |
+
}
|
427 |
+
],
|
428 |
+
"source": [
|
429 |
+
"import gradio as gr\n",
|
430 |
+
"\n",
|
431 |
+
"gr.ChatInterface(\n",
|
432 |
+
" fn=chat_fn,\n",
|
433 |
+
" title=\"🔍 RAG QA Demo\",\n",
|
434 |
+
" description=(\n",
|
435 |
+
" \"Ask a question—I'll only answer if the info is in the retrieved contexts; \"\n",
|
436 |
+
" \"otherwise, I'll say “Sorry, I don't know.”\"\n",
|
437 |
+
" )\n",
|
438 |
+
").launch()\n"
|
439 |
+
]
|
440 |
+
},
|
441 |
+
{
|
442 |
+
"cell_type": "code",
|
443 |
+
"execution_count": null,
|
444 |
+
"id": "fc557ef9",
|
445 |
+
"metadata": {},
|
446 |
+
"outputs": [],
|
447 |
+
"source": []
|
448 |
+
}
|
449 |
+
],
|
450 |
+
"metadata": {
|
451 |
+
"kernelspec": {
|
452 |
+
"display_name": "Python 3",
|
453 |
+
"language": "python",
|
454 |
+
"name": "python3"
|
455 |
+
},
|
456 |
+
"language_info": {
|
457 |
+
"codemirror_mode": {
|
458 |
+
"name": "ipython",
|
459 |
+
"version": 3
|
460 |
+
},
|
461 |
+
"file_extension": ".py",
|
462 |
+
"mimetype": "text/x-python",
|
463 |
+
"name": "python",
|
464 |
+
"nbconvert_exporter": "python",
|
465 |
+
"pygments_lexer": "ipython3",
|
466 |
+
"version": "3.13.0"
|
467 |
+
}
|
468 |
+
},
|
469 |
+
"nbformat": 4,
|
470 |
+
"nbformat_minor": 5
|
471 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
transformers
|
3 |
+
sentence-transformers
|
4 |
+
faiss-cpu
|
5 |
+
datasets
|
6 |
+
huggingface_hub
|
7 |
+
huggingface_hub[hf_xet]
|
try_1.ipynb
CHANGED
@@ -0,0 +1,1333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"Requirement already satisfied: gradio in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 1)) (5.29.0)\n",
|
13 |
+
"Requirement already satisfied: transformers in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 2)) (4.51.3)\n",
|
14 |
+
"Requirement already satisfied: sentence-transformers in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 3)) (4.1.0)\n",
|
15 |
+
"Requirement already satisfied: faiss-cpu in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 4)) (1.11.0)\n",
|
16 |
+
"Requirement already satisfied: datasets in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 5)) (3.5.1)\n",
|
17 |
+
"Requirement already satisfied: huggingface_hub in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from -r requirements.txt (line 6)) (0.30.2)\n",
|
18 |
+
"Requirement already satisfied: aiofiles<25.0,>=22.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (24.1.0)\n",
|
19 |
+
"Requirement already satisfied: anyio<5.0,>=3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (4.8.0)\n",
|
20 |
+
"Requirement already satisfied: audioop-lts<1.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.2.1)\n",
|
21 |
+
"Requirement already satisfied: fastapi<1.0,>=0.115.2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.115.12)\n",
|
22 |
+
"Requirement already satisfied: ffmpy in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.5.0)\n",
|
23 |
+
"Requirement already satisfied: gradio-client==1.10.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (1.10.0)\n",
|
24 |
+
"Requirement already satisfied: groovy~=0.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.1.2)\n",
|
25 |
+
"Requirement already satisfied: httpx>=0.24.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.28.1)\n",
|
26 |
+
"Requirement already satisfied: jinja2<4.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (3.1.5)\n",
|
27 |
+
"Requirement already satisfied: markupsafe<4.0,>=2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (3.0.2)\n",
|
28 |
+
"Requirement already satisfied: numpy<3.0,>=1.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (2.2.3)\n",
|
29 |
+
"Requirement already satisfied: orjson~=3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (3.10.18)\n",
|
30 |
+
"Requirement already satisfied: packaging in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (24.2)\n",
|
31 |
+
"Requirement already satisfied: pandas<3.0,>=1.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (2.2.3)\n",
|
32 |
+
"Requirement already satisfied: pillow<12.0,>=8.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (11.1.0)\n",
|
33 |
+
"Requirement already satisfied: pydantic<2.12,>=2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (2.11.4)\n",
|
34 |
+
"Requirement already satisfied: pydub in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.25.1)\n",
|
35 |
+
"Requirement already satisfied: python-multipart>=0.0.18 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.0.20)\n",
|
36 |
+
"Requirement already satisfied: pyyaml<7.0,>=5.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (6.0.2)\n",
|
37 |
+
"Requirement already satisfied: ruff>=0.9.3 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.11.8)\n",
|
38 |
+
"Requirement already satisfied: safehttpx<0.2.0,>=0.1.6 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.1.6)\n",
|
39 |
+
"Requirement already satisfied: semantic-version~=2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (2.10.0)\n",
|
40 |
+
"Requirement already satisfied: starlette<1.0,>=0.40.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.46.2)\n",
|
41 |
+
"Requirement already satisfied: tomlkit<0.14.0,>=0.12.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.13.2)\n",
|
42 |
+
"Requirement already satisfied: typer<1.0,>=0.12 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.15.3)\n",
|
43 |
+
"Requirement already satisfied: typing-extensions~=4.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (4.12.2)\n",
|
44 |
+
"Requirement already satisfied: uvicorn>=0.14.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio->-r requirements.txt (line 1)) (0.34.2)\n",
|
45 |
+
"Requirement already satisfied: fsspec in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio-client==1.10.0->gradio->-r requirements.txt (line 1)) (2025.3.0)\n",
|
46 |
+
"Requirement already satisfied: websockets<16.0,>=10.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from gradio-client==1.10.0->gradio->-r requirements.txt (line 1)) (15.0.1)\n",
|
47 |
+
"Requirement already satisfied: idna>=2.8 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from anyio<5.0,>=3.0->gradio->-r requirements.txt (line 1)) (3.10)\n",
|
48 |
+
"Requirement already satisfied: sniffio>=1.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from anyio<5.0,>=3.0->gradio->-r requirements.txt (line 1)) (1.3.1)\n",
|
49 |
+
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas<3.0,>=1.0->gradio->-r requirements.txt (line 1)) (2.9.0.post0)\n",
|
50 |
+
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas<3.0,>=1.0->gradio->-r requirements.txt (line 1)) (2025.1)\n",
|
51 |
+
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pandas<3.0,>=1.0->gradio->-r requirements.txt (line 1)) (2025.1)\n",
|
52 |
+
"Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pydantic<2.12,>=2.0->gradio->-r requirements.txt (line 1)) (0.7.0)\n",
|
53 |
+
"Requirement already satisfied: pydantic-core==2.33.2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pydantic<2.12,>=2.0->gradio->-r requirements.txt (line 1)) (2.33.2)\n",
|
54 |
+
"Requirement already satisfied: typing-inspection>=0.4.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from pydantic<2.12,>=2.0->gradio->-r requirements.txt (line 1)) (0.4.0)\n",
|
55 |
+
"Requirement already satisfied: click>=8.0.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (8.1.8)\n",
|
56 |
+
"Requirement already satisfied: shellingham>=1.3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (1.5.4)\n",
|
57 |
+
"Requirement already satisfied: rich>=10.11.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (14.0.0)\n",
|
58 |
+
"Requirement already satisfied: filelock in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (3.18.0)\n",
|
59 |
+
"Requirement already satisfied: regex!=2019.12.17 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (2024.11.6)\n",
|
60 |
+
"Requirement already satisfied: requests in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (2.32.3)\n",
|
61 |
+
"Requirement already satisfied: tokenizers<0.22,>=0.21 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (0.21.1)\n",
|
62 |
+
"Requirement already satisfied: safetensors>=0.4.3 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (0.5.3)\n",
|
63 |
+
"Requirement already satisfied: tqdm>=4.27 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from transformers->-r requirements.txt (line 2)) (4.67.1)\n",
|
64 |
+
"Requirement already satisfied: torch>=1.11.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sentence-transformers->-r requirements.txt (line 3)) (2.7.0)\n",
|
65 |
+
"Requirement already satisfied: scikit-learn in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sentence-transformers->-r requirements.txt (line 3)) (1.6.1)\n",
|
66 |
+
"Requirement already satisfied: scipy in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sentence-transformers->-r requirements.txt (line 3)) (1.15.2)\n",
|
67 |
+
"Requirement already satisfied: pyarrow>=15.0.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from datasets->-r requirements.txt (line 5)) (20.0.0)\n",
|
68 |
+
"Requirement already satisfied: dill<0.3.9,>=0.3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from datasets->-r requirements.txt (line 5)) (0.3.8)\n",
|
69 |
+
"Requirement already satisfied: xxhash in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from datasets->-r requirements.txt (line 5)) (3.5.0)\n",
|
70 |
+
"Requirement already satisfied: multiprocess<0.70.17 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from datasets->-r requirements.txt (line 5)) (0.70.16)\n",
|
71 |
+
"Requirement already satisfied: aiohttp in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from datasets->-r requirements.txt (line 5)) (3.11.18)\n",
|
72 |
+
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (2.6.1)\n",
|
73 |
+
"Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (1.3.2)\n",
|
74 |
+
"Requirement already satisfied: attrs>=17.3.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (25.1.0)\n",
|
75 |
+
"Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (1.6.0)\n",
|
76 |
+
"Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (6.4.3)\n",
|
77 |
+
"Requirement already satisfied: propcache>=0.2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (0.3.1)\n",
|
78 |
+
"Requirement already satisfied: yarl<2.0,>=1.17.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from aiohttp->datasets->-r requirements.txt (line 5)) (1.20.0)\n",
|
79 |
+
"Requirement already satisfied: colorama in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from click>=8.0.0->typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (0.4.6)\n",
|
80 |
+
"Requirement already satisfied: certifi in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from httpx>=0.24.1->gradio->-r requirements.txt (line 1)) (2025.1.31)\n",
|
81 |
+
"Requirement already satisfied: httpcore==1.* in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from httpx>=0.24.1->gradio->-r requirements.txt (line 1)) (1.0.7)\n",
|
82 |
+
"Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from httpcore==1.*->httpx>=0.24.1->gradio->-r requirements.txt (line 1)) (0.14.0)\n",
|
83 |
+
"Requirement already satisfied: six>=1.5 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio->-r requirements.txt (line 1)) (1.17.0)\n",
|
84 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from requests->transformers->-r requirements.txt (line 2)) (3.4.1)\n",
|
85 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from requests->transformers->-r requirements.txt (line 2)) (2.3.0)\n",
|
86 |
+
"Requirement already satisfied: markdown-it-py>=2.2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (3.0.0)\n",
|
87 |
+
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (2.19.1)\n",
|
88 |
+
"Requirement already satisfied: mdurl~=0.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio->-r requirements.txt (line 1)) (0.1.2)\n",
|
89 |
+
"Requirement already satisfied: sympy>=1.13.3 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from torch>=1.11.0->sentence-transformers->-r requirements.txt (line 3)) (1.14.0)\n",
|
90 |
+
"Requirement already satisfied: networkx in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from torch>=1.11.0->sentence-transformers->-r requirements.txt (line 3)) (3.4.2)\n",
|
91 |
+
"Requirement already satisfied: setuptools in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from torch>=1.11.0->sentence-transformers->-r requirements.txt (line 3)) (75.8.2)\n",
|
92 |
+
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from sympy>=1.13.3->torch>=1.11.0->sentence-transformers->-r requirements.txt (line 3)) (1.3.0)\n",
|
93 |
+
"Requirement already satisfied: joblib>=1.2.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from scikit-learn->sentence-transformers->-r requirements.txt (line 3)) (1.4.2)\n",
|
94 |
+
"Requirement already satisfied: threadpoolctl>=3.1.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from scikit-learn->sentence-transformers->-r requirements.txt (line 3)) (3.6.0)\n"
|
95 |
+
]
|
96 |
+
}
|
97 |
+
],
|
98 |
+
"source": [
|
99 |
+
"! pip install -r requirements.txt"
|
100 |
+
]
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"cell_type": "code",
|
104 |
+
"execution_count": 3,
|
105 |
+
"metadata": {},
|
106 |
+
"outputs": [
|
107 |
+
{
|
108 |
+
"name": "stdout",
|
109 |
+
"output_type": "stream",
|
110 |
+
"text": [
|
111 |
+
"{'id': '56be85543aeaaa14008c9063', 'title': 'Beyoncé', 'context': 'Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny\\'s Child. Managed by her father, Mathew Knowles, the group became one of the world\\'s best-selling girl groups of all time. Their hiatus saw the release of Beyoncé\\'s debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles \"Crazy in Love\" and \"Baby Boy\".', 'question': 'When did Beyonce start becoming popular?', 'answers': {'text': ['in the late 1990s'], 'answer_start': [269]}}\n"
|
112 |
+
]
|
113 |
+
}
|
114 |
+
],
|
115 |
+
"source": [
|
116 |
+
"from datasets import load_dataset\n",
|
117 |
+
"\n",
|
118 |
+
"# Load SQuAD v2 train split\n",
|
119 |
+
"squad = load_dataset(\"rajpurkar/squad_v2\", split=\"train\")\n",
|
120 |
+
"print(squad[0])\n"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"cell_type": "code",
|
125 |
+
"execution_count": 4,
|
126 |
+
"metadata": {},
|
127 |
+
"outputs": [
|
128 |
+
{
|
129 |
+
"name": "stdout",
|
130 |
+
"output_type": "stream",
|
131 |
+
"text": [
|
132 |
+
"['text-corpus', 'question-answer']\n"
|
133 |
+
]
|
134 |
+
}
|
135 |
+
],
|
136 |
+
"source": [
|
137 |
+
"from datasets import get_dataset_config_names\n",
|
138 |
+
"\n",
|
139 |
+
"print(get_dataset_config_names(\"rag-datasets/rag-mini-wikipedia\"))\n",
|
140 |
+
"# → ['text-corpus', 'question-answer']\n"
|
141 |
+
]
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"cell_type": "code",
|
145 |
+
"execution_count": 5,
|
146 |
+
"metadata": {},
|
147 |
+
"outputs": [
|
148 |
+
{
|
149 |
+
"name": "stdout",
|
150 |
+
"output_type": "stream",
|
151 |
+
"text": [
|
152 |
+
"Loaded 3200 passages.\n",
|
153 |
+
"['Uruguay (official full name in ; pron. , Eastern Republic of Uruguay) is a country located in the southeastern part of South America. It is home to 3.3 million people, of which 1.7 million live in the capital Montevideo and its metropolitan area.', 'It is bordered by Brazil to the north, by Argentina across the bank of both the Uruguay River to the west and the estuary of RÃ\\xado de la Plata to the southwest, and the South Atlantic Ocean to the southeast. It is the second smallest independent country in South America, larger only than Suriname and the French overseas department of French Guiana.']\n"
|
154 |
+
]
|
155 |
+
}
|
156 |
+
],
|
157 |
+
"source": [
|
158 |
+
"# After you’ve loaded:\n",
|
159 |
+
"wiki = load_dataset(\n",
|
160 |
+
" \"rag-datasets/rag-mini-wikipedia\",\n",
|
161 |
+
" \"text-corpus\",\n",
|
162 |
+
" split=\"passages\"\n",
|
163 |
+
")\n",
|
164 |
+
"\n",
|
165 |
+
"# ← Notice split=\"passages\" above, and now:\n",
|
166 |
+
"passages = wiki[\"passage\"] # <-- use \"passage\", not \"text\"\n",
|
167 |
+
"print(f\"Loaded {len(passages)} passages.\")\n",
|
168 |
+
"print(passages[:2])\n"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"cell_type": "code",
|
173 |
+
"execution_count": 6,
|
174 |
+
"metadata": {},
|
175 |
+
"outputs": [
|
176 |
+
{
|
177 |
+
"data": {
|
178 |
+
"application/vnd.jupyter.widget-view+json": {
|
179 |
+
"model_id": "8a3f694b820f4b8f84646a8ab2f79970",
|
180 |
+
"version_major": 2,
|
181 |
+
"version_minor": 0
|
182 |
+
},
|
183 |
+
"text/plain": [
|
184 |
+
"Resolving data files: 0%| | 0/26 [00:00<?, ?it/s]"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
"metadata": {},
|
188 |
+
"output_type": "display_data"
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"data": {
|
192 |
+
"application/vnd.jupyter.widget-view+json": {
|
193 |
+
"model_id": "1d019bd66d9547e0adcd5454c2ae248a",
|
194 |
+
"version_major": 2,
|
195 |
+
"version_minor": 0
|
196 |
+
},
|
197 |
+
"text/plain": [
|
198 |
+
"Resolving data files: 0%| | 0/26 [00:00<?, ?it/s]"
|
199 |
+
]
|
200 |
+
},
|
201 |
+
"metadata": {},
|
202 |
+
"output_type": "display_data"
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"data": {
|
206 |
+
"application/vnd.jupyter.widget-view+json": {
|
207 |
+
"model_id": "fa2bce666e8140ab84d82cd34cd2d960",
|
208 |
+
"version_major": 2,
|
209 |
+
"version_minor": 0
|
210 |
+
},
|
211 |
+
"text/plain": [
|
212 |
+
"Downloading data: 0%| | 0/26 [00:00<?, ?files/s]"
|
213 |
+
]
|
214 |
+
},
|
215 |
+
"metadata": {},
|
216 |
+
"output_type": "display_data"
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"data": {
|
220 |
+
"application/vnd.jupyter.widget-view+json": {
|
221 |
+
"model_id": "05e3239406214ab5b911623bb23e4ca8",
|
222 |
+
"version_major": 2,
|
223 |
+
"version_minor": 0
|
224 |
+
},
|
225 |
+
"text/plain": [
|
226 |
+
"train-00001-of-00026.parquet: 77%|#######7 | 231M/298M [00:00<?, ?B/s]"
|
227 |
+
]
|
228 |
+
},
|
229 |
+
"metadata": {},
|
230 |
+
"output_type": "display_data"
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"name": "stderr",
|
234 |
+
"output_type": "stream",
|
235 |
+
"text": [
|
236 |
+
"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\huggingface_hub\\file_download.py:144: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\victo\\.cache\\huggingface\\hub\\datasets--mandarjoshi--trivia_qa. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
|
237 |
+
"To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
|
238 |
+
" warnings.warn(message)\n"
|
239 |
+
]
|
240 |
+
},
|
241 |
+
{
|
242 |
+
"data": {
|
243 |
+
"application/vnd.jupyter.widget-view+json": {
|
244 |
+
"model_id": "b563007c5002441d85da63d3ac058bab",
|
245 |
+
"version_major": 2,
|
246 |
+
"version_minor": 0
|
247 |
+
},
|
248 |
+
"text/plain": [
|
249 |
+
"train-00002-of-00026.parquet: 0%| | 0.00/290M [00:00<?, ?B/s]"
|
250 |
+
]
|
251 |
+
},
|
252 |
+
"metadata": {},
|
253 |
+
"output_type": "display_data"
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"data": {
|
257 |
+
"application/vnd.jupyter.widget-view+json": {
|
258 |
+
"model_id": "9240545f3981467eaecb61c675e5576b",
|
259 |
+
"version_major": 2,
|
260 |
+
"version_minor": 0
|
261 |
+
},
|
262 |
+
"text/plain": [
|
263 |
+
"train-00003-of-00026.parquet: 0%| | 0.00/444M [00:00<?, ?B/s]"
|
264 |
+
]
|
265 |
+
},
|
266 |
+
"metadata": {},
|
267 |
+
"output_type": "display_data"
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"data": {
|
271 |
+
"application/vnd.jupyter.widget-view+json": {
|
272 |
+
"model_id": "520175ce1b144a2e80a117d8796809f0",
|
273 |
+
"version_major": 2,
|
274 |
+
"version_minor": 0
|
275 |
+
},
|
276 |
+
"text/plain": [
|
277 |
+
"train-00004-of-00026.parquet: 0%| | 0.00/461M [00:00<?, ?B/s]"
|
278 |
+
]
|
279 |
+
},
|
280 |
+
"metadata": {},
|
281 |
+
"output_type": "display_data"
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"data": {
|
285 |
+
"application/vnd.jupyter.widget-view+json": {
|
286 |
+
"model_id": "c89505462bde4fc8b8f2cc30adaa1ace",
|
287 |
+
"version_major": 2,
|
288 |
+
"version_minor": 0
|
289 |
+
},
|
290 |
+
"text/plain": [
|
291 |
+
"train-00005-of-00026.parquet: 0%| | 0.00/474M [00:00<?, ?B/s]"
|
292 |
+
]
|
293 |
+
},
|
294 |
+
"metadata": {},
|
295 |
+
"output_type": "display_data"
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"data": {
|
299 |
+
"application/vnd.jupyter.widget-view+json": {
|
300 |
+
"model_id": "80c4d01825074ab6b9d732c1a999d883",
|
301 |
+
"version_major": 2,
|
302 |
+
"version_minor": 0
|
303 |
+
},
|
304 |
+
"text/plain": [
|
305 |
+
"train-00006-of-00026.parquet: 0%| | 0.00/404M [00:00<?, ?B/s]"
|
306 |
+
]
|
307 |
+
},
|
308 |
+
"metadata": {},
|
309 |
+
"output_type": "display_data"
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"data": {
|
313 |
+
"application/vnd.jupyter.widget-view+json": {
|
314 |
+
"model_id": "982196edb22949769d93de117077b0aa",
|
315 |
+
"version_major": 2,
|
316 |
+
"version_minor": 0
|
317 |
+
},
|
318 |
+
"text/plain": [
|
319 |
+
"train-00007-of-00026.parquet: 0%| | 0.00/324M [00:00<?, ?B/s]"
|
320 |
+
]
|
321 |
+
},
|
322 |
+
"metadata": {},
|
323 |
+
"output_type": "display_data"
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"data": {
|
327 |
+
"application/vnd.jupyter.widget-view+json": {
|
328 |
+
"model_id": "21dd300150f04169bcaed35c005355c9",
|
329 |
+
"version_major": 2,
|
330 |
+
"version_minor": 0
|
331 |
+
},
|
332 |
+
"text/plain": [
|
333 |
+
"train-00008-of-00026.parquet: 0%| | 0.00/329M [00:00<?, ?B/s]"
|
334 |
+
]
|
335 |
+
},
|
336 |
+
"metadata": {},
|
337 |
+
"output_type": "display_data"
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"data": {
|
341 |
+
"application/vnd.jupyter.widget-view+json": {
|
342 |
+
"model_id": "13802139a2e14a7f85de99be0f3f8adf",
|
343 |
+
"version_major": 2,
|
344 |
+
"version_minor": 0
|
345 |
+
},
|
346 |
+
"text/plain": [
|
347 |
+
"train-00009-of-00026.parquet: 0%| | 0.00/336M [00:00<?, ?B/s]"
|
348 |
+
]
|
349 |
+
},
|
350 |
+
"metadata": {},
|
351 |
+
"output_type": "display_data"
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"data": {
|
355 |
+
"application/vnd.jupyter.widget-view+json": {
|
356 |
+
"model_id": "4980b7601d894899b8afd68cf9820468",
|
357 |
+
"version_major": 2,
|
358 |
+
"version_minor": 0
|
359 |
+
},
|
360 |
+
"text/plain": [
|
361 |
+
"train-00010-of-00026.parquet: 0%| | 0.00/400M [00:00<?, ?B/s]"
|
362 |
+
]
|
363 |
+
},
|
364 |
+
"metadata": {},
|
365 |
+
"output_type": "display_data"
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"data": {
|
369 |
+
"application/vnd.jupyter.widget-view+json": {
|
370 |
+
"model_id": "66427936e2514530b8c0c87cfe42edb2",
|
371 |
+
"version_major": 2,
|
372 |
+
"version_minor": 0
|
373 |
+
},
|
374 |
+
"text/plain": [
|
375 |
+
"train-00011-of-00026.parquet: 0%| | 0.00/370M [00:00<?, ?B/s]"
|
376 |
+
]
|
377 |
+
},
|
378 |
+
"metadata": {},
|
379 |
+
"output_type": "display_data"
|
380 |
+
},
|
381 |
+
{
|
382 |
+
"data": {
|
383 |
+
"application/vnd.jupyter.widget-view+json": {
|
384 |
+
"model_id": "0ea7415c9d164cfda25b361f30eb451a",
|
385 |
+
"version_major": 2,
|
386 |
+
"version_minor": 0
|
387 |
+
},
|
388 |
+
"text/plain": [
|
389 |
+
"train-00012-of-00026.parquet: 0%| | 0.00/341M [00:00<?, ?B/s]"
|
390 |
+
]
|
391 |
+
},
|
392 |
+
"metadata": {},
|
393 |
+
"output_type": "display_data"
|
394 |
+
},
|
395 |
+
{
|
396 |
+
"data": {
|
397 |
+
"application/vnd.jupyter.widget-view+json": {
|
398 |
+
"model_id": "fc28d3c2fdb545e0935f2216817768fc",
|
399 |
+
"version_major": 2,
|
400 |
+
"version_minor": 0
|
401 |
+
},
|
402 |
+
"text/plain": [
|
403 |
+
"train-00013-of-00026.parquet: 0%| | 0.00/327M [00:00<?, ?B/s]"
|
404 |
+
]
|
405 |
+
},
|
406 |
+
"metadata": {},
|
407 |
+
"output_type": "display_data"
|
408 |
+
},
|
409 |
+
{
|
410 |
+
"data": {
|
411 |
+
"application/vnd.jupyter.widget-view+json": {
|
412 |
+
"model_id": "544359bd2cca4b8dba095b89e4101252",
|
413 |
+
"version_major": 2,
|
414 |
+
"version_minor": 0
|
415 |
+
},
|
416 |
+
"text/plain": [
|
417 |
+
"train-00014-of-00026.parquet: 0%| | 0.00/310M [00:00<?, ?B/s]"
|
418 |
+
]
|
419 |
+
},
|
420 |
+
"metadata": {},
|
421 |
+
"output_type": "display_data"
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"data": {
|
425 |
+
"application/vnd.jupyter.widget-view+json": {
|
426 |
+
"model_id": "a6d2d3c209af4e039da71ffd7db271ca",
|
427 |
+
"version_major": 2,
|
428 |
+
"version_minor": 0
|
429 |
+
},
|
430 |
+
"text/plain": [
|
431 |
+
"train-00015-of-00026.parquet: 0%| | 0.00/157M [00:00<?, ?B/s]"
|
432 |
+
]
|
433 |
+
},
|
434 |
+
"metadata": {},
|
435 |
+
"output_type": "display_data"
|
436 |
+
},
|
437 |
+
{
|
438 |
+
"data": {
|
439 |
+
"application/vnd.jupyter.widget-view+json": {
|
440 |
+
"model_id": "9830a8a9edd84737ae1e70ad2cc9c1c3",
|
441 |
+
"version_major": 2,
|
442 |
+
"version_minor": 0
|
443 |
+
},
|
444 |
+
"text/plain": [
|
445 |
+
"train-00016-of-00026.parquet: 0%| | 0.00/136M [00:00<?, ?B/s]"
|
446 |
+
]
|
447 |
+
},
|
448 |
+
"metadata": {},
|
449 |
+
"output_type": "display_data"
|
450 |
+
},
|
451 |
+
{
|
452 |
+
"data": {
|
453 |
+
"application/vnd.jupyter.widget-view+json": {
|
454 |
+
"model_id": "6100a04c5bc54a6c8992eccd2768991b",
|
455 |
+
"version_major": 2,
|
456 |
+
"version_minor": 0
|
457 |
+
},
|
458 |
+
"text/plain": [
|
459 |
+
"train-00017-of-00026.parquet: 0%| | 0.00/159M [00:00<?, ?B/s]"
|
460 |
+
]
|
461 |
+
},
|
462 |
+
"metadata": {},
|
463 |
+
"output_type": "display_data"
|
464 |
+
},
|
465 |
+
{
|
466 |
+
"data": {
|
467 |
+
"application/vnd.jupyter.widget-view+json": {
|
468 |
+
"model_id": "b9d487eb0ecf49a180b73395826aa850",
|
469 |
+
"version_major": 2,
|
470 |
+
"version_minor": 0
|
471 |
+
},
|
472 |
+
"text/plain": [
|
473 |
+
"train-00018-of-00026.parquet: 0%| | 0.00/200M [00:00<?, ?B/s]"
|
474 |
+
]
|
475 |
+
},
|
476 |
+
"metadata": {},
|
477 |
+
"output_type": "display_data"
|
478 |
+
},
|
479 |
+
{
|
480 |
+
"data": {
|
481 |
+
"application/vnd.jupyter.widget-view+json": {
|
482 |
+
"model_id": "2911d2a55c414003b41c7f1e67800b5c",
|
483 |
+
"version_major": 2,
|
484 |
+
"version_minor": 0
|
485 |
+
},
|
486 |
+
"text/plain": [
|
487 |
+
"train-00019-of-00026.parquet: 0%| | 0.00/180M [00:00<?, ?B/s]"
|
488 |
+
]
|
489 |
+
},
|
490 |
+
"metadata": {},
|
491 |
+
"output_type": "display_data"
|
492 |
+
},
|
493 |
+
{
|
494 |
+
"data": {
|
495 |
+
"application/vnd.jupyter.widget-view+json": {
|
496 |
+
"model_id": "327a41e96c3949a6b2179c701ef47797",
|
497 |
+
"version_major": 2,
|
498 |
+
"version_minor": 0
|
499 |
+
},
|
500 |
+
"text/plain": [
|
501 |
+
"train-00020-of-00026.parquet: 0%| | 0.00/150M [00:00<?, ?B/s]"
|
502 |
+
]
|
503 |
+
},
|
504 |
+
"metadata": {},
|
505 |
+
"output_type": "display_data"
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"data": {
|
509 |
+
"application/vnd.jupyter.widget-view+json": {
|
510 |
+
"model_id": "8d0d01d4b973430e90c072710bd31244",
|
511 |
+
"version_major": 2,
|
512 |
+
"version_minor": 0
|
513 |
+
},
|
514 |
+
"text/plain": [
|
515 |
+
"train-00021-of-00026.parquet: 0%| | 0.00/153M [00:00<?, ?B/s]"
|
516 |
+
]
|
517 |
+
},
|
518 |
+
"metadata": {},
|
519 |
+
"output_type": "display_data"
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"data": {
|
523 |
+
"application/vnd.jupyter.widget-view+json": {
|
524 |
+
"model_id": "0defa90b28684a159873edff4e736feb",
|
525 |
+
"version_major": 2,
|
526 |
+
"version_minor": 0
|
527 |
+
},
|
528 |
+
"text/plain": [
|
529 |
+
"train-00022-of-00026.parquet: 0%| | 0.00/147M [00:00<?, ?B/s]"
|
530 |
+
]
|
531 |
+
},
|
532 |
+
"metadata": {},
|
533 |
+
"output_type": "display_data"
|
534 |
+
},
|
535 |
+
{
|
536 |
+
"data": {
|
537 |
+
"application/vnd.jupyter.widget-view+json": {
|
538 |
+
"model_id": "c9d77f763db744168b330490bc69d2f3",
|
539 |
+
"version_major": 2,
|
540 |
+
"version_minor": 0
|
541 |
+
},
|
542 |
+
"text/plain": [
|
543 |
+
"train-00023-of-00026.parquet: 0%| | 0.00/157M [00:00<?, ?B/s]"
|
544 |
+
]
|
545 |
+
},
|
546 |
+
"metadata": {},
|
547 |
+
"output_type": "display_data"
|
548 |
+
},
|
549 |
+
{
|
550 |
+
"data": {
|
551 |
+
"application/vnd.jupyter.widget-view+json": {
|
552 |
+
"model_id": "254a9cffc17641089d2e89865062f95d",
|
553 |
+
"version_major": 2,
|
554 |
+
"version_minor": 0
|
555 |
+
},
|
556 |
+
"text/plain": [
|
557 |
+
"train-00024-of-00026.parquet: 0%| | 0.00/154M [00:00<?, ?B/s]"
|
558 |
+
]
|
559 |
+
},
|
560 |
+
"metadata": {},
|
561 |
+
"output_type": "display_data"
|
562 |
+
},
|
563 |
+
{
|
564 |
+
"data": {
|
565 |
+
"application/vnd.jupyter.widget-view+json": {
|
566 |
+
"model_id": "e094b5c26eae4fc9b322cc8a2f426f09",
|
567 |
+
"version_major": 2,
|
568 |
+
"version_minor": 0
|
569 |
+
},
|
570 |
+
"text/plain": [
|
571 |
+
"train-00025-of-00026.parquet: 0%| | 0.00/158M [00:00<?, ?B/s]"
|
572 |
+
]
|
573 |
+
},
|
574 |
+
"metadata": {},
|
575 |
+
"output_type": "display_data"
|
576 |
+
},
|
577 |
+
{
|
578 |
+
"data": {
|
579 |
+
"application/vnd.jupyter.widget-view+json": {
|
580 |
+
"model_id": "b594f4942c2145688b6187650b635932",
|
581 |
+
"version_major": 2,
|
582 |
+
"version_minor": 0
|
583 |
+
},
|
584 |
+
"text/plain": [
|
585 |
+
"validation-00000-of-00004.parquet: 0%| | 0.00/327M [00:00<?, ?B/s]"
|
586 |
+
]
|
587 |
+
},
|
588 |
+
"metadata": {},
|
589 |
+
"output_type": "display_data"
|
590 |
+
},
|
591 |
+
{
|
592 |
+
"data": {
|
593 |
+
"application/vnd.jupyter.widget-view+json": {
|
594 |
+
"model_id": "834c2d1a34bb4beca57ab588c3aa8916",
|
595 |
+
"version_major": 2,
|
596 |
+
"version_minor": 0
|
597 |
+
},
|
598 |
+
"text/plain": [
|
599 |
+
"validation-00001-of-00004.parquet: 0%| | 0.00/296M [00:00<?, ?B/s]"
|
600 |
+
]
|
601 |
+
},
|
602 |
+
"metadata": {},
|
603 |
+
"output_type": "display_data"
|
604 |
+
},
|
605 |
+
{
|
606 |
+
"data": {
|
607 |
+
"application/vnd.jupyter.widget-view+json": {
|
608 |
+
"model_id": "ad55d48669044e0a8da4064ff0e2ba9f",
|
609 |
+
"version_major": 2,
|
610 |
+
"version_minor": 0
|
611 |
+
},
|
612 |
+
"text/plain": [
|
613 |
+
"validation-00002-of-00004.parquet: 0%| | 0.00/184M [00:00<?, ?B/s]"
|
614 |
+
]
|
615 |
+
},
|
616 |
+
"metadata": {},
|
617 |
+
"output_type": "display_data"
|
618 |
+
},
|
619 |
+
{
|
620 |
+
"data": {
|
621 |
+
"application/vnd.jupyter.widget-view+json": {
|
622 |
+
"model_id": "1985723fa1fe481db76fedf15ba44c3c",
|
623 |
+
"version_major": 2,
|
624 |
+
"version_minor": 0
|
625 |
+
},
|
626 |
+
"text/plain": [
|
627 |
+
"validation-00003-of-00004.parquet: 0%| | 0.00/129M [00:00<?, ?B/s]"
|
628 |
+
]
|
629 |
+
},
|
630 |
+
"metadata": {},
|
631 |
+
"output_type": "display_data"
|
632 |
+
},
|
633 |
+
{
|
634 |
+
"data": {
|
635 |
+
"application/vnd.jupyter.widget-view+json": {
|
636 |
+
"model_id": "47aff557e9494b0babf955d257da9707",
|
637 |
+
"version_major": 2,
|
638 |
+
"version_minor": 0
|
639 |
+
},
|
640 |
+
"text/plain": [
|
641 |
+
"test-00000-of-00004.parquet: 0%| | 0.00/307M [00:00<?, ?B/s]"
|
642 |
+
]
|
643 |
+
},
|
644 |
+
"metadata": {},
|
645 |
+
"output_type": "display_data"
|
646 |
+
},
|
647 |
+
{
|
648 |
+
"data": {
|
649 |
+
"application/vnd.jupyter.widget-view+json": {
|
650 |
+
"model_id": "6950c967f2764410a4e9a58e6f75eb9c",
|
651 |
+
"version_major": 2,
|
652 |
+
"version_minor": 0
|
653 |
+
},
|
654 |
+
"text/plain": [
|
655 |
+
"test-00001-of-00004.parquet: 0%| | 0.00/288M [00:00<?, ?B/s]"
|
656 |
+
]
|
657 |
+
},
|
658 |
+
"metadata": {},
|
659 |
+
"output_type": "display_data"
|
660 |
+
},
|
661 |
+
{
|
662 |
+
"data": {
|
663 |
+
"application/vnd.jupyter.widget-view+json": {
|
664 |
+
"model_id": "1289a1af756a44519b933cdb04e331b3",
|
665 |
+
"version_major": 2,
|
666 |
+
"version_minor": 0
|
667 |
+
},
|
668 |
+
"text/plain": [
|
669 |
+
"test-00002-of-00004.parquet: 0%| | 0.00/171M [00:00<?, ?B/s]"
|
670 |
+
]
|
671 |
+
},
|
672 |
+
"metadata": {},
|
673 |
+
"output_type": "display_data"
|
674 |
+
},
|
675 |
+
{
|
676 |
+
"data": {
|
677 |
+
"application/vnd.jupyter.widget-view+json": {
|
678 |
+
"model_id": "8373939a6ccd4aae8335bda2ced8e8f7",
|
679 |
+
"version_major": 2,
|
680 |
+
"version_minor": 0
|
681 |
+
},
|
682 |
+
"text/plain": [
|
683 |
+
"test-00003-of-00004.parquet: 0%| | 0.00/128M [00:00<?, ?B/s]"
|
684 |
+
]
|
685 |
+
},
|
686 |
+
"metadata": {},
|
687 |
+
"output_type": "display_data"
|
688 |
+
},
|
689 |
+
{
|
690 |
+
"data": {
|
691 |
+
"application/vnd.jupyter.widget-view+json": {
|
692 |
+
"model_id": "e982782830e74fbc94bf1c62359e756d",
|
693 |
+
"version_major": 2,
|
694 |
+
"version_minor": 0
|
695 |
+
},
|
696 |
+
"text/plain": [
|
697 |
+
"Generating train split: 0%| | 0/138384 [00:00<?, ? examples/s]"
|
698 |
+
]
|
699 |
+
},
|
700 |
+
"metadata": {},
|
701 |
+
"output_type": "display_data"
|
702 |
+
},
|
703 |
+
{
|
704 |
+
"data": {
|
705 |
+
"application/vnd.jupyter.widget-view+json": {
|
706 |
+
"model_id": "721b7ae4ac194096972ab4dfcf481771",
|
707 |
+
"version_major": 2,
|
708 |
+
"version_minor": 0
|
709 |
+
},
|
710 |
+
"text/plain": [
|
711 |
+
"Generating validation split: 0%| | 0/17944 [00:00<?, ? examples/s]"
|
712 |
+
]
|
713 |
+
},
|
714 |
+
"metadata": {},
|
715 |
+
"output_type": "display_data"
|
716 |
+
},
|
717 |
+
{
|
718 |
+
"data": {
|
719 |
+
"application/vnd.jupyter.widget-view+json": {
|
720 |
+
"model_id": "317e5566726c420ab680f2dc25558c1c",
|
721 |
+
"version_major": 2,
|
722 |
+
"version_minor": 0
|
723 |
+
},
|
724 |
+
"text/plain": [
|
725 |
+
"Generating test split: 0%| | 0/17210 [00:00<?, ? examples/s]"
|
726 |
+
]
|
727 |
+
},
|
728 |
+
"metadata": {},
|
729 |
+
"output_type": "display_data"
|
730 |
+
},
|
731 |
+
{
|
732 |
+
"name": "stdout",
|
733 |
+
"output_type": "stream",
|
734 |
+
"text": [
|
735 |
+
"1000\n"
|
736 |
+
]
|
737 |
+
}
|
738 |
+
],
|
739 |
+
"source": [
|
740 |
+
"# Load the first 1,000 examples of the validation split\n",
|
741 |
+
"trivia_small = load_dataset(\n",
|
742 |
+
" \"mandarjoshi/trivia_qa\",\n",
|
743 |
+
" \"rc\",\n",
|
744 |
+
" split=\"validation[:1000]\"\n",
|
745 |
+
")\n",
|
746 |
+
"print(len(trivia_small)) # 1000\n"
|
747 |
+
]
|
748 |
+
},
|
749 |
+
{
|
750 |
+
"cell_type": "code",
|
751 |
+
"execution_count": null,
|
752 |
+
"metadata": {},
|
753 |
+
"outputs": [],
|
754 |
+
"source": []
|
755 |
+
},
|
756 |
+
{
|
757 |
+
"cell_type": "markdown",
|
758 |
+
"metadata": {},
|
759 |
+
"source": [
|
760 |
+
"## Embedding"
|
761 |
+
]
|
762 |
+
},
|
763 |
+
{
|
764 |
+
"cell_type": "code",
|
765 |
+
"execution_count": 7,
|
766 |
+
"metadata": {},
|
767 |
+
"outputs": [
|
768 |
+
{
|
769 |
+
"data": {
|
770 |
+
"application/vnd.jupyter.widget-view+json": {
|
771 |
+
"model_id": "082fae0dc7c846648262320f497ead9f",
|
772 |
+
"version_major": 2,
|
773 |
+
"version_minor": 0
|
774 |
+
},
|
775 |
+
"text/plain": [
|
776 |
+
"modules.json: 0%| | 0.00/349 [00:00<?, ?B/s]"
|
777 |
+
]
|
778 |
+
},
|
779 |
+
"metadata": {},
|
780 |
+
"output_type": "display_data"
|
781 |
+
},
|
782 |
+
{
|
783 |
+
"name": "stderr",
|
784 |
+
"output_type": "stream",
|
785 |
+
"text": [
|
786 |
+
"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\huggingface_hub\\file_download.py:144: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\victo\\.cache\\huggingface\\hub\\models--sentence-transformers--all-MiniLM-L6-v2. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
|
787 |
+
"To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
|
788 |
+
" warnings.warn(message)\n"
|
789 |
+
]
|
790 |
+
},
|
791 |
+
{
|
792 |
+
"data": {
|
793 |
+
"application/vnd.jupyter.widget-view+json": {
|
794 |
+
"model_id": "657091b30bc247bf9a0be3c9fdcfafe1",
|
795 |
+
"version_major": 2,
|
796 |
+
"version_minor": 0
|
797 |
+
},
|
798 |
+
"text/plain": [
|
799 |
+
"config_sentence_transformers.json: 0%| | 0.00/116 [00:00<?, ?B/s]"
|
800 |
+
]
|
801 |
+
},
|
802 |
+
"metadata": {},
|
803 |
+
"output_type": "display_data"
|
804 |
+
},
|
805 |
+
{
|
806 |
+
"data": {
|
807 |
+
"application/vnd.jupyter.widget-view+json": {
|
808 |
+
"model_id": "5d02d9fcea144f7396c8b2f8b8a30b50",
|
809 |
+
"version_major": 2,
|
810 |
+
"version_minor": 0
|
811 |
+
},
|
812 |
+
"text/plain": [
|
813 |
+
"README.md: 0%| | 0.00/10.5k [00:00<?, ?B/s]"
|
814 |
+
]
|
815 |
+
},
|
816 |
+
"metadata": {},
|
817 |
+
"output_type": "display_data"
|
818 |
+
},
|
819 |
+
{
|
820 |
+
"data": {
|
821 |
+
"application/vnd.jupyter.widget-view+json": {
|
822 |
+
"model_id": "b23b9ee4be80470a96bfe5a7293d706c",
|
823 |
+
"version_major": 2,
|
824 |
+
"version_minor": 0
|
825 |
+
},
|
826 |
+
"text/plain": [
|
827 |
+
"sentence_bert_config.json: 0%| | 0.00/53.0 [00:00<?, ?B/s]"
|
828 |
+
]
|
829 |
+
},
|
830 |
+
"metadata": {},
|
831 |
+
"output_type": "display_data"
|
832 |
+
},
|
833 |
+
{
|
834 |
+
"data": {
|
835 |
+
"application/vnd.jupyter.widget-view+json": {
|
836 |
+
"model_id": "91766368d2c44fe181aedc2debbfd2c4",
|
837 |
+
"version_major": 2,
|
838 |
+
"version_minor": 0
|
839 |
+
},
|
840 |
+
"text/plain": [
|
841 |
+
"config.json: 0%| | 0.00/612 [00:00<?, ?B/s]"
|
842 |
+
]
|
843 |
+
},
|
844 |
+
"metadata": {},
|
845 |
+
"output_type": "display_data"
|
846 |
+
},
|
847 |
+
{
|
848 |
+
"name": "stderr",
|
849 |
+
"output_type": "stream",
|
850 |
+
"text": [
|
851 |
+
"Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n"
|
852 |
+
]
|
853 |
+
},
|
854 |
+
{
|
855 |
+
"data": {
|
856 |
+
"application/vnd.jupyter.widget-view+json": {
|
857 |
+
"model_id": "7f0e8193141a487680b8bd8f1c5d00d9",
|
858 |
+
"version_major": 2,
|
859 |
+
"version_minor": 0
|
860 |
+
},
|
861 |
+
"text/plain": [
|
862 |
+
"model.safetensors: 0%| | 0.00/90.9M [00:00<?, ?B/s]"
|
863 |
+
]
|
864 |
+
},
|
865 |
+
"metadata": {},
|
866 |
+
"output_type": "display_data"
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"data": {
|
870 |
+
"application/vnd.jupyter.widget-view+json": {
|
871 |
+
"model_id": "9c63240c2991419999ab0aa1a2852731",
|
872 |
+
"version_major": 2,
|
873 |
+
"version_minor": 0
|
874 |
+
},
|
875 |
+
"text/plain": [
|
876 |
+
"tokenizer_config.json: 0%| | 0.00/350 [00:00<?, ?B/s]"
|
877 |
+
]
|
878 |
+
},
|
879 |
+
"metadata": {},
|
880 |
+
"output_type": "display_data"
|
881 |
+
},
|
882 |
+
{
|
883 |
+
"data": {
|
884 |
+
"application/vnd.jupyter.widget-view+json": {
|
885 |
+
"model_id": "353cd71ff2c244b2b3f0ecd4f9d1a0b2",
|
886 |
+
"version_major": 2,
|
887 |
+
"version_minor": 0
|
888 |
+
},
|
889 |
+
"text/plain": [
|
890 |
+
"vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]"
|
891 |
+
]
|
892 |
+
},
|
893 |
+
"metadata": {},
|
894 |
+
"output_type": "display_data"
|
895 |
+
},
|
896 |
+
{
|
897 |
+
"data": {
|
898 |
+
"application/vnd.jupyter.widget-view+json": {
|
899 |
+
"model_id": "f1ce19c9a9b745198ffcfac143a351f6",
|
900 |
+
"version_major": 2,
|
901 |
+
"version_minor": 0
|
902 |
+
},
|
903 |
+
"text/plain": [
|
904 |
+
"tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]"
|
905 |
+
]
|
906 |
+
},
|
907 |
+
"metadata": {},
|
908 |
+
"output_type": "display_data"
|
909 |
+
},
|
910 |
+
{
|
911 |
+
"data": {
|
912 |
+
"application/vnd.jupyter.widget-view+json": {
|
913 |
+
"model_id": "a34f08cf92234b0a9f827a0bd43bd63f",
|
914 |
+
"version_major": 2,
|
915 |
+
"version_minor": 0
|
916 |
+
},
|
917 |
+
"text/plain": [
|
918 |
+
"special_tokens_map.json: 0%| | 0.00/112 [00:00<?, ?B/s]"
|
919 |
+
]
|
920 |
+
},
|
921 |
+
"metadata": {},
|
922 |
+
"output_type": "display_data"
|
923 |
+
},
|
924 |
+
{
|
925 |
+
"data": {
|
926 |
+
"application/vnd.jupyter.widget-view+json": {
|
927 |
+
"model_id": "eb7c74f55e134aec920d424da729bd63",
|
928 |
+
"version_major": 2,
|
929 |
+
"version_minor": 0
|
930 |
+
},
|
931 |
+
"text/plain": [
|
932 |
+
"config.json: 0%| | 0.00/190 [00:00<?, ?B/s]"
|
933 |
+
]
|
934 |
+
},
|
935 |
+
"metadata": {},
|
936 |
+
"output_type": "display_data"
|
937 |
+
},
|
938 |
+
{
|
939 |
+
"data": {
|
940 |
+
"application/vnd.jupyter.widget-view+json": {
|
941 |
+
"model_id": "8bfe1a75a9824eb192b2bf2c6c9859e8",
|
942 |
+
"version_major": 2,
|
943 |
+
"version_minor": 0
|
944 |
+
},
|
945 |
+
"text/plain": [
|
946 |
+
"Batches: 0%| | 0/100 [00:00<?, ?it/s]"
|
947 |
+
]
|
948 |
+
},
|
949 |
+
"metadata": {},
|
950 |
+
"output_type": "display_data"
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"name": "stdout",
|
954 |
+
"output_type": "stream",
|
955 |
+
"text": [
|
956 |
+
"Generated embeddings shape: (3200, 384)\n"
|
957 |
+
]
|
958 |
+
}
|
959 |
+
],
|
960 |
+
"source": [
|
961 |
+
"# 1.1) Import the embedder\n",
|
962 |
+
"from sentence_transformers import SentenceTransformer\n",
|
963 |
+
"\n",
|
964 |
+
"# 1.2) Instantiate your Sentence-Transformer model\n",
|
965 |
+
"# “all-MiniLM-L6-v2” is small & fast, good for demos\n",
|
966 |
+
"embedder = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
|
967 |
+
"\n",
|
968 |
+
"# 1.3) Encode your passages list\n",
|
969 |
+
"# This may take a moment—use show_progress_bar=True\n",
|
970 |
+
"embeddings = embedder.encode(\n",
|
971 |
+
" passages, \n",
|
972 |
+
" show_progress_bar=True,\n",
|
973 |
+
" convert_to_numpy=True\n",
|
974 |
+
")\n",
|
975 |
+
"\n",
|
976 |
+
"print(f\"Generated embeddings shape: {embeddings.shape}\")\n"
|
977 |
+
]
|
978 |
+
},
|
979 |
+
{
|
980 |
+
"cell_type": "code",
|
981 |
+
"execution_count": null,
|
982 |
+
"metadata": {},
|
983 |
+
"outputs": [],
|
984 |
+
"source": []
|
985 |
+
},
|
986 |
+
{
|
987 |
+
"cell_type": "markdown",
|
988 |
+
"metadata": {},
|
989 |
+
"source": [
|
990 |
+
"## Build the FAISS index"
|
991 |
+
]
|
992 |
+
},
|
993 |
+
{
|
994 |
+
"cell_type": "code",
|
995 |
+
"execution_count": 8,
|
996 |
+
"metadata": {},
|
997 |
+
"outputs": [
|
998 |
+
{
|
999 |
+
"name": "stdout",
|
1000 |
+
"output_type": "stream",
|
1001 |
+
"text": [
|
1002 |
+
"FAISS index contains 3200 vectors of dimension 384.\n"
|
1003 |
+
]
|
1004 |
+
}
|
1005 |
+
],
|
1006 |
+
"source": [
|
1007 |
+
"import faiss\n",
|
1008 |
+
"\n",
|
1009 |
+
"# 2.1) Get the embedding dimension\n",
|
1010 |
+
"dim = embeddings.shape[1]\n",
|
1011 |
+
"\n",
|
1012 |
+
"# 2.2) Create a simple L2 index\n",
|
1013 |
+
"index = faiss.IndexFlatL2(dim)\n",
|
1014 |
+
"\n",
|
1015 |
+
"# 2.3) Add all passage embeddings into the index\n",
|
1016 |
+
"index.add(embeddings)\n",
|
1017 |
+
"\n",
|
1018 |
+
"print(f\"FAISS index contains {index.ntotal} vectors of dimension {dim}.\")\n"
|
1019 |
+
]
|
1020 |
+
},
|
1021 |
+
{
|
1022 |
+
"cell_type": "markdown",
|
1023 |
+
"metadata": {},
|
1024 |
+
"source": [
|
1025 |
+
"## Test retrieval"
|
1026 |
+
]
|
1027 |
+
},
|
1028 |
+
{
|
1029 |
+
"cell_type": "code",
|
1030 |
+
"execution_count": 9,
|
1031 |
+
"metadata": {},
|
1032 |
+
"outputs": [
|
1033 |
+
{
|
1034 |
+
"name": "stdout",
|
1035 |
+
"output_type": "stream",
|
1036 |
+
"text": [
|
1037 |
+
"Passage 1 (dist=1.38):\n",
|
1038 |
+
"Modern Finnish popular music includes a renowned heavy metal scene, in common with other Nordic countries, as well as a number of prominent rock bands, jazz musicians, hip hop performers, and dance mu …\n",
|
1039 |
+
"\n",
|
1040 |
+
"Passage 2 (dist=1.40):\n",
|
1041 |
+
"* McDonough, James Lee, Shiloh: In Hell before Night (1977). …\n",
|
1042 |
+
"\n",
|
1043 |
+
"Passage 3 (dist=1.41):\n",
|
1044 |
+
"Sami music …\n",
|
1045 |
+
"\n",
|
1046 |
+
"Passage 4 (dist=1.42):\n",
|
1047 |
+
"The people of northern Finland, Sweden and Norway, the Sami, are known primarily for highly spiritual songs called Joik. The same word sometimes refers to lavlu or vuelie songs, though this is technic …\n",
|
1048 |
+
"\n",
|
1049 |
+
"Passage 5 (dist=1.44):\n",
|
1050 |
+
"John Adams, portrait by John Trumbull. …\n",
|
1051 |
+
"\n"
|
1052 |
+
]
|
1053 |
+
}
|
1054 |
+
],
|
1055 |
+
"source": [
|
1056 |
+
"# 3.1) Pick a sample query\n",
|
1057 |
+
"query = \"Who wrote the song Halo?\"\n",
|
1058 |
+
"\n",
|
1059 |
+
"# 3.2) Embed the query\n",
|
1060 |
+
"q_emb = embedder.encode([query], convert_to_numpy=True)\n",
|
1061 |
+
"\n",
|
1062 |
+
"# 3.3) Search top 5 nearest passages\n",
|
1063 |
+
"distances, indices = index.search(q_emb, k=5)\n",
|
1064 |
+
"\n",
|
1065 |
+
"# 3.4) Print out the top-5 texts\n",
|
1066 |
+
"for rank, idx in enumerate(indices[0], start=1):\n",
|
1067 |
+
" print(f\"Passage {rank} (dist={distances[0][rank-1]:.2f}):\")\n",
|
1068 |
+
" print(passages[idx][:200], \"…\\n\")\n"
|
1069 |
+
]
|
1070 |
+
},
|
1071 |
+
{
|
1072 |
+
"cell_type": "markdown",
|
1073 |
+
"metadata": {},
|
1074 |
+
"source": [
|
1075 |
+
"## Load and add the LLM"
|
1076 |
+
]
|
1077 |
+
},
|
1078 |
+
{
|
1079 |
+
"cell_type": "code",
|
1080 |
+
"execution_count": 12,
|
1081 |
+
"metadata": {},
|
1082 |
+
"outputs": [
|
1083 |
+
{
|
1084 |
+
"name": "stdout",
|
1085 |
+
"output_type": "stream",
|
1086 |
+
"text": [
|
1087 |
+
"^C\n"
|
1088 |
+
]
|
1089 |
+
}
|
1090 |
+
],
|
1091 |
+
"source": [
|
1092 |
+
"! huggingface-cli login"
|
1093 |
+
]
|
1094 |
+
},
|
1095 |
+
{
|
1096 |
+
"cell_type": "code",
|
1097 |
+
"execution_count": 14,
|
1098 |
+
"metadata": {},
|
1099 |
+
"outputs": [
|
1100 |
+
{
|
1101 |
+
"data": {
|
1102 |
+
"application/vnd.jupyter.widget-view+json": {
|
1103 |
+
"model_id": "fb405c91a771473e80578648166b0d5d",
|
1104 |
+
"version_major": 2,
|
1105 |
+
"version_minor": 0
|
1106 |
+
},
|
1107 |
+
"text/plain": [
|
1108 |
+
"tokenizer_config.json: 0%| | 0.00/2.54k [00:00<?, ?B/s]"
|
1109 |
+
]
|
1110 |
+
},
|
1111 |
+
"metadata": {},
|
1112 |
+
"output_type": "display_data"
|
1113 |
+
},
|
1114 |
+
{
|
1115 |
+
"name": "stderr",
|
1116 |
+
"output_type": "stream",
|
1117 |
+
"text": [
|
1118 |
+
"c:\\Users\\victo\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\huggingface_hub\\file_download.py:144: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\victo\\.cache\\huggingface\\hub\\models--google--flan-t5-base. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
|
1119 |
+
"To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
|
1120 |
+
" warnings.warn(message)\n",
|
1121 |
+
"Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n"
|
1122 |
+
]
|
1123 |
+
},
|
1124 |
+
{
|
1125 |
+
"data": {
|
1126 |
+
"application/vnd.jupyter.widget-view+json": {
|
1127 |
+
"model_id": "97617186c019485b83d0cb38f677266e",
|
1128 |
+
"version_major": 2,
|
1129 |
+
"version_minor": 0
|
1130 |
+
},
|
1131 |
+
"text/plain": [
|
1132 |
+
"spiece.model: 0%| | 0.00/792k [00:00<?, ?B/s]"
|
1133 |
+
]
|
1134 |
+
},
|
1135 |
+
"metadata": {},
|
1136 |
+
"output_type": "display_data"
|
1137 |
+
},
|
1138 |
+
{
|
1139 |
+
"data": {
|
1140 |
+
"application/vnd.jupyter.widget-view+json": {
|
1141 |
+
"model_id": "cfb09d567c6d4b77bb26b3af9e75a77d",
|
1142 |
+
"version_major": 2,
|
1143 |
+
"version_minor": 0
|
1144 |
+
},
|
1145 |
+
"text/plain": [
|
1146 |
+
"tokenizer.json: 0%| | 0.00/2.42M [00:00<?, ?B/s]"
|
1147 |
+
]
|
1148 |
+
},
|
1149 |
+
"metadata": {},
|
1150 |
+
"output_type": "display_data"
|
1151 |
+
},
|
1152 |
+
{
|
1153 |
+
"data": {
|
1154 |
+
"application/vnd.jupyter.widget-view+json": {
|
1155 |
+
"model_id": "8210d38a78634a7f9b0d76970a145ce5",
|
1156 |
+
"version_major": 2,
|
1157 |
+
"version_minor": 0
|
1158 |
+
},
|
1159 |
+
"text/plain": [
|
1160 |
+
"special_tokens_map.json: 0%| | 0.00/2.20k [00:00<?, ?B/s]"
|
1161 |
+
]
|
1162 |
+
},
|
1163 |
+
"metadata": {},
|
1164 |
+
"output_type": "display_data"
|
1165 |
+
},
|
1166 |
+
{
|
1167 |
+
"data": {
|
1168 |
+
"application/vnd.jupyter.widget-view+json": {
|
1169 |
+
"model_id": "e98d397ffacb4561b45821efc96a6ece",
|
1170 |
+
"version_major": 2,
|
1171 |
+
"version_minor": 0
|
1172 |
+
},
|
1173 |
+
"text/plain": [
|
1174 |
+
"config.json: 0%| | 0.00/1.40k [00:00<?, ?B/s]"
|
1175 |
+
]
|
1176 |
+
},
|
1177 |
+
"metadata": {},
|
1178 |
+
"output_type": "display_data"
|
1179 |
+
},
|
1180 |
+
{
|
1181 |
+
"name": "stderr",
|
1182 |
+
"output_type": "stream",
|
1183 |
+
"text": [
|
1184 |
+
"Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n"
|
1185 |
+
]
|
1186 |
+
},
|
1187 |
+
{
|
1188 |
+
"data": {
|
1189 |
+
"application/vnd.jupyter.widget-view+json": {
|
1190 |
+
"model_id": "1966d468bfa549b3938e52deae90dc39",
|
1191 |
+
"version_major": 2,
|
1192 |
+
"version_minor": 0
|
1193 |
+
},
|
1194 |
+
"text/plain": [
|
1195 |
+
"model.safetensors: 0%| | 0.00/990M [00:00<?, ?B/s]"
|
1196 |
+
]
|
1197 |
+
},
|
1198 |
+
"metadata": {},
|
1199 |
+
"output_type": "display_data"
|
1200 |
+
},
|
1201 |
+
{
|
1202 |
+
"data": {
|
1203 |
+
"application/vnd.jupyter.widget-view+json": {
|
1204 |
+
"model_id": "c901ab19c3d743f7b01a32a760cdb6ad",
|
1205 |
+
"version_major": 2,
|
1206 |
+
"version_minor": 0
|
1207 |
+
},
|
1208 |
+
"text/plain": [
|
1209 |
+
"generation_config.json: 0%| | 0.00/147 [00:00<?, ?B/s]"
|
1210 |
+
]
|
1211 |
+
},
|
1212 |
+
"metadata": {},
|
1213 |
+
"output_type": "display_data"
|
1214 |
+
},
|
1215 |
+
{
|
1216 |
+
"name": "stderr",
|
1217 |
+
"output_type": "stream",
|
1218 |
+
"text": [
|
1219 |
+
"Device set to use cpu\n"
|
1220 |
+
]
|
1221 |
+
},
|
1222 |
+
{
|
1223 |
+
"name": "stdout",
|
1224 |
+
"output_type": "stream",
|
1225 |
+
"text": [
|
1226 |
+
"a slang term for a group of people who are a part of a group of people\n"
|
1227 |
+
]
|
1228 |
+
}
|
1229 |
+
],
|
1230 |
+
"source": [
|
1231 |
+
"from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline\n",
|
1232 |
+
"\n",
|
1233 |
+
"# 1) Point to the Google repo, not Facebook’s\n",
|
1234 |
+
"MODEL_ID = \"google/flan-t5-base\"\n",
|
1235 |
+
"\n",
|
1236 |
+
"# 2) Load tokenizer & model (your token is used under the hood)\n",
|
1237 |
+
"tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)\n",
|
1238 |
+
"model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)\n",
|
1239 |
+
"\n",
|
1240 |
+
"# 3) Create the text2text pipeline\n",
|
1241 |
+
"qa_pipe = pipeline(\n",
|
1242 |
+
" \"text2text-generation\",\n",
|
1243 |
+
" model=model,\n",
|
1244 |
+
" tokenizer=tokenizer,\n",
|
1245 |
+
" device=-1 # or 0 if you have a GPU\n",
|
1246 |
+
")\n",
|
1247 |
+
"\n",
|
1248 |
+
"# 4) Sanity check\n",
|
1249 |
+
"out = qa_pipe(\"Question: What is RAG? Answer:\", max_length=50)\n",
|
1250 |
+
"print(out[0][\"generated_text\"])\n"
|
1251 |
+
]
|
1252 |
+
},
|
1253 |
+
{
|
1254 |
+
"cell_type": "code",
|
1255 |
+
"execution_count": 15,
|
1256 |
+
"metadata": {},
|
1257 |
+
"outputs": [
|
1258 |
+
{
|
1259 |
+
"name": "stdout",
|
1260 |
+
"output_type": "stream",
|
1261 |
+
"text": [
|
1262 |
+
"Requirement already satisfied: huggingface_hub[hf_xet] in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (0.30.2)\n",
|
1263 |
+
"Requirement already satisfied: filelock in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from huggingface_hub[hf_xet]) (3.18.0)\n",
|
1264 |
+
"Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from huggingface_hub[hf_xet]) (2025.3.0)\n",
|
1265 |
+
"Requirement already satisfied: packaging>=20.9 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from huggingface_hub[hf_xet]) (24.2)\n",
|
1266 |
+
"Requirement already satisfied: pyyaml>=5.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from huggingface_hub[hf_xet]) (6.0.2)\n",
|
1267 |
+
"Requirement already satisfied: requests in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from huggingface_hub[hf_xet]) (2.32.3)\n",
|
1268 |
+
"Requirement already satisfied: tqdm>=4.42.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from huggingface_hub[hf_xet]) (4.67.1)\n",
|
1269 |
+
"Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from huggingface_hub[hf_xet]) (4.12.2)\n",
|
1270 |
+
"Collecting hf-xet>=0.1.4 (from huggingface_hub[hf_xet])\n",
|
1271 |
+
" Downloading hf_xet-1.1.0-cp37-abi3-win_amd64.whl.metadata (498 bytes)\n",
|
1272 |
+
"Requirement already satisfied: colorama in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from tqdm>=4.42.1->huggingface_hub[hf_xet]) (0.4.6)\n",
|
1273 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from requests->huggingface_hub[hf_xet]) (3.4.1)\n",
|
1274 |
+
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from requests->huggingface_hub[hf_xet]) (3.10)\n",
|
1275 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from requests->huggingface_hub[hf_xet]) (2.3.0)\n",
|
1276 |
+
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\victo\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from requests->huggingface_hub[hf_xet]) (2025.1.31)\n",
|
1277 |
+
"Downloading hf_xet-1.1.0-cp37-abi3-win_amd64.whl (4.2 MB)\n",
|
1278 |
+
" ---------------------------------------- 0.0/4.2 MB ? eta -:--:--\n",
|
1279 |
+
" -- ------------------------------------- 0.3/4.2 MB ? eta -:--:--\n",
|
1280 |
+
" ---- ----------------------------------- 0.5/4.2 MB 1.7 MB/s eta 0:00:03\n",
|
1281 |
+
" ------- -------------------------------- 0.8/4.2 MB 1.5 MB/s eta 0:00:03\n",
|
1282 |
+
" ------- -------------------------------- 0.8/4.2 MB 1.5 MB/s eta 0:00:03\n",
|
1283 |
+
" --------- ------------------------------ 1.0/4.2 MB 1.0 MB/s eta 0:00:04\n",
|
1284 |
+
" -------------- ------------------------- 1.6/4.2 MB 1.2 MB/s eta 0:00:03\n",
|
1285 |
+
" -------------- ------------------------- 1.6/4.2 MB 1.2 MB/s eta 0:00:03\n",
|
1286 |
+
" ----------------- ---------------------- 1.8/4.2 MB 1.2 MB/s eta 0:00:02\n",
|
1287 |
+
" ---------------------- ----------------- 2.4/4.2 MB 1.2 MB/s eta 0:00:02\n",
|
1288 |
+
" ------------------------ --------------- 2.6/4.2 MB 1.3 MB/s eta 0:00:02\n",
|
1289 |
+
" --------------------------- ------------ 2.9/4.2 MB 1.3 MB/s eta 0:00:02\n",
|
1290 |
+
" ----------------------------- ---------- 3.1/4.2 MB 1.3 MB/s eta 0:00:01\n",
|
1291 |
+
" -------------------------------- ------- 3.4/4.2 MB 1.3 MB/s eta 0:00:01\n",
|
1292 |
+
" ---------------------------------- ----- 3.7/4.2 MB 1.3 MB/s eta 0:00:01\n",
|
1293 |
+
" ------------------------------------- -- 3.9/4.2 MB 1.3 MB/s eta 0:00:01\n",
|
1294 |
+
" ---------------------------------------- 4.2/4.2 MB 1.3 MB/s eta 0:00:00\n",
|
1295 |
+
"Installing collected packages: hf-xet\n",
|
1296 |
+
"Successfully installed hf-xet-1.1.0\n"
|
1297 |
+
]
|
1298 |
+
}
|
1299 |
+
],
|
1300 |
+
"source": [
|
1301 |
+
"! pip install huggingface_hub[hf_xet]"
|
1302 |
+
]
|
1303 |
+
},
|
1304 |
+
{
|
1305 |
+
"cell_type": "code",
|
1306 |
+
"execution_count": null,
|
1307 |
+
"metadata": {},
|
1308 |
+
"outputs": [],
|
1309 |
+
"source": []
|
1310 |
+
}
|
1311 |
+
],
|
1312 |
+
"metadata": {
|
1313 |
+
"kernelspec": {
|
1314 |
+
"display_name": "Python 3",
|
1315 |
+
"language": "python",
|
1316 |
+
"name": "python3"
|
1317 |
+
},
|
1318 |
+
"language_info": {
|
1319 |
+
"codemirror_mode": {
|
1320 |
+
"name": "ipython",
|
1321 |
+
"version": 3
|
1322 |
+
},
|
1323 |
+
"file_extension": ".py",
|
1324 |
+
"mimetype": "text/x-python",
|
1325 |
+
"name": "python",
|
1326 |
+
"nbconvert_exporter": "python",
|
1327 |
+
"pygments_lexer": "ipython3",
|
1328 |
+
"version": "3.13.0"
|
1329 |
+
}
|
1330 |
+
},
|
1331 |
+
"nbformat": 4,
|
1332 |
+
"nbformat_minor": 2
|
1333 |
+
}
|