MVPilgrim commited on
Commit
fa9c7e9
·
verified ·
1 Parent(s): 687a95e

Update semsearch.py

Browse files
Files changed (1) hide show
  1. semsearch.py +423 -422
semsearch.py CHANGED
@@ -1,422 +1,423 @@
1
- import weaviate
2
-
3
- from sentence_transformers import SentenceTransformer
4
- from langchain_community.document_loaders import BSHTMLLoader
5
- from pathlib import Path
6
- from lxml import html
7
- import logging
8
- from semantic_text_splitter import HuggingFaceTextSplitter
9
- from tokenizers import Tokenizer
10
- import json
11
- import os
12
- import re
13
- import logging
14
-
15
- import llama_cpp
16
- from llama_cpp import Llama
17
- import ipywidgets as widgets
18
- from IPython.display import display, clear_output
19
-
20
-
21
- weaviate_logger = logging.getLogger("httpx")
22
- weaviate_logger.setLevel(logging.WARNING)
23
-
24
- logger = logging.getLogger(__name__)
25
- logging.basicConfig(level=logging.INFO)
26
-
27
-
28
-
29
- ######################################################################
30
- # MAINLINE
31
- #
32
- logger.info("#### MAINLINE ENTERED.")
33
-
34
- #pathString = "/Users/660565/KPSAllInOne/ProgramFilesX86/WebCopy/DownloadedWebSites/LLMPOC_HTML"
35
- pathString = "/app/inputDocs"
36
- chunks = []
37
- webpageDocNames = []
38
- page_contentArray = []
39
- webpageChunks = []
40
- webpageTitles = []
41
- webpageChunksDocNames = []
42
-
43
- #####################################################################
44
- # Create UI widgets.
45
- output_widget = widgets.Output()
46
- with output_widget:
47
- print("### Create widgets entered.")
48
-
49
- systemTextArea = widgets.Textarea(
50
- value='',
51
- placeholder='Enter System Prompt.',
52
- description='Sys Prompt: ',
53
- disabled=False,
54
- layout=widgets.Layout(width='300px', height='80px')
55
- )
56
-
57
- userTextArea = widgets.Textarea(
58
- value='',
59
- placeholder='Enter User Prompt.',
60
- description='User Prompt: ',
61
- disabled=False,
62
- layout=widgets.Layout(width='435px', height='110px')
63
- )
64
-
65
- ragPromptTextArea = widgets.Textarea(
66
- value='',
67
- placeholder='App generated prompt with RAG information.',
68
- description='RAG Prompt: ',
69
- disabled=False,
70
- layout=widgets.Layout(width='580px', height='180px')
71
- )
72
-
73
- responseTextArea = widgets.Textarea(
74
- value='',
75
- placeholder='LLM generated response.',
76
- description='LLM Resp: ',
77
- disabled=False,
78
- layout=widgets.Layout(width='780px', height='200px')
79
- )
80
-
81
- selectRag = widgets.Checkbox(
82
- value=False,
83
- description='Use RAG',
84
- disabled=False
85
- )
86
-
87
- submitButton = widgets.Button(
88
- description='Run Model.',
89
- disabled=False,
90
- button_style='', # 'success', 'info', 'warning', 'danger' or ''
91
- tooltip='Click',
92
- icon='check' # (FontAwesome names without the `fa-` prefix)
93
- )
94
-
95
-
96
- #######################################################
97
- # Read each text input file, parse it into a document,
98
- # chunk it, collect chunks and document name.
99
- logger.info("#### Read and chunk input text files.")
100
- for filename in os.listdir(pathString):
101
- logger.info(filename)
102
- path = Path(pathString + "/" + filename)
103
- filename = filename.rstrip(".html")
104
- webpageDocNames.append(filename)
105
- htmlLoader = BSHTMLLoader(path,"utf-8")
106
- htmlData = htmlLoader.load()
107
-
108
- title = htmlData[0].metadata['title']
109
- page_content = htmlData[0].page_content
110
-
111
- # Clean data. Remove multiple newlines, etc.
112
- page_content = re.sub(r'\n+', '\n',page_content)
113
-
114
- page_contentArray.append(page_content);
115
- webpageTitles.append(title)
116
- max_tokens = 1000
117
- tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
118
- logger.debug(f"### tokenizer: {tokenizer}")
119
- splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=True)
120
- chunksOnePage = splitter.chunks(page_content, chunk_capacity=50)
121
-
122
- chunks = []
123
- for chnk in chunksOnePage:
124
- logger.debug(f"#### chnk in file: {chnk}")
125
- chunks.append(chnk)
126
- logger.debug(f"chunks: {chunks}")
127
- webpageChunks.append(chunks)
128
- webpageChunksDocNames.append(filename + "Chunks")
129
-
130
- logger.debug(f"### filename, title: {filename}, {title}")
131
-
132
- logger.debug(f"### webpageDocNames: {webpageDocNames}")
133
-
134
-
135
- ######################################################
136
- # Connect to the Weaviate vector database.
137
- logger.info("#### Create Weaviate db client connection.")
138
- client = weaviate.connect_to_custom(
139
- http_host="127.0.0.1",
140
- http_port=8080,
141
- http_secure=False,
142
- grpc_host="127.0.0.1",
143
- grpc_port=50051,
144
- grpc_secure=False
145
- #read_timeout=600,
146
- #write_timeout=90
147
- )
148
- client.connect()
149
-
150
-
151
- ######################################################
152
- # Create database webpage and chunks collections.
153
- #wpCollection = createWebpageCollection()
154
- #wpChunkCollection = createChunksCollection()
155
- logger.info("#### createWebpageCollection() entered.")
156
- if client.collections.exists("Documents"):
157
- client.collections.delete("Documents")
158
-
159
- class_obj = {
160
- "class": "Documents",
161
- "description": "For first attempt at loading a Weviate database.",
162
- "vectorizer": "text2vec-transformers",
163
- "moduleConfig": {
164
- "text2vec-transformers": {
165
- "vectorizeClassName": False
166
- }
167
- },
168
- "vectorIndexType": "hnsw",
169
- "vectorIndexConfig": {
170
- "distance": "cosine",
171
- },
172
- "properties": [
173
- {
174
- "name": "title",
175
- "dataType": ["text"],
176
- "description": "HTML doc title.",
177
- "vectorizer": "text2vec-transformers",
178
- "moduleConfig": {
179
- "text2vec-transformers": {
180
- "vectorizePropertyName": True,
181
- "skip": False,
182
- "tokenization": "lowercase"
183
- }
184
- },
185
- "invertedIndexConfig": {
186
- "bm25": {
187
- "b": 0.75,
188
- "k1": 1.2
189
- },
190
- }
191
- },
192
- {
193
- "name": "content",
194
- "dataType": ["text"],
195
- "description": "HTML page content.",
196
- "moduleConfig": {
197
- "text2vec-transformers": {
198
- "vectorizePropertyName": True,
199
- "tokenization": "whitespace"
200
- }
201
- }
202
- }
203
- ]
204
- }
205
- wpCollection = client.collections.create_from_dict(class_obj)
206
-
207
- logger.info("#### createChunksCollection() entered.")
208
- if client.collections.exists("Chunks"):
209
- client.collections.delete("Chunks")
210
-
211
- class_obj = {
212
- "class": "Chunks",
213
- "description": "Collection for document chunks.",
214
- "vectorizer": "text2vec-transformers",
215
- "moduleConfig": {
216
- "text2vec-transformers": {
217
- "vectorizeClassName": True
218
- }
219
- },
220
- "vectorIndexType": "hnsw",
221
- "vectorIndexConfig": {
222
- "distance": "cosine",
223
- },
224
- "properties": [
225
- {
226
- "name": "chunk",
227
- "dataType": ["text"],
228
- "description": "Single webpage chunk.",
229
- "vectorizer": "text2vec-transformers",
230
- "moduleConfig": {
231
- "text2vec-transformers": {
232
- "vectorizePropertyName": False,
233
- "skip": False,
234
- "tokenization": "lowercase"
235
- }
236
- }
237
- },
238
- {
239
- "name": "chunk_index",
240
- "dataType": ["int"]
241
- },
242
- {
243
- "name": "webpage",
244
- "dataType": ["Documents"],
245
- "description": "Webpage content chunks.",
246
-
247
- "invertedIndexConfig": {
248
- "bm25": {
249
- "b": 0.75,
250
- "k1": 1.2
251
- }
252
- }
253
- }
254
- ]
255
- }
256
- wpChunkCollection = client.collections.create_from_dict(class_obj)
257
-
258
-
259
- ###########################################################
260
- # Create document and chunks objects in the database.
261
- logger.info("#### Create page/doc and chunk db objects.")
262
- for i, className in enumerate(webpageDocNames):
263
- title = webpageTitles[i]
264
- logger.debug(f"## className, title: {className}, {title}")
265
- # Create Webpage Object
266
- page_content = page_contentArray[i]
267
- # Insert the document.
268
- wpCollectionObj_uuid = wpCollection.data.insert(
269
- {
270
- "name": className,
271
- "title": title,
272
- "content": page_content
273
- }
274
- )
275
-
276
- # Insert the chunks for the document.
277
- for i2, chunk in enumerate(webpageChunks[i]):
278
- chunk_uuid = wpChunkCollection.data.insert(
279
- {
280
- "title": title,
281
- "chunk": chunk,
282
- "chunk_index": i2,
283
- "references":
284
- {
285
- "webpage": wpCollectionObj_uuid
286
- }
287
- }
288
- )
289
-
290
- ###############################################################################
291
- # text contains prompt for vector DB.
292
- text = "human-made computer cognitive ability"
293
-
294
-
295
- ###############################################################################
296
- # Initial the the sentence transformer and encode the query prompt.
297
- logger.info(f"#### Encode text query prompt to create vectors. {text}")
298
- model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
299
-
300
- vector = model.encode(text)
301
- vectorList = []
302
-
303
- logger.debug("#### Print vectors.")
304
- for vec in vector:
305
- vectorList.append(vec)
306
- logger.debug(f"vectorList: {vectorList[2]}")
307
-
308
- # Fetch chunks and print chunks.
309
- logger.info("#### Retrieve semchunks from db using vectors from prompt.")
310
- semChunks = wpChunkCollection.query.near_vector(
311
- near_vector=vectorList,
312
- distance=0.7,
313
- limit=3
314
- )
315
- logger.debug(f"### semChunks[0]: {semChunks}")
316
-
317
- # Print chunks, corresponding document and document title.
318
- logger.info("#### Print individual retrieved chunks.")
319
- for chunk in enumerate(semChunks.objects):
320
- logger.info(f"#### chunk: {chunk}")
321
- webpage_uuid = chunk[1].properties['references']['webpage']
322
- logger.info(f"webpage_uuid: {webpage_uuid}")
323
- wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
324
- logger.info(f"### wpFromChunk title: {wpFromChunk.properties['title']}")
325
-
326
-
327
-
328
- ####################################################################
329
- #
330
- collection = client.collections.get("Chunks")
331
- #model = SentenceTransformer('../multi-qa-MiniLM-L6-cos-v1')
332
-
333
- #################################################################
334
- # Initialize the LLM.
335
- model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
336
- llm = Llama(model_path,
337
- #*,
338
- n_gpu_layers=0,
339
- split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
340
- main_gpu=0,
341
- tensor_split=None,
342
- vocab_only=False,
343
- use_mmap=True,
344
- use_mlock=False,
345
- kv_overrides=None,
346
- seed=llama_cpp.LLAMA_DEFAULT_SEED,
347
- n_ctx=512,
348
- n_batch=512,
349
- n_threads=8,
350
- n_threads_batch=16,
351
- rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
352
- pooling_type=llama_cpp.LLAMA_POOLING_TYPE_UNSPECIFIED,
353
- rope_freq_base=0.0,
354
- rope_freq_scale=0.0,
355
- yarn_ext_factor=-1.0,
356
- yarn_attn_factor=1.0,
357
- yarn_beta_fast=32.0,
358
- yarn_beta_slow=1.0,
359
- yarn_orig_ctx=0,
360
- logits_all=False,
361
- embedding=False,
362
- offload_kqv=True,
363
- last_n_tokens_size=64,
364
- lora_base=None,
365
- lora_scale=1.0,
366
- lora_path=None,
367
- numa=False,
368
- chat_format=None,
369
- chat_handler=None,
370
- draft_model=None,
371
- tokenizer=None,
372
- type_k=None,
373
- type_v=None,
374
- verbose=True
375
- )
376
-
377
-
378
- display(systemTextArea)
379
- display(userTextArea)
380
- display(ragPromptTextArea)
381
- display(responseTextArea)
382
- display(selectRag)
383
- display(submitButton)
384
-
385
- def setPrompt(pprompt,ragFlag):
386
- print("\n### setPrompt() entered. ragFlag: ",ragFlag)
387
- if ragFlag:
388
- ragPrompt = setRagPrompt(pprompt)
389
- userPrompt = pprompt + "\n" + ragPrompt
390
- prompt = userPrompt
391
- else:
392
- userPrompt = pprompt
393
- prompt = f""" <s> [INST] <<SYS>> {systemTextArea.value} </SYS>> Q: {userPrompt} A: [/INST]"""
394
- return prompt
395
-
396
- def runModel(prompt):
397
- output = llm.create_completion(
398
- prompt, # Prompt
399
- max_tokens=4096, # Generate up to 32 tokens
400
- #stop = ["Q:", "\n"], # Stop generating just before the model would generate a new question
401
- echo = False # Echo the prompt back in the output
402
- )
403
- responseTextArea.value = output["choices"][0]["text"]
404
-
405
- def on_submitButton_clicked(b):
406
- with output_widget:
407
- clear_output(wait=True)
408
- ragPromptTextArea.value = ""
409
- responseTextArea.value = ""
410
- log.debug(f"### selectRag: {selectRag.value}")
411
- prompt = setPrompt(userTextArea.value,selectRag.value)
412
- log.debug("### prompt: " + prompt)
413
- runModel(prompt)
414
-
415
- submitButton.on_click(on_submitButton_clicked)
416
- display(output_widget)
417
-
418
-
419
- logger.info("#### Closing client db connection.")
420
- client.close()
421
-
422
- logger.info("#### Program terminating.")
 
 
1
+ import weaviate
2
+
3
+ from sentence_transformers import SentenceTransformer
4
+ from langchain_community.document_loaders import BSHTMLLoader
5
+ from pathlib import Path
6
+ from lxml import html
7
+ import logging
8
+ from semantic_text_splitter import HuggingFaceTextSplitter
9
+ from tokenizers import Tokenizer
10
+ import json
11
+ import os
12
+ import re
13
+ import logging
14
+
15
+ import llama_cpp
16
+ from llama_cpp import Llama
17
+ import ipywidgets as widgets
18
+ from IPython.display import display, clear_output
19
+
20
+
21
+ weaviate_logger = logging.getLogger("httpx")
22
+ weaviate_logger.setLevel(logging.WARNING)
23
+
24
+ logger = logging.getLogger(__name__)
25
+ logging.basicConfig(level=logging.INFO)
26
+
27
+
28
+
29
+ ######################################################################
30
+ # MAINLINE
31
+ #
32
+ logger.info("#### MAINLINE ENTERED.")
33
+
34
+ #pathString = "/Users/660565/KPSAllInOne/ProgramFilesX86/WebCopy/DownloadedWebSites/LLMPOC_HTML"
35
+ pathString = "/app/inputDocs"
36
+ chunks = []
37
+ webpageDocNames = []
38
+ page_contentArray = []
39
+ webpageChunks = []
40
+ webpageTitles = []
41
+ webpageChunksDocNames = []
42
+
43
+ #####################################################################
44
+ # Create UI widgets.
45
+ output_widget = widgets.Output()
46
+ with output_widget:
47
+ print("### Create widgets entered.")
48
+
49
+ systemTextArea = widgets.Textarea(
50
+ value='',
51
+ placeholder='Enter System Prompt.',
52
+ description='Sys Prompt: ',
53
+ disabled=False,
54
+ layout=widgets.Layout(width='300px', height='80px')
55
+ )
56
+
57
+ userTextArea = widgets.Textarea(
58
+ value='',
59
+ placeholder='Enter User Prompt.',
60
+ description='User Prompt: ',
61
+ disabled=False,
62
+ layout=widgets.Layout(width='435px', height='110px')
63
+ )
64
+
65
+ ragPromptTextArea = widgets.Textarea(
66
+ value='',
67
+ placeholder='App generated prompt with RAG information.',
68
+ description='RAG Prompt: ',
69
+ disabled=False,
70
+ layout=widgets.Layout(width='580px', height='180px')
71
+ )
72
+
73
+ responseTextArea = widgets.Textarea(
74
+ value='',
75
+ placeholder='LLM generated response.',
76
+ description='LLM Resp: ',
77
+ disabled=False,
78
+ layout=widgets.Layout(width='780px', height='200px')
79
+ )
80
+
81
+ selectRag = widgets.Checkbox(
82
+ value=False,
83
+ description='Use RAG',
84
+ disabled=False
85
+ )
86
+
87
+ submitButton = widgets.Button(
88
+ description='Run Model.',
89
+ disabled=False,
90
+ button_style='', # 'success', 'info', 'warning', 'danger' or ''
91
+ tooltip='Click',
92
+ icon='check' # (FontAwesome names without the `fa-` prefix)
93
+ )
94
+
95
+
96
+ #######################################################
97
+ # Read each text input file, parse it into a document,
98
+ # chunk it, collect chunks and document name.
99
+ logger.info("#### Read and chunk input text files.")
100
+ for filename in os.listdir(pathString):
101
+ logger.info(filename)
102
+ path = Path(pathString + "/" + filename)
103
+ filename = filename.rstrip(".html")
104
+ webpageDocNames.append(filename)
105
+ htmlLoader = BSHTMLLoader(path,"utf-8")
106
+ htmlData = htmlLoader.load()
107
+
108
+ title = htmlData[0].metadata['title']
109
+ page_content = htmlData[0].page_content
110
+
111
+ # Clean data. Remove multiple newlines, etc.
112
+ page_content = re.sub(r'\n+', '\n',page_content)
113
+
114
+ page_contentArray.append(page_content);
115
+ webpageTitles.append(title)
116
+ max_tokens = 1000
117
+ tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
118
+ logger.debug(f"### tokenizer: {tokenizer}")
119
+ splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=True)
120
+ chunksOnePage = splitter.chunks(page_content, chunk_capacity=50)
121
+
122
+ chunks = []
123
+ for chnk in chunksOnePage:
124
+ logger.debug(f"#### chnk in file: {chnk}")
125
+ chunks.append(chnk)
126
+ logger.debug(f"chunks: {chunks}")
127
+ webpageChunks.append(chunks)
128
+ webpageChunksDocNames.append(filename + "Chunks")
129
+
130
+ logger.debug(f"### filename, title: {filename}, {title}")
131
+
132
+ logger.debug(f"### webpageDocNames: {webpageDocNames}")
133
+
134
+
135
+ ######################################################
136
+ # Connect to the Weaviate vector database.
137
+ logger.info("#### Create Weaviate db client connection.")
138
+ client = weaviate.connect_to_custom(
139
+ http_host="127.0.0.1",
140
+ http_port=8080,
141
+ http_secure=False,
142
+ grpc_host="127.0.0.1",
143
+ grpc_port=50051,
144
+ grpc_secure=False,
145
+ timeout=[600,600]
146
+ #read_timeout=600,
147
+ #write_timeout=90
148
+ )
149
+ client.connect()
150
+
151
+
152
+ ######################################################
153
+ # Create database webpage and chunks collections.
154
+ #wpCollection = createWebpageCollection()
155
+ #wpChunkCollection = createChunksCollection()
156
+ logger.info("#### createWebpageCollection() entered.")
157
+ if client.collections.exists("Documents"):
158
+ client.collections.delete("Documents")
159
+
160
+ class_obj = {
161
+ "class": "Documents",
162
+ "description": "For first attempt at loading a Weviate database.",
163
+ "vectorizer": "text2vec-transformers",
164
+ "moduleConfig": {
165
+ "text2vec-transformers": {
166
+ "vectorizeClassName": False
167
+ }
168
+ },
169
+ "vectorIndexType": "hnsw",
170
+ "vectorIndexConfig": {
171
+ "distance": "cosine",
172
+ },
173
+ "properties": [
174
+ {
175
+ "name": "title",
176
+ "dataType": ["text"],
177
+ "description": "HTML doc title.",
178
+ "vectorizer": "text2vec-transformers",
179
+ "moduleConfig": {
180
+ "text2vec-transformers": {
181
+ "vectorizePropertyName": True,
182
+ "skip": False,
183
+ "tokenization": "lowercase"
184
+ }
185
+ },
186
+ "invertedIndexConfig": {
187
+ "bm25": {
188
+ "b": 0.75,
189
+ "k1": 1.2
190
+ },
191
+ }
192
+ },
193
+ {
194
+ "name": "content",
195
+ "dataType": ["text"],
196
+ "description": "HTML page content.",
197
+ "moduleConfig": {
198
+ "text2vec-transformers": {
199
+ "vectorizePropertyName": True,
200
+ "tokenization": "whitespace"
201
+ }
202
+ }
203
+ }
204
+ ]
205
+ }
206
+ wpCollection = client.collections.create_from_dict(class_obj)
207
+
208
+ logger.info("#### createChunksCollection() entered.")
209
+ if client.collections.exists("Chunks"):
210
+ client.collections.delete("Chunks")
211
+
212
+ class_obj = {
213
+ "class": "Chunks",
214
+ "description": "Collection for document chunks.",
215
+ "vectorizer": "text2vec-transformers",
216
+ "moduleConfig": {
217
+ "text2vec-transformers": {
218
+ "vectorizeClassName": True
219
+ }
220
+ },
221
+ "vectorIndexType": "hnsw",
222
+ "vectorIndexConfig": {
223
+ "distance": "cosine",
224
+ },
225
+ "properties": [
226
+ {
227
+ "name": "chunk",
228
+ "dataType": ["text"],
229
+ "description": "Single webpage chunk.",
230
+ "vectorizer": "text2vec-transformers",
231
+ "moduleConfig": {
232
+ "text2vec-transformers": {
233
+ "vectorizePropertyName": False,
234
+ "skip": False,
235
+ "tokenization": "lowercase"
236
+ }
237
+ }
238
+ },
239
+ {
240
+ "name": "chunk_index",
241
+ "dataType": ["int"]
242
+ },
243
+ {
244
+ "name": "webpage",
245
+ "dataType": ["Documents"],
246
+ "description": "Webpage content chunks.",
247
+
248
+ "invertedIndexConfig": {
249
+ "bm25": {
250
+ "b": 0.75,
251
+ "k1": 1.2
252
+ }
253
+ }
254
+ }
255
+ ]
256
+ }
257
+ wpChunkCollection = client.collections.create_from_dict(class_obj)
258
+
259
+
260
+ ###########################################################
261
+ # Create document and chunks objects in the database.
262
+ logger.info("#### Create page/doc and chunk db objects.")
263
+ for i, className in enumerate(webpageDocNames):
264
+ title = webpageTitles[i]
265
+ logger.debug(f"## className, title: {className}, {title}")
266
+ # Create Webpage Object
267
+ page_content = page_contentArray[i]
268
+ # Insert the document.
269
+ wpCollectionObj_uuid = wpCollection.data.insert(
270
+ {
271
+ "name": className,
272
+ "title": title,
273
+ "content": page_content
274
+ }
275
+ )
276
+
277
+ # Insert the chunks for the document.
278
+ for i2, chunk in enumerate(webpageChunks[i]):
279
+ chunk_uuid = wpChunkCollection.data.insert(
280
+ {
281
+ "title": title,
282
+ "chunk": chunk,
283
+ "chunk_index": i2,
284
+ "references":
285
+ {
286
+ "webpage": wpCollectionObj_uuid
287
+ }
288
+ }
289
+ )
290
+
291
+ ###############################################################################
292
+ # text contains prompt for vector DB.
293
+ text = "human-made computer cognitive ability"
294
+
295
+
296
+ ###############################################################################
297
+ # Initial the the sentence transformer and encode the query prompt.
298
+ logger.info(f"#### Encode text query prompt to create vectors. {text}")
299
+ model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
300
+
301
+ vector = model.encode(text)
302
+ vectorList = []
303
+
304
+ logger.debug("#### Print vectors.")
305
+ for vec in vector:
306
+ vectorList.append(vec)
307
+ logger.debug(f"vectorList: {vectorList[2]}")
308
+
309
+ # Fetch chunks and print chunks.
310
+ logger.info("#### Retrieve semchunks from db using vectors from prompt.")
311
+ semChunks = wpChunkCollection.query.near_vector(
312
+ near_vector=vectorList,
313
+ distance=0.7,
314
+ limit=3
315
+ )
316
+ logger.debug(f"### semChunks[0]: {semChunks}")
317
+
318
+ # Print chunks, corresponding document and document title.
319
+ logger.info("#### Print individual retrieved chunks.")
320
+ for chunk in enumerate(semChunks.objects):
321
+ logger.info(f"#### chunk: {chunk}")
322
+ webpage_uuid = chunk[1].properties['references']['webpage']
323
+ logger.info(f"webpage_uuid: {webpage_uuid}")
324
+ wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
325
+ logger.info(f"### wpFromChunk title: {wpFromChunk.properties['title']}")
326
+
327
+
328
+
329
+ ####################################################################
330
+ #
331
+ collection = client.collections.get("Chunks")
332
+ #model = SentenceTransformer('../multi-qa-MiniLM-L6-cos-v1')
333
+
334
+ #################################################################
335
+ # Initialize the LLM.
336
+ model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
337
+ llm = Llama(model_path,
338
+ #*,
339
+ n_gpu_layers=0,
340
+ split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
341
+ main_gpu=0,
342
+ tensor_split=None,
343
+ vocab_only=False,
344
+ use_mmap=True,
345
+ use_mlock=False,
346
+ kv_overrides=None,
347
+ seed=llama_cpp.LLAMA_DEFAULT_SEED,
348
+ n_ctx=512,
349
+ n_batch=512,
350
+ n_threads=8,
351
+ n_threads_batch=16,
352
+ rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
353
+ pooling_type=llama_cpp.LLAMA_POOLING_TYPE_UNSPECIFIED,
354
+ rope_freq_base=0.0,
355
+ rope_freq_scale=0.0,
356
+ yarn_ext_factor=-1.0,
357
+ yarn_attn_factor=1.0,
358
+ yarn_beta_fast=32.0,
359
+ yarn_beta_slow=1.0,
360
+ yarn_orig_ctx=0,
361
+ logits_all=False,
362
+ embedding=False,
363
+ offload_kqv=True,
364
+ last_n_tokens_size=64,
365
+ lora_base=None,
366
+ lora_scale=1.0,
367
+ lora_path=None,
368
+ numa=False,
369
+ chat_format=None,
370
+ chat_handler=None,
371
+ draft_model=None,
372
+ tokenizer=None,
373
+ type_k=None,
374
+ type_v=None,
375
+ verbose=True
376
+ )
377
+
378
+
379
+ display(systemTextArea)
380
+ display(userTextArea)
381
+ display(ragPromptTextArea)
382
+ display(responseTextArea)
383
+ display(selectRag)
384
+ display(submitButton)
385
+
386
+ def setPrompt(pprompt,ragFlag):
387
+ print("\n### setPrompt() entered. ragFlag: ",ragFlag)
388
+ if ragFlag:
389
+ ragPrompt = setRagPrompt(pprompt)
390
+ userPrompt = pprompt + "\n" + ragPrompt
391
+ prompt = userPrompt
392
+ else:
393
+ userPrompt = pprompt
394
+ prompt = f""" <s> [INST] <<SYS>> {systemTextArea.value} </SYS>> Q: {userPrompt} A: [/INST]"""
395
+ return prompt
396
+
397
+ def runModel(prompt):
398
+ output = llm.create_completion(
399
+ prompt, # Prompt
400
+ max_tokens=4096, # Generate up to 32 tokens
401
+ #stop = ["Q:", "\n"], # Stop generating just before the model would generate a new question
402
+ echo = False # Echo the prompt back in the output
403
+ )
404
+ responseTextArea.value = output["choices"][0]["text"]
405
+
406
+ def on_submitButton_clicked(b):
407
+ with output_widget:
408
+ clear_output(wait=True)
409
+ ragPromptTextArea.value = ""
410
+ responseTextArea.value = ""
411
+ log.debug(f"### selectRag: {selectRag.value}")
412
+ prompt = setPrompt(userTextArea.value,selectRag.value)
413
+ log.debug("### prompt: " + prompt)
414
+ runModel(prompt)
415
+
416
+ submitButton.on_click(on_submitButton_clicked)
417
+ display(output_widget)
418
+
419
+
420
+ logger.info("#### Closing client db connection.")
421
+ client.close()
422
+
423
+ logger.info("#### Program terminating.")