import weaviate from weaviate.connect import ConnectionParams from weaviate.classes.init import AdditionalConfig, Timeout from sentence_transformers import SentenceTransformer from langchain_community.document_loaders import BSHTMLLoader from pathlib import Path from lxml import html import logging from semantic_text_splitter import HuggingFaceTextSplitter from tokenizers import Tokenizer import json import os import re import logging import llama_cpp from llama_cpp import Llama import ipywidgets as widgets from IPython.display import display, clear_output weaviate_logger = logging.getLogger("httpx") weaviate_logger.setLevel(logging.WARNING) logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) ###################################################################### # MAINLINE # logger.info("#### MAINLINE ENTERED.") #pathString = "/Users/660565/KPSAllInOne/ProgramFilesX86/WebCopy/DownloadedWebSites/LLMPOC_HTML" pathString = "/app/inputDocs" chunks = [] webpageDocNames = [] page_contentArray = [] webpageChunks = [] webpageTitles = [] webpageChunksDocNames = [] ##################################################################### # Create UI widgets. output_widget = widgets.Output() with output_widget: print("### Create widgets entered.") systemTextArea = widgets.Textarea( value='', placeholder='Enter System Prompt.', description='Sys Prompt: ', disabled=False, layout=widgets.Layout(width='300px', height='80px') ) userTextArea = widgets.Textarea( value='', placeholder='Enter User Prompt.', description='User Prompt: ', disabled=False, layout=widgets.Layout(width='435px', height='110px') ) ragPromptTextArea = widgets.Textarea( value='', placeholder='App generated prompt with RAG information.', description='RAG Prompt: ', disabled=False, layout=widgets.Layout(width='580px', height='180px') ) responseTextArea = widgets.Textarea( value='', placeholder='LLM generated response.', description='LLM Resp: ', disabled=False, layout=widgets.Layout(width='780px', height='200px') ) selectRag = widgets.Checkbox( value=False, description='Use RAG', disabled=False ) submitButton = widgets.Button( description='Run Model.', disabled=False, button_style='', # 'success', 'info', 'warning', 'danger' or '' tooltip='Click', icon='check' # (FontAwesome names without the `fa-` prefix) ) ###################################################### # Connect to the Weaviate vector database. logger.info("#### Create Weaviate db client connection.") client = weaviate.WeaviateClient( connection_params=ConnectionParams.from_params( http_host="localhost", http_port="8080", http_secure=False, grpc_host="localhost", grpc_port="50051", grpc_secure=False ), }, additional_config=AdditionalConfig( timeout=Timeout(init=60, query=1800, insert=1800), # Values in seconds ) ) client.connect() ####################################################### # Read each text input file, parse it into a document, # chunk it, collect chunks and document name. logger.info("#### Read and chunk input text files.") if not client.collections.exists("Documents") || not client.collections.exists("Documents") : for filename in os.listdir(pathString): logger.info(filename) path = Path(pathString + "/" + filename) filename = filename.rstrip(".html") webpageDocNames.append(filename) htmlLoader = BSHTMLLoader(path,"utf-8") htmlData = htmlLoader.load() title = htmlData[0].metadata['title'] page_content = htmlData[0].page_content # Clean data. Remove multiple newlines, etc. page_content = re.sub(r'\n+', '\n',page_content) page_contentArray.append(page_content); webpageTitles.append(title) max_tokens = 1000 tokenizer = Tokenizer.from_pretrained("bert-base-uncased") logger.debug(f"### tokenizer: {tokenizer}") splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=True) chunksOnePage = splitter.chunks(page_content, chunk_capacity=50) chunks = [] for chnk in chunksOnePage: logger.debug(f"#### chnk in file: {chnk}") chunks.append(chnk) logger.debug(f"chunks: {chunks}") webpageChunks.append(chunks) webpageChunksDocNames.append(filename + "Chunks") logger.debug(f"### filename, title: {filename}, {title}") logger.debug(f"### webpageDocNames: {webpageDocNames}") ###################################################### # Create database webpage and chunks collections. #wpCollection = createWebpageCollection() #wpChunkCollection = createChunksCollection() logger.info("#### createWebpageCollection() entered.") if not client.collections.exists("Documents"): #client.collections.delete("Documents") class_obj = { "class": "Documents", "description": "For first attempt at loading a Weviate database.", "vectorizer": "text2vec-transformers", "moduleConfig": { "text2vec-transformers": { "vectorizeClassName": False } }, "vectorIndexType": "hnsw", "vectorIndexConfig": { "distance": "cosine", }, "properties": [ { "name": "title", "dataType": ["text"], "description": "HTML doc title.", "vectorizer": "text2vec-transformers", "moduleConfig": { "text2vec-transformers": { "vectorizePropertyName": True, "skip": False, "tokenization": "lowercase" } }, "invertedIndexConfig": { "bm25": { "b": 0.75, "k1": 1.2 }, } }, { "name": "content", "dataType": ["text"], "description": "HTML page content.", "moduleConfig": { "text2vec-transformers": { "vectorizePropertyName": True, "tokenization": "whitespace" } } } ] } wpCollection = client.collections.create_from_dict(class_obj) logger.info("#### createChunksCollection() entered.") if not client.collections.exists("Chunks"): #client.collections.delete("Chunks") class_obj = { "class": "Chunks", "description": "Collection for document chunks.", "vectorizer": "text2vec-transformers", "moduleConfig": { "text2vec-transformers": { "vectorizeClassName": True } }, "vectorIndexType": "hnsw", "vectorIndexConfig": { "distance": "cosine", }, "properties": [ { "name": "chunk", "dataType": ["text"], "description": "Single webpage chunk.", "vectorizer": "text2vec-transformers", "moduleConfig": { "text2vec-transformers": { "vectorizePropertyName": False, "skip": False, "tokenization": "lowercase" } } }, { "name": "chunk_index", "dataType": ["int"] }, { "name": "webpage", "dataType": ["Documents"], "description": "Webpage content chunks.", "invertedIndexConfig": { "bm25": { "b": 0.75, "k1": 1.2 } } } ] } wpChunkCollection = client.collections.create_from_dict(class_obj) ########################################################### # Create document and chunks objects in the database. if not client.collections.exists("Documents") : logger.info("#### Create page/doc db objects.") for i, className in enumerate(webpageDocNames): title = webpageTitles[i] logger.debug(f"## className, title: {className}, {title}") # Create Webpage Object page_content = page_contentArray[i] # Insert the document. wpCollectionObj_uuid = wpCollection.data.insert( { "name": className, "title": title, "content": page_content } ) if not client.collections.exists("Chunks") : logger.info("#### Create chunk db objects.") # Insert the chunks for the document. for i2, chunk in enumerate(webpageChunks[i]): chunk_uuid = wpChunkCollection.data.insert( { "title": title, "chunk": chunk, "chunk_index": i2, "references": { "webpage": wpCollectionObj_uuid } } ) ################################################################# # Initialize the LLM. model_path = "/app/llama-2-7b-chat.Q4_0.gguf" llm = Llama(model_path, #*, n_gpu_layers=0, split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER, main_gpu=0, tensor_split=None, vocab_only=False, use_mmap=True, use_mlock=False, kv_overrides=None, seed=llama_cpp.LLAMA_DEFAULT_SEED, n_ctx=512, n_batch=512, n_threads=8, n_threads_batch=16, rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED, pooling_type=llama_cpp.LLAMA_POOLING_TYPE_UNSPECIFIED, rope_freq_base=0.0, rope_freq_scale=0.0, yarn_ext_factor=-1.0, yarn_attn_factor=1.0, yarn_beta_fast=32.0, yarn_beta_slow=1.0, yarn_orig_ctx=0, logits_all=False, embedding=False, offload_kqv=True, last_n_tokens_size=64, lora_base=None, lora_scale=1.0, lora_path=None, numa=False, chat_format=None, chat_handler=None, draft_model=None, tokenizer=None, type_k=None, type_v=None, verbose=True ) ############################################################################### # text contains prompt for vector DB. text = "human-made computer cognitive ability" def getRagData(text): ############################################################################### # Initial the the sentence transformer and encode the query prompt. logger.info(f"#### Encode text query prompt to create vectors. {text}") model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1') vector = model.encode(text) vectorList = [] logger.debug("#### Print vectors.") for vec in vector: vectorList.append(vec) logger.debug(f"vectorList: {vectorList[2]}") # Fetch chunks and print chunks. logger.info("#### Retrieve semchunks from db using vectors from prompt.") semChunks = wpChunkCollection.query.near_vector( near_vector=vectorList, distance=0.7, limit=3 ) logger.debug(f"### semChunks[0]: {semChunks}") # Print chunks, corresponding document and document title. logger.info("#### Print individual retrieved chunks.") for chunk in enumerate(semChunks.objects): logger.info(f"#### chunk: {chunk}") webpage_uuid = chunk[1].properties['references']['webpage'] logger.info(f"webpage_uuid: {webpage_uuid}") wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid) logger.info(f"### wpFromChunk title: {wpFromChunk.properties['title']}") #################################################################### # collection = client.collections.get("Chunks") #model = SentenceTransformer('../multi-qa-MiniLM-L6-cos-v1') display(systemTextArea) display(userTextArea) display(ragPromptTextArea) display(responseTextArea) display(selectRag) display(submitButton) def setPrompt(pprompt,ragFlag): print("\n### setPrompt() entered. ragFlag: ",ragFlag) if ragFlag: ragPrompt = setRagPrompt(pprompt) userPrompt = pprompt + "\n" + ragPrompt prompt = userPrompt else: userPrompt = pprompt prompt = f""" [INST] <> {systemTextArea.value} > Q: {userPrompt} A: [/INST]""" return prompt def runModel(prompt): output = llm.create_completion( prompt, # Prompt max_tokens=4096, # Generate up to 32 tokens #stop = ["Q:", "\n"], # Stop generating just before the model would generate a new question echo = False # Echo the prompt back in the output ) responseTextArea.value = output["choices"][0]["text"] def on_submitButton_clicked(b): with output_widget: clear_output(wait=True) ragPromptTextArea.value = "" responseTextArea.value = "" log.debug(f"### selectRag: {selectRag.value}") prompt = setPrompt(userTextArea.value,selectRag.value) log.debug("### prompt: " + prompt) runModel(prompt) submitButton.on_click(on_submitButton_clicked) display(output_widget) #logger.info("#### Closing client db connection.") #client.close() #logger.info("#### Program terminating.")