Ferhan taha commited on
Commit
1d555fd
·
verified ·
1 Parent(s): 7692855

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -134
app.py DELETED
@@ -1,134 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """app.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/14JJlKx1Oj4px4gdVwHn55FstUl2Dvh9z
8
- """
9
-
10
-
11
-
12
- #|export
13
- import os
14
-
15
- from langchain.document_loaders import PyPDFLoader
16
- from langchain.text_splitter import RecursiveCharacterTextSplitter
17
- from langchain.vectorstores import Chroma
18
- from langchain.chains import ConversationalRetrievalChain
19
- from langchain.embeddings import HuggingFaceEmbeddings
20
- from langchain.llms import HuggingFacePipeline
21
- from langchain.chains import ConversationChain
22
- from langchain.memory import ConversationBufferMemory
23
- from langchain.llms import HuggingFaceHub
24
- import pandas as pd
25
- from pathlib import Path
26
- import chromadb
27
- import gradio as gr
28
- from transformers import AutoTokenizer
29
- import transformers
30
- import torch
31
- import tqdm
32
- import accelerate
33
-
34
- #|export
35
- def initialize_database(file_path):
36
- # Create list of documents (when valid)
37
- collection_name = Path(file_path).stem
38
- # Fix potential issues from naming convention
39
- ## Remove space
40
- collection_name = collection_name.replace(" ","-")
41
- ## Limit lenght to 50 characters
42
- collection_name = collection_name[:50]
43
- ## Enforce start and end as alphanumeric character
44
- if not collection_name[0].isalnum():
45
- collection_name[0] = 'A'
46
- if not collection_name[-1].isalnum():
47
- collection_name[-1] = 'Z'
48
- # print('list_file_path: ', list_file_path)
49
- print('Collection name: ', collection_name)
50
- # Load document and create splits
51
- doc_splits = load_doc(file_path)
52
- # Create or load vector database
53
- # global vector_db
54
- vector_db = create_db(doc_splits, collection_name)
55
- return vector_db, collection_name, "Complete!"
56
-
57
- #|export
58
- def load_doc(file_path):
59
- loader = PyPDFLoader(file_path)
60
- pages = loader.load()
61
- text_splitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 50)
62
- doc_splits = text_splitter.split_documents(pages)
63
- return doc_splits
64
-
65
- #|export
66
- def create_db(splits, collection_name):
67
- embedding = HuggingFaceEmbeddings()
68
- new_client = chromadb.EphemeralClient()
69
- vectordb = Chroma.from_documents(
70
- documents=splits,
71
- embedding=embedding,
72
- client=new_client,
73
- collection_name=collection_name,
74
- # persist_directory=default_persist_directory
75
- )
76
- return vectordb
77
-
78
- #|export
79
- splt = load_doc('data.pdf')
80
-
81
- #|export
82
- vec = initialize_database('data.pdf')
83
-
84
- #|export
85
- vec_cre = create_db(splt, 'data')
86
- vec_cre
87
-
88
- #|export
89
- def initialize_llmchain(temperature, max_tokens, top_k, vector_db):
90
- memory = ConversationBufferMemory(
91
- memory_key="chat_history",
92
- output_key='answer',
93
- return_messages=True
94
- )
95
-
96
- llm = HuggingFaceHub(
97
- repo_id='mistralai/Mixtral-8x7B-Instruct-v0.1',
98
- model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
99
- )
100
- retriever=vector_db.as_retriever()
101
- qa_chain = ConversationalRetrievalChain.from_llm(
102
- llm,
103
- retriever=retriever,
104
- chain_type="stuff",
105
- memory=memory,
106
- # combine_docs_chain_kwargs={"prompt": your_prompt})
107
- return_source_documents=True,
108
- #return_generated_question=False,
109
- verbose=False,
110
- )
111
- return qa_chain
112
-
113
- #|export
114
- qa = initialize_llmchain(0.7, 1024, 1, vec_cre)
115
-
116
- #|export
117
- def format_chat_history(message, chat_history):
118
- formatted_chat_history = []
119
- for user_message, bot_message in chat_history:
120
- formatted_chat_history.append(f"User: {user_message}")
121
- formatted_chat_history.append(f"Assistant: {bot_message}")
122
- return formatted_chat_history
123
-
124
- #|export
125
- def conversation(message, history):
126
- formatted_chat_history = format_chat_history(message, history)
127
- response = qa({"question": message, "chat_history": formatted_chat_history})
128
- response_answer = response["answer"]
129
- if response_answer.find("Helpful Answer:") != -1:
130
- response_answer = response_answer.split("Helpful Answer:")[-1]
131
- return response_answer
132
-
133
- #|export
134
- gr.ChatInterface(conversation).launch()