AmrGharieb commited on
Commit
edebac2
·
1 Parent(s): 636a219

first deployment

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ final_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains import LLMChain
2
+ import streamlit as st
3
+ from decouple import config
4
+ from langchain.llms import OpenAI
5
+ from langchain.document_loaders import PyPDFLoader
6
+ from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
7
+ from langchain.vectorstores import Chroma
8
+ from langchain.retrievers.document_compressors import LLMChainExtractor
9
+ from langchain.retrievers import ContextualCompressionRetriever
10
+ from langchain.retrievers.self_query.base import SelfQueryRetriever
11
+ from langchain.chains import RetrievalQA
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from langchain.evaluation.qa import QAGenerateChain
14
+ from langchain.chains import RetrievalQA
15
+ from langchain.chat_models import ChatOpenAI
16
+ from langchain.document_loaders import CSVLoader
17
+ from langchain.indexes import VectorstoreIndexCreator
18
+ from langchain.vectorstores import DocArrayInMemorySearch
19
+ from langchain.prompts import ChatPromptTemplate
20
+ from langchain.document_loaders.generic import GenericLoader
21
+ from langchain.document_loaders.parsers import OpenAIWhisperParser
22
+ from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
23
+ from langchain.prompts import PromptTemplate
24
+ from langchain.memory import ConversationBufferMemory
25
+ from langchain.chains import ConversationalRetrievalChain
26
+ import time
27
+ from htmlTemplates import css, bot_template, user_template
28
+
29
+ from dotenv import load_dotenv, find_dotenv
30
+ _ = load_dotenv(find_dotenv()) # read local .env file
31
+
32
+ def timeit(func):
33
+ def wrapper(*args, **kwargs):
34
+ start_time = time.time() # Start time
35
+ result = func(*args, **kwargs) # Function execution
36
+ end_time = time.time() # End time
37
+ print(f"Function {func.__name__} took {end_time - start_time} seconds to execute.")
38
+ return result
39
+ return wrapper
40
+
41
+
42
+ @timeit
43
+ def get_llm():
44
+ return OpenAI(temperature=0.0)
45
+
46
+ @timeit
47
+ def get_memory():
48
+ return ConversationBufferMemory(
49
+ memory_key="chat_history",
50
+ return_messages=True
51
+ )
52
+
53
+ @timeit
54
+ def generate_response(question, vectordb, llm, memory,chat_history):
55
+
56
+ prompt = ChatPromptTemplate.from_template(
57
+ "You are a petroleum engineer specialist in hydralic fracture stimulation \
58
+ , please answer the question that surounded between the triple backtick \
59
+ ```{question}```"
60
+ )
61
+
62
+ question_template = prompt.format_messages(question=question)
63
+ final_qa = question_template[0].content
64
+
65
+ qa = ConversationalRetrievalChain.from_llm(
66
+ llm=llm,
67
+ retriever=vectordb.as_retriever(search_type="mmr",k=5, fetch_k=10),
68
+ memory=memory,
69
+ )
70
+
71
+ handle_userinput((qa({"question": question, "chat_history": chat_history})))
72
+
73
+ @timeit
74
+ def create_embeding_function():
75
+ embedding_func_all_mpnet_base_v2 = SentenceTransformerEmbeddings(
76
+ model_name="all-mpnet-base-v2")
77
+ # embedding_func_all_MiniLM_L6_v2 = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
78
+
79
+ return embedding_func_all_mpnet_base_v2
80
+
81
+ @timeit
82
+ def get_vector_db(embedding_function):
83
+ vector_db = Chroma(persist_directory="./final_db",
84
+ embedding_function=embedding_function)
85
+ return vector_db
86
+
87
+ def handle_userinput(user_question):
88
+ response = user_question
89
+ if chat_history not in st.session_state:
90
+ st.session_state.chat_history = []
91
+
92
+ st.session_state.chat_history = response['chat_history']
93
+
94
+ for i, message in enumerate(st.session_state.chat_history):
95
+ if i % 2 == 0:
96
+ st.write(user_template.replace(
97
+ "{{MSG}}", message.content), unsafe_allow_html=True)
98
+ else:
99
+ st.write(bot_template.replace(
100
+ "{{MSG}}", message.content), unsafe_allow_html=True)
101
+ if __name__ == "__main__":
102
+
103
+ st.set_page_config(page_title = "Hydraulic Fracture Stimulation Chat",page_icon=":books:")
104
+ st.write(css, unsafe_allow_html=True)
105
+ st.title("Hydraulic Fracture Stimulation Chat")
106
+ st.write(
107
+ "This is a chatbot that can answer questions related to petroleum engineering specially in hydraulic fracture stimulation.")
108
+
109
+ # get embeding function
110
+ embeding_function = create_embeding_function()
111
+ # get vector db
112
+ vector_db = get_vector_db(embeding_function)
113
+ # get llm
114
+ llm = get_llm()
115
+
116
+ # get memory
117
+ if 'memory' not in st.session_state:
118
+ st.session_state['memory'] = get_memory()
119
+ memory = st.session_state['memory']
120
+
121
+ # chat history
122
+ chat_history = []
123
+
124
+ prompt_question = st.chat_input("Please ask a question:")
125
+ if prompt_question:
126
+ generate_response(question= prompt_question, vectordb=vector_db, llm=llm, memory=memory,chat_history=chat_history)
final_db/b14a837e-d1d1-45d9-9434-afbedcec46ba/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d0b2dfa6fc6f8ed74e81120e268d444fa3033806c66a0440f1678c408b97218
3
+ size 6424000
final_db/b14a837e-d1d1-45d9-9434-afbedcec46ba/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e972f51eb6bdd4796b659eb05da7af265a8628f46bc86a7992f0972445ab6a36
3
+ size 100
final_db/b14a837e-d1d1-45d9-9434-afbedcec46ba/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f3544f58970ec3ab604362079f921d6b7e3bfeec39feb0368b5ca87c7a82993
3
+ size 113989
final_db/b14a837e-d1d1-45d9-9434-afbedcec46ba/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf238bb4bf1c2a6b3669ca83fe1bf29bee19b7f8d1e743c7b4cb3e3cd196b093
3
+ size 8000
final_db/b14a837e-d1d1-45d9-9434-afbedcec46ba/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61aaef00ec60f38a420980a9243be7a4579d4a3784cd4f0b31dbddc874d5355e
3
+ size 16976
final_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba67f4d0d95baa576e5fa3ef86c1cb9d5e8d56386b366020b91c3a07d65bb60e
3
+ size 23465984
htmlTemplates.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = '''
2
+ <style>
3
+ .chat-message {
4
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
5
+ }
6
+ .chat-message.user {
7
+ background-color: #2b313e
8
+ }
9
+ .chat-message.bot {
10
+ background-color: #475063
11
+ }
12
+ .chat-message .avatar {
13
+ width: 20%;
14
+ }
15
+ .chat-message .avatar img {
16
+ max-width: 78px;
17
+ max-height: 78px;
18
+ border-radius: 50%;
19
+ object-fit: cover;
20
+ }
21
+ .chat-message .message {
22
+ width: 80%;
23
+ padding: 0 1.5rem;
24
+ color: #fff;
25
+ }
26
+ '''
27
+
28
+ bot_template = '''
29
+ <div class="chat-message bot">
30
+ <div class="avatar">
31
+ <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32
+ </div>
33
+ <div class="message">{{MSG}}</div>
34
+ </div>
35
+ '''
36
+
37
+ user_template = '''
38
+ <div class="chat-message user">
39
+ <div class="avatar">
40
+ <img src="https://th.bing.com/th/id/OIP.xXHQ5dk4qJH74WMGNezDjwHaHa?rs=1&pid=ImgDetMain">
41
+ </div>
42
+ <div class="message">{{MSG}}</div>
43
+ </div>
44
+ '''
requirements.txt ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.6
2
+ aiosignal==1.3.1
3
+ altair==5.1.2
4
+ annotated-types==0.6.0
5
+ anyio==3.7.1
6
+ asttokens==2.4.1
7
+ async-timeout==4.0.3
8
+ attrs==23.1.0
9
+ backoff==2.2.1
10
+ bcrypt==4.0.1
11
+ blinker==1.7.0
12
+ Brotli==1.1.0
13
+ cachetools==5.3.2
14
+ certifi==2023.7.22
15
+ cffi==1.16.0
16
+ charset-normalizer==3.3.2
17
+ Chroma==0.2.0
18
+ chroma-hnswlib==0.7.3
19
+ chromadb==0.4.17
20
+ click==8.1.7
21
+ colorama==0.4.6
22
+ coloredlogs==15.0.1
23
+ comm==0.2.0
24
+ cryptography==41.0.5
25
+ ctransformers==0.2.27
26
+ dataclasses-json==0.6.2
27
+ debugpy==1.8.0
28
+ decorator==5.1.1
29
+ Deprecated==1.2.14
30
+ distro==1.8.0
31
+ exceptiongroup==1.1.3
32
+ executing==2.0.1
33
+ fastapi==0.104.1
34
+ filelock==3.13.1
35
+ flatbuffers==23.5.26
36
+ frozenlist==1.4.0
37
+ fsspec==2023.10.0
38
+ gitdb==4.0.11
39
+ GitPython==3.1.40
40
+ google-auth==2.23.4
41
+ googleapis-common-protos==1.61.0
42
+ greenlet==3.0.1
43
+ grpcio==1.59.2
44
+ h11==0.14.0
45
+ httpcore==1.0.2
46
+ httptools==0.6.1
47
+ httpx==0.25.1
48
+ huggingface-hub==0.19.3
49
+ humanfriendly==10.0
50
+ idna==3.4
51
+ importlib-metadata==6.8.0
52
+ importlib-resources==6.1.1
53
+ ipykernel==6.26.0
54
+ ipython==8.17.2
55
+ jedi==0.19.1
56
+ Jinja2==3.1.2
57
+ joblib==1.3.2
58
+ jsonpatch==1.33
59
+ jsonpointer==2.4
60
+ jsonschema==4.19.2
61
+ jsonschema-specifications==2023.11.1
62
+ jupyter_client==8.6.0
63
+ jupyter_core==5.5.0
64
+ kubernetes==28.1.0
65
+ langchain==0.0.336
66
+ langsmith==0.0.64
67
+ markdown-it-py==3.0.0
68
+ MarkupSafe==2.1.3
69
+ marshmallow==3.20.1
70
+ matplotlib-inline==0.1.6
71
+ mdurl==0.1.2
72
+ monotonic==1.6
73
+ mpmath==1.3.0
74
+ multidict==6.0.4
75
+ mutagen==1.47.0
76
+ mypy-extensions==1.0.0
77
+ nest-asyncio==1.5.8
78
+ networkx==3.2.1
79
+ nltk==3.8.1
80
+ numpy==1.26.2
81
+ oauthlib==3.2.2
82
+ onnxruntime==1.16.2
83
+ openai==1.3.0
84
+ opentelemetry-api==1.21.0
85
+ opentelemetry-exporter-otlp-proto-common==1.21.0
86
+ opentelemetry-exporter-otlp-proto-grpc==1.21.0
87
+ opentelemetry-proto==1.21.0
88
+ opentelemetry-sdk==1.21.0
89
+ opentelemetry-semantic-conventions==0.42b0
90
+ overrides==7.4.0
91
+ packaging==23.2
92
+ pandas==2.1.3
93
+ parso==0.8.3
94
+ Pillow==10.1.0
95
+ platformdirs==4.0.0
96
+ posthog==3.0.2
97
+ prompt-toolkit==3.0.41
98
+ protobuf==4.25.1
99
+ psutil==5.9.6
100
+ pulsar-client==3.3.0
101
+ pure-eval==0.2.2
102
+ py-cpuinfo==9.0.0
103
+ pyarrow==14.0.1
104
+ pyasn1==0.5.0
105
+ pyasn1-modules==0.3.0
106
+ pycparser==2.21
107
+ pycryptodomex==3.19.0
108
+ pydantic==2.5.1
109
+ pydantic_core==2.14.3
110
+ pydeck==0.8.1b0
111
+ pydub==0.25.1
112
+ Pygments==2.16.1
113
+ pypdf==3.17.1
114
+ PyPika==0.48.9
115
+ pyreadline3==3.4.1
116
+ python-dateutil==2.8.2
117
+ python-decouple==3.8
118
+ python-dotenv==1.0.0
119
+ pytz==2023.3.post1
120
+ pywin32==306
121
+ PyYAML==6.0.1
122
+ pyzmq==25.1.1
123
+ referencing==0.31.0
124
+ regex==2023.10.3
125
+ requests==2.31.0
126
+ requests-oauthlib==1.3.1
127
+ rich==13.7.0
128
+ rpds-py==0.12.0
129
+ rsa==4.9
130
+ safetensors==0.4.0
131
+ scikit-learn==1.3.2
132
+ scipy==1.11.3
133
+ sentence-transformers==2.2.2
134
+ sentencepiece==0.1.99
135
+ six==1.16.0
136
+ smmap==5.0.1
137
+ sniffio==1.3.0
138
+ SQLAlchemy==2.0.23
139
+ stack-data==0.6.3
140
+ starlette==0.27.0
141
+ streamlit==1.28.2
142
+ sympy==1.12
143
+ tenacity==8.2.3
144
+ threadpoolctl==3.2.0
145
+ tiktoken==0.5.1
146
+ tokenizers==0.15.0
147
+ toml==0.10.2
148
+ toolz==0.12.0
149
+ torch==2.1.1
150
+ torchvision==0.16.1
151
+ tornado==6.3.3
152
+ tqdm==4.66.1
153
+ traitlets==5.13.0
154
+ transformers==4.35.2
155
+ typer==0.9.0
156
+ typing-inspect==0.9.0
157
+ typing_extensions==4.8.0
158
+ tzdata==2023.3
159
+ tzlocal==5.2
160
+ urllib3==1.26.18
161
+ uvicorn==0.24.0.post1
162
+ validators==0.22.0
163
+ watchdog==3.0.0
164
+ watchfiles==0.21.0
165
+ wcwidth==0.2.10
166
+ websocket-client==1.6.4
167
+ websockets==12.0
168
+ wrapt==1.16.0
169
+ yarl==1.9.2
170
+ yt-dlp==2023.11.16
171
+ zipp==3.17.0