Spaces:
Sleeping
Sleeping
MVPilgrim
commited on
Commit
·
c0ba0c7
1
Parent(s):
7b821bd
debug
Browse files- Dockerfile +2 -2
- README.md +1 -0
- app.py +3 -3
Dockerfile
CHANGED
|
@@ -48,8 +48,8 @@ RUN FORCE_CMAKE=1 CMAKE_SYSTEM_PROCESSOR=AMD64 pip3 install --break-system-packa
|
|
| 48 |
RUN pip3 install --break-system-packages cffi
|
| 49 |
# Install text2vec-transformers
|
| 50 |
WORKDIR /app/text2vec-transformers
|
| 51 |
-
COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /app /app/text2vec-transformers
|
| 52 |
-
COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /usr/local/bin /app/text2vec-transformers/bin
|
| 53 |
COPY ./multi-qa-MiniLM-L6-cos-v1 /app/text2vec-transformers
|
| 54 |
RUN ./custom_prerequisites.py
|
| 55 |
|
|
|
|
| 48 |
RUN pip3 install --break-system-packages cffi
|
| 49 |
# Install text2vec-transformers
|
| 50 |
WORKDIR /app/text2vec-transformers
|
| 51 |
+
#COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /app /app/text2vec-transformers
|
| 52 |
+
#COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /usr/local/bin /app/text2vec-transformers/bin
|
| 53 |
COPY ./multi-qa-MiniLM-L6-cos-v1 /app/text2vec-transformers
|
| 54 |
RUN ./custom_prerequisites.py
|
| 55 |
|
README.md
CHANGED
|
@@ -10,6 +10,7 @@ app_port: 8501
|
|
| 10 |
#app_file: app.py
|
| 11 |
pinned: true
|
| 12 |
startup_duration_timeout: 3 hours
|
|
|
|
| 13 |
---
|
| 14 |
|
| 15 |
# POC for Retrieval Augmented Generation with Large Language Models
|
|
|
|
| 10 |
#app_file: app.py
|
| 11 |
pinned: true
|
| 12 |
startup_duration_timeout: 3 hours
|
| 13 |
+
hardware: gpu
|
| 14 |
---
|
| 15 |
|
| 16 |
# POC for Retrieval Augmented Generation with Large Language Models
|
app.py
CHANGED
|
@@ -346,7 +346,7 @@ try:
|
|
| 346 |
logger.info("### Initializing LLM.")
|
| 347 |
llm = Llama(model_path,
|
| 348 |
#*,
|
| 349 |
-
n_gpu_layers
|
| 350 |
split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
|
| 351 |
main_gpu=0,
|
| 352 |
tensor_split=None,
|
|
@@ -398,7 +398,7 @@ try:
|
|
| 398 |
###############################################################################
|
| 399 |
# Initial the the sentence transformer and encode the query prompt.
|
| 400 |
logger.debug(f"#### Encode text query prompt to create vectors. {promptText}")
|
| 401 |
-
model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
|
| 402 |
vector = model.encode(promptText)
|
| 403 |
|
| 404 |
logLevel = logger.getEffectiveLevel()
|
|
@@ -539,7 +539,7 @@ try:
|
|
| 539 |
placeHolder = st.empty()
|
| 540 |
else:
|
| 541 |
st.session_state.spinGenMsg = False;
|
| 542 |
-
with st.spinner('Generating Completion
|
| 543 |
st.session_state.sysTAtext = st.session_state.sysTA
|
| 544 |
logger.debug(f"sysTAtext: {st.session_state.sysTAtext}")
|
| 545 |
wrklist = setPrompt(st.session_state.userpTA,st.selectRag)
|
|
|
|
| 346 |
logger.info("### Initializing LLM.")
|
| 347 |
llm = Llama(model_path,
|
| 348 |
#*,
|
| 349 |
+
n_gpu_layers=-1,
|
| 350 |
split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
|
| 351 |
main_gpu=0,
|
| 352 |
tensor_split=None,
|
|
|
|
| 398 |
###############################################################################
|
| 399 |
# Initial the the sentence transformer and encode the query prompt.
|
| 400 |
logger.debug(f"#### Encode text query prompt to create vectors. {promptText}")
|
| 401 |
+
model = SentenceTransformer('/app/text2vec-transformers/multi-qa-MiniLM-L6-cos-v1')
|
| 402 |
vector = model.encode(promptText)
|
| 403 |
|
| 404 |
logLevel = logger.getEffectiveLevel()
|
|
|
|
| 539 |
placeHolder = st.empty()
|
| 540 |
else:
|
| 541 |
st.session_state.spinGenMsg = False;
|
| 542 |
+
with st.spinner('Generating Completion...'):
|
| 543 |
st.session_state.sysTAtext = st.session_state.sysTA
|
| 544 |
logger.debug(f"sysTAtext: {st.session_state.sysTAtext}")
|
| 545 |
wrklist = setPrompt(st.session_state.userpTA,st.selectRag)
|