Spaces:
Running
Running
MVPilgrim
commited on
Commit
·
c0ba0c7
1
Parent(s):
7b821bd
debug
Browse files- Dockerfile +2 -2
- README.md +1 -0
- app.py +3 -3
Dockerfile
CHANGED
@@ -48,8 +48,8 @@ RUN FORCE_CMAKE=1 CMAKE_SYSTEM_PROCESSOR=AMD64 pip3 install --break-system-packa
|
|
48 |
RUN pip3 install --break-system-packages cffi
|
49 |
# Install text2vec-transformers
|
50 |
WORKDIR /app/text2vec-transformers
|
51 |
-
COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /app /app/text2vec-transformers
|
52 |
-
COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /usr/local/bin /app/text2vec-transformers/bin
|
53 |
COPY ./multi-qa-MiniLM-L6-cos-v1 /app/text2vec-transformers
|
54 |
RUN ./custom_prerequisites.py
|
55 |
|
|
|
48 |
RUN pip3 install --break-system-packages cffi
|
49 |
# Install text2vec-transformers
|
50 |
WORKDIR /app/text2vec-transformers
|
51 |
+
#COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /app /app/text2vec-transformers
|
52 |
+
#COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /usr/local/bin /app/text2vec-transformers/bin
|
53 |
COPY ./multi-qa-MiniLM-L6-cos-v1 /app/text2vec-transformers
|
54 |
RUN ./custom_prerequisites.py
|
55 |
|
README.md
CHANGED
@@ -10,6 +10,7 @@ app_port: 8501
|
|
10 |
#app_file: app.py
|
11 |
pinned: true
|
12 |
startup_duration_timeout: 3 hours
|
|
|
13 |
---
|
14 |
|
15 |
# POC for Retrieval Augmented Generation with Large Language Models
|
|
|
10 |
#app_file: app.py
|
11 |
pinned: true
|
12 |
startup_duration_timeout: 3 hours
|
13 |
+
hardware: gpu
|
14 |
---
|
15 |
|
16 |
# POC for Retrieval Augmented Generation with Large Language Models
|
app.py
CHANGED
@@ -346,7 +346,7 @@ try:
|
|
346 |
logger.info("### Initializing LLM.")
|
347 |
llm = Llama(model_path,
|
348 |
#*,
|
349 |
-
n_gpu_layers
|
350 |
split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
|
351 |
main_gpu=0,
|
352 |
tensor_split=None,
|
@@ -398,7 +398,7 @@ try:
|
|
398 |
###############################################################################
|
399 |
# Initial the the sentence transformer and encode the query prompt.
|
400 |
logger.debug(f"#### Encode text query prompt to create vectors. {promptText}")
|
401 |
-
model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
|
402 |
vector = model.encode(promptText)
|
403 |
|
404 |
logLevel = logger.getEffectiveLevel()
|
@@ -539,7 +539,7 @@ try:
|
|
539 |
placeHolder = st.empty()
|
540 |
else:
|
541 |
st.session_state.spinGenMsg = False;
|
542 |
-
with st.spinner('Generating Completion
|
543 |
st.session_state.sysTAtext = st.session_state.sysTA
|
544 |
logger.debug(f"sysTAtext: {st.session_state.sysTAtext}")
|
545 |
wrklist = setPrompt(st.session_state.userpTA,st.selectRag)
|
|
|
346 |
logger.info("### Initializing LLM.")
|
347 |
llm = Llama(model_path,
|
348 |
#*,
|
349 |
+
n_gpu_layers=-1,
|
350 |
split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
|
351 |
main_gpu=0,
|
352 |
tensor_split=None,
|
|
|
398 |
###############################################################################
|
399 |
# Initial the the sentence transformer and encode the query prompt.
|
400 |
logger.debug(f"#### Encode text query prompt to create vectors. {promptText}")
|
401 |
+
model = SentenceTransformer('/app/text2vec-transformers/multi-qa-MiniLM-L6-cos-v1')
|
402 |
vector = model.encode(promptText)
|
403 |
|
404 |
logLevel = logger.getEffectiveLevel()
|
|
|
539 |
placeHolder = st.empty()
|
540 |
else:
|
541 |
st.session_state.spinGenMsg = False;
|
542 |
+
with st.spinner('Generating Completion...'):
|
543 |
st.session_state.sysTAtext = st.session_state.sysTA
|
544 |
logger.debug(f"sysTAtext: {st.session_state.sysTAtext}")
|
545 |
wrklist = setPrompt(st.session_state.userpTA,st.selectRag)
|