Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,7 +24,7 @@ def extract_text_from_pdf(pdf_file):
|
|
| 24 |
text = ""
|
| 25 |
for page_num in range(doc.page_count):
|
| 26 |
page = doc.load_page(page_num)
|
| 27 |
-
text += page.get_text()
|
| 28 |
return text
|
| 29 |
|
| 30 |
# Function to generate response from model
|
|
@@ -35,18 +35,19 @@ def generate_response(input_text, query, tokenizer, model):
|
|
| 35 |
Based on the following context/document:
|
| 36 |
{input_text}
|
| 37 |
Please answer the question: {query}
|
| 38 |
-
|
| 39 |
### Response:
|
| 40 |
"""
|
| 41 |
-
input_ids = tokenizer(prompt, return_tensors="pt")
|
| 42 |
if torch.cuda.is_available():
|
| 43 |
input_ids = input_ids.to("cuda")
|
|
|
|
| 44 |
# Generate a response from the model
|
| 45 |
outputs = model.generate(
|
| 46 |
-
|
| 47 |
max_new_tokens=500,
|
| 48 |
no_repeat_ngram_size=5
|
| 49 |
)
|
|
|
|
| 50 |
# Decode the generated output into readable text
|
| 51 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 52 |
|
|
@@ -75,8 +76,11 @@ def main():
|
|
| 75 |
# Load the model and tokenizer
|
| 76 |
tokenizer, model = load_model()
|
| 77 |
# Generate the response using the model
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
if __name__ == "__main__":
|
| 82 |
main()
|
|
|
|
| 24 |
text = ""
|
| 25 |
for page_num in range(doc.page_count):
|
| 26 |
page = doc.load_page(page_num)
|
| 27 |
+
text += page.get_text("text") # Ensure text extraction
|
| 28 |
return text
|
| 29 |
|
| 30 |
# Function to generate response from model
|
|
|
|
| 35 |
Based on the following context/document:
|
| 36 |
{input_text}
|
| 37 |
Please answer the question: {query}
|
|
|
|
| 38 |
### Response:
|
| 39 |
"""
|
| 40 |
+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
| 41 |
if torch.cuda.is_available():
|
| 42 |
input_ids = input_ids.to("cuda")
|
| 43 |
+
|
| 44 |
# Generate a response from the model
|
| 45 |
outputs = model.generate(
|
| 46 |
+
input_ids=input_ids,
|
| 47 |
max_new_tokens=500,
|
| 48 |
no_repeat_ngram_size=5
|
| 49 |
)
|
| 50 |
+
|
| 51 |
# Decode the generated output into readable text
|
| 52 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 53 |
|
|
|
|
| 76 |
# Load the model and tokenizer
|
| 77 |
tokenizer, model = load_model()
|
| 78 |
# Generate the response using the model
|
| 79 |
+
try:
|
| 80 |
+
response = generate_response(pdf_text, query, tokenizer, model)
|
| 81 |
+
st.text_area("Response", response, height=200)
|
| 82 |
+
except Exception as e:
|
| 83 |
+
st.error(f"Error generating response: {e}")
|
| 84 |
|
| 85 |
if __name__ == "__main__":
|
| 86 |
main()
|