Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -18,8 +18,9 @@ from pdfminer.layout import LAParams
|
|
18 |
|
19 |
import datetime
|
20 |
|
21 |
-
APP_START_TIME = datetime.datetime.
|
22 |
|
|
|
23 |
# --- PDF Extraction ---
|
24 |
def extract_text_from_pdf(pdf_path):
|
25 |
output_string = StringIO()
|
@@ -131,7 +132,7 @@ def setup_knowledge_base():
|
|
131 |
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
|
132 |
chunks = chunk_text(all_text, tokenizer)
|
133 |
model = SentenceTransformer('all-mpnet-base-v2')
|
134 |
-
embeddings = model.encode(chunks,
|
135 |
dim = embeddings[0].shape[0]
|
136 |
index = faiss.IndexFlatL2(dim)
|
137 |
index.add(np.array(embeddings).astype('float32'))
|
|
|
18 |
|
19 |
import datetime
|
20 |
|
21 |
+
APP_START_TIME = datetime.datetime.now(datetime.timezone.utc)
|
22 |
|
23 |
+
os.environ["PYTORCH_JIT"] = "0"
|
24 |
# --- PDF Extraction ---
|
25 |
def extract_text_from_pdf(pdf_path):
|
26 |
output_string = StringIO()
|
|
|
132 |
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
|
133 |
chunks = chunk_text(all_text, tokenizer)
|
134 |
model = SentenceTransformer('all-mpnet-base-v2')
|
135 |
+
embeddings = model.encode(chunks, show_progress_bar=False, truncation=True, max_length=512)
|
136 |
dim = embeddings[0].shape[0]
|
137 |
index = faiss.IndexFlatL2(dim)
|
138 |
index.add(np.array(embeddings).astype('float32'))
|