masadonline commited on
Commit
8b78680
·
verified ·
1 Parent(s): 0b29458

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -18,8 +18,9 @@ from pdfminer.layout import LAParams
18
 
19
  import datetime
20
 
21
- APP_START_TIME = datetime.datetime.utcnow()
22
 
 
23
  # --- PDF Extraction ---
24
  def extract_text_from_pdf(pdf_path):
25
  output_string = StringIO()
@@ -131,7 +132,7 @@ def setup_knowledge_base():
131
  tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
132
  chunks = chunk_text(all_text, tokenizer)
133
  model = SentenceTransformer('all-mpnet-base-v2')
134
- embeddings = model.encode(chunks, truncate=True, show_progress_bar=False)
135
  dim = embeddings[0].shape[0]
136
  index = faiss.IndexFlatL2(dim)
137
  index.add(np.array(embeddings).astype('float32'))
 
18
 
19
  import datetime
20
 
21
+ APP_START_TIME = datetime.datetime.now(datetime.timezone.utc)
22
 
23
+ os.environ["PYTORCH_JIT"] = "0"
24
  # --- PDF Extraction ---
25
  def extract_text_from_pdf(pdf_path):
26
  output_string = StringIO()
 
132
  tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
133
  chunks = chunk_text(all_text, tokenizer)
134
  model = SentenceTransformer('all-mpnet-base-v2')
135
+ embeddings = model.encode(chunks, show_progress_bar=False, truncation=True, max_length=512)
136
  dim = embeddings[0].shape[0]
137
  index = faiss.IndexFlatL2(dim)
138
  index.add(np.array(embeddings).astype('float32'))