singhdevendra58 commited on
Commit
1497d33
verified
1 Parent(s): 98fa964

Delete doc_qa_1.py

Browse files
Files changed (1) hide show
  1. doc_qa_1.py +0 -62
doc_qa_1.py DELETED
@@ -1,62 +0,0 @@
1
- from langchain.vectorstores import FAISS
2
- from langchain.text_splitter import RecursiveCharacterTextSplitter
3
- from langchain.embeddings import HuggingFaceEmbeddings
4
- from langchain.text_splitter import CharacterTextSplitter
5
- from langchain.docstore.document import Document
6
- from transformers import pipeline
7
- from langchain.chains.question_answering import load_qa_chain
8
- import os
9
-
10
- # Step 1: Load QA pipeline (don't wrap in HuggingFacePipeline)
11
- embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-small")
12
- qa_pipeline = pipeline("question-answering", model="deepset/xlm-roberta-base-squad2")
13
- multi_directory_path=r'tmp/'
14
-
15
- def docs_vector_index():
16
- from langchain.document_loaders import DirectoryLoader
17
- # Define a directory path
18
- directory_path = r"C:\Users\savni\PycharmProjects\DocsSearchEngine\tmp"
19
-
20
- # Create the DirectoryLoader, specifying loaders for each file type
21
- loader = DirectoryLoader(
22
- directory_path,
23
- glob="**/*", # This pattern loads all files; modify as needed
24
-
25
- )
26
- docs = loader.load()
27
-
28
- text_splitter = RecursiveCharacterTextSplitter(
29
- chunk_size=1024, chunk_overlap=100, separators=[" ", ",", "\n", "."]
30
- )
31
- print(docs)
32
- docs_chunks = text_splitter.split_documents(docs)
33
-
34
- print(f"docs_chunks length: {len(docs_chunks)}")
35
- print('********************docs_chunks',docs_chunks)
36
- if len(docs_chunks)>0:
37
- db = FAISS.from_documents(docs_chunks, embeddings)
38
- return db
39
- else:
40
- return ''
41
-
42
-
43
-
44
- def run_custom_qa(question, retrieved_docs):
45
- context = " ".join([doc.page_content for doc in retrieved_docs])
46
- output = qa_pipeline(question=question, context=context)
47
- return output #output["answer"]
48
-
49
- # # Step 6: Ask question
50
- # question = "鏉变含澶у銇亜銇よō绔嬨仌銈屻伨銇椼仧銇嬶紵"
51
- # relevant_docs = retriever.get_relevant_documents(question)
52
- # answer = run_custom_qa(question, relevant_docs)
53
- #
54
- # print("Answer:", answer)
55
-
56
- def doc_qa(query, db):
57
- print("*************************custom qa doc_qa",query)
58
- retriever = db.as_retriever()
59
- relevant_docs = retriever.get_relevant_documents(query)
60
- response=run_custom_qa(query, relevant_docs)
61
- print('response', response)
62
- return response