Samuel Thomas commited on
Commit
288225c
·
1 Parent(s): 7753f0c

fix for rag

Browse files
Files changed (1) hide show
  1. tools.py +9 -4
tools.py CHANGED
@@ -104,15 +104,20 @@ llm_pipe = pipeline(
104
  )
105
 
106
  # Initialize RAG components
107
- tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base")
108
  retriever = RagRetriever.from_pretrained(
109
  "facebook/rag-token-base",
110
  index_name="exact", # or "legacy" for legacy FAISS index
111
- use_dummy_dataset=False, # set to False and download the full index for real Wikipedia retrieval
 
 
 
 
 
 
 
112
  trust_remote_code=True
113
  )
114
- rag_model = RagSequenceForGeneration.from_pretrained("facebook/rag-token-base", retriever=retriever)
115
-
116
  # Speech-to-text pipeline
117
  asr_pipe = pipeline(
118
  "automatic-speech-recognition",
 
104
  )
105
 
106
  # Initialize RAG components
107
+ tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base", trust_remote_code=True)
108
  retriever = RagRetriever.from_pretrained(
109
  "facebook/rag-token-base",
110
  index_name="exact", # or "legacy" for legacy FAISS index
111
+ use_dummy_dataset=False, # set to False and download the full index for real Wikipedia retrieval
112
+ trust_remote_code=True, # Trust remote code for dataset loading
113
+ dataset_revision="main", # Specify a fixed revision
114
+ dataset="wiki_dpr", # Explicitly specify dataset name
115
+ )
116
+ rag_model = RagSequenceForGeneration.from_pretrained(
117
+ "facebook/rag-token-base",
118
+ retriever=retriever,
119
  trust_remote_code=True
120
  )
 
 
121
  # Speech-to-text pipeline
122
  asr_pipe = pipeline(
123
  "automatic-speech-recognition",