HF_Final_Assignment_Template

Sleeping

Samuel Thomas commited on May 9

Commit

288225c

1 Parent(s): 7753f0c

fix for rag

Files changed (1) hide show

tools.py CHANGED Viewed

@@ -104,15 +104,20 @@ llm_pipe = pipeline(
 )
 # Initialize RAG components
-tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base")
 retriever = RagRetriever.from_pretrained(
     "facebook/rag-token-base",
     index_name="exact",           # or "legacy" for legacy FAISS index
-    use_dummy_dataset=False,        # set to False and download the full index for real Wikipedia retrieval
     trust_remote_code=True
 )
-rag_model = RagSequenceForGeneration.from_pretrained("facebook/rag-token-base", retriever=retriever)
 # Speech-to-text pipeline
 asr_pipe = pipeline(
     "automatic-speech-recognition",

 )
 # Initialize RAG components
+tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base", trust_remote_code=True)
 retriever = RagRetriever.from_pretrained(
     "facebook/rag-token-base",
     index_name="exact",           # or "legacy" for legacy FAISS index
+    use_dummy_dataset=False,       # set to False and download the full index for real Wikipedia retrieval
+    trust_remote_code=True,       # Trust remote code for dataset loading
+    dataset_revision="main",      # Specify a fixed revision
+    dataset="wiki_dpr",           # Explicitly specify dataset name
+)
+rag_model = RagSequenceForGeneration.from_pretrained(
+    "facebook/rag-token-base",
+    retriever=retriever,
     trust_remote_code=True
 )
 # Speech-to-text pipeline
 asr_pipe = pipeline(
     "automatic-speech-recognition",