Spaces:

sivakum4
/

ubva

Running

sivakum4 commited on Apr 29

Commit

326a604

1 Parent(s): 9108a9a

Add: HFToken

Files changed (5) hide show

.gitignore ADDED Viewed

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+*.egg-info/
+.installed.cfg
+*.egg
+*.whl
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+# Type checker caches
+.mypy_cache/
+.dmypy.json
+.dmypy.json
+.pyre/
+.pyright/
+# Virtual environments
+.venv/
+venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+# VSCode settings
+.vscode/
+# JetBrains IDE
+.idea/
+# macOS
+.DS_Store
+# Logs
+*.log
+# Environment variables
+.env
+# Jupyter
+.ipynb_checkpoints/
+# Docker
+docker-compose.override.yml
+*.dockerfile
+# Node / frontend
+node_modules/
+dist/
+build/
+.next/
+out/
+# Yarn v2+
+.yarn/*
+!.yarn/patches
+!.yarn/releases
+!.yarn/plugins
+!.yarn/sdks
+!.yarn/versions
+# Caches
+.cache/
+# optionally Hugging Face cache directories
+transformers_cache/
+hf_cache/

buffalo_rag/__init__.py ADDED Viewed

File without changes

buffalo_rag/model/__init__.py ADDED Viewed

File without changes

buffalo_rag/model/rag.py CHANGED Viewed

@@ -2,10 +2,9 @@ import os
 import json
 from typing import List, Dict, Any, Optional, Tuple
-import torch
 # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-from huggingface_hub import InferenceApi
 from buffalo_rag.vector_store.db import VectorStore
@@ -22,11 +21,11 @@ class BuffaloRAG:
         hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
         if not hf_token:
             raise ValueError("Please set HUGGINGFACEHUB_API_TOKEN in your environment.")
-        self.inference = InferenceApi(
-            repo_id=model_name,
-            token=hf_token,
-            task="text-generation",
-        )
     def retrieve(self,
                 query: str,
@@ -69,16 +68,18 @@ class BuffaloRAG:
                 Answer:"""
         try:
-            payload = {
-                "inputs": prompt,
-                "parameters": {"max_new_tokens": 516, "do_sample": False}
-            }
-            # The SDK will handle headers/auth under the hood
-            response = self.inference(**payload)
-            # response is a list of dicts: [{"generated_text": "..."}]
-            text = response[0]["generated_text"]
-            # strip prompt echo
-            return text[len(prompt):].strip()
         except Exception as e:
             print(f"Error during generation: {str(e)}")
             # Fallback response

 import json
 from typing import List, Dict, Any, Optional, Tuple
 # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from huggingface_hub import InferenceClient
 from buffalo_rag.vector_store.db import VectorStore
         hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
         if not hf_token:
             raise ValueError("Please set HUGGINGFACEHUB_API_TOKEN in your environment.")
+        self.client = InferenceClient(
+                provider="cerebras",
+                api_key=hf_token,
+            )
     def retrieve(self,
                 query: str,
                 Answer:"""
         try:
+            completion = self.client.chat.completions.create(
+                model="meta-llama/Llama-3.3-70B-Instruct",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": prompt
+                    }
+                ],
+                max_tokens=512,
+            )
+            return completion.choices[0].message.content
         except Exception as e:
             print(f"Error during generation: {str(e)}")
             # Fallback response

buffalo_rag/vector_store/__init__.py ADDED Viewed

File without changes