sivakum4 commited on
Commit
326a604
·
1 Parent(s): 9108a9a

Add: HFToken

Browse files
.gitignore ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ parts/
18
+ sdist/
19
+ var/
20
+ wheels/
21
+ pip-wheel-metadata/
22
+ *.egg-info/
23
+ .installed.cfg
24
+ *.egg
25
+ *.whl
26
+
27
+ # Installer logs
28
+ pip-log.txt
29
+ pip-delete-this-directory.txt
30
+
31
+ # Unit test / coverage
32
+ htmlcov/
33
+ .tox/
34
+ .nox/
35
+ .coverage
36
+ .coverage.*
37
+ .cache
38
+ nosetests.xml
39
+ coverage.xml
40
+ *.cover
41
+ *.py,cover
42
+ .hypothesis/
43
+
44
+ # Type checker caches
45
+ .mypy_cache/
46
+ .dmypy.json
47
+ .dmypy.json
48
+ .pyre/
49
+ .pyright/
50
+
51
+ # Virtual environments
52
+ .venv/
53
+ venv/
54
+ env/
55
+ ENV/
56
+ env.bak/
57
+ venv.bak/
58
+
59
+ # VSCode settings
60
+ .vscode/
61
+
62
+ # JetBrains IDE
63
+ .idea/
64
+
65
+ # macOS
66
+ .DS_Store
67
+
68
+ # Logs
69
+ *.log
70
+
71
+ # Environment variables
72
+ .env
73
+
74
+ # Jupyter
75
+ .ipynb_checkpoints/
76
+
77
+ # Docker
78
+ docker-compose.override.yml
79
+ *.dockerfile
80
+
81
+ # Node / frontend
82
+ node_modules/
83
+ dist/
84
+ build/
85
+ .next/
86
+ out/
87
+
88
+ # Yarn v2+
89
+ .yarn/*
90
+ !.yarn/patches
91
+ !.yarn/releases
92
+ !.yarn/plugins
93
+ !.yarn/sdks
94
+ !.yarn/versions
95
+
96
+ # Caches
97
+ .cache/
98
+ # optionally Hugging Face cache directories
99
+ transformers_cache/
100
+ hf_cache/
buffalo_rag/__init__.py ADDED
File without changes
buffalo_rag/model/__init__.py ADDED
File without changes
buffalo_rag/model/rag.py CHANGED
@@ -2,10 +2,9 @@ import os
2
  import json
3
  from typing import List, Dict, Any, Optional, Tuple
4
 
5
- import torch
6
  # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
 
8
- from huggingface_hub import InferenceApi
9
 
10
  from buffalo_rag.vector_store.db import VectorStore
11
 
@@ -22,11 +21,11 @@ class BuffaloRAG:
22
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
23
  if not hf_token:
24
  raise ValueError("Please set HUGGINGFACEHUB_API_TOKEN in your environment.")
25
- self.inference = InferenceApi(
26
- repo_id=model_name,
27
- token=hf_token,
28
- task="text-generation",
29
- )
30
 
31
  def retrieve(self,
32
  query: str,
@@ -69,16 +68,18 @@ class BuffaloRAG:
69
  Answer:"""
70
 
71
  try:
72
- payload = {
73
- "inputs": prompt,
74
- "parameters": {"max_new_tokens": 516, "do_sample": False}
75
- }
76
- # The SDK will handle headers/auth under the hood
77
- response = self.inference(**payload)
78
- # response is a list of dicts: [{"generated_text": "..."}]
79
- text = response[0]["generated_text"]
80
- # strip prompt echo
81
- return text[len(prompt):].strip()
 
 
82
  except Exception as e:
83
  print(f"Error during generation: {str(e)}")
84
  # Fallback response
 
2
  import json
3
  from typing import List, Dict, Any, Optional, Tuple
4
 
 
5
  # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
 
7
+ from huggingface_hub import InferenceClient
8
 
9
  from buffalo_rag.vector_store.db import VectorStore
10
 
 
21
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
22
  if not hf_token:
23
  raise ValueError("Please set HUGGINGFACEHUB_API_TOKEN in your environment.")
24
+ self.client = InferenceClient(
25
+ provider="cerebras",
26
+ api_key=hf_token,
27
+ )
28
+
29
 
30
  def retrieve(self,
31
  query: str,
 
68
  Answer:"""
69
 
70
  try:
71
+ completion = self.client.chat.completions.create(
72
+ model="meta-llama/Llama-3.3-70B-Instruct",
73
+ messages=[
74
+ {
75
+ "role": "user",
76
+ "content": prompt
77
+ }
78
+ ],
79
+ max_tokens=512,
80
+ )
81
+
82
+ return completion.choices[0].message.content
83
  except Exception as e:
84
  print(f"Error during generation: {str(e)}")
85
  # Fallback response
buffalo_rag/vector_store/__init__.py ADDED
File without changes