Abhinav Gavireddi
commited on
Commit
·
a1d050d
1
Parent(s):
80de6a9
[fix]: fixed logger issues
Browse files- .github/workflows/ci.yaml +3 -2
- src/__init__.py +4 -11
- src/qa.py +2 -2
- src/retriever.py +1 -1
- src/utils.py +3 -2
.github/workflows/ci.yaml
CHANGED
@@ -19,14 +19,15 @@ jobs:
|
|
19 |
- name: Install dependencies
|
20 |
run: |
|
21 |
python -m pip install --upgrade pip
|
22 |
-
pip install
|
|
|
23 |
# - name: Run tests
|
24 |
# run: |
|
25 |
# if [ -f tests/test.py ]; then python -m unittest discover -s tests; fi
|
26 |
|
27 |
deploy-to-hf:
|
28 |
runs-on: ubuntu-latest
|
29 |
-
|
30 |
environment: prod
|
31 |
steps:
|
32 |
- name: Checkout code
|
|
|
19 |
- name: Install dependencies
|
20 |
run: |
|
21 |
python -m pip install --upgrade pip
|
22 |
+
pip install uv
|
23 |
+
uv pip install -r requirements.txt
|
24 |
# - name: Run tests
|
25 |
# run: |
|
26 |
# if [ -f tests/test.py ]; then python -m unittest discover -s tests; fi
|
27 |
|
28 |
deploy-to-hf:
|
29 |
runs-on: ubuntu-latest
|
30 |
+
needs: build-and-test
|
31 |
environment: prod
|
32 |
steps:
|
33 |
- name: Checkout code
|
src/__init__.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os
|
2 |
from dotenv import load_dotenv
|
3 |
import bleach
|
|
|
4 |
|
5 |
load_dotenv()
|
6 |
|
@@ -40,16 +41,8 @@ class GPPConfig:
|
|
40 |
DEDUP_SIM_THRESHOLD = float(os.getenv('DEDUP_SIM_THRESHOLD', 0.9))
|
41 |
EXPANSION_SIM_THRESHOLD = float(os.getenv('EXPANSION_SIM_THRESHOLD', 0.85))
|
42 |
COREF_CONTEXT_SIZE = int(os.getenv('COREF_CONTEXT_SIZE', 3))
|
43 |
-
|
44 |
-
class GPPConfig:
|
45 |
-
"""
|
46 |
-
Configuration for GPP pipeline.
|
47 |
-
"""
|
48 |
-
|
49 |
-
CHUNK_TOKEN_SIZE = 256
|
50 |
-
DEDUP_SIM_THRESHOLD = 0.9
|
51 |
-
EXPANSION_SIM_THRESHOLD = 0.85
|
52 |
-
COREF_CONTEXT_SIZE = 3
|
53 |
HNSW_EF_CONSTRUCTION = int(os.getenv("HNSW_EF_CONSTRUCTION", "200"))
|
54 |
HNSW_M = int(os.getenv("HNSW_M", "16"))
|
55 |
-
HNSW_EF_SEARCH = int(os.getenv("HNSW_EF_SEARCH", "50"))
|
|
|
|
|
|
1 |
import os
|
2 |
from dotenv import load_dotenv
|
3 |
import bleach
|
4 |
+
from loguru import logger
|
5 |
|
6 |
load_dotenv()
|
7 |
|
|
|
41 |
DEDUP_SIM_THRESHOLD = float(os.getenv('DEDUP_SIM_THRESHOLD', 0.9))
|
42 |
EXPANSION_SIM_THRESHOLD = float(os.getenv('EXPANSION_SIM_THRESHOLD', 0.85))
|
43 |
COREF_CONTEXT_SIZE = int(os.getenv('COREF_CONTEXT_SIZE', 3))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
HNSW_EF_CONSTRUCTION = int(os.getenv("HNSW_EF_CONSTRUCTION", "200"))
|
45 |
HNSW_M = int(os.getenv("HNSW_M", "16"))
|
46 |
+
HNSW_EF_SEARCH = int(os.getenv("HNSW_EF_SEARCH", "50"))
|
47 |
+
|
48 |
+
|
src/qa.py
CHANGED
@@ -11,8 +11,8 @@ Each component is modular and can be swapped or extended (e.g., add HyDE retriev
|
|
11 |
import os
|
12 |
from typing import List, Dict, Any, Tuple
|
13 |
|
14 |
-
from src import RerankerConfig
|
15 |
-
from src.utils import LLMClient
|
16 |
from src.retriever import Retriever, RetrieverConfig
|
17 |
|
18 |
class Reranker:
|
|
|
11 |
import os
|
12 |
from typing import List, Dict, Any, Tuple
|
13 |
|
14 |
+
from src import RerankerConfig, logger
|
15 |
+
from src.utils import LLMClient
|
16 |
from src.retriever import Retriever, RetrieverConfig
|
17 |
|
18 |
class Reranker:
|
src/retriever.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
from typing import List, Dict, Any
|
3 |
|
4 |
from src.config import RetrieverConfig
|
5 |
-
from src
|
6 |
|
7 |
class Retriever:
|
8 |
"""
|
|
|
2 |
from typing import List, Dict, Any
|
3 |
|
4 |
from src.config import RetrieverConfig
|
5 |
+
from src import logger # Use logger from src/__init__.py
|
6 |
|
7 |
class Retriever:
|
8 |
"""
|
src/utils.py
CHANGED
@@ -6,6 +6,7 @@ import openai
|
|
6 |
from typing import List
|
7 |
from openai import AzureOpenAI
|
8 |
from langchain_openai import AzureOpenAIEmbeddings
|
|
|
9 |
|
10 |
|
11 |
class LLMClient:
|
@@ -21,7 +22,7 @@ class LLMClient:
|
|
21 |
openai_model_name = model or os.getenv('OPENAI_MODEL', 'gpt-4o')
|
22 |
|
23 |
if not (azure_api_key or azure_endpoint or azure_api_version or openai_model_name):
|
24 |
-
|
25 |
raise EnvironmentError('Missing OPENAI_API_KEY')
|
26 |
client = AzureOpenAI(
|
27 |
api_key=azure_api_key,
|
@@ -40,7 +41,7 @@ class LLMClient:
|
|
40 |
text = resp.choices[0].message.content.strip()
|
41 |
return text
|
42 |
except Exception as e:
|
43 |
-
|
44 |
raise
|
45 |
|
46 |
|
|
|
6 |
from typing import List
|
7 |
from openai import AzureOpenAI
|
8 |
from langchain_openai import AzureOpenAIEmbeddings
|
9 |
+
from src import logger # Import logger from src/__init__.py
|
10 |
|
11 |
|
12 |
class LLMClient:
|
|
|
22 |
openai_model_name = model or os.getenv('OPENAI_MODEL', 'gpt-4o')
|
23 |
|
24 |
if not (azure_api_key or azure_endpoint or azure_api_version or openai_model_name):
|
25 |
+
logger.error('OPENAI_API_KEY is not set')
|
26 |
raise EnvironmentError('Missing OPENAI_API_KEY')
|
27 |
client = AzureOpenAI(
|
28 |
api_key=azure_api_key,
|
|
|
41 |
text = resp.choices[0].message.content.strip()
|
42 |
return text
|
43 |
except Exception as e:
|
44 |
+
logger.error(f'LLM generation failed: {e}')
|
45 |
raise
|
46 |
|
47 |
|