File size: 2,185 Bytes
9e2a8ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21549be
9e2a8ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa803cf
9e2a8ba
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from pathlib import Path
import os
from dotenv import load_dotenv

load_dotenv()

PATH_DATA = Path.home() / "git_repos" / "CHI_corpus_analysis" / "data"
PATH_RAW_CHI_METADATA = PATH_DATA / "CHI_raw.xls"
PATH_CLEAN_CHI_METADATA = PATH_DATA / "CHI_filtered.parquet"
PATH_CLEAN_CHI_METADATA_POSITIONS = PATH_DATA / "CHI_metadata.parquet"
PATH_CLEAN_CHI_METADATA_CLUSTERS = PATH_DATA / "CHI_metadata_clusters.parquet"
PATH_CLEAN_CHI_CLUSTERS_TOP_WORDS = PATH_DATA / "CHI_cluster_top_words.parquet"

PATH_EMBEDDINGS = PATH_DATA / "embeddings.parquet"
PATH_EMBEDDINGS_10d = PATH_DATA / "embeddings_10d.parquet"
PATH_SPARSE_EMBEDDINGS = PATH_DATA / "sparse_embeddings.parquet"
DENSE_VECTOR_NAME = "dense"
SPARSE_VECTOR_NAME = "sparse"

# why is v2 not supported?
# ValueError: Model prithivida/Splade_PP_en_v2 is not supported in SparseTextEmbedding.Please check the supported models using `SparseTextEmbedding.list_supported_models()`
SBERT_MODEL_NAME = "BAAI/bge-base-en-v1.5"
SPARSE_MODEL_NAME = "prithivida/Splade_PP_en_v1"

DEFAULT_QUERY = "doctors in participatory design"
APP_NAME = "CHI papers search engine"
CHROMA_DB_PATH = PATH_DATA / "chroma_db_vectors"
QDRANT_DB_PATH = PATH_DATA / "qdrant_db_vectors"
COLLECTION_NAME = "chi_collection"
COLLECTION_HYBRID_NAME = "hybrid_collection"


QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant:6333")
QDRANT_KEY = os.getenv("QDRANT_KEY", None)
MIN_COSINE_SCORE = float(os.getenv("MIN_COSINE_SCORE", 0.65))

CHROMA_COLLECTION_NAME = "chi_collection"
SQL_DB_PATH_LOCAL = PATH_DATA / "metadata.db"
METADATA_TABLE_NAME = "METADATA_TABLE"
CLUSTER_TOP_WORDS_TABLE_NAME = "CLUSTER_TOP_WORDS"


DISTANCE_METRIC = "cosine"
SQL_HOST = os.getenv("SQL_HOST")
SQL_PORT = os.getenv("SQL_PORT")
SQL_DATABASE = os.getenv("SQL_DATABASE")
SQL_USER_NAME = os.getenv("SQL_USER_NAME")
SQL_PASSWORD = os.getenv("SQL_PASSWORD")
SQL_EXTERNAL_URL = os.getenv("SQL_EXTERNAL_URL")
SQL_INTERNAL_URL = os.getenv("SQL_INTERNAL_URL")

SQL_BASE = f"{SQL_EXTERNAL_URL}" if SQL_EXTERNAL_URL else f'sqlite:///{SQL_DB_PATH_LOCAL}'

"postgresql://chi_metadata_user:DJy5LJpogeOGD7ziw1i40zr2YmJ3Nq4z@dpg-d2bkj1re5dus738b67dg-a.frankfurt-postgres.render.com:5432/chi_metadata"