from enum import StrEnum from pydantic import BaseModel from sqlalchemy import Column from sqlalchemy import Float from sqlalchemy import Integer from sqlalchemy import String from sqlalchemy.ext.declarative import declarative_base from settings import CLUSTER_TOP_WORDS_TABLE_NAME from settings import METADATA_TABLE_NAME Base = declarative_base() class VectorType(StrEnum): dense = "dense" sparse = "sparse" hybrid = "hybrid" class SearchRequestVector(BaseModel): input_text: str limit: int = 2000 min_year: int | None = None max_year: int | None = 2025 score_threshold_dense: float | None = 0.7 vector_type: VectorType = VectorType.dense class SearchRequestHybrid(SearchRequestVector): limit: int = 50 limit_dense: int = 500 limit_sparse: int = 50 vector_type: VectorType = VectorType.hybrid class SemanticSearchResults(BaseModel): doi: str score: float class MetadataPosition(BaseModel): doi: str cluster: str x: float y: float title: str year: int abstract: str # TODO: Can we have evrything in memory? class MetadataFull(MetadataPosition): scholar_link: str class MetadataDB(Base): __tablename__ = METADATA_TABLE_NAME doi = Column(String, primary_key=True) title = Column(String) abstract = Column(String) cluster = Column(String) year = Column(Integer) x = Column(Float) y = Column(Float) scholar_link = Column(String) class ClusterWordsDB(Base): __tablename__ = CLUSTER_TOP_WORDS_TABLE_NAME cluster = Column(String, primary_key=True) top_words = Column(String)