DocFinder / schemas.py
om4r932's picture
Add documentation + fix bugs
405abe1
from pydantic import BaseModel, Field
from typing import *
class DocRequest(BaseModel):
"""
Request model for single document retrieval.
Used to specify which document or specification to retrieve by its unique identifier.
"""
doc_id: str = Field(
...,
title="Document Identifier",
description="Unique identifier for the document or specification.",
)
class DocResponse(BaseModel):
"""
Response model for single document retrieval.
Contains all available metadata and access information for the requested document.
"""
doc_id: str = Field(
...,
title="Document Identifier",
description="Echoed document identifier from the request"
)
url: str = Field(
...,
title="Document URL",
description="Direct download URL"
)
version: Optional[str] = Field(
None,
title="Document Version",
description="Extracted version information (e.g., 'h20', 'v17.9.0') when available"
)
scope: Optional[str] = Field(
None,
title="Document Scope",
description="Brief description of the document's scope and purpose from metadata"
)
search_time: float = Field(
...,
title="Search Duration",
description="Time spent processing the request in seconds"
)
class BatchDocRequest(BaseModel):
"""
Request model for batch document retrieval.
Allows retrieval of multiple documents in a single API call for efficiency.
"""
doc_ids: List[str] = Field(
...,
title="Document Identifier List",
description="List of document identifiers to retrieve."
)
class BatchDocResponse(BaseModel):
"""
Response model for batch document retrieval.
Provides organized results separating found documents from missing ones.
"""
results: Dict[str, str] = Field(
...,
title="Found Documents",
description="Dictionary mapping document IDs to their corresponding URLs"
)
missing: List[str] = Field(
...,
title="Missing Documents",
description="List of document IDs that could not be found or are not indexed"
)
search_time: float = Field(
...,
title="Total Search Duration",
description="Total time spent processing the batch request in seconds"
)
class KeywordRequest(BaseModel):
"""
Request model for keyword-based specification search.
Provides flexible search options with multiple modes and filtering capabilities.
"""
keywords: Optional[str] = Field(
"",
title="Search Keywords",
description="Comma-separated keywords for searching specifications.",
examples=["5G NR,authentication", "handover,mobility", "security,encryption"]
)
search_mode: Literal["quick", "deep"] = Field(
...,
title="Search Mode",
description="Search mode: 'quick' searches metadata only, 'deep' searches metadata and document content"
)
case_sensitive: Optional[bool] = Field(
False,
title="Case Sensitive Search",
description="Enable case-sensitive keyword matching"
)
source: Optional[Literal["3GPP", "ETSI", "all"]] = Field(
"all",
title="Document Source",
description="Limit search to specific organization or search all repositories"
)
spec_type: Optional[Literal["TS", "TR"]] = Field(
None,
title="Specification Type",
description="Filter by specification type: 'TS' (Technical Specification) or 'TR' (Technical Report)"
)
mode: Optional[Literal["and", "or"]] = Field(
"and",
title="Search Logic",
description="Logical operator: 'and' requires all keywords to match, 'or' matches any keyword"
)
class BM25KeywordRequest(BaseModel):
"""
Request model for BM25 advanced search.
Provides parameters for relevance-based search using BM25 scoring algorithm.
"""
keywords: Optional[str] = Field(
"",
title="Search Query",
description="Natural language search query for BM25 processing",
examples=["5G authentication procedures", "handover mobility management", "security key derivation"]
)
source: Optional[Literal["3GPP", "ETSI", "all"]] = Field(
"all",
title="Document Source",
description="Limit search to specific organization repositories"
)
threshold: Optional[int] = Field(
60,
title="Relevance Threshold",
description="Minimum normalized BM25 relevance score (0-100) for results inclusion",
ge=0,
le=100
)
spec_type: Optional[Literal["TS", "TR"]] = Field(
None,
title="Specification Type",
description="Filter results by specification type"
)
class KeywordResponse(BaseModel):
"""
Response model for keyword and BM25 search results.
Contains ranked search results with metadata and timing information.
"""
results: List[Dict[str, Any]] = Field(
...,
title="Search Results",
description="List of matching specifications with complete metadata. In deep search mode, includes 'contains' field with matching content sections."
)
search_time: float = Field(
...,
title="Search Duration",
description="Time spent processing the search request in seconds"
)