Spaces:
Running
Running
Add documentation + fix bugs
Browse files- app.py +92 -10
- classes.py +9 -0
- documentation.md +48 -0
- schemas.py +152 -25
app.py
CHANGED
@@ -58,7 +58,7 @@ def get_tdoc_url(doc_id):
|
|
58 |
for tdoc in tdoc_locations:
|
59 |
if tdoc["doc_id"] == doc_id:
|
60 |
return tdoc["url"]
|
61 |
-
return "Document not indexed (
|
62 |
|
63 |
def get_spec_url(document):
|
64 |
series = document.split(".")[0].zfill(2)
|
@@ -74,7 +74,33 @@ def get_document(spec_id: str, spec_title: str, source: str):
|
|
74 |
text.extend([section['section'], section['content']])
|
75 |
return text
|
76 |
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
app.add_middleware(
|
79 |
CORSMiddleware,
|
80 |
allow_origins=["*"],
|
@@ -92,13 +118,35 @@ valid_3gpp_spec_format = re.compile(r'^\d{2}\.\d{3}(?:-\d+)?')
|
|
92 |
valid_etsi_doc_format = re.compile(r'^(?:SET|SCP|SETTEC|SETREQ|SCPTEC|SCPREQ)\(\d+\)\d+(?:r\d+)?', flags=re.IGNORECASE)
|
93 |
valid_etsi_spec_format = re.compile(r'^\d{3} \d{3}(?:-\d+)?')
|
94 |
|
95 |
-
@app.post("/find", response_model=DocResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
def find_document(request: DocRequest):
|
97 |
start_time = time.time()
|
98 |
document = request.doc_id
|
99 |
-
source = request.source
|
100 |
-
spec_metadatas = spec_metadatas_3gpp if source == "3GPP" else spec_metadatas_etsi if source == "ETSI" else spec_metadatas_3gpp + spec_metadatas_etsi
|
101 |
-
is_3gpp = valid_3gpp_doc_format.match(document) or valid_3gpp_spec_format.match(document)
|
102 |
|
103 |
url = get_tdoc_url(document) if valid_3gpp_doc_format.match(document) else \
|
104 |
get_spec_url(document) if valid_3gpp_spec_format.match(document) else \
|
@@ -108,9 +156,10 @@ def find_document(request: DocRequest):
|
|
108 |
raise HTTPException(status_code=404, detail=url)
|
109 |
|
110 |
version = None
|
111 |
-
if
|
112 |
version = url.split("/")[-1].replace(".zip", "").split("-")[-1]
|
113 |
scope = None
|
|
|
114 |
for spec in spec_metadatas:
|
115 |
if spec['id'] == document:
|
116 |
scope = spec['scope']
|
@@ -124,7 +173,23 @@ def find_document(request: DocRequest):
|
|
124 |
scope=scope
|
125 |
)
|
126 |
|
127 |
-
@app.post("/batch", response_model=BatchDocResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
def find_document_batch(request: BatchDocRequest):
|
129 |
start_time = time.time()
|
130 |
documents = request.doc_ids
|
@@ -148,7 +213,17 @@ def find_document_batch(request: BatchDocRequest):
|
|
148 |
search_time=time.time()-start_time
|
149 |
)
|
150 |
|
151 |
-
@app.post('/search
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
def search_specifications(request: KeywordRequest):
|
153 |
start_time = time.time()
|
154 |
boolSensitiveCase = request.case_sensitive
|
@@ -215,7 +290,14 @@ def search_specifications(request: KeywordRequest):
|
|
215 |
else:
|
216 |
raise HTTPException(status_code=404, detail="Specifications not found")
|
217 |
|
218 |
-
@app.post("/search
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
def bm25_search_specification(request: BM25KeywordRequest):
|
220 |
start_time = time.time()
|
221 |
source = request.source
|
|
|
58 |
for tdoc in tdoc_locations:
|
59 |
if tdoc["doc_id"] == doc_id:
|
60 |
return tdoc["url"]
|
61 |
+
return "Document not indexed (re-indexing documents ?)"
|
62 |
|
63 |
def get_spec_url(document):
|
64 |
series = document.split(".")[0].zfill(2)
|
|
|
74 |
text.extend([section['section'], section['content']])
|
75 |
return text
|
76 |
|
77 |
+
tags_metadata = [
|
78 |
+
{
|
79 |
+
"name": "Document Retrieval",
|
80 |
+
"description": """
|
81 |
+
Direct document lookup operations for retrieving specific documents by their unique identifiers.
|
82 |
+
|
83 |
+
These endpoints provide fast access to document URLs, versions, and metadata without requiring keyword searches.
|
84 |
+
Perfect for when you know the exact document ID you're looking for.
|
85 |
+
""",
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "Content Search",
|
89 |
+
"description": """
|
90 |
+
Advanced search operations for finding documents based on keywords and content matching.
|
91 |
+
|
92 |
+
Includes both quick metadata-based searches and deep content analysis with flexible filtering options.
|
93 |
+
Supports different search modes and logical operators for precise results.
|
94 |
+
""",
|
95 |
+
},
|
96 |
+
]
|
97 |
+
|
98 |
+
app = FastAPI(
|
99 |
+
title="3GPP & ETSI Document Finder API",
|
100 |
+
description=open('documentation.md').read(),
|
101 |
+
openapi_tags=tags_metadata
|
102 |
+
)
|
103 |
+
|
104 |
app.add_middleware(
|
105 |
CORSMiddleware,
|
106 |
allow_origins=["*"],
|
|
|
118 |
valid_etsi_doc_format = re.compile(r'^(?:SET|SCP|SETTEC|SETREQ|SCPTEC|SCPREQ)\(\d+\)\d+(?:r\d+)?', flags=re.IGNORECASE)
|
119 |
valid_etsi_spec_format = re.compile(r'^\d{3} \d{3}(?:-\d+)?')
|
120 |
|
121 |
+
@app.post("/find/single", response_model=DocResponse, tags=["Document Retrieval"], summary="Retrieve a single document by ID", responses={
|
122 |
+
200: {
|
123 |
+
"description": "Document found successfully",
|
124 |
+
"content": {
|
125 |
+
"application/json": {
|
126 |
+
"example": {
|
127 |
+
"doc_id": "23.401",
|
128 |
+
"url": "https://www.3gpp.org/ftp/Specs/archive/23_series/23.401/23401-h20.zip",
|
129 |
+
"version": "h20",
|
130 |
+
"scope": "General Packet Radio Service (GPRS) enhancements for Evolved Universal Terrestrial Radio Access Network (E-UTRAN) access",
|
131 |
+
"search_time": 0.0234
|
132 |
+
}
|
133 |
+
}
|
134 |
+
}
|
135 |
+
},
|
136 |
+
404: {
|
137 |
+
"description": "Document not found or not indexed",
|
138 |
+
"content": {
|
139 |
+
"application/json": {
|
140 |
+
"example": {
|
141 |
+
"detail": "Specification 99.999 not found"
|
142 |
+
}
|
143 |
+
}
|
144 |
+
}
|
145 |
+
}
|
146 |
+
})
|
147 |
def find_document(request: DocRequest):
|
148 |
start_time = time.time()
|
149 |
document = request.doc_id
|
|
|
|
|
|
|
150 |
|
151 |
url = get_tdoc_url(document) if valid_3gpp_doc_format.match(document) else \
|
152 |
get_spec_url(document) if valid_3gpp_spec_format.match(document) else \
|
|
|
156 |
raise HTTPException(status_code=404, detail=url)
|
157 |
|
158 |
version = None
|
159 |
+
if valid_3gpp_spec_format.match(document):
|
160 |
version = url.split("/")[-1].replace(".zip", "").split("-")[-1]
|
161 |
scope = None
|
162 |
+
spec_metadatas = spec_metadatas_3gpp if valid_3gpp_spec_format.match(document) else spec_metadatas_etsi
|
163 |
for spec in spec_metadatas:
|
164 |
if spec['id'] == document:
|
165 |
scope = spec['scope']
|
|
|
173 |
scope=scope
|
174 |
)
|
175 |
|
176 |
+
@app.post("/find/batch", response_model=BatchDocResponse, summary="Retrieve multiple documents by IDs", tags=["Document Retrieval"], responses={
|
177 |
+
200: {
|
178 |
+
"description": "Batch processing completed",
|
179 |
+
"content": {
|
180 |
+
"application/json": {
|
181 |
+
"example": {
|
182 |
+
"results": {
|
183 |
+
"23.401": "https://www.3gpp.org/ftp/Specs/archive/23_series/23.401/23401-h20.zip",
|
184 |
+
"S1-123456": "https://www.3gpp.org/ftp/tsg_sa/WG1_Serv/TSGSI_123/Docs/S1-123456.zip"
|
185 |
+
},
|
186 |
+
"missing": ["99.999", "INVALID-DOC"],
|
187 |
+
"search_time": 0.156
|
188 |
+
}
|
189 |
+
}
|
190 |
+
}
|
191 |
+
}
|
192 |
+
})
|
193 |
def find_document_batch(request: BatchDocRequest):
|
194 |
start_time = time.time()
|
195 |
documents = request.doc_ids
|
|
|
213 |
search_time=time.time()-start_time
|
214 |
)
|
215 |
|
216 |
+
@app.post('/search', response_model=KeywordResponse, tags=["Content Search"], summary="Search specifications by keywords", responses={
|
217 |
+
200: {
|
218 |
+
"description": "Search completed successfully"
|
219 |
+
},
|
220 |
+
400: {
|
221 |
+
"description": "You must enter keywords in deep search mode"
|
222 |
+
},
|
223 |
+
404: {
|
224 |
+
"description": "No specifications found matching the criteria"
|
225 |
+
}
|
226 |
+
})
|
227 |
def search_specifications(request: KeywordRequest):
|
228 |
start_time = time.time()
|
229 |
boolSensitiveCase = request.case_sensitive
|
|
|
290 |
else:
|
291 |
raise HTTPException(status_code=404, detail="Specifications not found")
|
292 |
|
293 |
+
@app.post("/search/bm25", response_model=KeywordResponse, tags=["Content Search"], summary="Advanced BM25 search with relevance scoring", responses={
|
294 |
+
200: {
|
295 |
+
"description": "BM25 search completed successfully"
|
296 |
+
},
|
297 |
+
404: {
|
298 |
+
"description": "No specifications found above the relevance threshold"
|
299 |
+
}
|
300 |
+
})
|
301 |
def bm25_search_specification(request: BM25KeywordRequest):
|
302 |
start_time = time.time()
|
303 |
source = request.source
|
classes.py
CHANGED
@@ -59,6 +59,7 @@ class ETSIDocFinder:
|
|
59 |
class ETSISpecFinder:
|
60 |
def __init__(self):
|
61 |
self.main_url = "https://www.etsi.org/deliver/etsi_ts"
|
|
|
62 |
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}
|
63 |
|
64 |
def get_spec_path(self, doc_id: str):
|
@@ -89,12 +90,20 @@ class ETSISpecFinder:
|
|
89 |
original = doc_id
|
90 |
|
91 |
url = f"{self.main_url}/{self.get_spec_path(original)}/"
|
|
|
92 |
print(url)
|
|
|
93 |
|
94 |
releases = self.get_docs_from_url(url)
|
95 |
files = self.get_docs_from_url(url + releases[-1])
|
96 |
for f in files:
|
97 |
if f.endswith(".pdf"):
|
98 |
return url + releases[-1] + "/" + f
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
return f"Specification {doc_id} not found"
|
|
|
59 |
class ETSISpecFinder:
|
60 |
def __init__(self):
|
61 |
self.main_url = "https://www.etsi.org/deliver/etsi_ts"
|
62 |
+
self.second_url = "https://www.etsi.org/deliver/etsi_tr"
|
63 |
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}
|
64 |
|
65 |
def get_spec_path(self, doc_id: str):
|
|
|
90 |
original = doc_id
|
91 |
|
92 |
url = f"{self.main_url}/{self.get_spec_path(original)}/"
|
93 |
+
url2 = f"{self.second_url}/{self.get_spec_path(original)}/"
|
94 |
print(url)
|
95 |
+
print(url2)
|
96 |
|
97 |
releases = self.get_docs_from_url(url)
|
98 |
files = self.get_docs_from_url(url + releases[-1])
|
99 |
for f in files:
|
100 |
if f.endswith(".pdf"):
|
101 |
return url + releases[-1] + "/" + f
|
102 |
+
|
103 |
+
releases = self.get_docs_from_url(url2)
|
104 |
+
files = self.get_docs_from_url(url + releases[-1])
|
105 |
+
for f in files:
|
106 |
+
if f.endswith('.pdf'):
|
107 |
+
return url + releases[-1] + "/" + f
|
108 |
|
109 |
return f"Specification {doc_id} not found"
|
documentation.md
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 📋 Document Finder Backend API
|
2 |
+
|
3 |
+
A comprehensive REST API for searching and retrieving technical documents and specifications from **3GPP** and **ETSI** organizations.
|
4 |
+
|
5 |
+
### 🚀 Key Features
|
6 |
+
|
7 |
+
* **Document Retrieval**: Get direct download URLs and metadata for specific documents
|
8 |
+
* **Batch Processing**: Handle multiple document requests simultaneously
|
9 |
+
* **Advanced Search**: Multiple search modes with keyword matching
|
10 |
+
* **BM25 Scoring**: State-of-the-art relevance ranking using BM25 algorithm
|
11 |
+
* **Cross-Organization**: Search across both 3GPP and ETSI document repositories
|
12 |
+
|
13 |
+
### 📚 Supported Document Types
|
14 |
+
|
15 |
+
#### 3GPP Documents
|
16 |
+
* **TDocs (Technical Documents)**:
|
17 |
+
- Format: `S1-123456`, `C4-234567`, `R2-345678`
|
18 |
+
- Working group documents from SA, CT, RAN groups
|
19 |
+
* **Technical Specifications**:
|
20 |
+
- Format: `23.401`, `38.331-16`
|
21 |
+
- Official published specifications
|
22 |
+
|
23 |
+
#### ETSI Documents
|
24 |
+
* **TDocs (Technical Documents)**:
|
25 |
+
- Format: `SET(25)000001`, `SCPTEQ(19)000011`
|
26 |
+
- Committee working documents
|
27 |
+
* **Technical Specifications**:
|
28 |
+
- Format: `131 102`, `188 008-2`
|
29 |
+
- Published ETSI standards
|
30 |
+
|
31 |
+
### 🔍 Search Capabilities
|
32 |
+
|
33 |
+
* **Quick Search**: Lightning-fast metadata-only search
|
34 |
+
* **Deep Search**: Comprehensive content-based search within document sections
|
35 |
+
* **BM25 Search**: Advanced relevance scoring with normalization
|
36 |
+
* **Flexible Filtering**: By source organization, document type, and specification category
|
37 |
+
|
38 |
+
### 🛡️ Data Sources
|
39 |
+
|
40 |
+
This API indexes and searches through:
|
41 |
+
- 3GPP specification metadata and content
|
42 |
+
- ETSI specification metadata and content
|
43 |
+
- 3GPP TDoc location mappings
|
44 |
+
- Pre-built BM25 search indices
|
45 |
+
|
46 |
+
### 🔧 Technical Stack
|
47 |
+
|
48 |
+
Built with FastAPI, featuring automatic OpenAPI documentation, request validation, and comprehensive error handling.
|
schemas.py
CHANGED
@@ -1,38 +1,165 @@
|
|
1 |
-
from pydantic import BaseModel
|
2 |
from typing import *
|
3 |
|
4 |
class DocRequest(BaseModel):
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
class DocResponse(BaseModel):
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
class BatchDocRequest(BaseModel):
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
class BatchDocResponse(BaseModel):
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
class KeywordRequest(BaseModel):
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
class KeywordResponse(BaseModel):
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, Field
|
2 |
from typing import *
|
3 |
|
4 |
class DocRequest(BaseModel):
|
5 |
+
"""
|
6 |
+
Request model for single document retrieval.
|
7 |
+
|
8 |
+
Used to specify which document or specification to retrieve by its unique identifier.
|
9 |
+
"""
|
10 |
+
doc_id: str = Field(
|
11 |
+
...,
|
12 |
+
title="Document Identifier",
|
13 |
+
description="Unique identifier for the document or specification.",
|
14 |
+
)
|
15 |
|
16 |
class DocResponse(BaseModel):
|
17 |
+
"""
|
18 |
+
Response model for single document retrieval.
|
19 |
+
|
20 |
+
Contains all available metadata and access information for the requested document.
|
21 |
+
"""
|
22 |
+
doc_id: str = Field(
|
23 |
+
...,
|
24 |
+
title="Document Identifier",
|
25 |
+
description="Echoed document identifier from the request"
|
26 |
+
)
|
27 |
+
url: str = Field(
|
28 |
+
...,
|
29 |
+
title="Document URL",
|
30 |
+
description="Direct download URL"
|
31 |
+
)
|
32 |
+
version: Optional[str] = Field(
|
33 |
+
None,
|
34 |
+
title="Document Version",
|
35 |
+
description="Extracted version information (e.g., 'h20', 'v17.9.0') when available"
|
36 |
+
)
|
37 |
+
scope: Optional[str] = Field(
|
38 |
+
None,
|
39 |
+
title="Document Scope",
|
40 |
+
description="Brief description of the document's scope and purpose from metadata"
|
41 |
+
)
|
42 |
+
search_time: float = Field(
|
43 |
+
...,
|
44 |
+
title="Search Duration",
|
45 |
+
description="Time spent processing the request in seconds"
|
46 |
+
)
|
47 |
|
48 |
class BatchDocRequest(BaseModel):
|
49 |
+
"""
|
50 |
+
Request model for batch document retrieval.
|
51 |
+
|
52 |
+
Allows retrieval of multiple documents in a single API call for efficiency.
|
53 |
+
"""
|
54 |
+
doc_ids: List[str] = Field(
|
55 |
+
...,
|
56 |
+
title="Document Identifier List",
|
57 |
+
description="List of document identifiers to retrieve."
|
58 |
+
)
|
59 |
|
60 |
class BatchDocResponse(BaseModel):
|
61 |
+
"""
|
62 |
+
Response model for batch document retrieval.
|
63 |
+
|
64 |
+
Provides organized results separating found documents from missing ones.
|
65 |
+
"""
|
66 |
+
results: Dict[str, str] = Field(
|
67 |
+
...,
|
68 |
+
title="Found Documents",
|
69 |
+
description="Dictionary mapping document IDs to their corresponding URLs"
|
70 |
+
)
|
71 |
+
missing: List[str] = Field(
|
72 |
+
...,
|
73 |
+
title="Missing Documents",
|
74 |
+
description="List of document IDs that could not be found or are not indexed"
|
75 |
+
)
|
76 |
+
search_time: float = Field(
|
77 |
+
...,
|
78 |
+
title="Total Search Duration",
|
79 |
+
description="Total time spent processing the batch request in seconds"
|
80 |
+
)
|
81 |
|
82 |
class KeywordRequest(BaseModel):
|
83 |
+
"""
|
84 |
+
Request model for keyword-based specification search.
|
85 |
+
|
86 |
+
Provides flexible search options with multiple modes and filtering capabilities.
|
87 |
+
"""
|
88 |
+
keywords: Optional[str] = Field(
|
89 |
+
"",
|
90 |
+
title="Search Keywords",
|
91 |
+
description="Comma-separated keywords for searching specifications.",
|
92 |
+
examples=["5G NR,authentication", "handover,mobility", "security,encryption"]
|
93 |
+
)
|
94 |
+
search_mode: Literal["quick", "deep"] = Field(
|
95 |
+
...,
|
96 |
+
title="Search Mode",
|
97 |
+
description="Search mode: 'quick' searches metadata only, 'deep' searches metadata and document content"
|
98 |
+
)
|
99 |
+
case_sensitive: Optional[bool] = Field(
|
100 |
+
False,
|
101 |
+
title="Case Sensitive Search",
|
102 |
+
description="Enable case-sensitive keyword matching"
|
103 |
+
)
|
104 |
+
source: Optional[Literal["3GPP", "ETSI", "all"]] = Field(
|
105 |
+
"all",
|
106 |
+
title="Document Source",
|
107 |
+
description="Limit search to specific organization or search all repositories"
|
108 |
+
)
|
109 |
+
spec_type: Optional[Literal["TS", "TR"]] = Field(
|
110 |
+
None,
|
111 |
+
title="Specification Type",
|
112 |
+
description="Filter by specification type: 'TS' (Technical Specification) or 'TR' (Technical Report)"
|
113 |
+
)
|
114 |
+
mode: Optional[Literal["and", "or"]] = Field(
|
115 |
+
"and",
|
116 |
+
title="Search Logic",
|
117 |
+
description="Logical operator: 'and' requires all keywords to match, 'or' matches any keyword"
|
118 |
+
)
|
119 |
+
|
120 |
+
class BM25KeywordRequest(BaseModel):
|
121 |
+
"""
|
122 |
+
Request model for BM25 advanced search.
|
123 |
+
|
124 |
+
Provides parameters for relevance-based search using BM25 scoring algorithm.
|
125 |
+
"""
|
126 |
+
keywords: Optional[str] = Field(
|
127 |
+
"",
|
128 |
+
title="Search Query",
|
129 |
+
description="Natural language search query for BM25 processing",
|
130 |
+
examples=["5G authentication procedures", "handover mobility management", "security key derivation"]
|
131 |
+
)
|
132 |
+
source: Optional[Literal["3GPP", "ETSI", "all"]] = Field(
|
133 |
+
"all",
|
134 |
+
title="Document Source",
|
135 |
+
description="Limit search to specific organization repositories"
|
136 |
+
)
|
137 |
+
threshold: Optional[int] = Field(
|
138 |
+
60,
|
139 |
+
title="Relevance Threshold",
|
140 |
+
description="Minimum normalized BM25 relevance score (0-100) for results inclusion",
|
141 |
+
ge=0,
|
142 |
+
le=100
|
143 |
+
)
|
144 |
+
spec_type: Optional[Literal["TS", "TR"]] = Field(
|
145 |
+
None,
|
146 |
+
title="Specification Type",
|
147 |
+
description="Filter results by specification type"
|
148 |
+
)
|
149 |
|
150 |
class KeywordResponse(BaseModel):
|
151 |
+
"""
|
152 |
+
Response model for keyword and BM25 search results.
|
153 |
+
|
154 |
+
Contains ranked search results with metadata and timing information.
|
155 |
+
"""
|
156 |
+
results: List[Dict[str, Any]] = Field(
|
157 |
+
...,
|
158 |
+
title="Search Results",
|
159 |
+
description="List of matching specifications with complete metadata. In deep search mode, includes 'contains' field with matching content sections."
|
160 |
+
)
|
161 |
+
search_time: float = Field(
|
162 |
+
...,
|
163 |
+
title="Search Duration",
|
164 |
+
description="Time spent processing the search request in seconds"
|
165 |
+
)
|