om4r932 commited on
Commit
a6af380
·
1 Parent(s): 431315f

First version (Back only)

Browse files
Files changed (6) hide show
  1. Dockerfile +17 -0
  2. README.md +2 -2
  3. app.py +302 -0
  4. classes.py +100 -0
  5. requirements.txt +14 -0
  6. schemas.py +38 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11.3
2
+
3
+ RUN apt-get update && \
4
+ apt-get install -y libreoffice libreoffice-writer libreoffice-calc libreoffice-impress && \
5
+ apt-get clean && rm -rf /var/lib/apt/lists/*
6
+
7
+ RUN useradd -m -u 1000 user
8
+ USER user
9
+ ENV PATH="/home/user/.local/bin:$PATH"
10
+
11
+ WORKDIR /app
12
+
13
+ COPY --chown=user ./requirements.txt requirements.txt
14
+ RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --no-cache-dir --upgrade -r requirements.txt
15
+
16
+ COPY --chown=user . /app
17
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
  title: DocFinder
3
  emoji: 📉
4
- colorFrom: pink
5
  colorTo: pink
6
  sdk: docker
7
- pinned: false
8
  license: mit
9
  short_description: 3GPP & ETSI Document Finder (frontend to be released...)
10
  ---
 
1
  ---
2
  title: DocFinder
3
  emoji: 📉
4
+ colorFrom: red
5
  colorTo: pink
6
  sdk: docker
7
+ pinned: true
8
  license: mit
9
  short_description: 3GPP & ETSI Document Finder (frontend to be released...)
10
  ---
app.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from datetime import datetime
3
+ import os, warnings, nltk, json, subprocess
4
+ import numpy as np
5
+ from nltk.stem import WordNetLemmatizer
6
+ from dotenv import load_dotenv
7
+ from sklearn.preprocessing import MinMaxScaler
8
+
9
+ os.environ['CURL_CA_BUNDLE'] = ""
10
+ warnings.filterwarnings('ignore')
11
+ nltk.download('wordnet')
12
+ load_dotenv()
13
+
14
+ from datasets import load_dataset
15
+ import bm25s
16
+ from bm25s.hf import BM25HF
17
+
18
+ from fastapi import FastAPI, HTTPException
19
+ from fastapi.middleware.cors import CORSMiddleware
20
+ from fastapi.responses import FileResponse
21
+ from fastapi.staticfiles import StaticFiles
22
+
23
+ from schemas import *
24
+ from classes import *
25
+
26
+ from bs4 import BeautifulSoup
27
+ import requests
28
+
29
+ lemmatizer = WordNetLemmatizer()
30
+
31
+ spec_metadatas_3gpp = load_dataset("OrganizedProgrammers/3GPPSpecMetadata", token=os.environ["HF_TOKEN"])
32
+ spec_contents_3gpp = load_dataset("OrganizedProgrammers/3GPPSpecContent", token=os.environ["HF_TOKEN"])
33
+ tdoc_locations_3gpp = load_dataset("OrganizedProgrammers/3GPPTDocLocation", token=os.environ["HF_TOKEN"])
34
+
35
+ spec_metadatas_etsi = load_dataset("OrganizedProgrammers/ETSISpecMetadata", token=os.environ["HF_TOKEN"])
36
+ spec_contents_etsi = load_dataset("OrganizedProgrammers/ETSISpecContent", token=os.environ["HF_TOKEN"])
37
+
38
+ spec_contents_3gpp = spec_contents_3gpp["train"].to_list()
39
+ spec_metadatas_3gpp = spec_metadatas_3gpp["train"].to_list()
40
+ spec_contents_etsi = spec_contents_etsi["train"].to_list()
41
+ spec_metadatas_etsi = spec_metadatas_etsi["train"].to_list()
42
+ tdoc_locations = tdoc_locations_3gpp["train"].to_list()
43
+
44
+ bm25_index_3gpp = BM25HF.load_from_hub("OrganizedProgrammers/3GPPBM25IndexSingle", load_corpus=True, token=os.environ["HF_TOKEN"])
45
+ bm25_index_etsi = BM25HF.load_from_hub("OrganizedProgrammers/ETSIBM25IndexSingle", load_corpus=True, token=os.environ["HF_TOKEN"])
46
+
47
+ def get_docs_from_url(url):
48
+ """Get list of documents/directories from a URL"""
49
+ try:
50
+ response = requests.get(url, verify=False, timeout=10)
51
+ soup = BeautifulSoup(response.text, "html.parser")
52
+ return [item.get_text() for item in soup.select("tr td a")]
53
+ except Exception as e:
54
+ print(f"Error accessing {url}: {e}")
55
+ return []
56
+
57
+ def get_tdoc_url(doc_id):
58
+ for tdoc in tdoc_locations:
59
+ if tdoc["doc_id"] == doc_id:
60
+ return tdoc["url"]
61
+ return "Document not indexed (Re-index TDocs)"
62
+
63
+ def get_spec_url(document):
64
+ series = document.split(".")[0].zfill(2)
65
+ url = f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{document}"
66
+ versions = get_docs_from_url(url)
67
+ return url + "/" + versions[-1] if versions != [] else f"Specification {document} not found"
68
+
69
+ def get_document(spec_id: str, spec_title: str, source: str):
70
+ text = [f"{spec_id} - {spec_title}"]
71
+ spec_contents = spec_contents_3gpp if source == "3GPP" else spec_contents_etsi if source == "ETSI" else spec_contents_3gpp + spec_contents_etsi
72
+ for section in spec_contents:
73
+ if not isinstance(section, str) and spec_id == section["doc_id"]:
74
+ text.extend([section['section'], section['content']])
75
+ return text
76
+
77
+ app = FastAPI(title="Document Finder Back-End", docs_url="/", description="Backend for DocFinder - Searching technical documents & specifications from 3GPP & ETSI")
78
+ app.add_middleware(
79
+ CORSMiddleware,
80
+ allow_origins=["*"],
81
+ allow_credentials=True,
82
+ allow_methods=["*"],
83
+ allow_headers=["*"],
84
+ )
85
+
86
+ etsi_doc_finder = ETSIDocFinder()
87
+ etsi_spec_finder = ETSISpecFinder()
88
+
89
+ valid_3gpp_doc_format = re.compile(r'^(S[1-6P]|C[1-6P]|R[1-6P])-\d+', flags=re.IGNORECASE)
90
+ valid_3gpp_spec_format = re.compile(r'^\d{2}\.\d{3}(?:-\d+)?')
91
+
92
+ valid_etsi_doc_format = re.compile(r'^(?:SET|SCP|SETTEC|SETREQ|SCPTEC|SCPREQ)\(\d+\)\d+(?:r\d+)?', flags=re.IGNORECASE)
93
+ valid_etsi_spec_format = re.compile(r'^\d{3} \d{3}(?:-\d+)?')
94
+
95
+ @app.post("/find", response_model=DocResponse)
96
+ def find_document(request: DocRequest):
97
+ start_time = time.time()
98
+ document = request.doc_id
99
+ source = request.source
100
+ spec_metadatas = spec_metadatas_3gpp if source == "3GPP" else spec_metadatas_etsi if source == "ETSI" else spec_metadatas_3gpp + spec_metadatas_etsi
101
+ is_3gpp = valid_3gpp_doc_format.match(document) or valid_3gpp_spec_format.match(document)
102
+
103
+ url = get_tdoc_url(document) if valid_3gpp_doc_format.match(document) else \
104
+ get_spec_url(document) if valid_3gpp_spec_format.match(document) else \
105
+ etsi_doc_finder.search_document(document) if valid_etsi_doc_format.match(document) else \
106
+ etsi_spec_finder.search_document(document) if valid_etsi_spec_format.match(document) else "Document ID not supported"
107
+ if "Specification" in url or "Document" in url:
108
+ raise HTTPException(status_code=404, detail=url)
109
+
110
+ version = None
111
+ if is_3gpp:
112
+ version = url.split("/")[-1].replace(".zip", "").split("-")[-1]
113
+ scope = None
114
+ for spec in spec_metadatas:
115
+ if spec['id'] == document:
116
+ scope = spec['scope']
117
+ break
118
+
119
+ return DocResponse(
120
+ doc_id=document,
121
+ version=version,
122
+ url=url,
123
+ search_time=time.time() - start_time,
124
+ scope=scope
125
+ )
126
+
127
+ @app.post("/batch", response_model=BatchDocResponse)
128
+ def find_document_batch(request: BatchDocRequest):
129
+ start_time = time.time()
130
+ documents = request.doc_ids
131
+ results = {}
132
+ missing = []
133
+
134
+ for document in documents:
135
+ url = get_tdoc_url(document) if valid_3gpp_doc_format.match(document) else \
136
+ get_spec_url(document) if valid_3gpp_spec_format.match(document) else \
137
+ etsi_doc_finder.search_document(document) if valid_etsi_doc_format.match(document) else \
138
+ etsi_spec_finder.search_document(document) if valid_etsi_spec_format.match(document) else "Document ID not supported"
139
+
140
+ if "Specification" in url or "Document" in url:
141
+ missing.append(document)
142
+ else:
143
+ results[document] = url
144
+
145
+ return BatchDocResponse(
146
+ results=results,
147
+ missing=missing,
148
+ search_time=time.time()-start_time
149
+ )
150
+
151
+ @app.post('/search-spec', response_model=KeywordResponse)
152
+ def search_specifications(request: KeywordRequest):
153
+ start_time = time.time()
154
+ boolSensitiveCase = request.case_sensitive
155
+ search_mode = request.search_mode
156
+ source = request.source
157
+ spec_metadatas = spec_metadatas_3gpp if source == "3GPP" else spec_metadatas_etsi if source == "ETSI" else spec_metadatas_3gpp + spec_metadatas_etsi
158
+ spec_type = request.spec_type
159
+ keywords = [string.lower() if boolSensitiveCase else string for string in request.keywords.split(",")]
160
+ print(keywords)
161
+ unique_specs = set()
162
+ results = []
163
+
164
+ if keywords == [""] and search_mode == "deep":
165
+ raise HTTPException(status_code=400, detail="You must enter keywords in deep search mode !")
166
+
167
+ for spec in spec_metadatas:
168
+ valid = False
169
+ if spec['id'] in unique_specs: continue
170
+ if spec.get('type', None) is None or (spec_type is not None and spec["type"] != spec_type): continue
171
+ if search_mode == "deep":
172
+ contents = []
173
+ doc = get_document(spec["id"], spec["title"], source)
174
+ docValid = len(doc) > 1
175
+
176
+ if request.mode == "and":
177
+ string = f"{spec['id']}+-+{spec['title']}+-+{spec['type']}+-+{spec['version']}"
178
+ if all(keyword in (string.lower() if boolSensitiveCase else string) for keyword in keywords):
179
+ valid = True
180
+ if search_mode == "deep":
181
+ if docValid:
182
+ for x in range(1, len(doc) - 1, 2):
183
+ section_title = doc[x]
184
+ section_content = doc[x+1]
185
+ if "reference" not in section_title.lower() and "void" not in section_title.lower() and "annex" not in section_content.lower():
186
+ if all(keyword in (section_content.lower() if boolSensitiveCase else section_content) for keyword in keywords):
187
+ valid = True
188
+ contents.append({section_title: section_content})
189
+ elif request.mode == "or":
190
+ string = f"{spec['id']}+-+{spec['title']}+-+{spec['type']}+-+{spec['version']}"
191
+ if any(keyword in (string.lower() if boolSensitiveCase else string) for keyword in keywords):
192
+ valid = True
193
+ if search_mode == "deep":
194
+ if docValid:
195
+ for x in range(1, len(doc) - 1, 2):
196
+ section_title = doc[x]
197
+ section_content = doc[x+1]
198
+ if "reference" not in section_title.lower() and "void" not in section_title.lower() and "annex" not in section_content.lower():
199
+ if any(keyword in (section_content.lower() if boolSensitiveCase else section_content) for keyword in keywords):
200
+ valid = True
201
+ contents.append({section_title: section_content})
202
+ if valid:
203
+ spec_content = spec
204
+ if search_mode == "deep":
205
+ spec_content["contains"] = {k: v for d in contents for k, v in d.items()}
206
+ results.append(spec_content)
207
+ else:
208
+ unique_specs.add(spec['id'])
209
+
210
+ if len(results) > 0:
211
+ return KeywordResponse(
212
+ results=results,
213
+ search_time=time.time() - start_time
214
+ )
215
+ else:
216
+ raise HTTPException(status_code=404, detail="Specifications not found")
217
+
218
+ @app.post("/search-spec/experimental", response_model=KeywordResponse)
219
+ def bm25_search_specification(request: BM25KeywordRequest):
220
+ start_time = time.time()
221
+ source = request.source
222
+ spec_type = request.spec_type
223
+ threshold = request.threshold
224
+ query = request.keywords
225
+
226
+ results_out = []
227
+ query_tokens = bm25s.tokenize(query)
228
+ if source == "3GPP":
229
+ results, scores = bm25_index_3gpp.retrieve(query_tokens, k=len(bm25_index_3gpp.corpus))
230
+ elif source == "ETSI":
231
+ results, scores = bm25_index_etsi.retrieve(query_tokens, k=len(bm25_index_etsi.corpus))
232
+ else:
233
+ print(len(bm25_index_3gpp.corpus), len(bm25_index_etsi.corpus))
234
+ results1, scores1 = bm25_index_3gpp.retrieve(query_tokens, k=len(bm25_index_3gpp.corpus))
235
+ results2, scores2 = bm25_index_etsi.retrieve(query_tokens, k=len(bm25_index_etsi.corpus))
236
+ results = np.concatenate([results1, results2], axis=1)
237
+ scores = np.concatenate([scores1, scores2], axis=1)
238
+
239
+ def calculate_boosted_score(metadata, score, query):
240
+ title = set(metadata['title'].lower().split())
241
+ q = set(query.lower().split())
242
+ spec_id_presence = 0.5 if metadata['id'].lower() in q else 0
243
+ booster = len(q & title) * 0.5
244
+ return score + spec_id_presence + booster
245
+
246
+ spec_scores = {}
247
+ spec_indices = {}
248
+ spec_details = {}
249
+
250
+ for i in range(results.shape[1]):
251
+ doc = results[0, i]
252
+ score = scores[0, i]
253
+ spec = doc["metadata"]["id"]
254
+
255
+ boosted_score = calculate_boosted_score(doc['metadata'], score, query)
256
+
257
+ if spec not in spec_scores or boosted_score > spec_scores[spec]:
258
+ spec_scores[spec] = boosted_score
259
+ spec_indices[spec] = i
260
+ spec_details[spec] = {
261
+ 'original_score': score,
262
+ 'boosted_score': boosted_score,
263
+ 'doc': doc
264
+ }
265
+
266
+ def normalize_scores(scores_dict):
267
+ if not scores_dict:
268
+ return {}
269
+
270
+ scores_array = np.array(list(scores_dict.values())).reshape(-1, 1)
271
+ scaler = MinMaxScaler()
272
+ normalized_scores = scaler.fit_transform(scores_array).flatten()
273
+
274
+ normalized_dict = {}
275
+ for i, spec in enumerate(scores_dict.keys()):
276
+ normalized_dict[spec] = normalized_scores[i]
277
+
278
+ return normalized_dict
279
+
280
+ normalized_scores = normalize_scores(spec_scores)
281
+
282
+ for spec in spec_details:
283
+ spec_details[spec]["normalized_score"] = normalized_scores[spec]
284
+
285
+ unique_specs = sorted(normalized_scores.keys(), key=lambda x: normalized_scores[x], reverse=True)
286
+
287
+ for rank, spec in enumerate(unique_specs, 1):
288
+ details = spec_details[spec]
289
+ metadata = details['doc']['metadata']
290
+ if metadata.get('type', None) is None or (spec_type is not None and metadata["type"] != spec_type):
291
+ continue
292
+ if details['normalized_score'] < threshold / 100:
293
+ break
294
+ results_out.append(metadata)
295
+
296
+ if len(results_out) > 0:
297
+ return KeywordResponse(
298
+ results=results_out,
299
+ search_time=time.time() - start_time
300
+ )
301
+ else:
302
+ raise HTTPException(status_code=404, detail="Specifications not found")
classes.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import re
3
+ from bs4 import BeautifulSoup
4
+ import os
5
+ import json
6
+
7
+ class ETSIDocFinder:
8
+ def __init__(self):
9
+ self.main_ftp_url = "https://docbox.etsi.org/SET"
10
+ self.session = requests.Session()
11
+ req = self.session.post("https://portal.etsi.org/ETSIPages/LoginEOL.ashx", verify=False, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}, data=json.dumps({"username": os.environ.get("EOL_USER"), "password": os.environ.get("EOL_PASSWORD")}))
12
+ print(req.content, req.status_code)
13
+
14
+ def get_workgroup(self, doc: str):
15
+ main_tsg = "SET-WG-R" if any(doc.startswith(kw) for kw in ["SETREQ", "SCPREQ"]) else "SET-WG-T" if any(doc.startswith(kw) for kw in ["SETTEC", "SCPTEC"]) else "SET" if any(doc.startswith(kw) for kw in ["SET", "SCP"]) else None
16
+ if main_tsg is None:
17
+ return None, None, None
18
+ regex = re.search(r'\(([^)]+)\)', doc)
19
+ workgroup = "20" + regex.group(1)
20
+ return main_tsg, workgroup, doc
21
+
22
+ def find_workgroup_url(self, main_tsg, workgroup):
23
+ response = self.session.get(f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS", verify=False)
24
+ soup = BeautifulSoup(response.text, 'html.parser')
25
+ for item in soup.find_all("tr"):
26
+ link = item.find("a")
27
+ if link and workgroup in link.get_text():
28
+ return f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS/{link.get_text()}"
29
+
30
+ return f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS/{workgroup}"
31
+
32
+ def get_docs_from_url(self, url):
33
+ try:
34
+ response = self.session.get(url, verify=False, timeout=15)
35
+ soup = BeautifulSoup(response.text, "html.parser")
36
+ return [item.get_text() for item in soup.select("tr td a")]
37
+ except Exception as e:
38
+ print(f"Error accessing {url}: {e}")
39
+ return []
40
+
41
+ def search_document(self, doc_id: str):
42
+ original = doc_id
43
+
44
+ main_tsg, workgroup, doc = self.get_workgroup(doc_id)
45
+ urls = []
46
+ if main_tsg:
47
+ wg_url = self.find_workgroup_url(main_tsg, workgroup)
48
+ print(wg_url)
49
+ if wg_url:
50
+ files = self.get_docs_from_url(wg_url)
51
+ print(files)
52
+ for f in files:
53
+ if doc in f.lower() or original in f:
54
+ print(f)
55
+ doc_url = f"{wg_url}/{f}"
56
+ urls.append(doc_url)
57
+ return urls[0] if len(urls) == 1 else urls[-2] if len(urls) > 1 else f"Document {doc_id} not found"
58
+
59
+ class ETSISpecFinder:
60
+ def __init__(self):
61
+ self.main_url = "https://www.etsi.org/deliver/etsi_ts"
62
+ self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}
63
+
64
+ def get_spec_path(self, doc_id: str):
65
+ if "-" in doc_id:
66
+ position, part = doc_id.split("-")
67
+ else:
68
+ position, part = doc_id, None
69
+
70
+ position = position.replace(" ", "")
71
+ if part:
72
+ if len(part) == 1:
73
+ part = "0" + part
74
+ spec_folder = position + part if part is not None else position
75
+ return f"{int(position) - (int(position)%100)}_{int(position) - (int(position)%100) + 99}/{spec_folder}"
76
+
77
+ def get_docs_from_url(self, url):
78
+ try:
79
+ response = requests.get(url, verify=False, timeout=15)
80
+ soup = BeautifulSoup(response.text, "html.parser")
81
+ docs = [item.get_text() for item in soup.find_all("a")][1:]
82
+ return docs
83
+ except Exception as e:
84
+ print(f"Error accessing {url}: {e}")
85
+ return []
86
+
87
+ def search_document(self, doc_id: str):
88
+ # Example : 103 666[-2 opt]
89
+ original = doc_id
90
+
91
+ url = f"{self.main_url}/{self.get_spec_path(original)}/"
92
+ print(url)
93
+
94
+ releases = self.get_docs_from_url(url)
95
+ files = self.get_docs_from_url(url + releases[-1])
96
+ for f in files:
97
+ if f.endswith(".pdf"):
98
+ return url + releases[-1] + "/" + f
99
+
100
+ return f"Specification {doc_id} not found"
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ requests
4
+ beautifulsoup4
5
+ pydantic
6
+ numpy
7
+ pandas
8
+ lxml
9
+ python-dotenv
10
+ scikit-learn
11
+ nltk
12
+ bm25s[full]
13
+ jax[cpu]
14
+ datasets
schemas.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import *
3
+
4
+ class DocRequest(BaseModel):
5
+ doc_id: str
6
+
7
+ class DocResponse(BaseModel):
8
+ doc_id: str
9
+ url: str
10
+ version: Optional[str] = None
11
+ scope: Optional[str] = None
12
+ search_time: float
13
+
14
+ class BatchDocRequest(BaseModel):
15
+ doc_ids: List[str]
16
+
17
+ class BatchDocResponse(BaseModel):
18
+ results: Dict[str, str]
19
+ missing: List[str]
20
+ search_time: float
21
+
22
+ class BM25KeywordRequest(BaseModel):
23
+ keywords: Optional[str] = ""
24
+ source: Optional[Literal["3GPP", "ETSI", "all"]] = "all"
25
+ threshold: Optional[int] = 60
26
+ spec_type: Optional[Literal["TS", "TR"]] = None
27
+
28
+ class KeywordRequest(BaseModel):
29
+ keywords: Optional[str] = ""
30
+ search_mode: Literal["quick", "deep"]
31
+ case_sensitive: Optional[bool] = False
32
+ source: Optional[Literal["3GPP", "ETSI", "all"]] = "all"
33
+ spec_type: Optional[Literal["TS", "TR"]] = None
34
+ mode: Optional[Literal["and", "or"]] = "and"
35
+
36
+ class KeywordResponse(BaseModel):
37
+ results: List[Dict[str, Any]]
38
+ search_time: float