Lucas ARRIESSE commited on
Commit
8ac47d4
·
1 Parent(s): adf98e8

Allow listing other document types than xxxxCRs

Browse files
Files changed (5) hide show
  1. api/docs.py +28 -79
  2. dependencies.py +5 -2
  3. schemas.py +12 -5
  4. static/index.html +8 -12
  5. static/js/app.js +17 -15
api/docs.py CHANGED
@@ -8,6 +8,7 @@ import traceback
8
  import zipfile
9
  import json
10
  import os
 
11
  from pydantic import BaseModel
12
  import requests
13
  import subprocess
@@ -23,7 +24,7 @@ from dependencies import DOC_FINDER_BASE_URL, get_http_client, get_llm_router
23
  from fastapi.responses import StreamingResponse
24
  from litellm.router import Router
25
 
26
- from schemas import DataRequest, DataResponse, DocRequirements, DownloadRequest, MeetingsRequest, MeetingsResponse, RequirementsRequest, RequirementsResponse
27
 
28
  # API router for requirement extraction from docs / doc list retrieval / download
29
  router = APIRouter(tags=["document extraction"])
@@ -173,22 +174,26 @@ def docx_to_txt(doc_id: str, url: str):
173
  # ============================================= Doc routes =========================================================
174
 
175
  @router.post("/get_meetings", response_model=MeetingsResponse)
176
- def get_meetings(req: MeetingsRequest):
 
177
  working_group = req.working_group
178
  tsg = re.sub(r"\d+", "", working_group)
179
  wg_number = re.search(r"\d", working_group).group(0)
180
 
 
181
  logging.debug(tsg, wg_number)
182
  url = "https://www.3gpp.org/ftp/tsg_" + tsg
183
  logging.debug(url)
184
 
185
- resp = requests.get(url, verify=False)
186
- soup = BeautifulSoup(resp.text, "html.parser")
187
 
188
  meeting_folders = []
189
  all_meetings = []
190
  wg_folders = [item.get_text() for item in soup.select("tr td a")]
191
  selected_folder = None
 
 
192
  for folder in wg_folders:
193
  if "wg" + str(wg_number) in folder.lower():
194
  selected_folder = folder
@@ -198,7 +203,7 @@ def get_meetings(req: MeetingsRequest):
198
  logging.debug(url)
199
 
200
  if selected_folder:
201
- resp = requests.get(url, verify=False)
202
  soup = BeautifulSoup(resp.text, "html.parser")
203
  meeting_folders = [item.get_text() for item in soup.select("tr td a") if item.get_text(
204
  ).startswith("TSG") or (item.get_text().startswith("CT") and "-" in item.get_text())]
@@ -211,14 +216,19 @@ def get_meetings(req: MeetingsRequest):
211
 
212
 
213
  @router.post("/get_dataframe", response_model=DataResponse)
214
- def get_change_request_dataframe(req: DataRequest):
 
 
 
 
 
215
  working_group = req.working_group
216
  tsg = re.sub(r"\d+", "", working_group)
217
  wg_number = re.search(r"\d", working_group).group(0)
218
  url = "https://www.3gpp.org/ftp/tsg_" + tsg
219
  logging.info("Fetching TDocs dataframe")
220
 
221
- resp = requests.get(url, verify=False)
222
  soup = BeautifulSoup(resp.text, "html.parser")
223
  wg_folders = [item.get_text() for item in soup.select("tr td a")]
224
  selected_folder = None
@@ -228,7 +238,7 @@ def get_change_request_dataframe(req: DataRequest):
228
  break
229
 
230
  url += "/" + selected_folder + "/" + req.meeting + "/docs"
231
- resp = requests.get(url, verify=False)
232
  soup = BeautifulSoup(resp.text, "html.parser")
233
  files = [item.get_text() for item in soup.select("tr td a")
234
  if item.get_text().endswith(".xlsx")]
@@ -240,7 +250,7 @@ def get_change_request_dataframe(req: DataRequest):
240
  return f"{url}/{tdoc}.zip"
241
 
242
  df = pd.read_excel(str(url + "/" + files[0]).replace("#", "%23"))
243
- filtered_df = df[(((df["Type"] == "CR") & ((df["CR category"] == "B") | (df["CR category"] == "C"))) | (df["Type"] == "pCR")) & ~(
244
  df["Uploaded"].isna())][["TDoc", "Title", "CR category", "Source", "Type", "Agenda item", "Agenda item description", "TDoc Status"]]
245
  filtered_df["URL"] = filtered_df["TDoc"].apply(gen_url)
246
 
@@ -251,11 +261,11 @@ def get_change_request_dataframe(req: DataRequest):
251
 
252
 
253
  @router.post("/download_tdocs")
254
- def download_tdocs(req: DownloadRequest):
255
  """Download the specified TDocs and zips them in a single archive"""
256
 
257
  # Document IDs to download
258
- document_ids = req.documents
259
 
260
  logging.info(f"Downloading TDocs: {document_ids}")
261
 
@@ -279,7 +289,7 @@ def download_tdocs(req: DownloadRequest):
279
 
280
  raise HTTPException(
281
  status_code=501, detail="Got no URL results for docs {documents}. 3GPP index may not be up to date")
282
-
283
  documents_content: Dict[str, bytes] = {}
284
  failed_documents: List[str] = []
285
 
@@ -330,66 +340,6 @@ def download_tdocs(req: DownloadRequest):
330
  headers={"Content-Disposition": "attachment; filename=tdocs.zip"}
331
  )
332
 
333
-
334
- @router.post("/generate_requirements", response_model=RequirementsResponse)
335
- async def gen_reqs(req: RequirementsRequest, background_tasks: BackgroundTasks, llm_router: Router = Depends(get_llm_router)):
336
- """Extract requirements from the specified TDocs using a LLM"""
337
-
338
- documents = req.documents
339
- n_docs = len(documents)
340
-
341
- logging.info("Generating requirements for documents: {}".format(
342
- [doc.document for doc in documents]))
343
-
344
- def prompt(doc_id, full):
345
- return f"Here's the document whose ID is {doc_id} : {full}\n\nExtract all requirements and group them by context, returning a list of objects where each object includes a document ID, a concise description of the context where the requirements apply (not a chapter title or copied text), and a list of associated requirements; always return the result as a list, even if only one context is found. Remove the errors"
346
-
347
- async def process_document(doc):
348
- doc_id = doc.document
349
- url = doc.url
350
- try:
351
- full = "\n".join(docx_to_txt(doc_id, url))
352
- except Exception as e:
353
- logging.error(f"Failed to process doc {doc_id}", e)
354
- return RequirementsResponse(requirements=[DocRequirements(document=doc_id, context="Error LLM", requirements=[])]).requirements
355
-
356
- try:
357
- resp_ai = await llm_router.acompletion(
358
- model="gemini-v2",
359
- messages=[
360
- {"role": "user", "content": prompt(doc_id, full)}],
361
- response_format=RequirementsResponse
362
- )
363
-
364
- return RequirementsResponse.model_validate_json(resp_ai.choices[0].message.content).requirements
365
-
366
- except Exception as e:
367
- logging.error(
368
- f"Failed to process document {doc_id}", e, stack_info=True)
369
- return RequirementsResponse(requirements=[DocRequirements(document=doc_id, context="Error LLM", requirements=[])]).requirements
370
-
371
- async def process_batch(batch):
372
- results = await asyncio.gather(*(process_document(doc) for doc in batch))
373
- return [item for sublist in results for item in sublist]
374
-
375
- all_requirements = []
376
-
377
- if n_docs <= 30:
378
- batch_results = await process_batch(documents)
379
- all_requirements.extend(batch_results)
380
- else:
381
- batch_size = 30
382
- batches = [documents[i:i + batch_size]
383
- for i in range(0, n_docs, batch_size)]
384
-
385
- for i, batch in enumerate(batches):
386
- batch_results = await process_batch(batch)
387
- all_requirements.extend(batch_results)
388
-
389
- if i < len(batches) - 1:
390
- background_tasks.add_task(asyncio.sleep, 60)
391
- return RequirementsResponse(requirements=all_requirements)
392
-
393
  # ======================================================================================================================================================================================
394
 
395
 
@@ -402,14 +352,13 @@ class ProgressUpdate(BaseModel):
402
 
403
 
404
  @router.post("/generate_requirements/sse")
405
- async def gen_reqs(req: RequirementsRequest, con: Request, llm_router: Router = Depends(get_llm_router)):
406
- """Extract requirements from the specified TDocs using a LLM and returns SSE events about the progress of ongoing operations"""
407
 
408
  documents = req.documents
409
  n_docs = len(documents)
410
 
411
- logging.info("Generating requirements for documents: {}".format(
412
- [doc.document for doc in documents]))
413
 
414
  # limit max concurrency of LLM requests to prevent a huge pile of errors because of small rate limits
415
  concurrency_sema = asyncio.Semaphore(4)
@@ -437,9 +386,9 @@ async def gen_reqs(req: RequirementsRequest, con: Request, llm_router: Router =
437
  model=model_used,
438
  messages=[
439
  {"role": "user", "content": prompt(doc_id, full)}],
440
- response_format=RequirementsResponse
441
  )
442
- return RequirementsResponse.model_validate_json(resp_ai.choices[0].message.content).requirements
443
  except Exception as e:
444
  return [DocRequirements(document=doc_id, context="Error LLM", requirements=[])]
445
  finally:
@@ -464,7 +413,7 @@ async def gen_reqs(req: RequirementsRequest, con: Request, llm_router: Router =
464
  n_processed += 1
465
  yield progress_update(ProgressUpdate(status="progress", data={}, total_docs=n_docs, processed_docs=n_processed))
466
 
467
- final_response = RequirementsResponse(requirements=items)
468
 
469
  yield progress_update(ProgressUpdate(status="complete", data=final_response.model_dump(), total_docs=n_docs, processed_docs=n_processed))
470
 
 
8
  import zipfile
9
  import json
10
  import os
11
+ from httpx import AsyncClient
12
  from pydantic import BaseModel
13
  import requests
14
  import subprocess
 
24
  from fastapi.responses import StreamingResponse
25
  from litellm.router import Router
26
 
27
+ from schemas import DataRequest, DataResponse, DocRequirements, DocDownloadRequest, MeetingsRequest, MeetingsResponse, ExtractRequirementsRequest, ExtractRequirementsResponse
28
 
29
  # API router for requirement extraction from docs / doc list retrieval / download
30
  router = APIRouter(tags=["document extraction"])
 
174
  # ============================================= Doc routes =========================================================
175
 
176
  @router.post("/get_meetings", response_model=MeetingsResponse)
177
+ async def get_meetings(req: MeetingsRequest, http_client: AsyncClient = Depends(get_http_client)):
178
+ # Extracting WG
179
  working_group = req.working_group
180
  tsg = re.sub(r"\d+", "", working_group)
181
  wg_number = re.search(r"\d", working_group).group(0)
182
 
183
+ # building corresponding FTP url
184
  logging.debug(tsg, wg_number)
185
  url = "https://www.3gpp.org/ftp/tsg_" + tsg
186
  logging.debug(url)
187
 
188
+ ftp_request = await http_client.get(url)
189
+ soup = BeautifulSoup(ftp_request.text, "html.parser")
190
 
191
  meeting_folders = []
192
  all_meetings = []
193
  wg_folders = [item.get_text() for item in soup.select("tr td a")]
194
  selected_folder = None
195
+
196
+ # sanity check to ensure the requested workgroup is present in the ftp directories
197
  for folder in wg_folders:
198
  if "wg" + str(wg_number) in folder.lower():
199
  selected_folder = folder
 
203
  logging.debug(url)
204
 
205
  if selected_folder:
206
+ resp = await http_client.get(url)
207
  soup = BeautifulSoup(resp.text, "html.parser")
208
  meeting_folders = [item.get_text() for item in soup.select("tr td a") if item.get_text(
209
  ).startswith("TSG") or (item.get_text().startswith("CT") and "-" in item.get_text())]
 
216
 
217
 
218
  @router.post("/get_dataframe", response_model=DataResponse)
219
+ async def get_docs_df(req: DataRequest, http_client: AsyncClient = Depends(get_http_client)):
220
+ """
221
+ Downloads the document list dataframe for a given meeting
222
+ """
223
+
224
+ # Extracting WG
225
  working_group = req.working_group
226
  tsg = re.sub(r"\d+", "", working_group)
227
  wg_number = re.search(r"\d", working_group).group(0)
228
  url = "https://www.3gpp.org/ftp/tsg_" + tsg
229
  logging.info("Fetching TDocs dataframe")
230
 
231
+ resp = await http_client.get(url)
232
  soup = BeautifulSoup(resp.text, "html.parser")
233
  wg_folders = [item.get_text() for item in soup.select("tr td a")]
234
  selected_folder = None
 
238
  break
239
 
240
  url += "/" + selected_folder + "/" + req.meeting + "/docs"
241
+ resp = await http_client.get(url)
242
  soup = BeautifulSoup(resp.text, "html.parser")
243
  files = [item.get_text() for item in soup.select("tr td a")
244
  if item.get_text().endswith(".xlsx")]
 
250
  return f"{url}/{tdoc}.zip"
251
 
252
  df = pd.read_excel(str(url + "/" + files[0]).replace("#", "%23"))
253
+ filtered_df = df[~(
254
  df["Uploaded"].isna())][["TDoc", "Title", "CR category", "Source", "Type", "Agenda item", "Agenda item description", "TDoc Status"]]
255
  filtered_df["URL"] = filtered_df["TDoc"].apply(gen_url)
256
 
 
261
 
262
 
263
  @router.post("/download_tdocs")
264
+ def download_tdocs(req: DocDownloadRequest):
265
  """Download the specified TDocs and zips them in a single archive"""
266
 
267
  # Document IDs to download
268
+ document_ids = [doc.document for doc in req.documents]
269
 
270
  logging.info(f"Downloading TDocs: {document_ids}")
271
 
 
289
 
290
  raise HTTPException(
291
  status_code=501, detail="Got no URL results for docs {documents}. 3GPP index may not be up to date")
292
+
293
  documents_content: Dict[str, bytes] = {}
294
  failed_documents: List[str] = []
295
 
 
340
  headers={"Content-Disposition": "attachment; filename=tdocs.zip"}
341
  )
342
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  # ======================================================================================================================================================================================
344
 
345
 
 
352
 
353
 
354
  @router.post("/generate_requirements/sse")
355
+ async def gen_reqs(req: ExtractRequirementsRequest, llm_router: Router = Depends(get_llm_router)):
356
+ """Extract requirements from the specified xxxxCR docs using a LLM and returns SSE events about the progress of ongoing operations"""
357
 
358
  documents = req.documents
359
  n_docs = len(documents)
360
 
361
+ logging.info("Generating requirements for documents: {}".format(req.documents))
 
362
 
363
  # limit max concurrency of LLM requests to prevent a huge pile of errors because of small rate limits
364
  concurrency_sema = asyncio.Semaphore(4)
 
386
  model=model_used,
387
  messages=[
388
  {"role": "user", "content": prompt(doc_id, full)}],
389
+ response_format=ExtractRequirementsResponse
390
  )
391
+ return ExtractRequirementsResponse.model_validate_json(resp_ai.choices[0].message.content).requirements
392
  except Exception as e:
393
  return [DocRequirements(document=doc_id, context="Error LLM", requirements=[])]
394
  finally:
 
413
  n_processed += 1
414
  yield progress_update(ProgressUpdate(status="progress", data={}, total_docs=n_docs, processed_docs=n_processed))
415
 
416
+ final_response = ExtractRequirementsResponse(requirements=items)
417
 
418
  yield progress_update(ProgressUpdate(status="complete", data=final_response.model_dump(), total_docs=n_docs, processed_docs=n_processed))
419
 
dependencies.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  from httpx import AsyncClient
3
  from litellm.router import Router
@@ -11,6 +12,7 @@ from jinja2 import Environment, StrictUndefined, FileSystemLoader
11
  INSIGHT_FINDER_BASE_URL = "https://organizedprogrammers-insight-finder.hf.space/"
12
  DOC_FINDER_BASE_URL = "https://organizedprogrammers-docfinder.hf.space/"
13
 
 
14
  def init_dependencies():
15
  """Initialize the application global dependencies"""
16
 
@@ -47,8 +49,9 @@ def init_dependencies():
47
  prompt_templates = Environment(loader=FileSystemLoader(
48
  "prompts"), enable_async=True, undefined=StrictUndefined)
49
 
50
- http_client = AsyncClient(verify=os.environ.get(
51
- "NO_SSL", "0") == "1", timeout=None)
 
52
 
53
 
54
  def get_llm_router() -> Router:
 
1
+ import logging
2
  import os
3
  from httpx import AsyncClient
4
  from litellm.router import Router
 
12
  INSIGHT_FINDER_BASE_URL = "https://organizedprogrammers-insight-finder.hf.space/"
13
  DOC_FINDER_BASE_URL = "https://organizedprogrammers-docfinder.hf.space/"
14
 
15
+
16
  def init_dependencies():
17
  """Initialize the application global dependencies"""
18
 
 
49
  prompt_templates = Environment(loader=FileSystemLoader(
50
  "prompts"), enable_async=True, undefined=StrictUndefined)
51
 
52
+ enable_ssl = not os.environ.get("NO_SSL", "0") == "1"
53
+ logging.debug(f"SSL cert check is {enable_ssl}")
54
+ http_client = AsyncClient(verify=enable_ssl, timeout=None)
55
 
56
 
57
  def get_llm_router() -> Router:
schemas.py CHANGED
@@ -23,11 +23,18 @@ class DataResponse(BaseModel):
23
 
24
 
25
  class DocInfo(BaseModel):
 
 
 
 
26
  document: str
 
27
  url: str
 
 
28
 
29
 
30
- class RequirementsRequest(BaseModel):
31
  documents: List[DocInfo]
32
 
33
 
@@ -37,7 +44,7 @@ class DocRequirements(BaseModel):
37
  requirements: List[str]
38
 
39
 
40
- class RequirementsResponse(BaseModel):
41
  requirements: List[DocRequirements]
42
 
43
  # --------------------------------------
@@ -66,9 +73,9 @@ class ReqSearchResponse(BaseModel):
66
  # --------------------------------------
67
 
68
 
69
- class DownloadRequest(BaseModel):
70
- documents: List[str] = Field(
71
- description="List of document IDs to download")
72
 
73
 
74
  class ReqGroupingCategory(BaseModel):
 
23
 
24
 
25
  class DocInfo(BaseModel):
26
+ """
27
+ Schema for describing a document to download.
28
+ """
29
+ # Document name
30
  document: str
31
+ # Document URL
32
  url: str
33
+ # Document type
34
+ type: str
35
 
36
 
37
+ class ExtractRequirementsRequest(BaseModel):
38
  documents: List[DocInfo]
39
 
40
 
 
44
  requirements: List[str]
45
 
46
 
47
+ class ExtractRequirementsResponse(BaseModel):
48
  requirements: List[DocRequirements]
49
 
50
  # --------------------------------------
 
73
  # --------------------------------------
74
 
75
 
76
+ class DocDownloadRequest(BaseModel):
77
+ documents: List[DocInfo] = Field(
78
+ description="List of documents to download")
79
 
80
 
81
  class ReqGroupingCategory(BaseModel):
static/index.html CHANGED
@@ -156,21 +156,17 @@
156
  <!-- Data Table Informations -->
157
  <div class="flex justify-between items-center mb-2 pt-5" id="data-table-info-container">
158
  <div class="flex gap-2 items-center">
159
- <div class="tooltip" data-tip="Extract requirements from selected documents">
160
  <button id="extract-requirements-btn"
161
- class="bg-orange-300 text-white text-sm rounded px-3 py-1 shadow hover:bg-orange-600">
162
- <svg class="w-6 h-6 text-gray-800 dark:text-white" aria-hidden="true"
163
- xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="none"
164
- viewBox="0 0 24 24">
165
- <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
166
- stroke-width="2"
167
- d="M9 8h6m-6 4h6m-6 4h6M6 3v18l2-2 2 2 2-2 2 2 2-2 2 2V3l-2 2-2-2-2 2-2-2-2 2-2-2Z" />
168
- </svg>Extract Requirements
169
  </button>
170
  </div>
171
- <button id="download-tdocs-btn" class="text-sm rounded px-3 py-1 shadow cursor-pointer">
172
- 📦 Download Selected TDocs
173
- </button>
174
  </div>
175
 
176
  <!-- document counts -->
 
156
  <!-- Data Table Informations -->
157
  <div class="flex justify-between items-center mb-2 pt-5" id="data-table-info-container">
158
  <div class="flex gap-2 items-center">
159
+ <div class="tooltip" data-tip="Extract requirements from selected pCR / CR documents">
160
  <button id="extract-requirements-btn"
161
+ class="bg-orange-300 text-white text-sm rounded px-3 py-1 shadow hover:bg-orange-600">💉
162
+ Extract Requirements
163
+ </button>
164
+ </div>
165
+ <div class="tooltip" data-tip="Download all selected TDocs as text files">
166
+ <button id="download-tdocs-btn" class="text-sm rounded px-3 py-1 shadow cursor-pointer">
167
+ 📦 Download Selected TDocs
 
168
  </button>
169
  </div>
 
 
 
170
  </div>
171
 
172
  <!-- document counts -->
static/js/app.js CHANGED
@@ -161,14 +161,6 @@ function setupFilters(data) {
161
  document.getElementById('status-filter-label').textContent = 'Status (Tous)';
162
  document.getElementById('agenda-filter-label').textContent = 'Agenda Item (Tous)';
163
  }
164
- /**
165
- * Configure les événements des filtres
166
- */
167
- function setupFilterEvents() {
168
- ['doc-type-filter', 'doc-status-filter', 'agenda-item-filter'].forEach(filterId => {
169
- document.getElementById(filterId).addEventListener('change', applyFilters);
170
- });
171
- }
172
 
173
  function updateSelectedAndDisplayedCount() {
174
  // Lignes visibles (après filtrage)
@@ -233,22 +225,25 @@ function setupTableEvents() {
233
  }
234
 
235
  /**
236
- * Télécharge les TDocs sélectionnés
 
237
  */
238
  async function downloadTDocs() {
239
  showLoadingOverlay('Downloading TDocs...');
240
  toggleElementsEnabled(['download-tdocs-btn', 'extract-requirements-btn'], false);
241
 
242
  try {
243
- // Extraire les données du tableau avec TDoc et URL
244
- const selectedData = extractTableData({ 'TDoc': 'document', 'URL': 'url' });
 
 
245
  if (selectedData.length === 0) {
246
  alert('Please select at least one document');
247
  return;
248
  }
249
 
250
- // Transformer au format requis: [{tdoc_id: url}, ...]
251
- const documents = selectedData.map(obj => obj.document)
252
 
253
  const response = await fetch('/docs/download_tdocs', {
254
  method: 'POST',
@@ -324,17 +319,24 @@ function downloadBlob(blob, filename) {
324
  * Extrait les requirements des documents sélectionnés
325
  */
326
  async function extractRequirements() {
327
- const selectedData = extractTableData({ 'TDoc': 'document', 'URL': 'url' });
 
 
 
 
328
  if (selectedData.length === 0) {
329
  alert('Please select at least one document');
330
  return;
331
  }
332
 
 
 
 
333
  showLoadingOverlay('Extracting requirements...');
334
  toggleElementsEnabled(['extract-requirements-btn'], false);
335
 
336
  try {
337
- const response = await postWithSSE('/docs/generate_requirements/sse', { documents: selectedData }, {
338
  onMessage: (msg) => {
339
  console.log("SSE message:");
340
  console.log(msg);
 
161
  document.getElementById('status-filter-label').textContent = 'Status (Tous)';
162
  document.getElementById('agenda-filter-label').textContent = 'Agenda Item (Tous)';
163
  }
 
 
 
 
 
 
 
 
164
 
165
  function updateSelectedAndDisplayedCount() {
166
  // Lignes visibles (après filtrage)
 
225
  }
226
 
227
  /**
228
+ * Télécharge les pCR / CR / draftCR dans TDocs sélectionnés.
229
+ * Le JS filtre les tdocs sélectionnés pour recup uniquement les xxxxxCR
230
  */
231
  async function downloadTDocs() {
232
  showLoadingOverlay('Downloading TDocs...');
233
  toggleElementsEnabled(['download-tdocs-btn', 'extract-requirements-btn'], false);
234
 
235
  try {
236
+ // Extraire les données du tableau avec le format suivant pour la requete backend
237
+ // { document: "nom_doc", url: "url_doc", type: "type_de_doc"}
238
+ const selectedData = extractTableData({ 'TDoc': 'document', 'URL': 'url', 'Type': "type" });
239
+
240
  if (selectedData.length === 0) {
241
  alert('Please select at least one document');
242
  return;
243
  }
244
 
245
+ // on prend tout
246
+ const documents = selectedData;
247
 
248
  const response = await fetch('/docs/download_tdocs', {
249
  method: 'POST',
 
319
  * Extrait les requirements des documents sélectionnés
320
  */
321
  async function extractRequirements() {
322
+ const selectedData = extractTableData({ 'TDoc': 'document', 'URL': 'url', 'Type': 'type' });
323
+
324
+ console.log("Selected docs data");
325
+ console.log(selectedData);
326
+
327
  if (selectedData.length === 0) {
328
  alert('Please select at least one document');
329
  return;
330
  }
331
 
332
+ // ne prendre que les documents avec un type qui contient xxxxCR en minuscules
333
+ const documents = selectedData.filter(d => d.type.toLowerCase().includes("cr"));
334
+
335
  showLoadingOverlay('Extracting requirements...');
336
  toggleElementsEnabled(['extract-requirements-btn'], false);
337
 
338
  try {
339
+ const response = await postWithSSE('/docs/generate_requirements/sse', { documents: documents }, {
340
  onMessage: (msg) => {
341
  console.log("SSE message:");
342
  console.log(msg);