First version
Browse files- Dockerfile +17 -0
- app.py +234 -0
- index.html +223 -0
- requirements.txt +10 -0
Dockerfile
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11.3
|
2 |
+
|
3 |
+
RUN apt-get update && \
|
4 |
+
apt-get install -y libreoffice libreoffice-writer libreoffice-calc libreoffice-impress && \
|
5 |
+
apt-get clean && rm -rf /var/lib/apt/lists/*
|
6 |
+
|
7 |
+
RUN useradd -m -u 1000 user
|
8 |
+
USER user
|
9 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
10 |
+
|
11 |
+
WORKDIR /app
|
12 |
+
|
13 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
14 |
+
RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --no-cache-dir --upgrade -r requirements.txt
|
15 |
+
|
16 |
+
COPY --chown=user . /app
|
17 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from fastapi.responses import FileResponse
|
4 |
+
import litellm
|
5 |
+
import pandas as pd
|
6 |
+
from pydantic import BaseModel, Field
|
7 |
+
from typing import Any, List, Dict, Optional
|
8 |
+
import re
|
9 |
+
import subprocess
|
10 |
+
import requests
|
11 |
+
import os
|
12 |
+
from lxml import etree
|
13 |
+
import zipfile
|
14 |
+
import io
|
15 |
+
import warnings
|
16 |
+
warnings.filterwarnings("ignore")
|
17 |
+
from bs4 import BeautifulSoup
|
18 |
+
|
19 |
+
app = FastAPI(title="Requirements Extractor")
|
20 |
+
app.add_middleware(CORSMiddleware, allow_credentials=True, allow_headers=["*"], allow_methods=["*"], allow_origins=["*"])
|
21 |
+
|
22 |
+
class MeetingsRequest(BaseModel):
|
23 |
+
working_group: str
|
24 |
+
|
25 |
+
class MeetingsResponse(BaseModel):
|
26 |
+
meetings: Dict[str, str]
|
27 |
+
|
28 |
+
class DataRequest(BaseModel):
|
29 |
+
working_group: str
|
30 |
+
meeting: str
|
31 |
+
|
32 |
+
class DataResponse(BaseModel):
|
33 |
+
data: List[Dict[Any, Any]]
|
34 |
+
|
35 |
+
class DocRequirements(BaseModel):
|
36 |
+
doc_id: str
|
37 |
+
context: str
|
38 |
+
requirements: List[str]
|
39 |
+
|
40 |
+
class DocInfo(BaseModel):
|
41 |
+
document: str
|
42 |
+
url: str
|
43 |
+
|
44 |
+
class RequirementsRequest(BaseModel):
|
45 |
+
documents: List[DocInfo]
|
46 |
+
|
47 |
+
class RequirementsResponse(BaseModel):
|
48 |
+
requirements: List[DocRequirements]
|
49 |
+
|
50 |
+
NSMAP = {
|
51 |
+
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
|
52 |
+
'v': 'urn:schemas-microsoft-com:vml'
|
53 |
+
}
|
54 |
+
|
55 |
+
def get_docx_archive(url: str) -> zipfile.ZipFile:
|
56 |
+
"""Récupère le docx depuis l'URL et le retourne comme objet ZipFile"""
|
57 |
+
if not url.endswith("zip"):
|
58 |
+
raise ValueError("URL doit pointer vers un fichier ZIP")
|
59 |
+
|
60 |
+
resp = requests.get(url, verify=False, headers={
|
61 |
+
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
62 |
+
})
|
63 |
+
resp.raise_for_status()
|
64 |
+
|
65 |
+
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
|
66 |
+
for file_name in zf.namelist():
|
67 |
+
if file_name.endswith((".docx", ".doc")):
|
68 |
+
docx_bytes = zf.read(file_name)
|
69 |
+
return zipfile.ZipFile(io.BytesIO(docx_bytes))
|
70 |
+
|
71 |
+
raise ValueError("Aucun fichier docx/doc trouvé dans l'archive")
|
72 |
+
|
73 |
+
def parse_document_xml(docx_zip: zipfile.ZipFile) -> etree._ElementTree:
|
74 |
+
"""Parse le document.xml principal"""
|
75 |
+
xml_bytes = docx_zip.read('word/document.xml')
|
76 |
+
parser = etree.XMLParser(remove_blank_text=True)
|
77 |
+
return etree.fromstring(xml_bytes, parser=parser)
|
78 |
+
|
79 |
+
def clean_document_xml(root: etree._Element) -> None:
|
80 |
+
"""Nettoie le XML en modifiant l'arbre directement"""
|
81 |
+
# Suppression des balises <w:del> et leur contenu
|
82 |
+
for del_elem in root.xpath('//w:del', namespaces=NSMAP):
|
83 |
+
parent = del_elem.getparent()
|
84 |
+
if parent is not None:
|
85 |
+
parent.remove(del_elem)
|
86 |
+
|
87 |
+
# Désencapsulation des balises <w:ins>
|
88 |
+
for ins_elem in root.xpath('//w:ins', namespaces=NSMAP):
|
89 |
+
parent = ins_elem.getparent()
|
90 |
+
index = parent.index(ins_elem)
|
91 |
+
for child in ins_elem.iterchildren():
|
92 |
+
parent.insert(index, child)
|
93 |
+
index += 1
|
94 |
+
parent.remove(ins_elem)
|
95 |
+
|
96 |
+
# Nettoyage des commentaires
|
97 |
+
for tag in ['w:commentRangeStart', 'w:commentRangeEnd', 'w:commentReference']:
|
98 |
+
for elem in root.xpath(f'//{tag}', namespaces=NSMAP):
|
99 |
+
parent = elem.getparent()
|
100 |
+
if parent is not None:
|
101 |
+
parent.remove(elem)
|
102 |
+
|
103 |
+
def create_modified_docx(original_zip: zipfile.ZipFile, modified_root: etree._Element) -> bytes:
|
104 |
+
"""Crée un nouveau docx avec le XML modifié"""
|
105 |
+
output = io.BytesIO()
|
106 |
+
|
107 |
+
with zipfile.ZipFile(output, 'w', compression=zipfile.ZIP_DEFLATED) as new_zip:
|
108 |
+
# Copier tous les fichiers non modifiés
|
109 |
+
for file in original_zip.infolist():
|
110 |
+
if file.filename != 'word/document.xml':
|
111 |
+
new_zip.writestr(file, original_zip.read(file.filename))
|
112 |
+
|
113 |
+
# Ajouter le document.xml modifié
|
114 |
+
xml_str = etree.tostring(
|
115 |
+
modified_root,
|
116 |
+
xml_declaration=True,
|
117 |
+
encoding='UTF-8',
|
118 |
+
pretty_print=True
|
119 |
+
)
|
120 |
+
new_zip.writestr('word/document.xml', xml_str)
|
121 |
+
|
122 |
+
output.seek(0)
|
123 |
+
return output.getvalue()
|
124 |
+
|
125 |
+
def docx_to_txt(doc_id: str, url: str):
|
126 |
+
docx_zip = get_docx_archive(url)
|
127 |
+
root = parse_document_xml(docx_zip)
|
128 |
+
clean_document_xml(root)
|
129 |
+
modified_bytes = create_modified_docx(docx_zip, root)
|
130 |
+
|
131 |
+
input_path = f"/tmp/{doc_id}_cleaned.docx"
|
132 |
+
output_path = f"/tmp/{doc_id}_cleaned.txt"
|
133 |
+
with open(input_path, "wb") as f:
|
134 |
+
f.write(modified_bytes)
|
135 |
+
|
136 |
+
subprocess.run([
|
137 |
+
"libreoffice",
|
138 |
+
"--headless",
|
139 |
+
"--convert-to", "txt",
|
140 |
+
"--outdir", "/tmp",
|
141 |
+
input_path
|
142 |
+
], check=True)
|
143 |
+
|
144 |
+
with open(output_path, "r", encoding="utf-8") as f:
|
145 |
+
txt_data = [line.strip() for line in f if line.strip()]
|
146 |
+
|
147 |
+
os.remove(input_path)
|
148 |
+
os.remove(output_path)
|
149 |
+
return txt_data
|
150 |
+
|
151 |
+
@app.get("/")
|
152 |
+
def render_page():
|
153 |
+
return FileResponse("index.html")
|
154 |
+
|
155 |
+
@app.post("/get_meetings", response_model=MeetingsResponse)
|
156 |
+
def get_meetings(req: MeetingsRequest):
|
157 |
+
working_group = req.working_group
|
158 |
+
tsg = re.sub(r"\d+", "", working_group)
|
159 |
+
wg_number = re.search(r"\d", working_group).group(0)
|
160 |
+
url = "https://www.3gpp.org/ftp/tsg_" + tsg
|
161 |
+
resp = requests.get(url, verify=False)
|
162 |
+
soup = BeautifulSoup(resp.text, "html.parser")
|
163 |
+
meeting_folders = []
|
164 |
+
all_meetings = []
|
165 |
+
wg_folders = [item.get_text() for item in soup.select("tr td a")]
|
166 |
+
selected_folder = None
|
167 |
+
for folder in wg_folders:
|
168 |
+
if str(wg_number) in folder:
|
169 |
+
selected_folder = folder
|
170 |
+
break
|
171 |
+
|
172 |
+
url += "/" + selected_folder
|
173 |
+
|
174 |
+
if selected_folder:
|
175 |
+
resp = requests.get(url, verify=False)
|
176 |
+
soup = BeautifulSoup(resp.text, "html.parser")
|
177 |
+
meeting_folders = [item.get_text() for item in soup.select("tr td a") if item.get_text().startswith("TSG")]
|
178 |
+
all_meetings = [working_group + "#" + meeting.split("_", 1)[1].replace("_", " ").replace("-", " ") for meeting in meeting_folders]
|
179 |
+
|
180 |
+
return MeetingsResponse(meetings=dict(zip(all_meetings, meeting_folders)))
|
181 |
+
|
182 |
+
@app.post("/get_dataframe", response_model=DataResponse)
|
183 |
+
def get_change_request_dataframe(req: DataRequest):
|
184 |
+
working_group = req.working_group
|
185 |
+
tsg = re.sub(r"\d+", "", working_group)
|
186 |
+
wg_number = re.search(r"\d", working_group).group(0)
|
187 |
+
url = "https://www.3gpp.org/ftp/tsg_" + tsg
|
188 |
+
resp = requests.get(url, verify=False)
|
189 |
+
soup = BeautifulSoup(resp.text, "html.parser")
|
190 |
+
wg_folders = [item.get_text() for item in soup.select("tr td a")]
|
191 |
+
selected_folder = None
|
192 |
+
for folder in wg_folders:
|
193 |
+
if str(wg_number) in folder:
|
194 |
+
selected_folder = folder
|
195 |
+
break
|
196 |
+
|
197 |
+
url += "/" + selected_folder + "/" + req.meeting + "/docs"
|
198 |
+
resp = requests.get(url, verify=False)
|
199 |
+
soup = BeautifulSoup(resp.text, "html.parser")
|
200 |
+
files = [item.get_text() for item in soup.select("tr td a") if item.get_text().endswith(".xlsx")]
|
201 |
+
|
202 |
+
def gen_url(tdoc: str):
|
203 |
+
return f"{url}/{tdoc}.zip"
|
204 |
+
|
205 |
+
df = pd.read_excel(str(url + "/" + files[0]).replace("#", "%23"))
|
206 |
+
filtered_df = df[(((df["Type"] == "CR") & ((df["CR category"] == "B") | (df["CR category"] == "C"))) | (df["Type"] == "pCR")) & ~(df["Uploaded"].isna())][["TDoc", "Title", "CR category", "Source", "Type", "Agenda item", "Agenda item description", "TDoc Status"]]
|
207 |
+
filtered_df["URL"] = filtered_df["TDoc"].apply(gen_url)
|
208 |
+
|
209 |
+
df = filtered_df.fillna("")
|
210 |
+
return DataResponse(data=df[["TDoc", "Title", "Type", "TDoc Status", "Agenda item description", "URL"]].to_dict(orient="records"))
|
211 |
+
|
212 |
+
@app.post("/generate_requirements", response_model=RequirementsResponse)
|
213 |
+
def gen_reqs(req: RequirementsRequest):
|
214 |
+
documents = req.documents
|
215 |
+
output = []
|
216 |
+
for doc in documents:
|
217 |
+
doc_id = doc.document
|
218 |
+
url = doc.url
|
219 |
+
|
220 |
+
full = "\n".join(docx_to_txt(doc_id, url))
|
221 |
+
|
222 |
+
resp_ai = litellm.completion(
|
223 |
+
model="gemini/gemini-2.0-flash",
|
224 |
+
api_key="SECRET API HERE",
|
225 |
+
messages=[{"role":"user","content": f"Here's the document whose ID is {doc_id} with requirements : {full}\n\nI want you to extract all the requirements and give me a context (not giving the section or whatever, a sentence is needed) where that calls for those requirements. If multiples covered contexts is present, make as many requirements list by context as you want."}],
|
226 |
+
response_format=DocRequirements
|
227 |
+
)
|
228 |
+
|
229 |
+
reqs = DocRequirements.model_validate_json(resp_ai.choices[0].message.content)
|
230 |
+
output.append(reqs)
|
231 |
+
|
232 |
+
return RequirementsResponse(requirements=output)
|
233 |
+
|
234 |
+
|
index.html
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="fr" data-theme="light">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Requirements Extractor</title>
|
7 |
+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/full.css" rel="stylesheet">
|
8 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
9 |
+
</head>
|
10 |
+
<body class="p-8 bg-base-100">
|
11 |
+
<div class="container mx-auto">
|
12 |
+
<h1 class="text-4xl font-bold text-center mb-8">Requirements Extractor</h1>
|
13 |
+
<div>
|
14 |
+
<div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
|
15 |
+
<select class="select select-bordered" id="workingGroupSelect">
|
16 |
+
<option disabled selected value="">Working Group</option>
|
17 |
+
<option>SA1</option>
|
18 |
+
<option>SA2</option>
|
19 |
+
<option>SA3</option>
|
20 |
+
<option>SA4</option>
|
21 |
+
<option>SA5</option>
|
22 |
+
<option>SA6</option>
|
23 |
+
<option>CT1</option>
|
24 |
+
<option>CT2</option>
|
25 |
+
<option>CT3</option>
|
26 |
+
<option>CT4</option>
|
27 |
+
<option>CT5</option>
|
28 |
+
<option>CT6</option>
|
29 |
+
</select>
|
30 |
+
<select class="select select-bordered" id="meetingSelect" disabled>
|
31 |
+
<option disabled selected value="">Select a working group</option>
|
32 |
+
</select>
|
33 |
+
<button class="btn" id="getTDocs">Get TDocs</button>
|
34 |
+
</div>
|
35 |
+
</div>
|
36 |
+
<div class="hidden" id="filters">
|
37 |
+
<div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
|
38 |
+
<select class="select select-bordered" id="docType">
|
39 |
+
<option disabled selected value="">Type</option>
|
40 |
+
<option>Tous</option>
|
41 |
+
</select>
|
42 |
+
|
43 |
+
<select class="select select-bordered" id="docStatus">
|
44 |
+
<option disabled selected value="">Status</option>
|
45 |
+
<option>Tous</option>
|
46 |
+
</select>
|
47 |
+
|
48 |
+
<select class="select select-bordered" id="agendaItem">
|
49 |
+
<option disabled selected value = "">Agenda</option>
|
50 |
+
<option>Tous</option>
|
51 |
+
</select>
|
52 |
+
</div>
|
53 |
+
</div>
|
54 |
+
|
55 |
+
|
56 |
+
<!-- Tableau des données -->
|
57 |
+
<div class="max-h-[65vh] overflow-y-auto">
|
58 |
+
<table class="table table-zebra w-full" id="dataFrame">
|
59 |
+
<thead class="sticky top-0 bg-base-200 z-10">
|
60 |
+
<tr class="bg-base-200">
|
61 |
+
<th>TDoc</th>
|
62 |
+
<th>Title</th>
|
63 |
+
<th>Type</th>
|
64 |
+
<th>Status</th>
|
65 |
+
<th>Agenda Item N°</th>
|
66 |
+
<th>URL</th>
|
67 |
+
</tr>
|
68 |
+
</thead>
|
69 |
+
<tbody>
|
70 |
+
</tbody>
|
71 |
+
</table>
|
72 |
+
</div>
|
73 |
+
|
74 |
+
<center><button class="btn mt-6 gap-4" id="getReqs">Get Requirements</button></center>
|
75 |
+
</div>
|
76 |
+
|
77 |
+
<script>
|
78 |
+
function getDataFrame(){
|
79 |
+
const wg = document.getElementById('workingGroupSelect').value;
|
80 |
+
const meeting = document.getElementById('meetingSelect').value;
|
81 |
+
document.getElementById('docType').innerHTML = `
|
82 |
+
<option disabled selected value="">Type</option>
|
83 |
+
<option>Tous</option>
|
84 |
+
`
|
85 |
+
|
86 |
+
document.getElementById('docStatus').innerHTML = `
|
87 |
+
<option disabled selected value="">Type</option>
|
88 |
+
<option>Tous</option>
|
89 |
+
`
|
90 |
+
|
91 |
+
document.getElementById('agendaItem').innerHTML = `
|
92 |
+
<option disabled selected value="">Type</option>
|
93 |
+
<option>Tous</option>
|
94 |
+
`
|
95 |
+
const dataFrame = document.getElementById("dataFrame");
|
96 |
+
document.getElementById("getTDocs").setAttribute('disabled', 'true')
|
97 |
+
document.getElementById("getTDocs").innerHTML = "Loading ...";
|
98 |
+
fetch("/get_dataframe", {method: "POST", headers: {"Content-Type": "application/json"}, body: JSON.stringify({"working_group": wg, "meeting": meeting})})
|
99 |
+
.then(resp => resp.json())
|
100 |
+
.then(data => {
|
101 |
+
document.getElementById("filters").classList.remove("hidden")
|
102 |
+
const dataframeBody = dataFrame.querySelector("tbody");
|
103 |
+
dataframeBody.innerHTML = "";
|
104 |
+
const setType = new Set();
|
105 |
+
const setAgenda = new Set();
|
106 |
+
const setStatus = new Set();
|
107 |
+
data.data.forEach(row => {
|
108 |
+
const tr = document.createElement("tr");
|
109 |
+
tr.setAttribute("data-type", row['Type']);
|
110 |
+
tr.setAttribute("data-status", row["TDoc Status"]);
|
111 |
+
tr.setAttribute("data-agenda", row["Agenda item description"]);
|
112 |
+
tr.innerHTML = `
|
113 |
+
<td>${row["TDoc"]}</td>
|
114 |
+
<td>${row["Title"]}</td>
|
115 |
+
<td>${row["Type"]}</td>
|
116 |
+
<td>${row["TDoc Status"]}</td>
|
117 |
+
<td>${row["Agenda item description"]}</td>
|
118 |
+
<td>
|
119 |
+
<a href="${row["URL"]}" class="link">${row["URL"]}</a>
|
120 |
+
</td>
|
121 |
+
`;
|
122 |
+
dataframeBody.appendChild(tr);
|
123 |
+
setType.add(row["Type"]);
|
124 |
+
setAgenda.add(row["Agenda item description"]);
|
125 |
+
setStatus.add(row["TDoc Status"]);
|
126 |
+
})
|
127 |
+
|
128 |
+
setType.forEach(tdoctype => {
|
129 |
+
const option = document.createElement("option");
|
130 |
+
option.textContent = tdoctype;
|
131 |
+
option.value = tdoctype;
|
132 |
+
document.getElementById('docType').appendChild(option);
|
133 |
+
})
|
134 |
+
|
135 |
+
setAgenda.forEach(agenda => {
|
136 |
+
const option = document.createElement("option");
|
137 |
+
option.textContent = agenda;
|
138 |
+
option.value = agenda;
|
139 |
+
document.getElementById('agendaItem').appendChild(option);
|
140 |
+
})
|
141 |
+
|
142 |
+
setStatus.forEach(status => {
|
143 |
+
const option = document.createElement("option");
|
144 |
+
option.textContent = status;
|
145 |
+
option.value = status;
|
146 |
+
document.getElementById('docStatus').appendChild(option);
|
147 |
+
})
|
148 |
+
})
|
149 |
+
|
150 |
+
document.getElementById("getTDocs").removeAttribute("disabled")
|
151 |
+
document.getElementById("getTDocs").innerHTML = "Get TDocs";
|
152 |
+
}
|
153 |
+
|
154 |
+
function filterTable() {
|
155 |
+
const type = document.getElementById('docType').value
|
156 |
+
const status = document.getElementById('docStatus').value
|
157 |
+
const agenda = document.getElementById('agendaItem').value
|
158 |
+
|
159 |
+
document.querySelectorAll('#dataFrame tbody tr').forEach(row => {
|
160 |
+
const showRow =
|
161 |
+
(type === 'Tous' || row.dataset.type === type || type === "") &&
|
162 |
+
(status === 'Tous' || row.dataset.status === status || status === "") &&
|
163 |
+
(agenda === 'Tous' || row.dataset.agenda === agenda || agenda === "")
|
164 |
+
|
165 |
+
row.style.display = showRow ? '' : 'none'
|
166 |
+
})
|
167 |
+
}
|
168 |
+
|
169 |
+
function getMeetings(){
|
170 |
+
const workingGroup = document.getElementById("workingGroupSelect").value;
|
171 |
+
document.getElementById("meetingSelect").setAttribute('disabled', 'true')
|
172 |
+
document.getElementById("meetingSelect").innerHTML = "<option>Loading...</option>"
|
173 |
+
document.getElementById("getTDocs").setAttribute('disabled', 'true')
|
174 |
+
fetch("/get_meetings", {method: "POST", headers: {"Content-Type": "application/json"}, body: JSON.stringify({"working_group": workingGroup})})
|
175 |
+
.then(resp => resp.json())
|
176 |
+
.then(data => {
|
177 |
+
document.getElementById("meetingSelect").innerHTML = "";
|
178 |
+
document.getElementById("meetingSelect").removeAttribute("disabled");
|
179 |
+
document.getElementById("getTDocs").removeAttribute("disabled")
|
180 |
+
for(const [key, value] of Object.entries(data.meetings)){
|
181 |
+
const option = document.createElement("option");
|
182 |
+
option.textContent = key;
|
183 |
+
option.value = value;
|
184 |
+
document.getElementById('meetingSelect').appendChild(option);
|
185 |
+
}
|
186 |
+
})
|
187 |
+
}
|
188 |
+
|
189 |
+
function tableToGenBody(tableSelector) {
|
190 |
+
// columnsMap : { "NomHeaderDansTable": "nom_voulu", ... }
|
191 |
+
let columnsMap = {"TDoc": "doc_id", "URL": "url"};
|
192 |
+
const table = document.querySelector(tableSelector);
|
193 |
+
const headers = Array.from(table.querySelectorAll('thead th')).map(th => th.innerText.trim());
|
194 |
+
|
195 |
+
// Indices des colonnes à extraire
|
196 |
+
const selectedIndices = headers
|
197 |
+
.map((header, idx) => columnsMap[header] ? idx : -1)
|
198 |
+
.filter(idx => idx !== -1);
|
199 |
+
|
200 |
+
return Array.from(table.querySelectorAll('tbody tr'))
|
201 |
+
.filter(row => getComputedStyle(row).display !== 'none')
|
202 |
+
.map(row => {
|
203 |
+
const cells = Array.from(row.querySelectorAll('td'));
|
204 |
+
const obj = {};
|
205 |
+
selectedIndices.forEach(idx => {
|
206 |
+
const originalHeader = headers[idx];
|
207 |
+
const newKey = columnsMap[originalHeader];
|
208 |
+
obj[newKey] = cells[idx].innerText.trim();
|
209 |
+
});
|
210 |
+
return obj;
|
211 |
+
});
|
212 |
+
}
|
213 |
+
|
214 |
+
// Écouteurs d'événements pour les filtres
|
215 |
+
|
216 |
+
document.getElementById('docType').addEventListener('change', filterTable)
|
217 |
+
document.getElementById('docStatus').addEventListener('change', filterTable)
|
218 |
+
document.getElementById('agendaItem').addEventListener('change', filterTable)
|
219 |
+
document.getElementById("workingGroupSelect").addEventListener('change', getMeetings)
|
220 |
+
document.getElementById('getTDocs').addEventListener('click', getDataFrame)
|
221 |
+
</script>
|
222 |
+
</body>
|
223 |
+
</html>
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
litellm
|
2 |
+
fastapi
|
3 |
+
uvicorn[standard]
|
4 |
+
pandas
|
5 |
+
numpy
|
6 |
+
pydantic
|
7 |
+
requests
|
8 |
+
lxml
|
9 |
+
openpyxl
|
10 |
+
beautifulsoup4
|