DavMelchi commited on
Commit
f991b3c
·
1 Parent(s): 39c7db8

adding fn4b parser

Browse files
Files changed (4) hide show
  1. Changelog.md +4 -0
  2. app.py +2 -1
  3. apps/fnb_parser.py +324 -0
  4. requirements.txt +0 -0
Changelog.md CHANGED
@@ -1,6 +1,10 @@
1
 
2
  # CHANGELOGS
3
 
 
 
 
 
4
  ## [0.2.10] - 2025-07-01
5
 
6
  - Add KPI analysis App
 
1
 
2
  # CHANGELOGS
3
 
4
+ ## [0.2.11] - 2025-07-04
5
+
6
+ - Add FNB parser App
7
+
8
  ## [0.2.10] - 2025-07-01
9
 
10
  - Add KPI analysis App
app.py CHANGED
@@ -108,7 +108,7 @@ if check_password():
108
  layout="wide",
109
  initial_sidebar_state="expanded",
110
  menu_items={
111
- "About": "**📡 NPO DB Query v0.2.10**",
112
  },
113
  )
114
 
@@ -133,6 +133,7 @@ if check_password():
133
  "apps/clustering.py",
134
  title="📡 Automatic Site Clustering",
135
  ),
 
136
  st.Page(
137
  "apps/import_physical_db.py", title="🌏Physical Database Verification"
138
  ),
 
108
  layout="wide",
109
  initial_sidebar_state="expanded",
110
  menu_items={
111
+ "About": "**📡 NPO DB Query v0.2.11**",
112
  },
113
  )
114
 
 
133
  "apps/clustering.py",
134
  title="📡 Automatic Site Clustering",
135
  ),
136
+ st.Page("apps/fnb_parser.py", title="📄 F4NB Extractor"),
137
  st.Page(
138
  "apps/import_physical_db.py", title="🌏Physical Database Verification"
139
  ),
apps/fnb_parser.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Streamlit application for extracting site and sector information from .docx design files.
3
+ The logic is adapted from `Sector Stacked.py` but provides an interactive UI where users can
4
+ upload one or many Word documents and instantly visualise / download the results.
5
+ """
6
+
7
+ import io
8
+ import os
9
+ import re
10
+ from typing import List
11
+
12
+ import pandas as pd
13
+ import plotly.express as px
14
+ import streamlit as st
15
+ from docx import Document
16
+
17
+ ###############################################################################
18
+ # --------------------------- Core extract logic -------------------------- #
19
+ ###############################################################################
20
+
21
+
22
+ def extract_info_from_docx_separated_sectors(
23
+ docx_bytes: bytes, filename: str
24
+ ) -> List[dict]:
25
+ """Extract the site-level and sector-level information from a Word design file.
26
+
27
+ Parameters
28
+ ----------
29
+ docx_bytes : bytes
30
+ Raw bytes of the `.docx` file – read directly from the Streamlit uploader.
31
+ filename : str
32
+ Original filename. Used only for reference in the output.
33
+
34
+ Returns
35
+ -------
36
+ list[dict]
37
+ A list containing up to three dictionaries – one for each sector.
38
+ """
39
+ # python-docx can open a file-like object, so we wrap the bytes in BytesIO
40
+ doc = Document(io.BytesIO(docx_bytes))
41
+
42
+ # Shared site information
43
+ site_shared = {
44
+ "File": filename,
45
+ "Code": None,
46
+ "Site Name": None,
47
+ "Localité": None,
48
+ "Adresse": None,
49
+ "X": None,
50
+ "Y": None,
51
+ "Z": None,
52
+ "UTM_Zone": None,
53
+ }
54
+
55
+ # Per-sector placeholders (we assume max 3 sectors)
56
+ sector_data = {
57
+ "Azimuth": [None] * 3,
58
+ "Height": [None] * 3,
59
+ "MechTilt": [None] * 3,
60
+ "ElecTilt": [None] * 3,
61
+ }
62
+
63
+ # Iterate tables / rows / cells once, filling the data structures
64
+ for table in doc.tables:
65
+ for row in table.rows:
66
+ # Drop empty cells and overspaces
67
+ cells = [cell.text.strip() for cell in row.cells if cell.text.strip()]
68
+ if not cells:
69
+ continue
70
+
71
+ row_text_lower = " | ".join(cells).lower()
72
+
73
+ # Code (assumes pattern "T00" / "N01" typical of site codes)
74
+ if site_shared["Code"] is None and any("code" in c.lower() for c in cells):
75
+ for val in cells:
76
+ if ("t00" in val.lower()) or ("n01" in val.lower()):
77
+ site_shared["Code"] = val.replace(" ", "").strip()
78
+ break
79
+
80
+ # Site Name – same heuristic as original script
81
+ if site_shared["Site Name"] is None and any(
82
+ "nom" in c.lower() for c in cells
83
+ ):
84
+ for val in cells:
85
+ if ("t00" in val.lower()) or ("n01" in val.lower()):
86
+ site_shared["Site Name"] = val.strip()
87
+ break
88
+
89
+ # UTM Zone
90
+ if site_shared["UTM_Zone"] is None:
91
+ utm_match = re.search(r"utm\s*(\d+)", row_text_lower)
92
+ if utm_match:
93
+ site_shared["UTM_Zone"] = f"UTM{utm_match.group(1)}"
94
+
95
+ # Localité and Adresse
96
+ if site_shared["Localité"] is None and any(
97
+ "localité" in c.lower() for c in cells
98
+ ):
99
+ for val in cells:
100
+ if val.lower() != "localité:":
101
+ site_shared["Localité"] = val.strip()
102
+ break
103
+ if site_shared["Adresse"] is None and any(
104
+ "adresse" in c.lower() for c in cells
105
+ ):
106
+ for val in cells:
107
+ if val.lower() != "adresse:":
108
+ site_shared["Adresse"] = val.strip()
109
+ break
110
+
111
+ # Coordinates (X, Y, Z)
112
+ if {"X", "Y", "Z"}.intersection(cells):
113
+ for i, cell_text in enumerate(cells):
114
+ text = cell_text.strip()
115
+ # X coordinate
116
+ if text == "X" and i + 1 < len(cells):
117
+ site_shared["X"] = cells[i + 1].strip()
118
+ # Y coordinate – could be in same cell e.g. "Y 123" or split
119
+ elif re.search(r"Y\s*[0-9]", text):
120
+ match = re.search(r"Y\s*([0-9°'\.\sWE]+)", text)
121
+ if match:
122
+ site_shared["Y"] = match.group(1).strip()
123
+ elif text == "Y" and i + 1 < len(cells):
124
+ site_shared["Y"] = cells[i + 1].strip()
125
+ # Z / Elevation
126
+ elif re.search(r"Z\s*[0-9]", text):
127
+ match = re.search(r"Z\s*([0-9]+)", text)
128
+ if match:
129
+ site_shared["Z"] = match.group(1).strip()
130
+ elif text == "Z" and i + 1 < len(cells):
131
+ z_val = re.search(r"([0-9]+)", cells[i + 1])
132
+ if z_val:
133
+ site_shared["Z"] = z_val.group(1).strip()
134
+
135
+ # Sector-specific lines
136
+ first_cell = cells[0].lower()
137
+ if first_cell == "azimut":
138
+ for i in range(min(3, len(cells) - 1)):
139
+ sector_data["Azimuth"][i] = cells[i + 1]
140
+ elif "hauteur des aériens" in first_cell:
141
+ for i in range(min(3, len(cells) - 1)):
142
+ sector_data["Height"][i] = cells[i + 1]
143
+ elif "tilt mécanique" in first_cell:
144
+ for i in range(min(3, len(cells) - 1)):
145
+ sector_data["MechTilt"][i] = cells[i + 1]
146
+ elif "tilt électrique" in first_cell:
147
+ for i in range(min(3, len(cells) - 1)):
148
+ sector_data["ElecTilt"][i] = cells[i + 1]
149
+
150
+ # Convert to per-sector rows
151
+ rows: List[dict] = []
152
+ for sector_id in range(3):
153
+ if sector_data["Azimuth"][sector_id]:
154
+ rows.append(
155
+ {
156
+ **site_shared,
157
+ "Sector ID": sector_id + 1,
158
+ "Azimuth": sector_data["Azimuth"][sector_id],
159
+ "Height": sector_data["Height"][sector_id],
160
+ "MechTilt": sector_data["MechTilt"][sector_id],
161
+ "ElecTilt": sector_data["ElecTilt"][sector_id],
162
+ }
163
+ )
164
+ return rows
165
+
166
+
167
+ def convert_coord_to_decimal(coord: str, default_direction: str | None = None):
168
+ """Convert coordinate strings containing degrees/minutes/seconds to decimal degrees.
169
+
170
+ Handles various formats, e.g. "3° 33' 12.4\" W", "3 33 12.4 O", "-3.5534", "3.5534E".
171
+ West (W/O) or South (S) are returned as negative values.
172
+ Returns None if conversion fails.
173
+ """
174
+
175
+ if coord is None or (isinstance(coord, float) and pd.isna(coord)):
176
+ return None
177
+
178
+ # Normalise the string – unify decimal separator and strip spaces
179
+ text = str(coord).replace(",", ".").strip()
180
+ if not text:
181
+ return None
182
+
183
+ # Detect hemisphere / direction letters
184
+ direction = None
185
+ match_dir = re.search(r"([NSEWnsewOo])", text)
186
+ if match_dir:
187
+ direction = match_dir.group(1).upper()
188
+ text = text.replace(match_dir.group(1), "") # remove letter for numeric parsing
189
+ else:
190
+ # No explicit letter – use supplied default if provided
191
+ if default_direction is not None:
192
+ direction = default_direction.upper()
193
+
194
+ # Grab all numeric components
195
+ nums = re.findall(r"[-+]?(?:\d+\.?\d*)", text)
196
+ if not nums:
197
+ return None
198
+
199
+ # Convert strings to float
200
+ nums_f = [float(n) for n in nums]
201
+
202
+ # Determine decimal value depending on how many components we have
203
+ if len(nums_f) >= 3:
204
+ deg, minute, sec = nums_f[0], nums_f[1], nums_f[2]
205
+ dec = deg + minute / 60 + sec / 3600
206
+ elif len(nums_f) == 2:
207
+ deg, minute = nums_f[0], nums_f[1]
208
+ dec = deg + minute / 60
209
+ else: # Already decimal degrees
210
+ dec = nums_f[0]
211
+
212
+ # Apply sign for West/Ouest/South
213
+ if direction in {"W", "O", "S"}: # West/Ouest or South => negative
214
+ dec = -abs(dec)
215
+
216
+ return dec
217
+
218
+
219
+ def process_files_to_dataframe(uploaded_files) -> pd.DataFrame:
220
+ """Run extraction on the uploaded files and return a concatenated dataframe."""
221
+ all_rows: List[dict] = []
222
+ for uploaded in uploaded_files:
223
+ rows = extract_info_from_docx_separated_sectors(uploaded.read(), uploaded.name)
224
+ all_rows.extend(rows)
225
+ df = pd.DataFrame(all_rows)
226
+
227
+ # Add decimal conversion for X and Y
228
+ if not df.empty and {"X", "Y"}.issubset(df.columns):
229
+ df["X_decimal"] = df["X"].apply(
230
+ lambda c: convert_coord_to_decimal(c, default_direction="N")
231
+ )
232
+ df["Y_decimal"] = df["Y"].apply(
233
+ lambda c: convert_coord_to_decimal(c, default_direction="W")
234
+ )
235
+
236
+ return df
237
+
238
+
239
+ ###############################################################################
240
+ # ----------------------------- Streamlit UI ------------------------------ #
241
+ ###############################################################################
242
+
243
+
244
+ def main() -> None:
245
+ st.set_page_config(
246
+ page_title="F4NB Extractor to Excel", page_icon="📄", layout="wide"
247
+ )
248
+
249
+ st.title("📄 F4NB Extractor to Excel")
250
+ st.markdown(
251
+ "Convert F4NB Word documents into a tidy Excel / DataFrame containing site & sector information.\n"
252
+ "Upload one or many F4NB `.docx` files and hit **Process**."
253
+ )
254
+
255
+ st.subheader("Upload Files")
256
+ uploaded_files = st.file_uploader(
257
+ "Select one or more F4NB `.docx` files",
258
+ type=["docx"],
259
+ accept_multiple_files=True,
260
+ )
261
+ process_btn = st.button("Process", type="primary", disabled=not uploaded_files)
262
+
263
+ if process_btn and uploaded_files:
264
+ with st.spinner("Extracting information…"):
265
+ df = process_files_to_dataframe(uploaded_files)
266
+
267
+ if df.empty:
268
+ st.warning(
269
+ "No data extracted. Check that the files conform to the expected format."
270
+ )
271
+ return
272
+
273
+ st.success(
274
+ f"Processed {len(uploaded_files)} file(s) – extracted {len(df)} sector rows."
275
+ )
276
+ st.dataframe(df, use_container_width=True)
277
+
278
+ # Interactive map of extracted coordinates using Plotly
279
+ if {"Y_decimal", "X_decimal"}.issubset(df.columns):
280
+ geo_df = (
281
+ df[["Y_decimal", "X_decimal", "Site Name", "Code"]]
282
+ .dropna()
283
+ .rename(columns={"Y_decimal": "Longitude", "X_decimal": "Latitude"})
284
+ .assign(
285
+ Size=lambda d: (
286
+ pd.to_numeric(d["Height"], errors="coerce").fillna(10)
287
+ if "Height" in d.columns
288
+ else 10
289
+ )
290
+ )
291
+ )
292
+ if not geo_df.empty:
293
+ st.subheader("🗺️ Site Locations")
294
+ fig = px.scatter_map(
295
+ geo_df,
296
+ lat="Latitude",
297
+ lon="Longitude",
298
+ hover_name="Site Name",
299
+ hover_data={"Code": True},
300
+ size="Size",
301
+ size_max=10,
302
+ zoom=6,
303
+ height=500,
304
+ )
305
+ fig.update_layout(
306
+ mapbox_style="open-street-map",
307
+ margin={"r": 0, "t": 0, "l": 0, "b": 0},
308
+ )
309
+ st.plotly_chart(fig, use_container_width=True)
310
+
311
+ # Offer download as Excel
312
+ buffer = io.BytesIO()
313
+ with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
314
+ df.to_excel(writer, index=False, sheet_name="Extract")
315
+ st.download_button(
316
+ label="💾 Download Excel",
317
+ data=buffer.getvalue(),
318
+ file_name="extracted_fnb.xlsx",
319
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
320
+ )
321
+
322
+
323
+ if __name__ == "__main__": # pragma: no cover
324
+ main()
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ