Spaces:

DavMelchi
/

db_query

Running

+import time
+import streamlit as st
+from queries.process_gsm import process_gsm_data
+from queries.process_lte import process_lte_data
+from queries.process_wcdma import process_wcdma_data
+from utils.utils_vars import UtilsVars
+st.title("Database processing")
+uploaded_file = st.file_uploader("Upload updated dump file", type="xlsb")
+def process_database(process_func, database_type):
+    if uploaded_file is not None:
+        start_time = time.time()
+        process_func(uploaded_file)
+        execution_time = time.time() - start_time
+        st.write(
+            f"{database_type} database is generated. Execution time: {execution_time:.2f} seconds"
+        )
+        download_button(database_type)
+def download_button(database_type):
+    if database_type == "2G":
+        data = UtilsVars.final_gsm_database
+        file_name = f"2G database_{time.time()}.xlsx"
+    elif database_type == "3G":
+        data = UtilsVars.final_wcdma_database
+        file_name = f"3G database_{time.time()}.xlsx"
+    elif database_type == "LTE":
+        data = UtilsVars.final_lte_database
+        file_name = f"LTE database_{time.time()}.xlsx"
+    st.download_button(
+        label=f"Download {database_type} Database File",
+        data=data,
+        file_name=file_name,
+        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    )
+col1, col2, col3 = st.columns(3)
+if uploaded_file is not None:
+    with col1:
+        st.button(
+            "Generate 2G Database",
+            on_click=lambda: process_database(process_gsm_data, "2G"),
+        )
+    with col2:
+        st.button(
+            "Generate 3G Database",
+            on_click=lambda: process_database(process_wcdma_data, "3G"),
+        )
+    with col3:
+        st.button(
+            "Generate LTE Database",
+            on_click=lambda: process_database(process_lte_data, "LTE"),
+        )

physical_db/physical_database.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

queries/process_gsm.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import pandas as pd
+from queries.process_trx import process_trx_data
+from utils.convert_to_excel import convert_dfs, save_dataframe
+from utils.utils_vars import UtilsVars
+BTS_COLUMNS = [
+    "ID_BCF",
+    "ID_BTS",
+    "BSC",
+    "BCF",
+    "BTS",
+    "code",
+    "plmnPermitted",
+    "frequencyBandInUse",
+    "name",
+    "adminState",
+    "allowIMSIAttachDetach",
+    "amrSegLoadDepTchRateLower",
+    "amrSegLoadDepTchRateUpper",
+    "antennaHopping",
+    "bcchTrxPower",
+    "bsIdentityCodeBCC",
+    "bsIdentityCodeNCC",
+    "BSIC",
+    "cellId",
+    "dedicatedGPRScapacity",
+    "defaultGPRScapacity",
+    "fddQMin",
+    "fddQOffset",
+    "fddRscpMin",
+    "gprsEnabled",
+    "locationAreaIdLAC",
+    "locationAreaIdMCC",
+    "locationAreaIdMNC",
+    "rac",
+    "rachDropRxLevelThreshold",
+    "sectorId",
+    "SectorId2",
+    "segmentId",
+    "fastReturnToLTE",
+    "gsmPriority",
+    "segmentName",
+    "Code_Sector",
+]
+BCF_COLUMNS = [
+    "ID_BCF",
+    "site_name",
+]
+def process_gsm_data(file_path: str):
+    """
+    Process data from the specified file path.
+    Args:
+        file_path (str): The path to the file.
+    """
+    # Read the specific sheet into a DataFrame
+    dfs = pd.read_excel(
+        file_path,
+        sheet_name=["BTS", "BCF", "TRX"],
+        engine="calamine",
+        skiprows=[0],
+    )
+    # Process BTS data
+    df_bts = dfs["BTS"]
+    df_bts.columns = df_bts.columns.str.replace(r"[ ]", "", regex=True)
+    df_bts["code"] = df_bts["name"].str.split("_").str[0].astype(int)
+    df_bts["ID_BTS"] = df_bts[["BSC", "BCF", "BTS"]].astype(str).apply("_".join, axis=1)
+    df_bts["BSIC"] = (
+        df_bts[["bsIdentityCodeNCC", "bsIdentityCodeBCC"]]
+        .astype(str)
+        .apply("".join, axis=1)
+    )
+    df_bts["SectorId2"] = (
+        df_bts["sectorId"].map(UtilsVars.sector_mapping).fillna(df_bts["sectorId"])
+    )
+    df_bts["ID_BCF"] = df_bts[["BSC", "BCF"]].astype(str).apply("_".join, axis=1)
+    df_bts["Code_Sector"] = (
+        df_bts[["code", "SectorId2"]].astype(str).apply("_".join, axis=1)
+    )
+    df_bts["Code_Sector"] = df_bts["Code_Sector"].str.replace(".0", "")
+    df_bts = df_bts[BTS_COLUMNS]
+    # Process BCF data
+    df_bcf = dfs["BCF"]
+    df_bcf.columns = df_bcf.columns.str.replace(r"[ ]", "", regex=True)
+    df_bcf["ID_BCF"] = df_bcf[["BSC", "BCF"]].astype(str).apply("_".join, axis=1)
+    df_bcf.rename(columns={"name": "site_name"}, inplace=True)
+    df_bcf = df_bcf[BCF_COLUMNS]
+    df_trx = process_trx_data(file_path)
+    # Merge dataframes
+    df_bts_bcf = pd.merge(df_bts, df_bcf, on="ID_BCF", how="left")
+    df_2g = pd.merge(df_bts_bcf, df_trx, on="ID_BTS", how="left")
+    df_physical_db = UtilsVars.physisal_db
+    df_2g = pd.merge(df_2g, df_physical_db, on="Code_Sector", how="left")
+    # Save dataframes
+    # save_dataframe(df_bts, "bts")
+    # save_dataframe(df_bcf, "bcf")
+    save_dataframe(df_trx, "trx")
+    # df_2g2 = save_dataframe(df_2g, "2g")
+    UtilsVars.final_gsm_database = convert_dfs([df_2g], ["GSM"])

queries/process_lte.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import numpy as np
+import pandas as pd
+from utils.convert_to_excel import convert_dfs, save_dataframe
+from utils.utils_vars import UtilsVars, get_band
+LNCEL_COLUMNS = [
+    "ID_LNBTS",
+    "ID_LNCEL",
+    "MRBTS",
+    "LNBTS",
+    "LNCEL",
+    "final_name",
+    "name",
+    "cellName",
+    "code",
+    "SectorId",
+    "Code_Sector",
+    "actModulationSchemeDl",
+    "actModulationSchemeUL",
+    "administrativeState",
+    "eutraCelId",
+    "lcrId",
+    "pMax",
+    "phyCellId",
+    "tac",
+    "Region",
+    "band",
+    "band_type",
+]
+LNCEL_FDD_COLUMNS = [
+    "ID_LNCEL",
+    "dlChBw",
+    "dlMimoMode",
+    "dlRsBoost",
+    "earfcnDL",
+    "earfcnUL",
+    "prachCS",
+    "rootSeqIndex",
+    "ulChBw",
+]
+LNCEL_TDD_COLUMNS = [
+    "ID_LNCEL",
+    "chBw",
+    "dlMimoMode",
+    "dlRsBoost",
+    "earfcn",
+    "prachCS",
+    "rootSeqIndex",
+]
+def process_lte_data(file_path: str):
+    """
+    Process data from the specified file path.
+    Args:
+        file_path (str): The path to the file.
+    """
+    # Read excel sheets into dataframes
+    dfs = pd.read_excel(
+        file_path,
+        sheet_name=["LNCEL", "LNBTS", "LNCEL_FDD", "LNCEL_TDD"],
+        engine="calamine",
+        skiprows=[0],
+    )
+    # Process LNCEL data
+    df_lncel = dfs["LNCEL"]
+    df_lncel.columns = df_lncel.columns.str.replace(r"[ ]", "", regex=True)
+    df_lncel["final_name"] = df_lncel["name"].fillna(df_lncel["cellName"])
+    df_lncel["code"] = df_lncel["final_name"].str.split("_").str[0]
+    df_lncel["SectorId"] = (
+        df_lncel["lcrId"].map(UtilsVars.sector_mapping).fillna(df_lncel["lcrId"])
+    )
+    df_lncel["Code_Sector"] = (
+        df_lncel[["code", "SectorId"]]
+        .astype(str)
+        .apply("_".join, axis=1)
+        .str.replace(".0", "")
+        .str.lstrip("0")
+    )
+    df_lncel["ID_LNCEL"] = (
+        df_lncel[["MRBTS", "LNBTS", "LNCEL"]].astype(str).apply("_".join, axis=1)
+    )
+    df_lncel["ID_LNBTS"] = (
+        df_lncel[["MRBTS", "LNBTS"]].astype(str).apply("_".join, axis=1)
+    )
+    df_lncel["Region"] = df_lncel["final_name"].str.split("_").str[1]
+    df_lncel["band"] = df_lncel["final_name"].apply(get_band)
+    df_lncel["band_type"] = np.where(df_lncel["band"] == "L2300", "TDD", "FDD")
+    df_lncel = df_lncel[LNCEL_COLUMNS]
+    # Process LNBTS data
+    df_lnbts = dfs["LNBTS"]
+    df_lnbts.columns = df_lnbts.columns.str.replace(r"[ ]", "", regex=True)
+    df_lnbts["ID_LNBTS"] = (
+        df_lnbts[["MRBTS", "LNBTS"]].astype(str).apply("_".join, axis=1)
+    )
+    df_lnbts.rename(columns={"name": "lnbts_name"}, inplace=True)
+    df_lnbts = df_lnbts[["ID_LNBTS", "lnbts_name"]]
+    # Merge dataframes
+    df_lncel_lnbts = pd.merge(df_lncel, df_lnbts, on="ID_LNBTS", how="left")
+    df_physical_db = UtilsVars.physisal_db
+    df_physical_db = df_physical_db[
+        ["Code_Sector", "Azimut", "Longitude", "Latitude", "Hauteur"]
+    ]
+    df_lncel_lnbts = pd.merge(
+        df_lncel_lnbts, df_physical_db, on="Code_Sector", how="left"
+    )
+    # Process LNCEL_FDD and LNCEL_TDD data
+    df_lncel_fdd = dfs["LNCEL_FDD"]
+    df_lncel_fdd.columns = df_lncel_fdd.columns.str.replace(r"[ ]", "", regex=True)
+    df_lncel_fdd["ID_LNCEL"] = (
+        df_lncel_fdd[["MRBTS", "LNBTS", "LNCEL"]].astype(str).apply("_".join, axis=1)
+    )
+    df_lncel_fdd = df_lncel_fdd[LNCEL_FDD_COLUMNS]
+    df_lncel_tdd = dfs["LNCEL_TDD"]
+    df_lncel_tdd.columns = df_lncel_tdd.columns.str.replace(r"[ ]", "", regex=True)
+    df_lncel_tdd["ID_LNCEL"] = (
+        df_lncel_tdd[["MRBTS", "LNBTS", "LNCEL"]].astype(str).apply("_".join, axis=1)
+    )
+    df_lncel_tdd = df_lncel_tdd[LNCEL_TDD_COLUMNS]
+    # Create df_fdd and df_tdd base on "band"
+    df_fdd = df_lncel_lnbts[df_lncel_lnbts["band"] != "L2300"]
+    df_tdd = df_lncel_lnbts[df_lncel_lnbts["band"] == "L2300"]
+    df_fdd_final = pd.merge(df_fdd, df_lncel_fdd, on="ID_LNCEL", how="left")
+    df_tdd_final = pd.merge(df_tdd, df_lncel_tdd, on="ID_LNCEL", how="left")
+    # Save dataframes
+    # save_dataframe(df_fdd_final, "fdd")
+    # save_dataframe(df_tdd_final, "tdd")
+    UtilsVars.final_lte_database = convert_dfs(
+        [df_fdd_final, df_tdd_final], ["lte_fdd", "lte_tdd"]
+    )
+# process_lte_data(r"data2\20240805_5810_05082024_Dump.xml.gz.xlsb")

queries/process_trx.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import pandas as pd
+from utils.convert_to_excel import convert_dfs, save_dataframe
+from utils.utils_vars import UtilsVars
+TRX_COLUMNS = [
+    "ID_BTS",
+    "trxRfPower",
+    "BCCH",
+    "TCH",
+    "number_trx_per_cell",
+    "number_trx_per_site",
+]
+def process_trx_data(file_path: str):
+    """
+    Process data from the specified file path.
+    Args:
+        file_path (str): The path to the file.
+    """
+    # Read the specific sheet into a DataFrame
+    dfs = pd.read_excel(
+        file_path,
+        sheet_name=["BTS", "BCF", "TRX"],
+        engine="calamine",
+        skiprows=[0],
+    )
+    # Process TRX data
+    df_trx = dfs["TRX"]
+    df_trx.columns = df_trx.columns.str.replace(r"[ ]", "", regex=True)
+    df_trx["ID_BTS"] = df_trx[["BSC", "BCF", "BTS"]].astype(str).apply("_".join, axis=1)
+    df_trx["ID_BCF"] = df_trx[["BSC", "BCF"]].astype(str).apply("_".join, axis=1)
+    df_trx["number_trx_per_cell"] = df_trx.groupby("ID_BTS")["ID_BTS"].transform(
+        "count"
+    )
+    df_trx["number_trx_per_site"] = df_trx.groupby("ID_BCF")["ID_BCF"].transform(
+        "count"
+    )
+    bcch = df_trx[df_trx["channel0Type"] == 4]
+    tch = df_trx[df_trx["channel0Type"] == 3][["ID_BTS", "initialFrequency"]]
+    tch = tch.pivot_table(
+        index="ID_BTS",
+        values="initialFrequency",
+        aggfunc=lambda x: " ".join(map(str, x)),
+    )
+    tch = tch.reset_index()
+    # rename the columns
+    tch.columns = ["ID_BTS", "TCH"]
+    # Merge dataframes
+    df_trx = pd.merge(bcch, tch, on="ID_BTS", how="left")
+    # rename "initialFrequency" to "BCCH"
+    df_trx = df_trx.rename(columns={"initialFrequency": "BCCH"})
+    df_trx = df_trx[TRX_COLUMNS]
+    # Save dataframes
+    # save_dataframe(df_trx, "trx")
+    # df_2g2 = save_dataframe(df_2g, "2g")
+    # UtilsVars.final_gsm_database = convert_dfs([df_2g], ["GSM"])
+    return df_trx
+# process_trx_data(r"data2\20240805_5810_05082024_Dump.xml.gz.xlsb")

queries/process_wcdma.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import pandas as pd
+from utils.convert_to_excel import convert_dfs, save_dataframe
+from utils.extract_code import extract_code_from_mrbts
+from utils.utils_vars import UtilsVars
+WCEL_COLUMNS = [
+    "ID_WBTS",
+    "ID_WCEL",
+    "RNC",
+    "WBTS",
+    "WCEL",
+    "site_name",
+    "name",
+    "code",
+    "AdminCellState",
+    "CId",
+    "LAC",
+    "UARFCN",
+    "PriScrCode",
+    "SAC",
+    "maxCarrierPower",
+    "PtxPrimaryCPICH",
+    "CellRange",
+    "CodeTreeOptTimer",
+    "CodeTreeOptimisation",
+    "CodeTreeUsage",
+    "PRACHDelayRange",
+    "PrxOffset",
+    "PrxTarget",
+    "PrxTargetMax",
+    "PrxTargetPSMax",
+    "PrxTargetPSMaxtHSRACH",
+    "PtxCellMax",
+    "PtxOffset",
+    "PtxTarget",
+    "SmartLTELayeringEnabled",
+    "SectorID",
+    "Code_Sector",
+    "code_wcel",
+]
+WBTS_COLUMNS = [
+    "ID_WBTS",
+    "site_name",
+]
+WNCEL_COLUMNS = [
+    "code_wcel",
+    "maxCarrierPower",
+]
+def process_wcdma_data(file_path: str):
+    """
+    Process data from the specified file path.
+    Args:
+        file_path (str): The path to the file.
+    """
+    # Read the specific sheet into a DataFrame
+    # df_wcel = pd.read_excel(
+    #     file_path, sheet_name="WCEL", engine="calamine", skiprows=[0]
+    # )
+    # df_wbts = pd.read_excel(
+    #     file_path, sheet_name="WBTS", engine="calamine", skiprows=[0]
+    # )
+    # df_wncel = pd.read_excel(
+    #     file_path, sheet_name="WNCEL", engine="calamine", skiprows=[0]
+    # )
+    dfs = pd.read_excel(
+        file_path,
+        sheet_name=["WCEL", "WBTS", "WNCEL"],
+        engine="calamine",
+        skiprows=[0],
+    )
+    # Process BTS data
+    df_wcel = dfs["WCEL"]
+    df_wcel.columns = df_wcel.columns.str.replace(r"[ ]", "", regex=True)
+    df_wcel["code"] = df_wcel["name"].str.split("_").str[0].astype(int)
+    df_wcel["ID_WCEL"] = (
+        df_wcel[["RNC", "WBTS", "WCEL"]].astype(str).apply("_".join, axis=1)
+    )
+    df_wcel["ID_WBTS"] = df_wcel[["RNC", "WBTS"]].astype(str).apply("_".join, axis=1)
+    df_wcel["Code_Sector"] = (
+        df_wcel[["code", "SectorID"]].astype(str).apply("_".join, axis=1)
+    )
+    df_wcel["code_wcel"] = df_wcel[["code", "WCEL"]].astype(str).apply("_".join, axis=1)
+    df_wcel["Code_Sector"] = df_wcel["Code_Sector"].str.replace(".0", "")
+    # Process WBTS data
+    df_wbts = dfs["WBTS"]
+    df_wbts.columns = df_wbts.columns.str.replace(r"[ ]", "", regex=True)
+    df_wbts["ID_WBTS"] = df_wbts[["RNC", "WBTS"]].astype(str).apply("_".join, axis=1)
+    df_wbts.rename(columns={"name": "site_name"}, inplace=True)
+    df_wbts = df_wbts[WBTS_COLUMNS]
+    # Process WNCEL data
+    df_wncel = dfs["WNCEL"]
+    df_wncel.columns = df_wncel.columns.str.replace(r"[ ]", "", regex=True)
+    df_wncel["CODE"] = df_wncel["MRBTS"].apply(extract_code_from_mrbts)
+    df_wncel["code_wcel"] = (
+        df_wncel[["CODE", "WNCEL"]].astype(str).apply("_".join, axis=1)
+    )
+    df_wncel = df_wncel[WNCEL_COLUMNS]
+    # Merge dataframes
+    df_wcel_bcf = pd.merge(df_wcel, df_wbts, on="ID_WBTS", how="left")
+    df_3g = pd.merge(df_wcel_bcf, df_wncel, on="code_wcel", how="left")
+    df_3g = df_3g[WCEL_COLUMNS]
+    df_physical_db = UtilsVars.physisal_db
+    df_3g = pd.merge(df_3g, df_physical_db, on="Code_Sector", how="left")
+    # Save dataframes
+    # save_dataframe(df_wcel, "wcel")
+    # save_dataframe(df_wcel_bcf, "wbts")
+    # save_dataframe(df_wncel, "wncel")
+    # df_3g = save_dataframe(df_3g, "3G")
+    UtilsVars.final_wcdma_database = convert_dfs([df_3g], ["WCDMA"])
+    # BTS.process_ok = "Done"

requirements.txt ADDED Viewed

Binary file (126 Bytes). View file

utils/convert_to_excel.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import io
+import time
+import pandas as pd
+import streamlit as st
+@st.cache_data
+def convert_dfs(dfs: list[pd.DataFrame], sheet_names: list[str]) -> bytes:
+    # IMPORTANT: Cache the conversion to prevent computation on every rerun
+    # Create a BytesIO object
+    bytes_io = io.BytesIO()
+    # Write the dataframes to the BytesIO object
+    with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
+        for df, sheet_name in zip(dfs, sheet_names):
+            df.to_excel(writer, sheet_name=sheet_name, index=False)
+    # Get the bytes data
+    bytes_data = bytes_io.getvalue()
+    # Close the BytesIO object
+    bytes_io.close()
+    return bytes_data
+# def save_dataframes(dfs: list[pd.DataFrame], sheet_names: list[str], folder_path: str):
+#     """
+#     Save the dataframes to an excel file. The excel file will be saved in the
+#     folder_path directory.
+#     Args:
+#         dfs (list[pd.DataFrame]): The list of dataframes to save.
+#         sheet_names (list[str]): The list of names for each sheet.
+#         folder_path (str): The path to the folder where the excel file will be saved.
+#     """
+#     bytes_data = convert_dfs(dfs, sheet_names)
+#     timestamp = int(time.time())
+#     file_name = f"{folder_path}/data_{timestamp}.xlsx"
+#     with open(file_name, "wb") as f:
+#         f.write(bytes_data)
+def save_dataframe(df: pd.DataFrame, sheet_name: str):
+    """
+    Save the dataframe to a csv file.
+    Args:
+        df (pd.DataFrame): The dataframe to save.
+        sheet_name (str): The name of the sheet.
+    """
+    df.to_csv(f"data2/{sheet_name}_{time.time()}.csv", index=False)

utils/extract_code.py ADDED Viewed

	@@ -0,0 +1,34 @@

+def extract_code_from_mrbts(mrbts):
+    """
+    Extracts the code from a MRBTS (Mobile Radio Base Transceiver Station) string.
+    Args:
+        mrbts (int or str): The MRBTS string to extract the code from.
+    Returns:
+        int: The extracted code from the MRBTS string.
+    Raises:
+        None.
+    Notes:
+        This function handles MRBTS strings that start with '10' and have a length greater than 5,
+        as well as MRBTS strings that start with '1', '2', or '3'. For MRBTS strings that do not
+        meet these criteria, the entire MRBTS string is returned as an integer.
+    """
+    str_mrbts = str(mrbts)
+    if len(str_mrbts) > 5 and str_mrbts.startswith("10"):
+        # For MRBTS starting with '10' and having length greater than 5
+        return int(str_mrbts[2:])
+    elif len(str_mrbts) > 4 and str_mrbts.startswith("1"):
+        return int(str_mrbts[1:])
+    elif len(str_mrbts) > 4 and str_mrbts.startswith("2"):
+        # For MRBTS starting with '2' (like 20000 + code)
+        return int(str_mrbts[1:])
+    elif len(str_mrbts) > 4 and str_mrbts.startswith("3"):
+        # For MRBTS starting with '3' (like 30000 + code)
+        return int(str_mrbts[1:])
+    else:
+        # Default case
+        return int(str_mrbts)

utils/utils_vars.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import numpy as np
+import pandas as pd
+def get_physical_db():
+    """
+    Reads the physical_database.csv file from the physical_db directory and
+    returns a pandas DataFrame containing only the columns 'Code_Sector',
+    'Azimut', 'Longitude', 'Latitude', and 'Hauteur'.
+    Returns:
+        pd.DataFrame: A DataFrame containing the filtered columns.
+    """
+    physical = pd.read_csv(r"physical_db\physical_database.csv")
+    physical = physical[["Code_Sector", "Azimut", "Longitude", "Latitude", "Hauteur"]]
+    return physical
+class UtilsVars:
+    sector_mapping = {4: 1, 5: 2, 6: 3, 11: 1, 12: 2, 13: 3}
+    channeltype_mapping = {4: "BCCH", 3: "TCH"}
+    final_lte_database = ""
+    final_gsm_database = ""
+    final_wcdma_database = ""
+    physisal_db = get_physical_db()
+# print(UtilsVars.physisal_db)
+def get_band(text):
+    """
+    Extract the band from the given string.
+    Parameters
+    ----------
+    text : str
+        The string to extract the band from.
+    Returns
+    -------
+    str or np.nan
+        The extracted band, or NaN if the text was not a string or did not contain
+        any of the recognized bands (L1800, L2300, L800).
+    """
+    if isinstance(text, str):  # Check if text is a string
+        if "L1800" in text:
+            return "L1800"
+        elif "L2300" in text:
+            return "L2300"
+        elif "L800" in text:
+            return "L800"
+    return np.nan  # or return None