File size: 7,191 Bytes
7860c2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import os
import shutil
import tempfile

import pandas as pd
import streamlit as st
import xlwings as xw
from pyxlsb import open_workbook

# === Core Logic ===


def find_header_row(df, keyword="Dist_Name"):
    for i in range(min(20, len(df))):
        row = df.iloc[i].astype(str).str.strip().str.lower()
        if any(keyword.lower() in str(cell) for cell in row):
            return i
    raise ValueError(f"No row with '{keyword}' found.")


def read_xlsb_with_pyxlsb(file, sheet):
    rows = []
    with open_workbook(file) as wb:
        with wb.get_sheet(sheet) as s:
            for row in s.rows():
                rows.append([item.v for item in row])
    return pd.DataFrame(rows)


def read_sheet_fallback(file, sheet):
    try:
        return read_xlsb_with_pyxlsb(file, sheet)
    except Exception:
        try:
            app = xw.App(visible=False)
            book = app.books.open(file)
            sht = book.sheets[sheet]
            df = sht.used_range.options(pd.DataFrame, header=False, index=False).value
            book.close()
            app.quit()
            return df
        except Exception as e2:
            raise RuntimeError(f"xlwings failed: {e2}")


def load_clean_df(file, sheet):
    df_raw = read_sheet_fallback(file, sheet)
    header_row = find_header_row(df_raw)
    df_raw.columns = df_raw.iloc[header_row]
    df = df_raw.drop(index=list(range(header_row + 1)))
    df.columns = [str(c).strip().replace("\xa0", " ") for c in df.columns]
    df = df.astype(str).apply(lambda col: col.str.strip())
    return df


def detect_dist_col(columns):
    for col in columns:
        if "dist" in col.lower() and "name" in col.lower():
            return col
    raise ValueError("Dist_Name column not found.")


def compare_dumps(
    old_file,
    new_file,
    mo_list,
    output_dir,
    #   progress_callback=None
):
    os.makedirs(output_dir, exist_ok=True)

    # Friendly column labels based on file names
    old_label = os.path.basename(old_file)
    new_label = os.path.basename(new_file)

    total_changes = 0
    logs = []

    for i, sheet_name in enumerate(mo_list):
        try:
            df_old = load_clean_df(old_file, sheet_name)
            df_new = load_clean_df(new_file, sheet_name)

            dist_col_old = detect_dist_col(df_old.columns)
            dist_col_new = detect_dist_col(df_new.columns)

            df_old = df_old[df_old[dist_col_old].notna()].set_index(dist_col_old)
            df_new = df_new[df_new[dist_col_new].notna()].set_index(dist_col_new)

            common = df_old.index.intersection(df_new.index)
            df_old_common = df_old.loc[common]
            df_new_common = df_new.loc[common]

            mask = (df_old_common != df_new_common) & ~(
                df_old_common.isna() & df_new_common.isna()
            )

            changes = []
            for dist in mask.index:
                for param in mask.columns[mask.loc[dist]]:
                    if param.strip().lower() == "file_name":
                        continue

                    changes.append(
                        {
                            "Dist_Name": dist,
                            "Parameter": param,
                            old_label: df_old_common.loc[dist, param],
                            new_label: df_new_common.loc[dist, param],
                        }
                    )

            df_changes = pd.DataFrame(changes)
            if not df_changes.empty:
                output_path = os.path.join(output_dir, f"{sheet_name}_differences.xlsx")
                df_changes.to_excel(output_path, index=False)
                logs.append(f"{len(df_changes)} changes in {sheet_name}")
                total_changes += len(df_changes)
            else:
                logs.append(f"No changes in {sheet_name}")

        except Exception as e:
            logs.append(f"Error in {sheet_name}: {e}")

    # if progress_callback:
    #     progress_callback((i + 1) / len(mo_list))

    return total_changes, logs


# === Streamlit UI ===

st.title("📊 Dump Compare Tool")

old_file = st.file_uploader("Upload Old Dump (.xlsb)", type=["xlsb"], key="old")
new_file = st.file_uploader("Upload New Dump (.xlsb)", type=["xlsb"], key="new")

# Determine common sheet names available in BOTH uploaded dumps and let the user pick
common_sheets: list[str] = []
selected_sheets: list[str] = []

if old_file and new_file:
    import tempfile as _tmp

    from pyxlsb import open_workbook as _open_wb

    def _get_sheet_names(uploaded_file) -> list[str]:
        """Return sheet names from an `st.uploaded_file` object."""
        with _tmp.NamedTemporaryFile(delete=False, suffix=".xlsb") as tmp:
            tmp.write(uploaded_file.getvalue())
            tmp_path = tmp.name
        try:
            with _open_wb(tmp_path) as wb:
                # `wb.sheets` in pyxlsb already returns a list of sheet names (str)
                return list(wb.sheets)
        finally:
            os.remove(tmp_path)

    common_sheets = sorted(
        set(_get_sheet_names(old_file)).intersection(_get_sheet_names(new_file))
    )

    if common_sheets:
        selected_sheets = st.multiselect(
            "MO Sheet Names (choose one or more)",
            common_sheets,
            default=common_sheets[:1],  # select only the first sheet by default
        )
    else:
        st.warning("No common sheet names found between the two files.")
output_dir = "comparison_output"  # fixed output folder name

if st.button("Run Comparison", type="primary", use_container_width=True):
    if not all([old_file, new_file]) or not selected_sheets:
        st.warning("Please upload both files and select at least one common sheet.")
    else:
        mo_list = selected_sheets
        # Reset file pointers because they may have been consumed while reading sheet names
        old_file.seek(0)
        new_file.seek(0)
        with st.spinner("Comparing dumps..."):
            with tempfile.TemporaryDirectory() as tmpdir:
                output_path = os.path.join(tmpdir, output_dir)
                old_path = os.path.join(tmpdir, "old.xlsb")
                new_path = os.path.join(tmpdir, "new.xlsb")

                with open(old_path, "wb") as f:
                    f.write(old_file.read())
                with open(new_path, "wb") as f:
                    f.write(new_file.read())

                # progress_bar = st.progress(0.0)

                # def update_progress(pct):
                #     progress_bar.progress(pct)

                total, logs = compare_dumps(old_path, new_path, mo_list, output_path)

                st.success(f"✅ Comparison completed. Total changes: {total}")

                # Zip and offer download
                shutil.make_archive(output_path, "zip", output_path)
                with open(f"{output_path}.zip", "rb") as f:
                    st.download_button(
                        "Download Results (.zip)",
                        f,
                        file_name="differences.zip",
                        mime="application/zip",
                        type="primary",
                        on_click="ignore",
                    )