File size: 3,372 Bytes
09b3ab0
3cb5962
 
 
 
09b3ab0
 
 
 
3cb5962
 
5b03300
76e7fa7
 
3cb5962
 
 
 
 
 
 
09b3ab0
 
3cb5962
 
 
 
 
 
 
 
 
 
 
 
 
 
09b3ab0
 
3cb5962
 
 
 
 
 
 
09b3ab0
 
3cb5962
 
 
 
09b3ab0
3cb5962
 
09b3ab0
3cb5962
 
 
09b3ab0
 
3cb5962
 
 
 
09b3ab0
 
3cb5962
 
 
 
5b03300
 
 
 
 
 
60b4adc
 
 
5b03300
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import marimo
import pandas as pd
import io
import matplotlib.pyplot as plt
from script import getSearchResult, getClustersWithGraph, compare_clusters

app = marimo.App()

@app.cell
def _(mo):
    csv_upload = mo.ui.file(label="Upload Keyword CSV (1 column)")
    import os
    api_key = os.environ.get("GOOGLE_API_KEY", "")
    cse_id = os.environ.get("GOOGLE_CSE_ID", "")
    country = mo.ui.text(label="Country Code (e.g. UK)", value="UK")
    language = mo.ui.text(label="Language Code (e.g. EN)", value="EN")
    database = mo.ui.text(label="SQLite DB Name", value="data.db")
    serp_table = mo.ui.text(label="SERP Table", value="keywords_serps")
    cluster_table = mo.ui.text(label="Cluster Table", value="keyword_clusters")
    run_button = mo.ui.button(label="Run Clustering Comparison")
    return csv_upload, api_key, cse_id, country, language, database, serp_table, cluster_table, run_button

@app.cell
def _(mo):
    timestamp_options = [
        ("Latest available (max)", "max"),
        ("March 2024 core update (2024-04-26 00:00:00.000000)", "2024-04-26 00:00:00.000000"),
        ("August 2024 core update (2024-09-10 00:00:00.000000)", "2024-09-10 00:00:00.000000"),
        ("November 2024 core update (2024-12-11 00:00:00.000000)", "2024-12-11 00:00:00.000000"),
        ("December 2024 core update (2024-12-25 00:00:00.000000)", "2024-12-25 00:00:00.000000"),
        ("March 2025 core update (2025-04-03 00:00:00.000000)", "2025-04-03 00:00:00.000000"),
        ("June 2025 core update (2025-07-24 00:00:00.000000)", "2025-07-24 00:00:00.000000"),
    ]

    timestamp_1 = mo.ui.dropdown(timestamp_options, label="Choose Timestamp 1")
    timestamp_2 = mo.ui.dropdown(timestamp_options, label="Choose Timestamp 2")
    return timestamp_1, timestamp_2

@app.cell
def _(csv_upload):
    if csv_upload.value:
        df_keywords = pd.read_csv(io.BytesIO(csv_upload.value.read()))
        keywords = df_keywords.iloc[:, 0].tolist()
    else:
        df_keywords, keywords = None, []
    return df_keywords, keywords

@app.cell
def _(keywords, api_key, cse_id, country, language, database, serp_table, timestamp_1, timestamp_2, run_button):
    if run_button.clicked and keywords:
        # Run search and store results
        getSearchResult(keywords, language, country, api_key, cse_id, database, serp_table)

        fig1, clusters1 = getClustersWithGraph(database, serp_table, timestamp_1.value)
        fig2, clusters2 = getClustersWithGraph(database, serp_table, timestamp_2.value)

        movement = compare_clusters(clusters1, clusters2)
        return fig1, fig2, movement
    return None, None, None

@app.cell
def _(fig1, fig2):
    if fig1 and fig2:
        display(fig1)
        display(fig2)

@app.cell
def _(movement):
    if movement is not None:
        movement.sort_values(by="searchTerms", inplace=True)
        movement.reset_index(drop=True, inplace=True)
        return movement is not None:
        movement.sort_values(by="searchTerms", inplace=True)
        movement.reset_index(drop=True, inplace=True)
        return movement is not None:
        movement.sort_values(by="searchTerms", inplace=True)
        movement.reset_index(drop=True, inplace=True)
        return movement is not None:
        movement.sort_values(by="searchTerms", inplace=True)
        movement.reset_index(drop=True, inplace=True)
        movement