import networkx as nx
from sklearn.cluster import HDBSCAN
import matplotlib.pyplot as plt
import numpy as np
from sklearn.manifold import TSNE
import umap
from sklearn.cluster import KMeans
from adjustText import adjust_text
from constants import high_level_families, primary_families_branches


def filter_languages_by_families(matrix, languages, families):
    """
    Filters the languages based on their families.

    Parameters:
    - languages: list of languages to filter.
    - families: list of families to include.

    Returns:
    - filtered_languages: list of languages that belong to the specified families.
    """
    filtered_languages = [
        (i, lang)
        for i, lang in enumerate(languages)
        if high_level_families[lang] in families
    ]
    filtered_indices = [i for i, lang in filtered_languages]
    filtered_languages = [lang for i, lang in filtered_languages]
    filtered_matrix = matrix[np.ix_(filtered_indices, filtered_indices)]
    return filtered_matrix, filtered_languages


def get_dynamic_color_map(n_colors):
    """
    Generates a dynamic color map with the specified number of colors.

    Parameters:
    - n_colors: int, the number of distinct colors required.

    Returns:
    - color_map: list of RGB tuples representing the colors.
    """
    cmap = plt.get_cmap("tab20") if n_colors <= 20 else plt.get_cmap("hsv")
    color_map = [cmap(i / n_colors) for i in range(n_colors)]
    return color_map


def cluster_languages_by_families(languages):
    lang_families = [high_level_families[lang] for lang in languages]
    legend = sorted(set(lang_families))
    clusters = [legend.index(family) for family in lang_families]
    return clusters, legend


def cluster_languages_by_subfamilies(languages):
    labels = [
        high_level_families[lang] + f" ({primary_families_branches[lang]})"
        for lang in languages
    ]
    legend = sorted(set(labels))
    clusters = [legend.index(family) for family in labels]
    return clusters, legend


def plot_mst(
    matrix,
    languages,
    clusters,
    legend=None,
    fig_size=(20, 20),
):
    """
    Plots a Minimum Spanning Tree (MST) from a given distance matrix, node labels, and cluster assignments.

    Parameters:
    - dist_matrix: 2D NumPy array (N x N) representing the pairwise distances between nodes.
    - labels: list of length N containing the labels for each node.
    - clusters: list of length N containing the cluster assignment (or ID) for each node.
    """
    # Create an empty undirected graph
    G = nx.Graph()

    # Number of nodes
    N = len(languages)

    # Add edges to the graph from the distance matrix.
    # Only iterate over the upper triangle of the matrix (i < j)
    for i in range(N):
        for j in range(i + 1, N):
            G.add_edge(i, j, weight=matrix[i, j])

    # Compute the Minimum Spanning Tree using NetworkX's built-in function.
    mst = nx.minimum_spanning_tree(G)

    # Choose a layout for the MST. Here we use Kamada-Kawai layout which considers edge weights.
    pos = nx.kamada_kawai_layout(mst, weight="weight")

    # Map each cluster to a color
    unique_clusters = sorted(set(clusters))
    cmap = get_dynamic_color_map(len(unique_clusters))
    cluster_colors = {cluster: cmap[i] for i, cluster in enumerate(unique_clusters)}

    node_colors = [cluster_colors.get(cluster) for cluster in clusters]

    # Create a figure for plotting.
    fig, ax = plt.subplots(figsize=fig_size)

    # Draw the MST edges.
    nx.draw_networkx_edges(mst, pos, edge_color="gray", ax=ax)

    # Draw the nodes with colors corresponding to their clusters.
    nx.draw_networkx_nodes(
        mst, pos, node_color=node_colors, node_size=100, ax=ax, alpha=0.7
    )

    # Instead of directly drawing labels, we create text objects to adjust them later
    texts = []
    for i, label in enumerate(languages):
        x, y = pos[i]
        texts.append(ax.text(x, y, label, fontsize=10))

    # Adjust text labels to minimize overlap.
    # The arrowprops argument can draw arrows from labels to nodes if desired.
    adjust_text(texts, expand_text=(1.05, 1.2))

    # Add a legend for clusters
    if legend is None:
        legend = {cluster: str(cluster) for cluster in unique_clusters}
    legend_handles = [
        plt.Line2D(
            [0],
            [0],
            marker="o",
            color="w",
            markerfacecolor=cluster_colors[cluster],
            markersize=10,
            alpha=0.7,
            label=legend[cluster],
        )
        for cluster in unique_clusters
    ]
    ax.legend(handles=legend_handles, title="Clusters", loc="best")

    # Remove axis for clarity.
    ax.axis("off")
    # ax.set_title(f"Minimum Spanning Tree of Languages ({'Average' if use_average else f'{model}, {dataset}'})")

    return fig


def cluster_languages_kmeans(dist_matrix, languages, n_clusters=5):
    """
    Clusters languages using a distance matrix and KMeans.

    Parameters:
    - dist_matrix: 2D NumPy array (N x N) representing the pairwise distances between languages.
    - n_clusters: int, the number of clusters to form.

    Returns:
    - filtered_matrix: 2D NumPy array of the filtered distance matrix.
    - filtered_languages: list of filtered languages.
    - filtered_clusters: list of filtered cluster assignments.
    """

    # Perform clustering using KMeans
    kmeans_model = KMeans(n_clusters=n_clusters, random_state=23)
    clusters = kmeans_model.fit_predict(dist_matrix)

    # # Count the number of elements in each cluster
    # cluster_counts = np.bincount(clusters)

    # # Identify clusters with more than 1 element
    # valid_clusters = np.where(cluster_counts > 1)[0]

    # # Filter out points belonging to clusters with only 1 element
    # valid_indices = np.isin(clusters, valid_clusters)
    # filtered_matrix = dist_matrix[np.ix_(valid_indices, valid_indices)]
    # filtered_languages = np.array(languages)[valid_indices]
    # filtered_clusters = np.array(clusters)[valid_indices]

    # return filtered_matrix, filtered_languages, filtered_clusters

    return dist_matrix, languages, clusters


def cluster_languages_hdbscan(dist_matrix, languages, min_cluster_size=2):
    """
    Clusters languages using a distance matrix and HDBSCAN.

    Parameters:
    - dist_matrix: 2D NumPy array (N x N) representing the pairwise distances between languages.
    - min_cluster_size: int, the minimum size of clusters.

    Returns:
    - clusters: list of length N containing the cluster assignment (or ID) for each language.
    """
    # Perform clustering using HDBSCAN with the precomputed distance matrix
    clustering_model = HDBSCAN(metric="precomputed", min_cluster_size=min_cluster_size)
    clusters = clustering_model.fit_predict(dist_matrix)

    # Filter out points belonging to cluster -1 using NumPy
    valid_indices = np.where(clusters != -1)[0]
    filtered_matrix = dist_matrix[np.ix_(valid_indices, valid_indices)]
    filtered_languages = np.array(languages)[valid_indices]
    filtered_clusters = np.array(clusters)[valid_indices]
    return filtered_matrix, filtered_languages, filtered_clusters


def plot_distances_tsne(
    matrix,
    languages,
    clusters,
    legend=None,
    fig_size=(16, 12),
):
    """
    Plots all languages from the distances matrix using t-SNE and colors them by clusters.
    """
    tsne = TSNE(n_components=2, random_state=23, metric="precomputed", init="random")
    tsne_results = tsne.fit_transform(matrix)

    # Map each cluster to a color
    unique_clusters = sorted(set(clusters))
    cmap = get_dynamic_color_map(len(unique_clusters))
    cluster_colors = {cluster: cmap[i] for i, cluster in enumerate(unique_clusters)}

    fig, ax = plt.subplots(figsize=fig_size)
    scatter = ax.scatter(
        tsne_results[:, 0],
        tsne_results[:, 1],
        c=[cluster_colors[cluster] for cluster in clusters],
        alpha=0.7,
    )

    # for i, lang in enumerate(languages):
    #     ax.text(tsne_results[i, 0], tsne_results[i, 1], lang, fontsize=8, alpha=0.8)

    # Instead of directly drawing labels, we create text objects to adjust them later
    texts = []
    for i, label in enumerate(languages):
        x, y = tsne_results[i, 0], tsne_results[i, 1]
        texts.append(ax.text(x, y, label, fontsize=10))

    # Adjust text labels to minimize overlap.
    # The arrowprops argument can draw arrows from labels to nodes if desired.
    adjust_text(texts, expand_text=(1.05, 1.2))

    # Add a legend for clusters
    if legend is None:
        legend = {cluster: str(cluster) for cluster in unique_clusters}
    legend_handles = [
        plt.Line2D(
            [0],
            [0],
            marker="o",
            color="w",
            markerfacecolor=cluster_colors[cluster],
            markersize=10,
            label=legend[cluster],
        )
        for cluster in unique_clusters
    ]
    ax.legend(handles=legend_handles, title="Clusters", loc="best")

    # ax.set_title(
    #     f"t-SNE Visualization of Language Distances ({'Average' if use_average else f'{model}, {dataset}'})"
    # )
    # ax.set_xlabel("t-SNE Dimension 1")
    # ax.set_ylabel("t-SNE Dimension 2")
    ax.axis("off")
    return fig


def plot_distances_umap(
    matrix,
    languages,
    clusters,
    legend=None,
    fig_size=(16, 12),
):
    """
    Plots all languages from the distances matrix using UMAP and colors them by clusters.
    """

    umap_model = umap.UMAP(metric="precomputed", random_state=23)
    umap_results = umap_model.fit_transform(matrix)

    # Map each cluster to a color
    unique_clusters = sorted(set(clusters))
    cmap = get_dynamic_color_map(len(unique_clusters))
    cluster_colors = {cluster: cmap[i] for i, cluster in enumerate(unique_clusters)}

    fig, ax = plt.subplots(figsize=fig_size)
    scatter = ax.scatter(
        umap_results[:, 0],
        umap_results[:, 1],
        c=[cluster_colors[cluster] for cluster in clusters],
        alpha=0.7,
    )

    # for i, lang in enumerate(languages):
    #     ax.text(umap_results[i, 0], umap_results[i, 1], lang, fontsize=8, alpha=0.8)

    # Instead of directly drawing labels, we create text objects to adjust them later
    texts = []
    for i, label in enumerate(languages):
        x, y = umap_results[i, 0], umap_results[i, 1]
        texts.append(ax.text(x, y, label, fontsize=10))

    # Adjust text labels to minimize overlap.
    # The arrowprops argument can draw arrows from labels to nodes if desired.
    adjust_text(texts, expand_text=(1.05, 1.2))

    # Add a legend for clusters
    if legend is None:
        legend = {cluster: str(cluster) for cluster in unique_clusters}
    legend_handles = [
        plt.Line2D(
            [0],
            [0],
            marker="o",
            color="w",
            markerfacecolor=cluster_colors[cluster],
            markersize=10,
            label=legend[cluster],
        )
        for cluster in unique_clusters
    ]
    ax.legend(handles=legend_handles, title="Clusters", loc="best")

    # ax.set_title(
    #     f"UMAP Visualization of Language Distances ({'Average' if use_average else f'{model}, {dataset}'})"
    # )
    # ax.set_xlabel("UMAP Dimension 1")
    # ax.set_ylabel("UMAP Dimension 2")
    ax.axis("off")
    return fig