Spaces:

mirix
/

Periodic_Table_Colourizer

Sleeping

File size: 26,965 Bytes

import gradio as gr
import plotly.graph_objects as go
import numpy as np
import pandas as pd

# Import mendeleev for comprehensive periodic table data
try:
    from mendeleev.fetch import fetch_table
    MENDELEEV_AVAILABLE = True
except ImportError:
    print("mendeleev library not found. Please install it using: pip install mendeleev")
    MENDELEEV_AVAILABLE = False

def load_periodic_data():
    """Load comprehensive periodic table data using mendeleev library"""
    if not MENDELEEV_AVAILABLE:
        return pd.DataFrame(), []
    
    try:
        # Get the full periodic table with all properties
        df = fetch_table('elements')
        
        # Get available columns and filter out non-numeric ones
        numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
        
        # Remove non-property columns
        exclude_cols = ['atomic_number', 'period', 'group_id', 'mass_number', 'mass', 'id']
        numeric_columns = [col for col in numeric_columns if col not in exclude_cols]
        
        return df, numeric_columns
    except Exception as e:
        print(f"Error loading mendeleev data: {e}")
        return pd.DataFrame(), []

# Load data
elements_data, available_properties = load_periodic_data()

def is_continuous_correlative_property(prop_name, df):
    """Determine if a property is continuous AND correlative with atomic number (should be excluded from dropdown)"""
    # Properties that are both continuous and strongly correlative with atomic number
    continuous_correlative_properties = {
        'atomic_weight', 'atomic_mass', 'mass', 'weight'
    }
    
    # Check if property name contains continuous correlative indicators
    for cont_prop in continuous_correlative_properties:
        if cont_prop in prop_name.lower():
            return True
    
    # Check if property is highly correlated with atomic number
    if prop_name in df.columns and 'atomic_number' in df.columns:
        data = df[[prop_name, 'atomic_number']].dropna()
        if len(data) > 20:
            correlation = data[prop_name].corr(data['atomic_number'])
            # High correlation (>0.9) indicates strong relationship with atomic number
            # Combined with high uniqueness indicates continuous correlative property
            unique_ratio = len(data[prop_name].unique()) / len(data[prop_name])
            if abs(correlation) > 0.9 and unique_ratio > 0.8:
                return True
    
    return False

def is_integer_property(prop_name, df):
    """Determine if a property should be treated as integer"""
    integer_properties = {
        'period', 'group_id', 'group', 'block_number', 'neutrons',
        'electrons', 'protons', 'valence', 'oxidation_states'
    }
    
    # Check explicit integer properties
    for int_prop in integer_properties:
        if int_prop in prop_name.lower():
            return True
    
    # Check if all non-null values are integers
    if prop_name in df.columns:
        data = df[prop_name].dropna()
        if len(data) > 5:
            # Check if all values are close to integers
            are_integers = np.allclose(data, np.round(data), rtol=0, atol=1e-10)
            return are_integers
    
    return False

def calculate_color_variance(data, use_log=False):
    """Calculate the effective color variance for a given scaling approach"""
    if len(data) < 3:
        return 0
    
    if use_log:
        # For log scale, need positive values
        positive_data = data[data > 0]
        if len(positive_data) < 3:
            return 0
        scaled_data = np.log10(positive_data)
    else:
        scaled_data = data
    
    # Normalize to 0-1 range (simulating color mapping)
    min_val, max_val = scaled_data.min(), scaled_data.max()
    if max_val == min_val:
        return 0
    
    normalized = (scaled_data - min_val) / (max_val - min_val)
    
    # Calculate effective variance - higher means better color distribution
    return np.var(normalized)

def requires_log_scale(prop_name, df):
    """Improved heuristic to determine if logarithmic scale maximizes color palette utilization"""
    if prop_name not in df.columns:
        return False
    
    data = df[prop_name].dropna()
    if len(data) < 10:
        return False
    
    # Must have all positive values for log scale
    if data.min() <= 0:
        return False
    
    # Properties that typically benefit from log scale (abundance-related)
    log_scale_indicators = [
        'abundance', 'concentration', 'ppm', 'ppb', 'radioactive',
        'half_life', 'decay', 'isotope_abundance'
    ]
    
    for indicator in log_scale_indicators:
        if indicator in prop_name.lower():
            return True
    
    # Calculate color variance for both approaches
    linear_variance = calculate_color_variance(data, use_log=False)
    log_variance = calculate_color_variance(data, use_log=True)
    
    # Use log scale if it provides significantly better color distribution
    # Require at least 50% improvement to switch to log scale
    improvement_threshold = 1.5
    
    # Additional criteria for when log scale is beneficial:
    # 1. Log scale provides better variance AND
    # 2. Data has wide range (>2 orders of magnitude) OR high skewness
    
    range_ratio = data.max() / data.min()
    data_skewness = abs(data.skew()) if hasattr(data, 'skew') else 0
    
    use_log_conditions = [
        log_variance > linear_variance * improvement_threshold,  # Log provides better color distribution
        range_ratio > 100 or data_skewness > 2,  # Data is suitable for log scaling
        len(data) > 20  # Sufficient data points
    ]
    
    return all(use_log_conditions)

def get_element_series_description(df):
    """Get element series description based on available data"""
    # Try to find series-related columns
    series_columns = []
    for col in df.columns:
        if any(term in col.lower() for term in ['series', 'group_name', 'category', 'family', 'type']):
            series_columns.append(col)
    
    # Prefer columns with descriptive names
    if 'series' in df.columns:
        return 'series'
    elif 'group_name' in df.columns:
        return 'group_name'
    elif series_columns:
        return series_columns[0]
    
    # If no series column, try to create one from period and group
    if 'period' in df.columns and 'group_id' in df.columns:
        return 'period'  # Fallback to period
    
    return None

def create_element_series_mapping(df):
    """Create a mapping of element series if not available"""
    if 'series' in df.columns:
        return 'Element Series', 'series'
    
    # Try other descriptive columns
    descriptive_columns = {
        'group_name': 'Element Group',
        'category': 'Element Category', 
        'family': 'Element Family',
        'type': 'Element Type'
    }
    
    for col, label in descriptive_columns.items():
        if col in df.columns and df[col].notna().sum() > 50:
            return label, col
    
    # If no good series data, use period as fallback
    if 'period' in df.columns:
        return 'Period', 'period'
    
    return None, None

def filter_relevant_properties(df, available_props):
    """Filter properties to keep only relevant ones with sufficient data, excluding continuous correlative properties"""
    
    # Define curated properties with quality thresholds (these stay for internal use)
    curated_properties = {
        'atomic_weight': {'label': 'Atomic Mass (u)', 'min_data': 100, 'log_scale': False},
        'density': {'label': 'Density (g/cm³)', 'min_data': 50, 'log_scale': False},
        'en_pauling': {'label': 'Electronegativity (Pauling)', 'min_data': 70, 'log_scale': False},
        'atomic_radius': {'label': 'Atomic Radius (pm)', 'min_data': 50, 'log_scale': False},
        'vdw_radius': {'label': 'Van der Waals Radius (pm)', 'min_data': 40, 'log_scale': False},
        'covalent_radius': {'label': 'Covalent Radius (pm)', 'min_data': 40, 'log_scale': False},
        'ionenergy': {'label': 'First Ionization Energy (eV)', 'min_data': 80, 'log_scale': False},
        'electron_affinity': {'label': 'Electron Affinity (eV)', 'min_data': 40, 'log_scale': False},
        'melting_point': {'label': 'Melting Point (K)', 'min_data': 70, 'log_scale': False},
        'boiling_point': {'label': 'Boiling Point (K)', 'min_data': 60, 'log_scale': False},
        'atomic_volume': {'label': 'Atomic Volume (cm³/mol)', 'min_data': 40, 'log_scale': False},
        'thermal_conductivity': {'label': 'Thermal Conductivity (W/mK)', 'min_data': 30, 'log_scale': False},
        'c6': {'label': 'C6 Dispersion Coefficient', 'min_data': 30, 'log_scale': False},
        'dipole_polarizability': {'label': 'Dipole Polarizability', 'min_data': 30, 'log_scale': False},
        'period': {'label': 'Period', 'min_data': 100, 'log_scale': False},
        'group_id': {'label': 'Group', 'min_data': 100, 'log_scale': False},
    }
    
    # Check which properties are available and have sufficient data
    valid_properties = {}
    dropdown_properties = {}  # Separate dict for dropdown (excluding continuous correlative)
    property_info = {}
    
    # First, try to add element series as the default
    default_label, default_property = create_element_series_mapping(df)
    if default_property and default_property in df.columns:
        non_null_count = df[default_property].notna().sum()
        if non_null_count >= 50:  # Lower threshold for series data
            valid_properties[default_label] = default_property
            dropdown_properties[default_label] = default_property
            property_info[default_property] = {
                'label': default_label, 
                'min_data': 50, 
                'log_scale': False,
                'is_default': True
            }
    
    for prop_name, prop_config in curated_properties.items():
        if prop_name in available_props:
            # Count non-null values
            non_null_count = df[prop_name].notna().sum()
            if non_null_count >= prop_config['min_data']:
                valid_properties[prop_config['label']] = prop_name
                property_info[prop_name] = prop_config
                
                # Only add to dropdown if not continuous and correlative
                if not is_continuous_correlative_property(prop_name, df):
                    dropdown_properties[prop_config['label']] = prop_name
    
    # Add any other properties with very good data coverage (>80 elements)
    for prop in available_props:
        if prop not in curated_properties:
            non_null_count = df[prop].notna().sum()
            if non_null_count > 80:  # High threshold for uncurated properties
                display_name = prop.replace('_', ' ').title()
                log_scale = requires_log_scale(prop, df)
                valid_properties[display_name] = prop
                property_info[prop] = {'label': display_name, 'min_data': 80, 'log_scale': log_scale}
                
                # Only add to dropdown if not continuous and correlative
                if not is_continuous_correlative_property(prop, df):
                    dropdown_properties[display_name] = prop
    
    return valid_properties, dropdown_properties, property_info

# Get valid properties
valid_properties, dropdown_properties, property_info = filter_relevant_properties(elements_data, available_properties)

def get_portland_like_colorscale(use_log=False):
    """Get Portland or Portland-like colorscale"""
    # Portland is great - let's use variations of it
    if use_log:
        # For log scale, use a slightly adjusted Portland to handle the wider dynamic range
        return 'Portland'
    else:
        return 'Portland'

def should_use_log_scale(property_name, df):
    """Determine if logarithmic scale should be used based on data distribution"""
    if property_name not in df.columns:
        return False
    
    # Check if explicitly configured
    if property_name in property_info:
        configured_log = property_info[property_name].get('log_scale', False)
        if configured_log:
            return True
    
    # Use improved heuristic
    return requires_log_scale(property_name, df)

# Standard periodic table positions
ELEMENT_POSITIONS = {
    # Period 1
    1: (1, 1), 2: (18, 1),
    # Period 2
    3: (1, 2), 4: (2, 2), 5: (13, 2), 6: (14, 2), 7: (15, 2), 8: (16, 2), 9: (17, 2), 10: (18, 2),
    # Period 3
    11: (1, 3), 12: (2, 3), 13: (13, 3), 14: (14, 3), 15: (15, 3), 16: (16, 3), 17: (17, 3), 18: (18, 3),
    # Period 4
    19: (1, 4), 20: (2, 4), 21: (3, 4), 22: (4, 4), 23: (5, 4), 24: (6, 4), 25: (7, 4), 26: (8, 4),
    27: (9, 4), 28: (10, 4), 29: (11, 4), 30: (12, 4), 31: (13, 4), 32: (14, 4), 33: (15, 4), 34: (16, 4), 35: (17, 4), 36: (18, 4),
    # Period 5
    37: (1, 5), 38: (2, 5), 39: (3, 5), 40: (4, 5), 41: (5, 5), 42: (6, 5), 43: (7, 5), 44: (8, 5),
    45: (9, 5), 46: (10, 5), 47: (11, 5), 48: (12, 5), 49: (13, 5), 50: (14, 5), 51: (15, 5), 52: (16, 5), 53: (17, 5), 54: (18, 5),
    # Period 6
    55: (1, 6), 56: (2, 6), 
    # Lanthanides (period 6 continued)
    57: (4, 9), 58: (5, 9), 59: (6, 9), 60: (7, 9), 61: (8, 9), 62: (9, 9), 63: (10, 9), 64: (11, 9),
    65: (12, 9), 66: (13, 9), 67: (14, 9), 68: (15, 9), 69: (16, 9), 70: (17, 9), 71: (18, 9),
    # Period 6 continued
    72: (4, 6), 73: (5, 6), 74: (6, 6), 75: (7, 6), 76: (8, 6), 77: (9, 6), 78: (10, 6), 79: (11, 6),
    80: (12, 6), 81: (13, 6), 82: (14, 6), 83: (15, 6), 84: (16, 6), 85: (17, 6), 86: (18, 6),
    # Period 7
    87: (1, 7), 88: (2, 7),
    # Actinides (period 7 continued)
    89: (4, 10), 90: (5, 10), 91: (6, 10), 92: (7, 10), 93: (8, 10), 94: (9, 10), 95: (10, 10), 96: (11, 10),
    97: (12, 10), 98: (13, 10), 99: (14, 10), 100: (15, 10), 101: (16, 10), 102: (17, 10), 103: (18, 10),
    # Period 7 continued
    104: (4, 7), 105: (5, 7), 106: (6, 7), 107: (7, 7), 108: (8, 7), 109: (9, 7), 110: (10, 7), 111: (11, 7),
    112: (12, 7), 113: (13, 7), 114: (14, 7), 115: (15, 7), 116: (16, 7), 117: (17, 7), 118: (18, 7)
}

def get_electronic_configuration(element):
    """Extract electronic configuration from element data"""
    # Try different possible column names for electronic configuration
    config_columns = ['electronic_configuration', 'electron_configuration', 'econf', 'ec']
    
    for col in config_columns:
        if col in element.index and pd.notna(element.get(col)):
            return str(element[col])
    
    # If no explicit electronic configuration column, try to construct it from other data
    # This is a fallback - the mendeleev library should have this data
    return None

def create_hover_text(element, selected_property, original_value, display_value):
    """Create detailed hover text for an element"""
    
    def format_value(value, unit="", is_integer=False):
        if pd.isna(value):
            return "N/A"
        if isinstance(value, (int, float)):
            if is_integer:
                return f"{int(round(value))} {unit}".strip()
            elif abs(value) >= 1000:
                return f"{value:.2e} {unit}".strip()
            elif abs(value) >= 10:
                return f"{value:.2f} {unit}".strip()
            else:
                return f"{value:.3f} {unit}".strip()
        return str(value)
    
    # Get property info
    prop_config = property_info.get(selected_property, {})
    prop_label = prop_config.get('label', selected_property.replace('_', ' ').title())
    
    # Determine if this is an integer property
    is_int_prop = is_integer_property(selected_property, elements_data)
    
    # Determine units based on property name
    if 'density' in selected_property.lower():
        unit = "g/cm³"
    elif 'electronegativity' in selected_property.lower():
        unit = ""
    elif 'radius' in selected_property.lower():
        unit = "pm"
    elif 'energy' in selected_property.lower() or 'ionization' in selected_property.lower():
        unit = "eV"
    elif 'affinity' in selected_property.lower():
        unit = "eV"
    elif 'point' in selected_property.lower() or 'temperature' in selected_property.lower():
        unit = "K"
    elif 'weight' in selected_property.lower() or 'mass' in selected_property.lower():
        unit = "u"
    elif 'volume' in selected_property.lower():
        unit = "cm³/mol"
    elif 'conductivity' in selected_property.lower():
        unit = "W/mK"
    else:
        unit = ""
    
    current_str = format_value(original_value, unit, is_int_prop)
    
    # Build hover text with key properties
    hover_lines = [
        f"<b>{element.get('name', 'N/A')} ({element.get('symbol', 'N/A')})</b>",
        f"<b>{prop_label}: {current_str}</b>",
        "",  # Empty line for separation
        f"Atomic Number: {element.get('atomic_number', 'N/A')}",
    ]
    
    # Add electronic configuration if available
    electronic_config = get_electronic_configuration(element)
    if electronic_config:
        hover_lines.append(f"Electronic Configuration: {electronic_config}")
    
    # Add key properties if available
    key_properties = [
        ('atomic_weight', 'Atomic Weight', 'u', False),
        ('period', 'Period', '', True),
        ('group_id', 'Group', '', True),
        ('block', 'Block', '', False),
        ('en_pauling', 'Electronegativity', '', False),
        ('atomic_radius', 'Atomic Radius', 'pm', False),
        ('ionenergy', 'Ionization Energy', 'eV', False),
        ('melting_point', 'Melting Point', 'K', False),
        ('boiling_point', 'Boiling Point', 'K', False),
        ('density', 'Density', 'g/cm³', False),
    ]
    
    for prop_name, display_name, prop_unit, is_int in key_properties:
        if prop_name in element.index and pd.notna(element.get(prop_name)):
            value_str = format_value(element[prop_name], prop_unit, is_int)
            hover_lines.append(f"{display_name}: {value_str}")
    
    return "<br>".join(hover_lines)

def create_periodic_table_figure(selected_property_label):
    """Create the periodic table figure for the given property"""
    
    if not MENDELEEV_AVAILABLE or elements_data.empty:
        fig = go.Figure()
        fig.add_annotation(
            text="Mendeleev library not available. Please install: pip install mendeleev",
            showarrow=False,
            font=dict(size=16)
        )
        return fig
    
    # Get the actual property name from the label
    selected_property = valid_properties.get(selected_property_label)
    if not selected_property:
        fig = go.Figure()
        fig.add_annotation(
            text=f"Property '{selected_property_label}' not available",
            showarrow=False,
            font=dict(size=16)
        )
        return fig
    
    # Filter out elements without the selected property
    property_data = elements_data[selected_property].dropna()
    
    if property_data.empty:
        fig = go.Figure()
        fig.add_annotation(
            text=f"No data available for {selected_property_label}",
            showarrow=False,
            font=dict(size=16)
        )
        return fig
    
    # Determine if we should use log scale
    use_log = should_use_log_scale(selected_property, elements_data)
    
    # Prepare data for visualization
    if use_log:
        # For log scale, we need positive values
        positive_data = property_data[property_data > 0]
        if positive_data.empty:
            use_log = False
            viz_data = property_data
            min_value = property_data.min()
            max_value = property_data.max()
        else:
            viz_data = np.log10(positive_data)
            min_value = viz_data.min()
            max_value = viz_data.max()
    else:
        viz_data = property_data
        min_value = property_data.min()
        max_value = property_data.max()
    
    # Initialize data lists
    hover_texts = []
    element_symbols = []
    atomic_numbers = []
    x_positions = []
    y_positions = []
    element_values = []
    
    # Process each element
    for _, element in elements_data.iterrows():
        atomic_num = element['atomic_number']
        
        # Skip elements without position data
        if atomic_num not in ELEMENT_POSITIONS:
            continue
        
        x_pos, y_pos = ELEMENT_POSITIONS[atomic_num]
        x_positions.append(x_pos)
        y_positions.append(11 - y_pos)  # Invert y-axis for correct table layout
        element_symbols.append(element['symbol'])
        atomic_numbers.append(atomic_num)
        
        # Get property value
        original_value = element[selected_property]
        
        if pd.notna(original_value):
            if use_log and original_value > 0:
                display_value = np.log10(original_value)
            else:
                display_value = original_value
        else:
            display_value = np.nan
        
        element_values.append(display_value)
        
        # Create comprehensive hover text
        hover_text = create_hover_text(element, selected_property, original_value, display_value)
        hover_texts.append(hover_text)
    
    # Create the figure
    fig = go.Figure()
    
    # Add scatter plot
    fig.add_trace(go.Scatter(
        x=x_positions,
        y=y_positions,
        mode='markers+text',
        text=element_symbols,
        hoverinfo='text',
        hovertext=hover_texts,
        textfont=dict(
            family="Arial, sans-serif",
            size=14,
            color="white",
            weight="bold",
        ),
        hoverlabel=dict(
            bgcolor="rgba(255,255,255,0.95)",
            font_size=12,
            font_family="Arial, sans-serif",
            bordercolor="black"
        ),
        marker=dict(
            symbol='square',
            color=element_values,
            size=45,
            colorscale=get_portland_like_colorscale(use_log),
            cmin=min_value,
            cmax=max_value,
            colorbar=dict(
                title=f"{selected_property_label}{'<br>(log scale)' if use_log else ''}",
                thickness=20,
                x=1.02
            ),
            showscale=True,
            line=dict(color='black', width=1)
        )
    ))
    
    # Add atomic number annotations
    for i in range(len(x_positions)):
        fig.add_annotation(
            x=x_positions[i],
            y=y_positions[i] + 0.3,
            text=str(atomic_numbers[i]),
            showarrow=False,
            font=dict(
                family="Arial, sans-serif",
                size=8,
                color="white",
                weight="bold",
            )
        )
    
    # Add lanthanide and actinide labels
    fig.add_annotation(x=3, y=2, text="Lanthanides", showarrow=False, 
                      font=dict(size=10, weight="bold"))
    fig.add_annotation(x=3, y=1, text="Actinides", showarrow=False, 
                      font=dict(size=10, weight="bold"))
    
    # Update layout
    title_text = f'<b>Periodic Table by {selected_property_label}</b>'
    if use_log:
        title_text += '<br><span style="font-size:14px;">(Logarithmic Color Scale)</span>'
    
    fig.update_layout(
        title=dict(
            text=title_text,
            x=0.5,
            font=dict(size=24)
        ),
        xaxis=dict(
            range=[0, 19],
            showgrid=False,
            zeroline=False,
            showticklabels=False,
            visible=False
        ),
        yaxis=dict(
            range=[0, 12],
            showgrid=False,
            zeroline=False,
            showticklabels=False,
            visible=False
        ),
        plot_bgcolor='white',
        paper_bgcolor='#f8f9fa',
        width=1480,
        height=800,
        margin=dict(l=20, r=100, t=100, b=20)
    )
    
    return fig

# Create Gradio interface
def create_gradio_app():
    """Create the Gradio interface"""
    
    if not MENDELEEV_AVAILABLE or not dropdown_properties:
        def error_interface():
            return "❌ Mendeleev library not available or no valid properties found. Please install: pip install mendeleev"
        
        return gr.Interface(
            fn=error_interface,
            inputs=[],
            outputs=gr.Textbox(label="Error"),
            title="Periodic Table Dashboard - Error"
        )
    
    # Get property options for dropdown (excluding continuous correlative properties)
    property_options = list(dropdown_properties.keys())
    
    # Set default to element series if available, otherwise first property
    default_property = None
    for label, prop_name in dropdown_properties.items():
        if property_info.get(prop_name, {}).get('is_default', False):
            default_property = label
            break
    
    if not default_property and property_options:
        default_property = property_options[0]
    
    with gr.Blocks(title="Interactive Periodic Table", theme=gr.themes.Soft()) as app:
        gr.Markdown("# 🧪 Interactive Periodic Table")
        
        with gr.Row():
            with gr.Column(scale=1):
                property_dropdown = gr.Dropdown(
                    choices=property_options,
                    value=default_property,
                    label="Select Property to Colourize",
                )
        with gr.Row():
            plot_output = gr.Plot(show_label=False)
        with gr.Row():
            gr.Markdown(f""" 
                **🔬 Data Source:** [Mendeleev Library](https://mendeleev.readthedocs.io/)
                """)
        
        # Update plot when dropdown changes
        property_dropdown.change(
            fn=create_periodic_table_figure,
            inputs=[property_dropdown],
            outputs=[plot_output]
        )
        
        # Initialize with first property
        app.load(
            fn=create_periodic_table_figure,
            inputs=[property_dropdown],
            outputs=[plot_output]
        )
    
    return app

# Create and run the app
if __name__ == "__main__":
    print(f"🚀 Starting Gradio app with {len(dropdown_properties)} properties (excluding continuous correlative)")
    if dropdown_properties:
        print("📋 Available dropdown properties:")
        for label, prop_name in dropdown_properties.items():
            log_note = " (log scale)" if should_use_log_scale(prop_name, elements_data) else ""
            int_note = " (integer)" if is_integer_property(prop_name, elements_data) else ""
            default_note = " (DEFAULT)" if property_info.get(prop_name, {}).get('is_default', False) else ""
            data_count = elements_data[prop_name].notna().sum()
            print(f"  • {label}: {data_count} elements{log_note}{int_note}{default_note}")
    
    print(f"\n📋 Total valid properties (including continuous correlative): {len(valid_properties)}")
    
    app = create_gradio_app()
    app.launch()