import gradio as gr import plotly.graph_objects as go import numpy as np import pandas as pd # Import mendeleev for comprehensive periodic table data try: from mendeleev.fetch import fetch_table MENDELEEV_AVAILABLE = True except ImportError: print("mendeleev library not found. Please install it using: pip install mendeleev") MENDELEEV_AVAILABLE = False def load_periodic_data(): """Load comprehensive periodic table data using mendeleev library""" if not MENDELEEV_AVAILABLE: return pd.DataFrame(), [] try: # Get the full periodic table with all properties df = fetch_table('elements') # Get available columns and filter out non-numeric ones numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() # Remove non-property columns exclude_cols = ['atomic_number', 'period', 'group_id', 'mass_number', 'mass', 'id'] numeric_columns = [col for col in numeric_columns if col not in exclude_cols] return df, numeric_columns except Exception as e: print(f"Error loading mendeleev data: {e}") return pd.DataFrame(), [] # Load data elements_data, available_properties = load_periodic_data() def is_continuous_correlative_property(prop_name, df): """Determine if a property is continuous AND correlative with atomic number (should be excluded from dropdown)""" # Properties that are both continuous and strongly correlative with atomic number continuous_correlative_properties = { 'atomic_weight', 'atomic_mass', 'mass', 'weight' } # Check if property name contains continuous correlative indicators for cont_prop in continuous_correlative_properties: if cont_prop in prop_name.lower(): return True # Check if property is highly correlated with atomic number if prop_name in df.columns and 'atomic_number' in df.columns: data = df[[prop_name, 'atomic_number']].dropna() if len(data) > 20: correlation = data[prop_name].corr(data['atomic_number']) # High correlation (>0.9) indicates strong relationship with atomic number # Combined with high uniqueness indicates continuous correlative property unique_ratio = len(data[prop_name].unique()) / len(data[prop_name]) if abs(correlation) > 0.9 and unique_ratio > 0.8: return True return False def is_integer_property(prop_name, df): """Determine if a property should be treated as integer""" integer_properties = { 'period', 'group_id', 'group', 'block_number', 'neutrons', 'electrons', 'protons', 'valence', 'oxidation_states' } # Check explicit integer properties for int_prop in integer_properties: if int_prop in prop_name.lower(): return True # Check if all non-null values are integers if prop_name in df.columns: data = df[prop_name].dropna() if len(data) > 5: # Check if all values are close to integers are_integers = np.allclose(data, np.round(data), rtol=0, atol=1e-10) return are_integers return False def calculate_color_variance(data, use_log=False): """Calculate the effective color variance for a given scaling approach""" if len(data) < 3: return 0 if use_log: # For log scale, need positive values positive_data = data[data > 0] if len(positive_data) < 3: return 0 scaled_data = np.log10(positive_data) else: scaled_data = data # Normalize to 0-1 range (simulating color mapping) min_val, max_val = scaled_data.min(), scaled_data.max() if max_val == min_val: return 0 normalized = (scaled_data - min_val) / (max_val - min_val) # Calculate effective variance - higher means better color distribution return np.var(normalized) def requires_log_scale(prop_name, df): """Improved heuristic to determine if logarithmic scale maximizes color palette utilization""" if prop_name not in df.columns: return False data = df[prop_name].dropna() if len(data) < 10: return False # Must have all positive values for log scale if data.min() <= 0: return False # Properties that typically benefit from log scale (abundance-related) log_scale_indicators = [ 'abundance', 'concentration', 'ppm', 'ppb', 'radioactive', 'half_life', 'decay', 'isotope_abundance' ] for indicator in log_scale_indicators: if indicator in prop_name.lower(): return True # Calculate color variance for both approaches linear_variance = calculate_color_variance(data, use_log=False) log_variance = calculate_color_variance(data, use_log=True) # Use log scale if it provides significantly better color distribution # Require at least 50% improvement to switch to log scale improvement_threshold = 1.5 # Additional criteria for when log scale is beneficial: # 1. Log scale provides better variance AND # 2. Data has wide range (>2 orders of magnitude) OR high skewness range_ratio = data.max() / data.min() data_skewness = abs(data.skew()) if hasattr(data, 'skew') else 0 use_log_conditions = [ log_variance > linear_variance * improvement_threshold, # Log provides better color distribution range_ratio > 100 or data_skewness > 2, # Data is suitable for log scaling len(data) > 20 # Sufficient data points ] return all(use_log_conditions) def get_element_series_description(df): """Get element series description based on available data""" # Try to find series-related columns series_columns = [] for col in df.columns: if any(term in col.lower() for term in ['series', 'group_name', 'category', 'family', 'type']): series_columns.append(col) # Prefer columns with descriptive names if 'series' in df.columns: return 'series' elif 'group_name' in df.columns: return 'group_name' elif series_columns: return series_columns[0] # If no series column, try to create one from period and group if 'period' in df.columns and 'group_id' in df.columns: return 'period' # Fallback to period return None def create_element_series_mapping(df): """Create a mapping of element series if not available""" if 'series' in df.columns: return 'Element Series', 'series' # Try other descriptive columns descriptive_columns = { 'group_name': 'Element Group', 'category': 'Element Category', 'family': 'Element Family', 'type': 'Element Type' } for col, label in descriptive_columns.items(): if col in df.columns and df[col].notna().sum() > 50: return label, col # If no good series data, use period as fallback if 'period' in df.columns: return 'Period', 'period' return None, None def filter_relevant_properties(df, available_props): """Filter properties to keep only relevant ones with sufficient data, excluding continuous correlative properties""" # Define curated properties with quality thresholds (these stay for internal use) curated_properties = { 'atomic_weight': {'label': 'Atomic Mass (u)', 'min_data': 100, 'log_scale': False}, 'density': {'label': 'Density (g/cm³)', 'min_data': 50, 'log_scale': False}, 'en_pauling': {'label': 'Electronegativity (Pauling)', 'min_data': 70, 'log_scale': False}, 'atomic_radius': {'label': 'Atomic Radius (pm)', 'min_data': 50, 'log_scale': False}, 'vdw_radius': {'label': 'Van der Waals Radius (pm)', 'min_data': 40, 'log_scale': False}, 'covalent_radius': {'label': 'Covalent Radius (pm)', 'min_data': 40, 'log_scale': False}, 'ionenergy': {'label': 'First Ionization Energy (eV)', 'min_data': 80, 'log_scale': False}, 'electron_affinity': {'label': 'Electron Affinity (eV)', 'min_data': 40, 'log_scale': False}, 'melting_point': {'label': 'Melting Point (K)', 'min_data': 70, 'log_scale': False}, 'boiling_point': {'label': 'Boiling Point (K)', 'min_data': 60, 'log_scale': False}, 'atomic_volume': {'label': 'Atomic Volume (cm³/mol)', 'min_data': 40, 'log_scale': False}, 'thermal_conductivity': {'label': 'Thermal Conductivity (W/mK)', 'min_data': 30, 'log_scale': False}, 'c6': {'label': 'C6 Dispersion Coefficient', 'min_data': 30, 'log_scale': False}, 'dipole_polarizability': {'label': 'Dipole Polarizability', 'min_data': 30, 'log_scale': False}, 'period': {'label': 'Period', 'min_data': 100, 'log_scale': False}, 'group_id': {'label': 'Group', 'min_data': 100, 'log_scale': False}, } # Check which properties are available and have sufficient data valid_properties = {} dropdown_properties = {} # Separate dict for dropdown (excluding continuous correlative) property_info = {} # First, try to add element series as the default default_label, default_property = create_element_series_mapping(df) if default_property and default_property in df.columns: non_null_count = df[default_property].notna().sum() if non_null_count >= 50: # Lower threshold for series data valid_properties[default_label] = default_property dropdown_properties[default_label] = default_property property_info[default_property] = { 'label': default_label, 'min_data': 50, 'log_scale': False, 'is_default': True } for prop_name, prop_config in curated_properties.items(): if prop_name in available_props: # Count non-null values non_null_count = df[prop_name].notna().sum() if non_null_count >= prop_config['min_data']: valid_properties[prop_config['label']] = prop_name property_info[prop_name] = prop_config # Only add to dropdown if not continuous and correlative if not is_continuous_correlative_property(prop_name, df): dropdown_properties[prop_config['label']] = prop_name # Add any other properties with very good data coverage (>80 elements) for prop in available_props: if prop not in curated_properties: non_null_count = df[prop].notna().sum() if non_null_count > 80: # High threshold for uncurated properties display_name = prop.replace('_', ' ').title() log_scale = requires_log_scale(prop, df) valid_properties[display_name] = prop property_info[prop] = {'label': display_name, 'min_data': 80, 'log_scale': log_scale} # Only add to dropdown if not continuous and correlative if not is_continuous_correlative_property(prop, df): dropdown_properties[display_name] = prop return valid_properties, dropdown_properties, property_info # Get valid properties valid_properties, dropdown_properties, property_info = filter_relevant_properties(elements_data, available_properties) def get_portland_like_colorscale(use_log=False): """Get Portland or Portland-like colorscale""" # Portland is great - let's use variations of it if use_log: # For log scale, use a slightly adjusted Portland to handle the wider dynamic range return 'Portland' else: return 'Portland' def should_use_log_scale(property_name, df): """Determine if logarithmic scale should be used based on data distribution""" if property_name not in df.columns: return False # Check if explicitly configured if property_name in property_info: configured_log = property_info[property_name].get('log_scale', False) if configured_log: return True # Use improved heuristic return requires_log_scale(property_name, df) # Standard periodic table positions ELEMENT_POSITIONS = { # Period 1 1: (1, 1), 2: (18, 1), # Period 2 3: (1, 2), 4: (2, 2), 5: (13, 2), 6: (14, 2), 7: (15, 2), 8: (16, 2), 9: (17, 2), 10: (18, 2), # Period 3 11: (1, 3), 12: (2, 3), 13: (13, 3), 14: (14, 3), 15: (15, 3), 16: (16, 3), 17: (17, 3), 18: (18, 3), # Period 4 19: (1, 4), 20: (2, 4), 21: (3, 4), 22: (4, 4), 23: (5, 4), 24: (6, 4), 25: (7, 4), 26: (8, 4), 27: (9, 4), 28: (10, 4), 29: (11, 4), 30: (12, 4), 31: (13, 4), 32: (14, 4), 33: (15, 4), 34: (16, 4), 35: (17, 4), 36: (18, 4), # Period 5 37: (1, 5), 38: (2, 5), 39: (3, 5), 40: (4, 5), 41: (5, 5), 42: (6, 5), 43: (7, 5), 44: (8, 5), 45: (9, 5), 46: (10, 5), 47: (11, 5), 48: (12, 5), 49: (13, 5), 50: (14, 5), 51: (15, 5), 52: (16, 5), 53: (17, 5), 54: (18, 5), # Period 6 55: (1, 6), 56: (2, 6), # Lanthanides (period 6 continued) 57: (4, 9), 58: (5, 9), 59: (6, 9), 60: (7, 9), 61: (8, 9), 62: (9, 9), 63: (10, 9), 64: (11, 9), 65: (12, 9), 66: (13, 9), 67: (14, 9), 68: (15, 9), 69: (16, 9), 70: (17, 9), 71: (18, 9), # Period 6 continued 72: (4, 6), 73: (5, 6), 74: (6, 6), 75: (7, 6), 76: (8, 6), 77: (9, 6), 78: (10, 6), 79: (11, 6), 80: (12, 6), 81: (13, 6), 82: (14, 6), 83: (15, 6), 84: (16, 6), 85: (17, 6), 86: (18, 6), # Period 7 87: (1, 7), 88: (2, 7), # Actinides (period 7 continued) 89: (4, 10), 90: (5, 10), 91: (6, 10), 92: (7, 10), 93: (8, 10), 94: (9, 10), 95: (10, 10), 96: (11, 10), 97: (12, 10), 98: (13, 10), 99: (14, 10), 100: (15, 10), 101: (16, 10), 102: (17, 10), 103: (18, 10), # Period 7 continued 104: (4, 7), 105: (5, 7), 106: (6, 7), 107: (7, 7), 108: (8, 7), 109: (9, 7), 110: (10, 7), 111: (11, 7), 112: (12, 7), 113: (13, 7), 114: (14, 7), 115: (15, 7), 116: (16, 7), 117: (17, 7), 118: (18, 7) } def get_electronic_configuration(element): """Extract electronic configuration from element data""" # Try different possible column names for electronic configuration config_columns = ['electronic_configuration', 'electron_configuration', 'econf', 'ec'] for col in config_columns: if col in element.index and pd.notna(element.get(col)): return str(element[col]) # If no explicit electronic configuration column, try to construct it from other data # This is a fallback - the mendeleev library should have this data return None def create_hover_text(element, selected_property, original_value, display_value): """Create detailed hover text for an element""" def format_value(value, unit="", is_integer=False): if pd.isna(value): return "N/A" if isinstance(value, (int, float)): if is_integer: return f"{int(round(value))} {unit}".strip() elif abs(value) >= 1000: return f"{value:.2e} {unit}".strip() elif abs(value) >= 10: return f"{value:.2f} {unit}".strip() else: return f"{value:.3f} {unit}".strip() return str(value) # Get property info prop_config = property_info.get(selected_property, {}) prop_label = prop_config.get('label', selected_property.replace('_', ' ').title()) # Determine if this is an integer property is_int_prop = is_integer_property(selected_property, elements_data) # Determine units based on property name if 'density' in selected_property.lower(): unit = "g/cm³" elif 'electronegativity' in selected_property.lower(): unit = "" elif 'radius' in selected_property.lower(): unit = "pm" elif 'energy' in selected_property.lower() or 'ionization' in selected_property.lower(): unit = "eV" elif 'affinity' in selected_property.lower(): unit = "eV" elif 'point' in selected_property.lower() or 'temperature' in selected_property.lower(): unit = "K" elif 'weight' in selected_property.lower() or 'mass' in selected_property.lower(): unit = "u" elif 'volume' in selected_property.lower(): unit = "cm³/mol" elif 'conductivity' in selected_property.lower(): unit = "W/mK" else: unit = "" current_str = format_value(original_value, unit, is_int_prop) # Build hover text with key properties hover_lines = [ f"{element.get('name', 'N/A')} ({element.get('symbol', 'N/A')})", f"{prop_label}: {current_str}", "", # Empty line for separation f"Atomic Number: {element.get('atomic_number', 'N/A')}", ] # Add electronic configuration if available electronic_config = get_electronic_configuration(element) if electronic_config: hover_lines.append(f"Electronic Configuration: {electronic_config}") # Add key properties if available key_properties = [ ('atomic_weight', 'Atomic Weight', 'u', False), ('period', 'Period', '', True), ('group_id', 'Group', '', True), ('block', 'Block', '', False), ('en_pauling', 'Electronegativity', '', False), ('atomic_radius', 'Atomic Radius', 'pm', False), ('ionenergy', 'Ionization Energy', 'eV', False), ('melting_point', 'Melting Point', 'K', False), ('boiling_point', 'Boiling Point', 'K', False), ('density', 'Density', 'g/cm³', False), ] for prop_name, display_name, prop_unit, is_int in key_properties: if prop_name in element.index and pd.notna(element.get(prop_name)): value_str = format_value(element[prop_name], prop_unit, is_int) hover_lines.append(f"{display_name}: {value_str}") return "
".join(hover_lines) def create_periodic_table_figure(selected_property_label): """Create the periodic table figure for the given property""" if not MENDELEEV_AVAILABLE or elements_data.empty: fig = go.Figure() fig.add_annotation( text="Mendeleev library not available. Please install: pip install mendeleev", showarrow=False, font=dict(size=16) ) return fig # Get the actual property name from the label selected_property = valid_properties.get(selected_property_label) if not selected_property: fig = go.Figure() fig.add_annotation( text=f"Property '{selected_property_label}' not available", showarrow=False, font=dict(size=16) ) return fig # Filter out elements without the selected property property_data = elements_data[selected_property].dropna() if property_data.empty: fig = go.Figure() fig.add_annotation( text=f"No data available for {selected_property_label}", showarrow=False, font=dict(size=16) ) return fig # Determine if we should use log scale use_log = should_use_log_scale(selected_property, elements_data) # Prepare data for visualization if use_log: # For log scale, we need positive values positive_data = property_data[property_data > 0] if positive_data.empty: use_log = False viz_data = property_data min_value = property_data.min() max_value = property_data.max() else: viz_data = np.log10(positive_data) min_value = viz_data.min() max_value = viz_data.max() else: viz_data = property_data min_value = property_data.min() max_value = property_data.max() # Initialize data lists hover_texts = [] element_symbols = [] atomic_numbers = [] x_positions = [] y_positions = [] element_values = [] # Process each element for _, element in elements_data.iterrows(): atomic_num = element['atomic_number'] # Skip elements without position data if atomic_num not in ELEMENT_POSITIONS: continue x_pos, y_pos = ELEMENT_POSITIONS[atomic_num] x_positions.append(x_pos) y_positions.append(11 - y_pos) # Invert y-axis for correct table layout element_symbols.append(element['symbol']) atomic_numbers.append(atomic_num) # Get property value original_value = element[selected_property] if pd.notna(original_value): if use_log and original_value > 0: display_value = np.log10(original_value) else: display_value = original_value else: display_value = np.nan element_values.append(display_value) # Create comprehensive hover text hover_text = create_hover_text(element, selected_property, original_value, display_value) hover_texts.append(hover_text) # Create the figure fig = go.Figure() # Add scatter plot fig.add_trace(go.Scatter( x=x_positions, y=y_positions, mode='markers+text', text=element_symbols, hoverinfo='text', hovertext=hover_texts, textfont=dict( family="Arial, sans-serif", size=14, color="white", weight="bold", ), hoverlabel=dict( bgcolor="rgba(255,255,255,0.95)", font_size=12, font_family="Arial, sans-serif", bordercolor="black" ), marker=dict( symbol='square', color=element_values, size=45, colorscale=get_portland_like_colorscale(use_log), cmin=min_value, cmax=max_value, colorbar=dict( title=f"{selected_property_label}{'
(log scale)' if use_log else ''}", thickness=20, x=1.02 ), showscale=True, line=dict(color='black', width=1) ) )) # Add atomic number annotations for i in range(len(x_positions)): fig.add_annotation( x=x_positions[i], y=y_positions[i] + 0.3, text=str(atomic_numbers[i]), showarrow=False, font=dict( family="Arial, sans-serif", size=8, color="white", weight="bold", ) ) # Add lanthanide and actinide labels fig.add_annotation(x=3, y=2, text="Lanthanides", showarrow=False, font=dict(size=10, weight="bold")) fig.add_annotation(x=3, y=1, text="Actinides", showarrow=False, font=dict(size=10, weight="bold")) # Update layout title_text = f'Periodic Table by {selected_property_label}' if use_log: title_text += '
(Logarithmic Color Scale)' fig.update_layout( title=dict( text=title_text, x=0.5, font=dict(size=24) ), xaxis=dict( range=[0, 19], showgrid=False, zeroline=False, showticklabels=False, visible=False ), yaxis=dict( range=[0, 12], showgrid=False, zeroline=False, showticklabels=False, visible=False ), plot_bgcolor='white', paper_bgcolor='#f8f9fa', width=1480, height=800, margin=dict(l=20, r=100, t=100, b=20) ) return fig # Create Gradio interface def create_gradio_app(): """Create the Gradio interface""" if not MENDELEEV_AVAILABLE or not dropdown_properties: def error_interface(): return "❌ Mendeleev library not available or no valid properties found. Please install: pip install mendeleev" return gr.Interface( fn=error_interface, inputs=[], outputs=gr.Textbox(label="Error"), title="Periodic Table Dashboard - Error" ) # Get property options for dropdown (excluding continuous correlative properties) property_options = list(dropdown_properties.keys()) # Set default to element series if available, otherwise first property default_property = None for label, prop_name in dropdown_properties.items(): if property_info.get(prop_name, {}).get('is_default', False): default_property = label break if not default_property and property_options: default_property = property_options[0] with gr.Blocks(title="Interactive Periodic Table", theme=gr.themes.Soft()) as app: gr.Markdown("# 🧪 Interactive Periodic Table") with gr.Row(): with gr.Column(scale=1): property_dropdown = gr.Dropdown( choices=property_options, value=default_property, label="Select Property to Colourize", ) with gr.Row(): plot_output = gr.Plot(show_label=False) with gr.Row(): gr.Markdown(f""" **🔬 Data Source:** [Mendeleev Library](https://mendeleev.readthedocs.io/) """) # Update plot when dropdown changes property_dropdown.change( fn=create_periodic_table_figure, inputs=[property_dropdown], outputs=[plot_output] ) # Initialize with first property app.load( fn=create_periodic_table_figure, inputs=[property_dropdown], outputs=[plot_output] ) return app # Create and run the app if __name__ == "__main__": print(f"🚀 Starting Gradio app with {len(dropdown_properties)} properties (excluding continuous correlative)") if dropdown_properties: print("📋 Available dropdown properties:") for label, prop_name in dropdown_properties.items(): log_note = " (log scale)" if should_use_log_scale(prop_name, elements_data) else "" int_note = " (integer)" if is_integer_property(prop_name, elements_data) else "" default_note = " (DEFAULT)" if property_info.get(prop_name, {}).get('is_default', False) else "" data_count = elements_data[prop_name].notna().sum() print(f" • {label}: {data_count} elements{log_note}{int_note}{default_note}") print(f"\n📋 Total valid properties (including continuous correlative): {len(valid_properties)}") app = create_gradio_app() app.launch()