mirix's picture
Upload 2 files
ceadb69 verified
raw
history blame
27 kB
import gradio as gr
import plotly.graph_objects as go
import numpy as np
import pandas as pd
# Import mendeleev for comprehensive periodic table data
try:
from mendeleev.fetch import fetch_table
MENDELEEV_AVAILABLE = True
except ImportError:
print("mendeleev library not found. Please install it using: pip install mendeleev")
MENDELEEV_AVAILABLE = False
def load_periodic_data():
"""Load comprehensive periodic table data using mendeleev library"""
if not MENDELEEV_AVAILABLE:
return pd.DataFrame(), []
try:
# Get the full periodic table with all properties
df = fetch_table('elements')
# Get available columns and filter out non-numeric ones
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
# Remove non-property columns
exclude_cols = ['atomic_number', 'period', 'group_id', 'mass_number', 'mass', 'id']
numeric_columns = [col for col in numeric_columns if col not in exclude_cols]
return df, numeric_columns
except Exception as e:
print(f"Error loading mendeleev data: {e}")
return pd.DataFrame(), []
# Load data
elements_data, available_properties = load_periodic_data()
def is_continuous_correlative_property(prop_name, df):
"""Determine if a property is continuous AND correlative with atomic number (should be excluded from dropdown)"""
# Properties that are both continuous and strongly correlative with atomic number
continuous_correlative_properties = {
'atomic_weight', 'atomic_mass', 'mass', 'weight'
}
# Check if property name contains continuous correlative indicators
for cont_prop in continuous_correlative_properties:
if cont_prop in prop_name.lower():
return True
# Check if property is highly correlated with atomic number
if prop_name in df.columns and 'atomic_number' in df.columns:
data = df[[prop_name, 'atomic_number']].dropna()
if len(data) > 20:
correlation = data[prop_name].corr(data['atomic_number'])
# High correlation (>0.9) indicates strong relationship with atomic number
# Combined with high uniqueness indicates continuous correlative property
unique_ratio = len(data[prop_name].unique()) / len(data[prop_name])
if abs(correlation) > 0.9 and unique_ratio > 0.8:
return True
return False
def is_integer_property(prop_name, df):
"""Determine if a property should be treated as integer"""
integer_properties = {
'period', 'group_id', 'group', 'block_number', 'neutrons',
'electrons', 'protons', 'valence', 'oxidation_states'
}
# Check explicit integer properties
for int_prop in integer_properties:
if int_prop in prop_name.lower():
return True
# Check if all non-null values are integers
if prop_name in df.columns:
data = df[prop_name].dropna()
if len(data) > 5:
# Check if all values are close to integers
are_integers = np.allclose(data, np.round(data), rtol=0, atol=1e-10)
return are_integers
return False
def calculate_color_variance(data, use_log=False):
"""Calculate the effective color variance for a given scaling approach"""
if len(data) < 3:
return 0
if use_log:
# For log scale, need positive values
positive_data = data[data > 0]
if len(positive_data) < 3:
return 0
scaled_data = np.log10(positive_data)
else:
scaled_data = data
# Normalize to 0-1 range (simulating color mapping)
min_val, max_val = scaled_data.min(), scaled_data.max()
if max_val == min_val:
return 0
normalized = (scaled_data - min_val) / (max_val - min_val)
# Calculate effective variance - higher means better color distribution
return np.var(normalized)
def requires_log_scale(prop_name, df):
"""Improved heuristic to determine if logarithmic scale maximizes color palette utilization"""
if prop_name not in df.columns:
return False
data = df[prop_name].dropna()
if len(data) < 10:
return False
# Must have all positive values for log scale
if data.min() <= 0:
return False
# Properties that typically benefit from log scale (abundance-related)
log_scale_indicators = [
'abundance', 'concentration', 'ppm', 'ppb', 'radioactive',
'half_life', 'decay', 'isotope_abundance'
]
for indicator in log_scale_indicators:
if indicator in prop_name.lower():
return True
# Calculate color variance for both approaches
linear_variance = calculate_color_variance(data, use_log=False)
log_variance = calculate_color_variance(data, use_log=True)
# Use log scale if it provides significantly better color distribution
# Require at least 50% improvement to switch to log scale
improvement_threshold = 1.5
# Additional criteria for when log scale is beneficial:
# 1. Log scale provides better variance AND
# 2. Data has wide range (>2 orders of magnitude) OR high skewness
range_ratio = data.max() / data.min()
data_skewness = abs(data.skew()) if hasattr(data, 'skew') else 0
use_log_conditions = [
log_variance > linear_variance * improvement_threshold, # Log provides better color distribution
range_ratio > 100 or data_skewness > 2, # Data is suitable for log scaling
len(data) > 20 # Sufficient data points
]
return all(use_log_conditions)
def get_element_series_description(df):
"""Get element series description based on available data"""
# Try to find series-related columns
series_columns = []
for col in df.columns:
if any(term in col.lower() for term in ['series', 'group_name', 'category', 'family', 'type']):
series_columns.append(col)
# Prefer columns with descriptive names
if 'series' in df.columns:
return 'series'
elif 'group_name' in df.columns:
return 'group_name'
elif series_columns:
return series_columns[0]
# If no series column, try to create one from period and group
if 'period' in df.columns and 'group_id' in df.columns:
return 'period' # Fallback to period
return None
def create_element_series_mapping(df):
"""Create a mapping of element series if not available"""
if 'series' in df.columns:
return 'Element Series', 'series'
# Try other descriptive columns
descriptive_columns = {
'group_name': 'Element Group',
'category': 'Element Category',
'family': 'Element Family',
'type': 'Element Type'
}
for col, label in descriptive_columns.items():
if col in df.columns and df[col].notna().sum() > 50:
return label, col
# If no good series data, use period as fallback
if 'period' in df.columns:
return 'Period', 'period'
return None, None
def filter_relevant_properties(df, available_props):
"""Filter properties to keep only relevant ones with sufficient data, excluding continuous correlative properties"""
# Define curated properties with quality thresholds (these stay for internal use)
curated_properties = {
'atomic_weight': {'label': 'Atomic Mass (u)', 'min_data': 100, 'log_scale': False},
'density': {'label': 'Density (g/cmΒ³)', 'min_data': 50, 'log_scale': False},
'en_pauling': {'label': 'Electronegativity (Pauling)', 'min_data': 70, 'log_scale': False},
'atomic_radius': {'label': 'Atomic Radius (pm)', 'min_data': 50, 'log_scale': False},
'vdw_radius': {'label': 'Van der Waals Radius (pm)', 'min_data': 40, 'log_scale': False},
'covalent_radius': {'label': 'Covalent Radius (pm)', 'min_data': 40, 'log_scale': False},
'ionenergy': {'label': 'First Ionization Energy (eV)', 'min_data': 80, 'log_scale': False},
'electron_affinity': {'label': 'Electron Affinity (eV)', 'min_data': 40, 'log_scale': False},
'melting_point': {'label': 'Melting Point (K)', 'min_data': 70, 'log_scale': False},
'boiling_point': {'label': 'Boiling Point (K)', 'min_data': 60, 'log_scale': False},
'atomic_volume': {'label': 'Atomic Volume (cmΒ³/mol)', 'min_data': 40, 'log_scale': False},
'thermal_conductivity': {'label': 'Thermal Conductivity (W/mK)', 'min_data': 30, 'log_scale': False},
'c6': {'label': 'C6 Dispersion Coefficient', 'min_data': 30, 'log_scale': False},
'dipole_polarizability': {'label': 'Dipole Polarizability', 'min_data': 30, 'log_scale': False},
'period': {'label': 'Period', 'min_data': 100, 'log_scale': False},
'group_id': {'label': 'Group', 'min_data': 100, 'log_scale': False},
}
# Check which properties are available and have sufficient data
valid_properties = {}
dropdown_properties = {} # Separate dict for dropdown (excluding continuous correlative)
property_info = {}
# First, try to add element series as the default
default_label, default_property = create_element_series_mapping(df)
if default_property and default_property in df.columns:
non_null_count = df[default_property].notna().sum()
if non_null_count >= 50: # Lower threshold for series data
valid_properties[default_label] = default_property
dropdown_properties[default_label] = default_property
property_info[default_property] = {
'label': default_label,
'min_data': 50,
'log_scale': False,
'is_default': True
}
for prop_name, prop_config in curated_properties.items():
if prop_name in available_props:
# Count non-null values
non_null_count = df[prop_name].notna().sum()
if non_null_count >= prop_config['min_data']:
valid_properties[prop_config['label']] = prop_name
property_info[prop_name] = prop_config
# Only add to dropdown if not continuous and correlative
if not is_continuous_correlative_property(prop_name, df):
dropdown_properties[prop_config['label']] = prop_name
# Add any other properties with very good data coverage (>80 elements)
for prop in available_props:
if prop not in curated_properties:
non_null_count = df[prop].notna().sum()
if non_null_count > 80: # High threshold for uncurated properties
display_name = prop.replace('_', ' ').title()
log_scale = requires_log_scale(prop, df)
valid_properties[display_name] = prop
property_info[prop] = {'label': display_name, 'min_data': 80, 'log_scale': log_scale}
# Only add to dropdown if not continuous and correlative
if not is_continuous_correlative_property(prop, df):
dropdown_properties[display_name] = prop
return valid_properties, dropdown_properties, property_info
# Get valid properties
valid_properties, dropdown_properties, property_info = filter_relevant_properties(elements_data, available_properties)
def get_portland_like_colorscale(use_log=False):
"""Get Portland or Portland-like colorscale"""
# Portland is great - let's use variations of it
if use_log:
# For log scale, use a slightly adjusted Portland to handle the wider dynamic range
return 'Portland'
else:
return 'Portland'
def should_use_log_scale(property_name, df):
"""Determine if logarithmic scale should be used based on data distribution"""
if property_name not in df.columns:
return False
# Check if explicitly configured
if property_name in property_info:
configured_log = property_info[property_name].get('log_scale', False)
if configured_log:
return True
# Use improved heuristic
return requires_log_scale(property_name, df)
# Standard periodic table positions
ELEMENT_POSITIONS = {
# Period 1
1: (1, 1), 2: (18, 1),
# Period 2
3: (1, 2), 4: (2, 2), 5: (13, 2), 6: (14, 2), 7: (15, 2), 8: (16, 2), 9: (17, 2), 10: (18, 2),
# Period 3
11: (1, 3), 12: (2, 3), 13: (13, 3), 14: (14, 3), 15: (15, 3), 16: (16, 3), 17: (17, 3), 18: (18, 3),
# Period 4
19: (1, 4), 20: (2, 4), 21: (3, 4), 22: (4, 4), 23: (5, 4), 24: (6, 4), 25: (7, 4), 26: (8, 4),
27: (9, 4), 28: (10, 4), 29: (11, 4), 30: (12, 4), 31: (13, 4), 32: (14, 4), 33: (15, 4), 34: (16, 4), 35: (17, 4), 36: (18, 4),
# Period 5
37: (1, 5), 38: (2, 5), 39: (3, 5), 40: (4, 5), 41: (5, 5), 42: (6, 5), 43: (7, 5), 44: (8, 5),
45: (9, 5), 46: (10, 5), 47: (11, 5), 48: (12, 5), 49: (13, 5), 50: (14, 5), 51: (15, 5), 52: (16, 5), 53: (17, 5), 54: (18, 5),
# Period 6
55: (1, 6), 56: (2, 6),
# Lanthanides (period 6 continued)
57: (4, 9), 58: (5, 9), 59: (6, 9), 60: (7, 9), 61: (8, 9), 62: (9, 9), 63: (10, 9), 64: (11, 9),
65: (12, 9), 66: (13, 9), 67: (14, 9), 68: (15, 9), 69: (16, 9), 70: (17, 9), 71: (18, 9),
# Period 6 continued
72: (4, 6), 73: (5, 6), 74: (6, 6), 75: (7, 6), 76: (8, 6), 77: (9, 6), 78: (10, 6), 79: (11, 6),
80: (12, 6), 81: (13, 6), 82: (14, 6), 83: (15, 6), 84: (16, 6), 85: (17, 6), 86: (18, 6),
# Period 7
87: (1, 7), 88: (2, 7),
# Actinides (period 7 continued)
89: (4, 10), 90: (5, 10), 91: (6, 10), 92: (7, 10), 93: (8, 10), 94: (9, 10), 95: (10, 10), 96: (11, 10),
97: (12, 10), 98: (13, 10), 99: (14, 10), 100: (15, 10), 101: (16, 10), 102: (17, 10), 103: (18, 10),
# Period 7 continued
104: (4, 7), 105: (5, 7), 106: (6, 7), 107: (7, 7), 108: (8, 7), 109: (9, 7), 110: (10, 7), 111: (11, 7),
112: (12, 7), 113: (13, 7), 114: (14, 7), 115: (15, 7), 116: (16, 7), 117: (17, 7), 118: (18, 7)
}
def get_electronic_configuration(element):
"""Extract electronic configuration from element data"""
# Try different possible column names for electronic configuration
config_columns = ['electronic_configuration', 'electron_configuration', 'econf', 'ec']
for col in config_columns:
if col in element.index and pd.notna(element.get(col)):
return str(element[col])
# If no explicit electronic configuration column, try to construct it from other data
# This is a fallback - the mendeleev library should have this data
return None
def create_hover_text(element, selected_property, original_value, display_value):
"""Create detailed hover text for an element"""
def format_value(value, unit="", is_integer=False):
if pd.isna(value):
return "N/A"
if isinstance(value, (int, float)):
if is_integer:
return f"{int(round(value))} {unit}".strip()
elif abs(value) >= 1000:
return f"{value:.2e} {unit}".strip()
elif abs(value) >= 10:
return f"{value:.2f} {unit}".strip()
else:
return f"{value:.3f} {unit}".strip()
return str(value)
# Get property info
prop_config = property_info.get(selected_property, {})
prop_label = prop_config.get('label', selected_property.replace('_', ' ').title())
# Determine if this is an integer property
is_int_prop = is_integer_property(selected_property, elements_data)
# Determine units based on property name
if 'density' in selected_property.lower():
unit = "g/cmΒ³"
elif 'electronegativity' in selected_property.lower():
unit = ""
elif 'radius' in selected_property.lower():
unit = "pm"
elif 'energy' in selected_property.lower() or 'ionization' in selected_property.lower():
unit = "eV"
elif 'affinity' in selected_property.lower():
unit = "eV"
elif 'point' in selected_property.lower() or 'temperature' in selected_property.lower():
unit = "K"
elif 'weight' in selected_property.lower() or 'mass' in selected_property.lower():
unit = "u"
elif 'volume' in selected_property.lower():
unit = "cmΒ³/mol"
elif 'conductivity' in selected_property.lower():
unit = "W/mK"
else:
unit = ""
current_str = format_value(original_value, unit, is_int_prop)
# Build hover text with key properties
hover_lines = [
f"<b>{element.get('name', 'N/A')} ({element.get('symbol', 'N/A')})</b>",
f"<b>{prop_label}: {current_str}</b>",
"", # Empty line for separation
f"Atomic Number: {element.get('atomic_number', 'N/A')}",
]
# Add electronic configuration if available
electronic_config = get_electronic_configuration(element)
if electronic_config:
hover_lines.append(f"Electronic Configuration: {electronic_config}")
# Add key properties if available
key_properties = [
('atomic_weight', 'Atomic Weight', 'u', False),
('period', 'Period', '', True),
('group_id', 'Group', '', True),
('block', 'Block', '', False),
('en_pauling', 'Electronegativity', '', False),
('atomic_radius', 'Atomic Radius', 'pm', False),
('ionenergy', 'Ionization Energy', 'eV', False),
('melting_point', 'Melting Point', 'K', False),
('boiling_point', 'Boiling Point', 'K', False),
('density', 'Density', 'g/cmΒ³', False),
]
for prop_name, display_name, prop_unit, is_int in key_properties:
if prop_name in element.index and pd.notna(element.get(prop_name)):
value_str = format_value(element[prop_name], prop_unit, is_int)
hover_lines.append(f"{display_name}: {value_str}")
return "<br>".join(hover_lines)
def create_periodic_table_figure(selected_property_label):
"""Create the periodic table figure for the given property"""
if not MENDELEEV_AVAILABLE or elements_data.empty:
fig = go.Figure()
fig.add_annotation(
text="Mendeleev library not available. Please install: pip install mendeleev",
showarrow=False,
font=dict(size=16)
)
return fig
# Get the actual property name from the label
selected_property = valid_properties.get(selected_property_label)
if not selected_property:
fig = go.Figure()
fig.add_annotation(
text=f"Property '{selected_property_label}' not available",
showarrow=False,
font=dict(size=16)
)
return fig
# Filter out elements without the selected property
property_data = elements_data[selected_property].dropna()
if property_data.empty:
fig = go.Figure()
fig.add_annotation(
text=f"No data available for {selected_property_label}",
showarrow=False,
font=dict(size=16)
)
return fig
# Determine if we should use log scale
use_log = should_use_log_scale(selected_property, elements_data)
# Prepare data for visualization
if use_log:
# For log scale, we need positive values
positive_data = property_data[property_data > 0]
if positive_data.empty:
use_log = False
viz_data = property_data
min_value = property_data.min()
max_value = property_data.max()
else:
viz_data = np.log10(positive_data)
min_value = viz_data.min()
max_value = viz_data.max()
else:
viz_data = property_data
min_value = property_data.min()
max_value = property_data.max()
# Initialize data lists
hover_texts = []
element_symbols = []
atomic_numbers = []
x_positions = []
y_positions = []
element_values = []
# Process each element
for _, element in elements_data.iterrows():
atomic_num = element['atomic_number']
# Skip elements without position data
if atomic_num not in ELEMENT_POSITIONS:
continue
x_pos, y_pos = ELEMENT_POSITIONS[atomic_num]
x_positions.append(x_pos)
y_positions.append(11 - y_pos) # Invert y-axis for correct table layout
element_symbols.append(element['symbol'])
atomic_numbers.append(atomic_num)
# Get property value
original_value = element[selected_property]
if pd.notna(original_value):
if use_log and original_value > 0:
display_value = np.log10(original_value)
else:
display_value = original_value
else:
display_value = np.nan
element_values.append(display_value)
# Create comprehensive hover text
hover_text = create_hover_text(element, selected_property, original_value, display_value)
hover_texts.append(hover_text)
# Create the figure
fig = go.Figure()
# Add scatter plot
fig.add_trace(go.Scatter(
x=x_positions,
y=y_positions,
mode='markers+text',
text=element_symbols,
hoverinfo='text',
hovertext=hover_texts,
textfont=dict(
family="Arial, sans-serif",
size=14,
color="white",
weight="bold",
),
hoverlabel=dict(
bgcolor="rgba(255,255,255,0.95)",
font_size=12,
font_family="Arial, sans-serif",
bordercolor="black"
),
marker=dict(
symbol='square',
color=element_values,
size=45,
colorscale=get_portland_like_colorscale(use_log),
cmin=min_value,
cmax=max_value,
colorbar=dict(
title=f"{selected_property_label}{'<br>(log scale)' if use_log else ''}",
thickness=20,
x=1.02
),
showscale=True,
line=dict(color='black', width=1)
)
))
# Add atomic number annotations
for i in range(len(x_positions)):
fig.add_annotation(
x=x_positions[i],
y=y_positions[i] + 0.3,
text=str(atomic_numbers[i]),
showarrow=False,
font=dict(
family="Arial, sans-serif",
size=8,
color="white",
weight="bold",
)
)
# Add lanthanide and actinide labels
fig.add_annotation(x=3, y=2, text="Lanthanides", showarrow=False,
font=dict(size=10, weight="bold"))
fig.add_annotation(x=3, y=1, text="Actinides", showarrow=False,
font=dict(size=10, weight="bold"))
# Update layout
title_text = f'<b>Periodic Table by {selected_property_label}</b>'
if use_log:
title_text += '<br><span style="font-size:14px;">(Logarithmic Color Scale)</span>'
fig.update_layout(
title=dict(
text=title_text,
x=0.5,
font=dict(size=24)
),
xaxis=dict(
range=[0, 19],
showgrid=False,
zeroline=False,
showticklabels=False,
visible=False
),
yaxis=dict(
range=[0, 12],
showgrid=False,
zeroline=False,
showticklabels=False,
visible=False
),
plot_bgcolor='white',
paper_bgcolor='#f8f9fa',
width=1200,
height=800,
margin=dict(l=20, r=100, t=100, b=20)
)
return fig
# Create Gradio interface
def create_gradio_app():
"""Create the Gradio interface"""
if not MENDELEEV_AVAILABLE or not dropdown_properties:
def error_interface():
return "❌ Mendeleev library not available or no valid properties found. Please install: pip install mendeleev"
return gr.Interface(
fn=error_interface,
inputs=[],
outputs=gr.Textbox(label="Error"),
title="Periodic Table Dashboard - Error"
)
# Get property options for dropdown (excluding continuous correlative properties)
property_options = list(dropdown_properties.keys())
# Set default to element series if available, otherwise first property
default_property = None
for label, prop_name in dropdown_properties.items():
if property_info.get(prop_name, {}).get('is_default', False):
default_property = label
break
if not default_property and property_options:
default_property = property_options[0]
with gr.Blocks(title="Interactive Periodic Table", theme=gr.themes.Soft()) as app:
gr.Markdown("# πŸ§ͺ Interactive Periodic Table")
with gr.Row():
with gr.Column(scale=1):
property_dropdown = gr.Dropdown(
choices=property_options,
value=default_property,
label="Select Property to Colourize",
)
with gr.Row():
plot_output = gr.Plot(show_label=False)
with gr.Row():
gr.Markdown(f"""
**πŸ”¬ Data Source:** [Mendeleev Library](https://mendeleev.readthedocs.io/)
""")
# Update plot when dropdown changes
property_dropdown.change(
fn=create_periodic_table_figure,
inputs=[property_dropdown],
outputs=[plot_output]
)
# Initialize with first property
app.load(
fn=create_periodic_table_figure,
inputs=[property_dropdown],
outputs=[plot_output]
)
return app
# Create and run the app
if __name__ == "__main__":
print(f"πŸš€ Starting Gradio app with {len(dropdown_properties)} properties (excluding continuous correlative)")
if dropdown_properties:
print("πŸ“‹ Available dropdown properties:")
for label, prop_name in dropdown_properties.items():
log_note = " (log scale)" if should_use_log_scale(prop_name, elements_data) else ""
int_note = " (integer)" if is_integer_property(prop_name, elements_data) else ""
default_note = " (DEFAULT)" if property_info.get(prop_name, {}).get('is_default', False) else ""
data_count = elements_data[prop_name].notna().sum()
print(f" β€’ {label}: {data_count} elements{log_note}{int_note}{default_note}")
print(f"\nπŸ“‹ Total valid properties (including continuous correlative): {len(valid_properties)}")
app = create_gradio_app()
app.launch()