Spaces:

mirix
/

Periodic_Table_Colourizer

Sleeping

App Files Files Community

Periodic_Table_Colourizer / app.py

mirix

Upload 2 files

ceadb69 verified 8 days ago

raw

history blame

27 kB

	import gradio as gr
	import plotly.graph_objects as go
	import numpy as np
	import pandas as pd

	# Import mendeleev for comprehensive periodic table data
	try:
	from mendeleev.fetch import fetch_table
	MENDELEEV_AVAILABLE = True
	except ImportError:
	print("mendeleev library not found. Please install it using: pip install mendeleev")
	MENDELEEV_AVAILABLE = False

	def load_periodic_data():
	"""Load comprehensive periodic table data using mendeleev library"""
	if not MENDELEEV_AVAILABLE:
	return pd.DataFrame(), []

	try:
	# Get the full periodic table with all properties
	df = fetch_table('elements')

	# Get available columns and filter out non-numeric ones
	numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()

	# Remove non-property columns
	exclude_cols = ['atomic_number', 'period', 'group_id', 'mass_number', 'mass', 'id']
	numeric_columns = [col for col in numeric_columns if col not in exclude_cols]

	return df, numeric_columns
	except Exception as e:
	print(f"Error loading mendeleev data: {e}")
	return pd.DataFrame(), []

	# Load data
	elements_data, available_properties = load_periodic_data()

	def is_continuous_correlative_property(prop_name, df):
	"""Determine if a property is continuous AND correlative with atomic number (should be excluded from dropdown)"""
	# Properties that are both continuous and strongly correlative with atomic number
	continuous_correlative_properties = {
	'atomic_weight', 'atomic_mass', 'mass', 'weight'
	}

	# Check if property name contains continuous correlative indicators
	for cont_prop in continuous_correlative_properties:
	if cont_prop in prop_name.lower():
	return True

	# Check if property is highly correlated with atomic number
	if prop_name in df.columns and 'atomic_number' in df.columns:
	data = df[[prop_name, 'atomic_number']].dropna()
	if len(data) > 20:
	correlation = data[prop_name].corr(data['atomic_number'])
	# High correlation (>0.9) indicates strong relationship with atomic number
	# Combined with high uniqueness indicates continuous correlative property
	unique_ratio = len(data[prop_name].unique()) / len(data[prop_name])
	if abs(correlation) > 0.9 and unique_ratio > 0.8:
	return True

	return False

	def is_integer_property(prop_name, df):
	"""Determine if a property should be treated as integer"""
	integer_properties = {
	'period', 'group_id', 'group', 'block_number', 'neutrons',
	'electrons', 'protons', 'valence', 'oxidation_states'
	}

	# Check explicit integer properties
	for int_prop in integer_properties:
	if int_prop in prop_name.lower():
	return True

	# Check if all non-null values are integers
	if prop_name in df.columns:
	data = df[prop_name].dropna()
	if len(data) > 5:
	# Check if all values are close to integers
	are_integers = np.allclose(data, np.round(data), rtol=0, atol=1e-10)
	return are_integers

	return False

	def calculate_color_variance(data, use_log=False):
	"""Calculate the effective color variance for a given scaling approach"""
	if len(data) < 3:
	return 0

	if use_log:
	# For log scale, need positive values
	positive_data = data[data > 0]
	if len(positive_data) < 3:
	return 0
	scaled_data = np.log10(positive_data)
	else:
	scaled_data = data

	# Normalize to 0-1 range (simulating color mapping)
	min_val, max_val = scaled_data.min(), scaled_data.max()
	if max_val == min_val:
	return 0

	normalized = (scaled_data - min_val) / (max_val - min_val)

	# Calculate effective variance - higher means better color distribution
	return np.var(normalized)

	def requires_log_scale(prop_name, df):
	"""Improved heuristic to determine if logarithmic scale maximizes color palette utilization"""
	if prop_name not in df.columns:
	return False

	data = df[prop_name].dropna()
	if len(data) < 10:
	return False

	# Must have all positive values for log scale
	if data.min() <= 0:
	return False

	# Properties that typically benefit from log scale (abundance-related)
	log_scale_indicators = [
	'abundance', 'concentration', 'ppm', 'ppb', 'radioactive',
	'half_life', 'decay', 'isotope_abundance'
	]

	for indicator in log_scale_indicators:
	if indicator in prop_name.lower():
	return True

	# Calculate color variance for both approaches
	linear_variance = calculate_color_variance(data, use_log=False)
	log_variance = calculate_color_variance(data, use_log=True)

	# Use log scale if it provides significantly better color distribution
	# Require at least 50% improvement to switch to log scale
	improvement_threshold = 1.5

	# Additional criteria for when log scale is beneficial:
	# 1. Log scale provides better variance AND
	# 2. Data has wide range (>2 orders of magnitude) OR high skewness

	range_ratio = data.max() / data.min()
	data_skewness = abs(data.skew()) if hasattr(data, 'skew') else 0

	use_log_conditions = [
	log_variance > linear_variance * improvement_threshold, # Log provides better color distribution
	range_ratio > 100 or data_skewness > 2, # Data is suitable for log scaling
	len(data) > 20 # Sufficient data points
	]

	return all(use_log_conditions)

	def get_element_series_description(df):
	"""Get element series description based on available data"""
	# Try to find series-related columns
	series_columns = []
	for col in df.columns:
	if any(term in col.lower() for term in ['series', 'group_name', 'category', 'family', 'type']):
	series_columns.append(col)

	# Prefer columns with descriptive names
	if 'series' in df.columns:
	return 'series'
	elif 'group_name' in df.columns:
	return 'group_name'
	elif series_columns:
	return series_columns[0]

	# If no series column, try to create one from period and group
	if 'period' in df.columns and 'group_id' in df.columns:
	return 'period' # Fallback to period

	return None

	def create_element_series_mapping(df):
	"""Create a mapping of element series if not available"""
	if 'series' in df.columns:
	return 'Element Series', 'series'

	# Try other descriptive columns
	descriptive_columns = {
	'group_name': 'Element Group',
	'category': 'Element Category',
	'family': 'Element Family',
	'type': 'Element Type'
	}

	for col, label in descriptive_columns.items():
	if col in df.columns and df[col].notna().sum() > 50:
	return label, col

	# If no good series data, use period as fallback
	if 'period' in df.columns:
	return 'Period', 'period'

	return None, None

	def filter_relevant_properties(df, available_props):
	"""Filter properties to keep only relevant ones with sufficient data, excluding continuous correlative properties"""

	# Define curated properties with quality thresholds (these stay for internal use)
	curated_properties = {
	'atomic_weight': {'label': 'Atomic Mass (u)', 'min_data': 100, 'log_scale': False},
	'density': {'label': 'Density (g/cm³)', 'min_data': 50, 'log_scale': False},
	'en_pauling': {'label': 'Electronegativity (Pauling)', 'min_data': 70, 'log_scale': False},
	'atomic_radius': {'label': 'Atomic Radius (pm)', 'min_data': 50, 'log_scale': False},
	'vdw_radius': {'label': 'Van der Waals Radius (pm)', 'min_data': 40, 'log_scale': False},
	'covalent_radius': {'label': 'Covalent Radius (pm)', 'min_data': 40, 'log_scale': False},
	'ionenergy': {'label': 'First Ionization Energy (eV)', 'min_data': 80, 'log_scale': False},
	'electron_affinity': {'label': 'Electron Affinity (eV)', 'min_data': 40, 'log_scale': False},
	'melting_point': {'label': 'Melting Point (K)', 'min_data': 70, 'log_scale': False},
	'boiling_point': {'label': 'Boiling Point (K)', 'min_data': 60, 'log_scale': False},
	'atomic_volume': {'label': 'Atomic Volume (cm³/mol)', 'min_data': 40, 'log_scale': False},
	'thermal_conductivity': {'label': 'Thermal Conductivity (W/mK)', 'min_data': 30, 'log_scale': False},
	'c6': {'label': 'C6 Dispersion Coefficient', 'min_data': 30, 'log_scale': False},
	'dipole_polarizability': {'label': 'Dipole Polarizability', 'min_data': 30, 'log_scale': False},
	'period': {'label': 'Period', 'min_data': 100, 'log_scale': False},
	'group_id': {'label': 'Group', 'min_data': 100, 'log_scale': False},
	}

	# Check which properties are available and have sufficient data
	valid_properties = {}
	dropdown_properties = {} # Separate dict for dropdown (excluding continuous correlative)
	property_info = {}

	# First, try to add element series as the default
	default_label, default_property = create_element_series_mapping(df)
	if default_property and default_property in df.columns:
	non_null_count = df[default_property].notna().sum()
	if non_null_count >= 50: # Lower threshold for series data
	valid_properties[default_label] = default_property
	dropdown_properties[default_label] = default_property
	property_info[default_property] = {
	'label': default_label,
	'min_data': 50,
	'log_scale': False,
	'is_default': True
	}

	for prop_name, prop_config in curated_properties.items():
	if prop_name in available_props:
	# Count non-null values
	non_null_count = df[prop_name].notna().sum()
	if non_null_count >= prop_config['min_data']:
	valid_properties[prop_config['label']] = prop_name
	property_info[prop_name] = prop_config

	# Only add to dropdown if not continuous and correlative
	if not is_continuous_correlative_property(prop_name, df):
	dropdown_properties[prop_config['label']] = prop_name

	# Add any other properties with very good data coverage (>80 elements)
	for prop in available_props:
	if prop not in curated_properties:
	non_null_count = df[prop].notna().sum()
	if non_null_count > 80: # High threshold for uncurated properties
	display_name = prop.replace('_', ' ').title()
	log_scale = requires_log_scale(prop, df)
	valid_properties[display_name] = prop
	property_info[prop] = {'label': display_name, 'min_data': 80, 'log_scale': log_scale}

	# Only add to dropdown if not continuous and correlative
	if not is_continuous_correlative_property(prop, df):
	dropdown_properties[display_name] = prop

	return valid_properties, dropdown_properties, property_info

	# Get valid properties
	valid_properties, dropdown_properties, property_info = filter_relevant_properties(elements_data, available_properties)

	def get_portland_like_colorscale(use_log=False):
	"""Get Portland or Portland-like colorscale"""
	# Portland is great - let's use variations of it
	if use_log:
	# For log scale, use a slightly adjusted Portland to handle the wider dynamic range
	return 'Portland'
	else:
	return 'Portland'

	def should_use_log_scale(property_name, df):
	"""Determine if logarithmic scale should be used based on data distribution"""
	if property_name not in df.columns:
	return False

	# Check if explicitly configured
	if property_name in property_info:
	configured_log = property_info[property_name].get('log_scale', False)
	if configured_log:
	return True

	# Use improved heuristic
	return requires_log_scale(property_name, df)

	# Standard periodic table positions
	ELEMENT_POSITIONS = {
	# Period 1
	1: (1, 1), 2: (18, 1),
	# Period 2
	3: (1, 2), 4: (2, 2), 5: (13, 2), 6: (14, 2), 7: (15, 2), 8: (16, 2), 9: (17, 2), 10: (18, 2),
	# Period 3
	11: (1, 3), 12: (2, 3), 13: (13, 3), 14: (14, 3), 15: (15, 3), 16: (16, 3), 17: (17, 3), 18: (18, 3),
	# Period 4
	19: (1, 4), 20: (2, 4), 21: (3, 4), 22: (4, 4), 23: (5, 4), 24: (6, 4), 25: (7, 4), 26: (8, 4),
	27: (9, 4), 28: (10, 4), 29: (11, 4), 30: (12, 4), 31: (13, 4), 32: (14, 4), 33: (15, 4), 34: (16, 4), 35: (17, 4), 36: (18, 4),
	# Period 5
	37: (1, 5), 38: (2, 5), 39: (3, 5), 40: (4, 5), 41: (5, 5), 42: (6, 5), 43: (7, 5), 44: (8, 5),
	45: (9, 5), 46: (10, 5), 47: (11, 5), 48: (12, 5), 49: (13, 5), 50: (14, 5), 51: (15, 5), 52: (16, 5), 53: (17, 5), 54: (18, 5),
	# Period 6
	55: (1, 6), 56: (2, 6),
	# Lanthanides (period 6 continued)
	57: (4, 9), 58: (5, 9), 59: (6, 9), 60: (7, 9), 61: (8, 9), 62: (9, 9), 63: (10, 9), 64: (11, 9),
	65: (12, 9), 66: (13, 9), 67: (14, 9), 68: (15, 9), 69: (16, 9), 70: (17, 9), 71: (18, 9),
	# Period 6 continued
	72: (4, 6), 73: (5, 6), 74: (6, 6), 75: (7, 6), 76: (8, 6), 77: (9, 6), 78: (10, 6), 79: (11, 6),
	80: (12, 6), 81: (13, 6), 82: (14, 6), 83: (15, 6), 84: (16, 6), 85: (17, 6), 86: (18, 6),
	# Period 7
	87: (1, 7), 88: (2, 7),
	# Actinides (period 7 continued)
	89: (4, 10), 90: (5, 10), 91: (6, 10), 92: (7, 10), 93: (8, 10), 94: (9, 10), 95: (10, 10), 96: (11, 10),
	97: (12, 10), 98: (13, 10), 99: (14, 10), 100: (15, 10), 101: (16, 10), 102: (17, 10), 103: (18, 10),
	# Period 7 continued
	104: (4, 7), 105: (5, 7), 106: (6, 7), 107: (7, 7), 108: (8, 7), 109: (9, 7), 110: (10, 7), 111: (11, 7),
	112: (12, 7), 113: (13, 7), 114: (14, 7), 115: (15, 7), 116: (16, 7), 117: (17, 7), 118: (18, 7)
	}

	def get_electronic_configuration(element):
	"""Extract electronic configuration from element data"""
	# Try different possible column names for electronic configuration
	config_columns = ['electronic_configuration', 'electron_configuration', 'econf', 'ec']

	for col in config_columns:
	if col in element.index and pd.notna(element.get(col)):
	return str(element[col])

	# If no explicit electronic configuration column, try to construct it from other data
	# This is a fallback - the mendeleev library should have this data
	return None

	def create_hover_text(element, selected_property, original_value, display_value):
	"""Create detailed hover text for an element"""

	def format_value(value, unit="", is_integer=False):
	if pd.isna(value):
	return "N/A"
	if isinstance(value, (int, float)):
	if is_integer:
	return f"{int(round(value))} {unit}".strip()
	elif abs(value) >= 1000:
	return f"{value:.2e} {unit}".strip()
	elif abs(value) >= 10:
	return f"{value:.2f} {unit}".strip()
	else:
	return f"{value:.3f} {unit}".strip()
	return str(value)

	# Get property info
	prop_config = property_info.get(selected_property, {})
	prop_label = prop_config.get('label', selected_property.replace('_', ' ').title())

	# Determine if this is an integer property
	is_int_prop = is_integer_property(selected_property, elements_data)

	# Determine units based on property name
	if 'density' in selected_property.lower():
	unit = "g/cm³"
	elif 'electronegativity' in selected_property.lower():
	unit = ""
	elif 'radius' in selected_property.lower():
	unit = "pm"
	elif 'energy' in selected_property.lower() or 'ionization' in selected_property.lower():
	unit = "eV"
	elif 'affinity' in selected_property.lower():
	unit = "eV"
	elif 'point' in selected_property.lower() or 'temperature' in selected_property.lower():
	unit = "K"
	elif 'weight' in selected_property.lower() or 'mass' in selected_property.lower():
	unit = "u"
	elif 'volume' in selected_property.lower():
	unit = "cm³/mol"
	elif 'conductivity' in selected_property.lower():
	unit = "W/mK"
	else:
	unit = ""

	current_str = format_value(original_value, unit, is_int_prop)

	# Build hover text with key properties
	hover_lines = [
	f"<b>{element.get('name', 'N/A')} ({element.get('symbol', 'N/A')})</b>",
	f"<b>{prop_label}: {current_str}</b>",
	"", # Empty line for separation
	f"Atomic Number: {element.get('atomic_number', 'N/A')}",
	]

	# Add electronic configuration if available
	electronic_config = get_electronic_configuration(element)
	if electronic_config:
	hover_lines.append(f"Electronic Configuration: {electronic_config}")

	# Add key properties if available
	key_properties = [
	('atomic_weight', 'Atomic Weight', 'u', False),
	('period', 'Period', '', True),
	('group_id', 'Group', '', True),
	('block', 'Block', '', False),
	('en_pauling', 'Electronegativity', '', False),
	('atomic_radius', 'Atomic Radius', 'pm', False),
	('ionenergy', 'Ionization Energy', 'eV', False),
	('melting_point', 'Melting Point', 'K', False),
	('boiling_point', 'Boiling Point', 'K', False),
	('density', 'Density', 'g/cm³', False),
	]

	for prop_name, display_name, prop_unit, is_int in key_properties:
	if prop_name in element.index and pd.notna(element.get(prop_name)):
	value_str = format_value(element[prop_name], prop_unit, is_int)
	hover_lines.append(f"{display_name}: {value_str}")

	return "<br>".join(hover_lines)

	def create_periodic_table_figure(selected_property_label):
	"""Create the periodic table figure for the given property"""

	if not MENDELEEV_AVAILABLE or elements_data.empty:
	fig = go.Figure()
	fig.add_annotation(
	text="Mendeleev library not available. Please install: pip install mendeleev",
	showarrow=False,
	font=dict(size=16)
	)
	return fig

	# Get the actual property name from the label
	selected_property = valid_properties.get(selected_property_label)
	if not selected_property:
	fig = go.Figure()
	fig.add_annotation(
	text=f"Property '{selected_property_label}' not available",
	showarrow=False,
	font=dict(size=16)
	)
	return fig

	# Filter out elements without the selected property
	property_data = elements_data[selected_property].dropna()

	if property_data.empty:
	fig = go.Figure()
	fig.add_annotation(
	text=f"No data available for {selected_property_label}",
	showarrow=False,
	font=dict(size=16)
	)
	return fig

	# Determine if we should use log scale
	use_log = should_use_log_scale(selected_property, elements_data)

	# Prepare data for visualization
	if use_log:
	# For log scale, we need positive values
	positive_data = property_data[property_data > 0]
	if positive_data.empty:
	use_log = False
	viz_data = property_data
	min_value = property_data.min()
	max_value = property_data.max()
	else:
	viz_data = np.log10(positive_data)
	min_value = viz_data.min()
	max_value = viz_data.max()
	else:
	viz_data = property_data
	min_value = property_data.min()
	max_value = property_data.max()

	# Initialize data lists
	hover_texts = []
	element_symbols = []
	atomic_numbers = []
	x_positions = []
	y_positions = []
	element_values = []

	# Process each element
	for _, element in elements_data.iterrows():
	atomic_num = element['atomic_number']

	# Skip elements without position data
	if atomic_num not in ELEMENT_POSITIONS:
	continue

	x_pos, y_pos = ELEMENT_POSITIONS[atomic_num]
	x_positions.append(x_pos)
	y_positions.append(11 - y_pos) # Invert y-axis for correct table layout
	element_symbols.append(element['symbol'])
	atomic_numbers.append(atomic_num)

	# Get property value
	original_value = element[selected_property]

	if pd.notna(original_value):
	if use_log and original_value > 0:
	display_value = np.log10(original_value)
	else:
	display_value = original_value
	else:
	display_value = np.nan

	element_values.append(display_value)

	# Create comprehensive hover text
	hover_text = create_hover_text(element, selected_property, original_value, display_value)
	hover_texts.append(hover_text)

	# Create the figure
	fig = go.Figure()

	# Add scatter plot
	fig.add_trace(go.Scatter(
	x=x_positions,
	y=y_positions,
	mode='markers+text',
	text=element_symbols,
	hoverinfo='text',
	hovertext=hover_texts,
	textfont=dict(
	family="Arial, sans-serif",
	size=14,
	color="white",
	weight="bold",
	),
	hoverlabel=dict(
	bgcolor="rgba(255,255,255,0.95)",
	font_size=12,
	font_family="Arial, sans-serif",
	bordercolor="black"
	),
	marker=dict(
	symbol='square',
	color=element_values,
	size=45,
	colorscale=get_portland_like_colorscale(use_log),
	cmin=min_value,
	cmax=max_value,
	colorbar=dict(
	title=f"{selected_property_label}{'<br>(log scale)' if use_log else ''}",
	thickness=20,
	x=1.02
	),
	showscale=True,
	line=dict(color='black', width=1)
	)
	))

	# Add atomic number annotations
	for i in range(len(x_positions)):
	fig.add_annotation(
	x=x_positions[i],
	y=y_positions[i] + 0.3,
	text=str(atomic_numbers[i]),
	showarrow=False,
	font=dict(
	family="Arial, sans-serif",
	size=8,
	color="white",
	weight="bold",
	)
	)

	# Add lanthanide and actinide labels
	fig.add_annotation(x=3, y=2, text="Lanthanides", showarrow=False,
	font=dict(size=10, weight="bold"))
	fig.add_annotation(x=3, y=1, text="Actinides", showarrow=False,
	font=dict(size=10, weight="bold"))

	# Update layout
	title_text = f'<b>Periodic Table by {selected_property_label}</b>'
	if use_log:
	title_text += '<br><span style="font-size:14px;">(Logarithmic Color Scale)</span>'

	fig.update_layout(
	title=dict(
	text=title_text,
	x=0.5,
	font=dict(size=24)
	),
	xaxis=dict(
	range=[0, 19],
	showgrid=False,
	zeroline=False,
	showticklabels=False,
	visible=False
	),
	yaxis=dict(
	range=[0, 12],
	showgrid=False,
	zeroline=False,
	showticklabels=False,
	visible=False
	),
	plot_bgcolor='white',
	paper_bgcolor='#f8f9fa',
	width=1200,
	height=800,
	margin=dict(l=20, r=100, t=100, b=20)
	)

	return fig

	# Create Gradio interface
	def create_gradio_app():
	"""Create the Gradio interface"""

	if not MENDELEEV_AVAILABLE or not dropdown_properties:
	def error_interface():
	return "❌ Mendeleev library not available or no valid properties found. Please install: pip install mendeleev"

	return gr.Interface(
	fn=error_interface,
	inputs=[],
	outputs=gr.Textbox(label="Error"),
	title="Periodic Table Dashboard - Error"
	)

	# Get property options for dropdown (excluding continuous correlative properties)
	property_options = list(dropdown_properties.keys())

	# Set default to element series if available, otherwise first property
	default_property = None
	for label, prop_name in dropdown_properties.items():
	if property_info.get(prop_name, {}).get('is_default', False):
	default_property = label
	break

	if not default_property and property_options:
	default_property = property_options[0]

	with gr.Blocks(title="Interactive Periodic Table", theme=gr.themes.Soft()) as app:
	gr.Markdown("# 🧪 Interactive Periodic Table")

	with gr.Row():
	with gr.Column(scale=1):
	property_dropdown = gr.Dropdown(
	choices=property_options,
	value=default_property,
	label="Select Property to Colourize",
	)
	with gr.Row():
	plot_output = gr.Plot(show_label=False)
	with gr.Row():
	gr.Markdown(f"""
	🔬 Data Source: [Mendeleev Library](https://mendeleev.readthedocs.io/)
	""")

	# Update plot when dropdown changes
	property_dropdown.change(
	fn=create_periodic_table_figure,
	inputs=[property_dropdown],
	outputs=[plot_output]
	)

	# Initialize with first property
	app.load(
	fn=create_periodic_table_figure,
	inputs=[property_dropdown],
	outputs=[plot_output]
	)

	return app

	# Create and run the app
	if __name__ == "__main__":
	print(f"🚀 Starting Gradio app with {len(dropdown_properties)} properties (excluding continuous correlative)")
	if dropdown_properties:
	print("📋 Available dropdown properties:")
	for label, prop_name in dropdown_properties.items():
	log_note = " (log scale)" if should_use_log_scale(prop_name, elements_data) else ""
	int_note = " (integer)" if is_integer_property(prop_name, elements_data) else ""
	default_note = " (DEFAULT)" if property_info.get(prop_name, {}).get('is_default', False) else ""
	data_count = elements_data[prop_name].notna().sum()
	print(f" • {label}: {data_count} elements{log_note}{int_note}{default_note}")

	print(f"\n📋 Total valid properties (including continuous correlative): {len(valid_properties)}")

	app = create_gradio_app()
	app.launch()