Spaces:
Build error
Build error
File size: 3,406 Bytes
92a085a f076a08 92a085a f076a08 92a085a f076a08 92a085a f076a08 92a085a f076a08 92a085a f076a08 92a085a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from io import StringIO
import openpyxl
def load_data(file):
file_extension = file.name.split('.')[-1].lower()
if file_extension == 'csv':
data = pd.read_csv(file)
elif file_extension in ['xls', 'xlsx']:
data = pd.read_excel(file)
else:
st.error("Unsupported file format. Please upload a CSV, XLS, or XLSX file.")
return None
return data
def manual_data_entry():
st.subheader("Manual Data Entry")
col_names = st.text_input("Enter column names separated by commas:").split(',')
col_names = [name.strip() for name in col_names if name.strip()]
if col_names:
num_rows = st.number_input("Enter number of rows:", min_value=1, value=5)
data = []
for i in range(num_rows):
row = []
for col in col_names:
value = st.text_input(f"Enter value for {col} (Row {i+1}):")
row.append(value)
data.append(row)
return pd.DataFrame(data, columns=col_names)
return None
def perform_analysis(data):
st.header("4. Analysis")
# EDA
st.subheader("Exploratory Data Analysis")
# Summary statistics
st.write("Summary Statistics:")
st.write(data.describe())
# Correlation heatmap
st.write("Correlation Heatmap:")
numeric_data = data.select_dtypes(include=['float64', 'int64'])
if not numeric_data.empty:
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(numeric_data.corr(), annot=True, cmap='coolwarm', ax=ax)
st.pyplot(fig)
else:
st.write("No numeric columns available for correlation heatmap.")
# Pairplot
st.write("Pairplot:")
if not numeric_data.empty:
fig = sns.pairplot(numeric_data)
st.pyplot(fig)
else:
st.write("No numeric columns available for pairplot.")
# Histogram
st.write("Histograms:")
for column in numeric_data.columns:
fig, ax = plt.subplots()
sns.histplot(data[column], kde=True, ax=ax)
st.pyplot(fig)
def main():
st.title("PPDAC Data Analysis Toolkit")
# Problem
st.header("1. Problem")
problem = st.text_area("Define your problem:")
# Plan
st.header("2. Plan")
plan = st.text_area("Describe your plan:")
# Data
st.header("3. Data")
data_input_method = st.radio("Choose data input method:", ("Upload File", "Manual Entry"))
if data_input_method == "Upload File":
uploaded_file = st.file_uploader("Choose a CSV, XLS, or XLSX file", type=["csv", "xls", "xlsx"])
if uploaded_file is not None:
data = load_data(uploaded_file)
else:
data = None
else:
data = manual_data_entry()
if data is not None:
st.write("Data Preview:")
st.write(data.head())
# Convert columns to numeric where possible
for col in data.columns:
try:
data[col] = pd.to_numeric(data[col])
except ValueError:
pass # Keep as non-numeric if conversion fails
perform_analysis(data)
# Conclusion
st.header("5. Conclusion")
conclusion = st.text_area("Write your conclusion based on the analysis:")
if __name__ == "__main__":
main() |