Spaces:
Sleeping
Sleeping
File size: 5,618 Bytes
dfc542c 91a458d dfc542c 22b9c3e dfc542c 22b9c3e dfc542c 22b9c3e dfc542c 22b9c3e dfc542c 22b9c3e dfc542c 22b9c3e dfc542c 22b9c3e dfc542c 22b9c3e dfc542c 22b9c3e dfc542c 22b9c3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import pandas as pd
import os
import sys
src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "backend"))
sys.path.append(src_directory)
from utils import logger
file_path = "./world_population.csv"
# file_path = "C:/Users/Vijay/Downloads/world_population.csv"
# data_frame = pd.read_csv(file_path)
def process_data():
try:
logger.log("I'm going to read the csv")
data_frame = pd.read_csv(file_path)
logger.log("I'm reading the csv")
return data_frame
except Exception as e :
logger.log("I couldn't read the file")
return f"Unable to read the file {e}"
def display_continents(dataframe):
continents = dataframe['Continent'].unique()
logger.log("Displaying the list of continents in the data")
return continents
def display_countries(dataframe):
countries = dataframe['Country'].values
logger.log("Displaying the list of countries in the data")
return countries
def continent_stat(dataframe, attribute="Population", stat_type="highest"):
try:
if 'Continent' not in dataframe.columns or attribute not in dataframe.columns:
return ValueError(f"Dataframe must contain 'Continent' and '{attribute}' columns.")
continent_stats = dataframe.groupby('Continent')[attribute].agg(total_attribute='sum')
if stat_type == "highest":
continent = continent_stats.idxmax().item()
value = continent_stats.max().item()
logger.log(f"Displaying the continent with the highest {attribute}: {continent} with {attribute} {value}")
elif stat_type == "lowest":
continent = continent_stats.idxmin().item()
value = continent_stats.min().item()
logger.log(f"Displaying the continent with the lowest {attribute}: {continent} with {attribute} {value}")
else:
raise ValueError("Invalid stat_type. Use 'highest' or 'lowest'.")
return {continent: value}
except Exception as e:
logger.log(f"Error in continent_stat: {str(e)}")
return {"error": str(e)}
def country_stat(dataframe, attribute : str = "Population", stat_type :str = "highest"):
try :
if stat_type.lower() == "highest":
index= dataframe[attribute].idxmax()
elif stat_type.lower() == "lowest":
index= dataframe[attribute].idxmin()
country = dataframe['Country'][index]
requested_attribute = dataframe[attribute][index]
result = {country:requested_attribute.item()}
logger.log(f"Displaying the country with {stat_type} {attribute} in the data")
return result
except Exception as e:
return f"Unable to fetch the data. Error {e}"
# def list_country_by_continent(dataframe,continent):
# try:
# df_countries = dataframe[dataframe['Continent'] == continent]
# countries= df_countries['Country'].to_list()
# logger.log("Separated data by continent")
# return countries
# except Exception as e:
# return f"{e}"
# def get_stat_by_continent(df ,continent: str, data_type: str, stat: str , ):
# if continent.lower() == "NorthAmerica".lower():
# continent = "North America"
# if continent.lower() == "SouthAmerica".lower():
# continent = "South America"
# valid_stats = ['max', 'min', 'mean' , 'sum' , 'count']
# if stat not in valid_stats:
# return f"Invalid stat. Please use one of the following: {valid_stats}."
# continent_population_stats = df.groupby('Continent')[data_type].agg(
# Maximum='max', Minimum='min', Average = 'mean',Total='sum' , Number_of_Countries = 'count')
# continent_countries = df[df['Continent'] == continent]
# if continent not in continent_population_stats.index:
# return f"Continent '{continent}' not found in the data."
# if stat == 'max':
# population_result = continent_population_stats.loc[continent]['Maximum']
# country_id = continent_countries.loc[continent_countries[data_type].idxmax()]
# country_name = country_id['Country']
# population_value = country_id[data_type]
# return f"{continent}'s {stat} {data_type} is {int(population_result)}. Country: {country_name} , {data_type} :{population_value}"
# if stat == 'min':
# population_result = continent_population_stats.loc[continent]['Minimum']
# country_id = continent_countries.loc[continent_countries[data_type].idxmin()]
# country_name = country_id['Country']
# population_value = country_id[data_type]
# return f"{continent}'s {stat} {data_type} is {int(population_result)}. Country: {country_name} , {data_type} :{population_value}"
# if stat == 'mean':
# population_result = continent_population_stats.loc[continent]['Average']
# return f"{continent}'s average {data_type} is {int(population_result)}"
# if stat == 'sum':
# population_result = continent_population_stats.loc[continent]['Total']
# return f"{continent}'s total {data_type} is {int(population_result)}"
# if stat == 'count' :
# population_result = continent_population_stats.loc[continent]['Number_of_Countries']
# return f"Total countries in {continent} is {int(population_result)}"
# def get_continent_with_max_value(dataframe, key, value):
# max_id = dataframe[value].idxmax()
# value_num = dataframe[value][max_id]
# value_country = dataframe[key][max_id]
# return f"{value_country}'s max {value} is {value_num}" |