Spaces:
Sleeping
Sleeping
File size: 5,160 Bytes
dfc542c 91a458d dfc542c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import pandas as pd
import os
import sys
src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "backend"))
sys.path.append(src_directory)
from utils import logger
file_path = "./world_population.csv"
# file_path = "C:/Users/Vijay/Downloads/world_population.csv"
# data_frame = pd.read_csv(file_path)
def process_data():
try:
logger.log("I'm going to read the csv")
data_frame = pd.read_csv(file_path)
logger.log("I'm reading the csv")
return data_frame
except Exception as e :
logger.log("I couldn't read the file")
return f"Unable to read the file {e}"
def display_continents(dataframe):
continents = dataframe['Continent'].unique()
logger.log("Displaying the list of continents in the data")
return continents
def display_countries(dataframe):
countries = dataframe['Country'].values
logger.log("Displaying the list of countries in the data")
return countries
def continent_with_highest_population(dataframe):
highest= dataframe.groupby('Continent')['Population'].agg(total_population = 'sum')
max_continent = highest.idxmax().item()
max_population = highest.max().item()
result = {max_continent:max_population}
logger.log("Displaying the continent with highest population in the data")
return result
def continent_with_lowest_population(dataframe):
lowest= dataframe.groupby('Continent')['Population'].agg(total_population = 'sum')
min_continent = lowest.idxmin().item()
min_population = lowest.min().item()
result = {min_continent:min_population}
logger.log("Displaying the continent with lowest population in the data")
return result
def country_with_lowest_population(dataframe):
index= dataframe['Population'].idxmin()
min_country = dataframe['Country'][index]
min_population = dataframe['Population'][index]
result = {min_country:min_population.item()}
logger.log("Displaying the country with lowest population in the data")
return result
def country_with_highest_population(dataframe):
index= dataframe['Population'].idxmax()
max_country = dataframe['Country'][index]
max_population = dataframe['Population'][index]
result = {max_country:max_population.item()}
logger.log("Displaying the country with highest population in the data")
return result
def list_country_by_continent(dataframe,continent):
try:
df_countries = dataframe[dataframe['Continent'] == continent]
countries= df_countries['Country'].to_list()
logger.log("Separated data by continent")
return countries
except Exception as e:
return f"{e}"
def get_stat_by_continent(df ,continent: str, data_type: str, stat: str , ):
if continent.lower() == "NorthAmerica".lower():
continent = "North America"
if continent.lower() == "SouthAmerica".lower():
continent = "South America"
valid_stats = ['max', 'min', 'mean' , 'sum' , 'count']
if stat not in valid_stats:
return f"Invalid stat. Please use one of the following: {valid_stats}."
continent_population_stats = df.groupby('Continent')[data_type].agg(
Maximum='max', Minimum='min', Average = 'mean',Total='sum' , Number_of_Countries = 'count')
continent_countries = df[df['Continent'] == continent]
if continent not in continent_population_stats.index:
return f"Continent '{continent}' not found in the data."
if stat == 'max':
population_result = continent_population_stats.loc[continent]['Maximum']
country_id = continent_countries.loc[continent_countries[data_type].idxmax()]
country_name = country_id['Country']
population_value = country_id[data_type]
return f"{continent}'s {stat} {data_type} is {int(population_result)}. Country: {country_name} , {data_type} :{population_value}"
if stat == 'min':
population_result = continent_population_stats.loc[continent]['Minimum']
country_id = continent_countries.loc[continent_countries[data_type].idxmin()]
country_name = country_id['Country']
population_value = country_id[data_type]
return f"{continent}'s {stat} {data_type} is {int(population_result)}. Country: {country_name} , {data_type} :{population_value}"
if stat == 'mean':
population_result = continent_population_stats.loc[continent]['Average']
return f"{continent}'s average {data_type} is {int(population_result)}"
if stat == 'sum':
population_result = continent_population_stats.loc[continent]['Total']
return f"{continent}'s total {data_type} is {int(population_result)}"
if stat == 'count' :
population_result = continent_population_stats.loc[continent]['Number_of_Countries']
return f"Total countries in {continent} is {int(population_result)}"
def get_continent_with_max_value(dataframe, key, value):
max_id = dataframe[value].idxmax()
value_num = dataframe[value][max_id]
value_country = dataframe[key][max_id]
return f"{value_country}'s max {value} is {value_num}"
|