import pandas as pd import os import sys src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "backend")) sys.path.append(src_directory) from utils import logger file_path = "./world_population.csv" # file_path = "C:/Users/Vijay/Downloads/world_population.csv" # data_frame = pd.read_csv(file_path) def process_data(): try: logger.log("I'm going to read the csv") data_frame = pd.read_csv(file_path) logger.log("I'm reading the csv") return data_frame except Exception as e : logger.log("I couldn't read the file") return f"Unable to read the file {e}" def display_continents(dataframe): continents = dataframe['Continent'].unique() logger.log("Displaying the list of continents in the data") return continents def display_countries(dataframe): countries = dataframe['Country'].values logger.log("Displaying the list of countries in the data") return countries def continent_stat(dataframe, attribute="Population", stat_type="highest"): try: if 'Continent' not in dataframe.columns or attribute not in dataframe.columns: return ValueError(f"Dataframe must contain 'Continent' and '{attribute}' columns.") continent_stats = dataframe.groupby('Continent')[attribute].agg(total_attribute='sum') if stat_type == "highest": continent = continent_stats.idxmax().item() value = continent_stats.max().item() logger.log(f"Displaying the continent with the highest {attribute}: {continent} with {attribute} {value}") elif stat_type == "lowest": continent = continent_stats.idxmin().item() value = continent_stats.min().item() logger.log(f"Displaying the continent with the lowest {attribute}: {continent} with {attribute} {value}") else: raise ValueError("Invalid stat_type. Use 'highest' or 'lowest'.") return {continent: value} except Exception as e: logger.log(f"Error in continent_stat: {str(e)}") return {"error": str(e)} def country_stat(dataframe, attribute : str = "Population", stat_type :str = "highest"): try : if stat_type.lower() == "highest": index= dataframe[attribute].idxmax() elif stat_type.lower() == "lowest": index= dataframe[attribute].idxmin() country = dataframe['Country'][index] requested_attribute = dataframe[attribute][index] result = {country:requested_attribute.item()} logger.log(f"Displaying the country with {stat_type} {attribute} in the data") return result except Exception as e: return f"Unable to fetch the data. Error {e}" # def list_country_by_continent(dataframe,continent): # try: # df_countries = dataframe[dataframe['Continent'] == continent] # countries= df_countries['Country'].to_list() # logger.log("Separated data by continent") # return countries # except Exception as e: # return f"{e}" # def get_stat_by_continent(df ,continent: str, data_type: str, stat: str , ): # if continent.lower() == "NorthAmerica".lower(): # continent = "North America" # if continent.lower() == "SouthAmerica".lower(): # continent = "South America" # valid_stats = ['max', 'min', 'mean' , 'sum' , 'count'] # if stat not in valid_stats: # return f"Invalid stat. Please use one of the following: {valid_stats}." # continent_population_stats = df.groupby('Continent')[data_type].agg( # Maximum='max', Minimum='min', Average = 'mean',Total='sum' , Number_of_Countries = 'count') # continent_countries = df[df['Continent'] == continent] # if continent not in continent_population_stats.index: # return f"Continent '{continent}' not found in the data." # if stat == 'max': # population_result = continent_population_stats.loc[continent]['Maximum'] # country_id = continent_countries.loc[continent_countries[data_type].idxmax()] # country_name = country_id['Country'] # population_value = country_id[data_type] # return f"{continent}'s {stat} {data_type} is {int(population_result)}. Country: {country_name} , {data_type} :{population_value}" # if stat == 'min': # population_result = continent_population_stats.loc[continent]['Minimum'] # country_id = continent_countries.loc[continent_countries[data_type].idxmin()] # country_name = country_id['Country'] # population_value = country_id[data_type] # return f"{continent}'s {stat} {data_type} is {int(population_result)}. Country: {country_name} , {data_type} :{population_value}" # if stat == 'mean': # population_result = continent_population_stats.loc[continent]['Average'] # return f"{continent}'s average {data_type} is {int(population_result)}" # if stat == 'sum': # population_result = continent_population_stats.loc[continent]['Total'] # return f"{continent}'s total {data_type} is {int(population_result)}" # if stat == 'count' : # population_result = continent_population_stats.loc[continent]['Number_of_Countries'] # return f"Total countries in {continent} is {int(population_result)}" # def get_continent_with_max_value(dataframe, key, value): # max_id = dataframe[value].idxmax() # value_num = dataframe[value][max_id] # value_country = dataframe[key][max_id] # return f"{value_country}'s max {value} is {value_num}"