Spaces:

VelaTest
/

world_data_insights_api

Sleeping

File size: 5,160 Bytes

dfc542c
 
 
91a458d
dfc542c

import pandas as pd
import os
import sys
src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "backend"))
sys.path.append(src_directory)
from utils import logger

file_path =  "./world_population.csv"

# file_path =  "C:/Users/Vijay/Downloads/world_population.csv"
# data_frame = pd.read_csv(file_path)

def process_data():
    try:
        logger.log("I'm going to read the csv")
        data_frame = pd.read_csv(file_path)
        logger.log("I'm reading the csv")
        return data_frame
    except Exception as e :
        logger.log("I couldn't read the file")
        return f"Unable to read the file {e}"
    
def display_continents(dataframe):
    continents = dataframe['Continent'].unique()
    logger.log("Displaying the list of continents in the data")
    return continents

def display_countries(dataframe):
    countries = dataframe['Country'].values
    logger.log("Displaying the list of countries in the data")
    return countries

def continent_with_highest_population(dataframe):
    highest= dataframe.groupby('Continent')['Population'].agg(total_population = 'sum')
    max_continent = highest.idxmax().item()
    max_population = highest.max().item()
    result = {max_continent:max_population}
    logger.log("Displaying the continent with highest population in the data")
    return result

def continent_with_lowest_population(dataframe):
    lowest= dataframe.groupby('Continent')['Population'].agg(total_population = 'sum')
    min_continent = lowest.idxmin().item()
    min_population = lowest.min().item()
    result = {min_continent:min_population}
    logger.log("Displaying the continent with lowest population in the data")
    return result

def country_with_lowest_population(dataframe):
    index= dataframe['Population'].idxmin()
    min_country = dataframe['Country'][index]
    min_population = dataframe['Population'][index]
    result = {min_country:min_population.item()}
    logger.log("Displaying the country with lowest population in the data")
    return result

def country_with_highest_population(dataframe):
    index= dataframe['Population'].idxmax()
    max_country = dataframe['Country'][index]
    max_population = dataframe['Population'][index]
    result = {max_country:max_population.item()}
    logger.log("Displaying the country with highest population in the data")
    return result

























    

def list_country_by_continent(dataframe,continent):
    try:
        df_countries = dataframe[dataframe['Continent'] == continent]
        countries= df_countries['Country'].to_list()
        logger.log("Separated data by continent")
        return countries
    except Exception as e:
        return f"{e}"

def get_stat_by_continent(df ,continent: str, data_type: str, stat: str , ):

    if continent.lower() == "NorthAmerica".lower():
        continent = "North America"
    if continent.lower() == "SouthAmerica".lower():
        continent = "South America"

    valid_stats = ['max', 'min', 'mean' , 'sum' , 'count']
    if stat not in valid_stats:
        return f"Invalid stat. Please use one of the following: {valid_stats}."
    
    continent_population_stats = df.groupby('Continent')[data_type].agg(
        Maximum='max', Minimum='min', Average = 'mean',Total='sum' , Number_of_Countries = 'count')
    
    continent_countries = df[df['Continent'] == continent]

    if continent not in continent_population_stats.index:
        return f"Continent '{continent}' not found in the data."
    
    if stat == 'max':
        population_result = continent_population_stats.loc[continent]['Maximum']
        country_id = continent_countries.loc[continent_countries[data_type].idxmax()]
        country_name = country_id['Country']
        population_value = country_id[data_type]
        return f"{continent}'s {stat} {data_type} is {int(population_result)}. Country: {country_name} , {data_type} :{population_value}"
    if stat == 'min':  
        population_result = continent_population_stats.loc[continent]['Minimum']
        country_id = continent_countries.loc[continent_countries[data_type].idxmin()]
        country_name = country_id['Country']
        population_value = country_id[data_type]
        return f"{continent}'s {stat} {data_type} is {int(population_result)}. Country: {country_name} , {data_type} :{population_value}"
    if stat == 'mean':
        population_result = continent_population_stats.loc[continent]['Average']
        return f"{continent}'s average {data_type} is {int(population_result)}"
    if stat == 'sum':
        population_result = continent_population_stats.loc[continent]['Total']
        return f"{continent}'s total {data_type} is {int(population_result)}"
    if stat == 'count' :
        population_result = continent_population_stats.loc[continent]['Number_of_Countries']
        return f"Total countries in {continent} is {int(population_result)}"
    
def get_continent_with_max_value(dataframe, key, value):
    max_id = dataframe[value].idxmax()
    value_num = dataframe[value][max_id]
    value_country = dataframe[key][max_id]
    return f"{value_country}'s max {value} is {value_num}"