Spaces:

VelaTest
/

world_data_insights_api

Sleeping

world_data_insights_api / backend /modules /home_page.py

Vela

commit_message : "Added files"

22b9c3e 5 months ago

5.62 kB

	import pandas as pd
	import os
	import sys
	src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "backend"))
	sys.path.append(src_directory)
	from utils import logger

	file_path = "./world_population.csv"

	# file_path = "C:/Users/Vijay/Downloads/world_population.csv"
	# data_frame = pd.read_csv(file_path)

	def process_data():
	try:
	logger.log("I'm going to read the csv")
	data_frame = pd.read_csv(file_path)
	logger.log("I'm reading the csv")
	return data_frame
	except Exception as e :
	logger.log("I couldn't read the file")
	return f"Unable to read the file {e}"

	def display_continents(dataframe):
	continents = dataframe['Continent'].unique()
	logger.log("Displaying the list of continents in the data")
	return continents

	def display_countries(dataframe):
	countries = dataframe['Country'].values
	logger.log("Displaying the list of countries in the data")
	return countries

	def continent_stat(dataframe, attribute="Population", stat_type="highest"):
	try:
	if 'Continent' not in dataframe.columns or attribute not in dataframe.columns:
	return ValueError(f"Dataframe must contain 'Continent' and '{attribute}' columns.")

	continent_stats = dataframe.groupby('Continent')[attribute].agg(total_attribute='sum')

	if stat_type == "highest":
	continent = continent_stats.idxmax().item()
	value = continent_stats.max().item()
	logger.log(f"Displaying the continent with the highest {attribute}: {continent} with {attribute} {value}")

	elif stat_type == "lowest":
	continent = continent_stats.idxmin().item()
	value = continent_stats.min().item()
	logger.log(f"Displaying the continent with the lowest {attribute}: {continent} with {attribute} {value}")

	else:
	raise ValueError("Invalid stat_type. Use 'highest' or 'lowest'.")

	return {continent: value}

	except Exception as e:
	logger.log(f"Error in continent_stat: {str(e)}")
	return {"error": str(e)}

	def country_stat(dataframe, attribute : str = "Population", stat_type :str = "highest"):
	try :
	if stat_type.lower() == "highest":
	index= dataframe[attribute].idxmax()
	elif stat_type.lower() == "lowest":
	index= dataframe[attribute].idxmin()

	country = dataframe['Country'][index]
	requested_attribute = dataframe[attribute][index]
	result = {country:requested_attribute.item()}
	logger.log(f"Displaying the country with {stat_type} {attribute} in the data")
	return result
	except Exception as e:
	return f"Unable to fetch the data. Error {e}"



	# def list_country_by_continent(dataframe,continent):
	# try:
	# df_countries = dataframe[dataframe['Continent'] == continent]
	# countries= df_countries['Country'].to_list()
	# logger.log("Separated data by continent")
	# return countries
	# except Exception as e:
	# return f"{e}"

	# def get_stat_by_continent(df ,continent: str, data_type: str, stat: str , ):

	# if continent.lower() == "NorthAmerica".lower():
	# continent = "North America"
	# if continent.lower() == "SouthAmerica".lower():
	# continent = "South America"

	# valid_stats = ['max', 'min', 'mean' , 'sum' , 'count']
	# if stat not in valid_stats:
	# return f"Invalid stat. Please use one of the following: {valid_stats}."

	# continent_population_stats = df.groupby('Continent')[data_type].agg(
	# Maximum='max', Minimum='min', Average = 'mean',Total='sum' , Number_of_Countries = 'count')

	# continent_countries = df[df['Continent'] == continent]

	# if continent not in continent_population_stats.index:
	# return f"Continent '{continent}' not found in the data."

	# if stat == 'max':
	# population_result = continent_population_stats.loc[continent]['Maximum']
	# country_id = continent_countries.loc[continent_countries[data_type].idxmax()]
	# country_name = country_id['Country']
	# population_value = country_id[data_type]
	# return f"{continent}'s {stat} {data_type} is {int(population_result)}. Country: {country_name} , {data_type} :{population_value}"
	# if stat == 'min':
	# population_result = continent_population_stats.loc[continent]['Minimum']
	# country_id = continent_countries.loc[continent_countries[data_type].idxmin()]
	# country_name = country_id['Country']
	# population_value = country_id[data_type]
	# return f"{continent}'s {stat} {data_type} is {int(population_result)}. Country: {country_name} , {data_type} :{population_value}"
	# if stat == 'mean':
	# population_result = continent_population_stats.loc[continent]['Average']
	# return f"{continent}'s average {data_type} is {int(population_result)}"
	# if stat == 'sum':
	# population_result = continent_population_stats.loc[continent]['Total']
	# return f"{continent}'s total {data_type} is {int(population_result)}"
	# if stat == 'count' :
	# population_result = continent_population_stats.loc[continent]['Number_of_Countries']
	# return f"Total countries in {continent} is {int(population_result)}"

	# def get_continent_with_max_value(dataframe, key, value):
	# max_id = dataframe[value].idxmax()
	# value_num = dataframe[value][max_id]
	# value_country = dataframe[key][max_id]
	# return f"{value_country}'s max {value} is {value_num}"