Add support for reading CSV file from web URL
Browse files
isco.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
"""This module provides functionality for creating a hierarchy tree and a mapping from ISCO code to node name."""
|
| 2 |
|
| 3 |
import csv
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
def create_hierarchy_dict(file: str) -> dict:
|
|
@@ -10,21 +11,27 @@ def create_hierarchy_dict(file: str) -> dict:
|
|
| 10 |
A csv file with the ISCO-08 structure can be downloaded from the International Labour Organization (ILO) at [https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08 EN.csv](https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN.csv)
|
| 11 |
|
| 12 |
Args:
|
| 13 |
-
- file: A string representing the path to the CSV file containing the 4-digit ISCO-08 codes.
|
| 14 |
|
| 15 |
Returns:
|
| 16 |
- A dictionary where keys are ISCO-08 unit codes and values are sets of their parent codes.
|
| 17 |
"""
|
| 18 |
isco_hierarchy = {}
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
return isco_hierarchy
|
| 30 |
|
|
|
|
| 1 |
"""This module provides functionality for creating a hierarchy tree and a mapping from ISCO code to node name."""
|
| 2 |
|
| 3 |
import csv
|
| 4 |
+
import requests
|
| 5 |
|
| 6 |
|
| 7 |
def create_hierarchy_dict(file: str) -> dict:
|
|
|
|
| 11 |
A csv file with the ISCO-08 structure can be downloaded from the International Labour Organization (ILO) at [https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08 EN.csv](https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN.csv)
|
| 12 |
|
| 13 |
Args:
|
| 14 |
+
- file: A string representing the path to the CSV file containing the 4-digit ISCO-08 codes. It can be a local path or a web URL.
|
| 15 |
|
| 16 |
Returns:
|
| 17 |
- A dictionary where keys are ISCO-08 unit codes and values are sets of their parent codes.
|
| 18 |
"""
|
| 19 |
isco_hierarchy = {}
|
| 20 |
|
| 21 |
+
if file.startswith("http://") or file.startswith("https://"):
|
| 22 |
+
response = requests.get(file)
|
| 23 |
+
lines = response.text.splitlines()
|
| 24 |
+
else:
|
| 25 |
+
with open(file, newline="") as csvfile:
|
| 26 |
+
lines = csvfile.readlines()
|
| 27 |
+
|
| 28 |
+
reader = csv.DictReader(lines)
|
| 29 |
+
for row in reader:
|
| 30 |
+
unit_code = row["unit"].zfill(4)
|
| 31 |
+
minor_code = unit_code[0:3]
|
| 32 |
+
sub_major_code = unit_code[0:2]
|
| 33 |
+
major_code = unit_code[0]
|
| 34 |
+
isco_hierarchy[unit_code] = {minor_code, major_code, sub_major_code}
|
| 35 |
|
| 36 |
return isco_hierarchy
|
| 37 |
|