low_level_subfamilies = { # Germanic "Afrikaans": "West Germanic", "Bavarian": "Austro-Bavarian", "Danish": "North Germanic", "Dutch": "West Germanic", "English": "West Germanic", "German": "West Germanic", "Icelandic": "North Germanic", "Low Saxon": "West Germanic", "Luxembourgish": "West Germanic", "Norwegian (Bokmal)": "North Germanic", "Norwegian (Nynorsk)": "North Germanic", "Scots": "West Germanic", "Swedish": "North Germanic", "West Frisian": "West Germanic", # Romance "Aragonese": "Ibero-Romance", "Asturian": "Ibero-Romance", "Catalan": "Occitano-Romance", "French": "Gallo-Romance", "Galician": "Ibero-Romance", "Italian": "Italo-Dalmatian", "Lombard": "Gallo-Italic", "Occitan": "Occitano-Romance", "Piedmontese":"Gallo-Italic", "Portuguese":"Ibero-Romance", "Romanian": "Eastern Romance", "Sicilian": "Italo-Dalmatian", "Spanish": "Ibero-Romance", # Slavic "Belarusian":"East Slavic", "Bosnian": "South Slavic", "Bulgarian": "South Slavic", "Croatian": "South Slavic", "Czech": "West Slavic", "Macedonian":"South Slavic", "Polish": "West Slavic", "Russian": "East Slavic", "Serbian": "South Slavic", "Serbo-Croatian":"South Slavic", "Slovak": "West Slavic", "Slovenian": "South Slavic", "Ukrainian": "East Slavic", # Celtic "Breton": "Brythonic", "Welsh": "Brythonic", "Irish": "Goidelic", # Baltic "Latvian": "Baltic", "Lithuanian": "Baltic", # Indo-Aryan (Indic) "Bengali": "Eastern Indo-Aryan", "Bishnupriya Manipuri": "Eastern Indo-Aryan", "Gujarati": "Western Indo-Aryan", "Hindi": "Central Indo-Aryan", "Marathi": "Southern Indo-Aryan", "Nepali": "Northern Indo-Aryan", "Punjabi": "North-western Indo-Aryan", "Urdu": "Central Indo-Aryan", "Western Punjabi": "North-western Indo-Aryan", # Iranian "Persian (Farsi)": "South-western Iranian", "Tajik": "South-western Iranian", # Turkic "Azerbaijani": "Oghuz", "South Azerbaijani": "Oghuz", "Turkish": "Oghuz", "Bashkir": "Kipchak", "Kazakh": "Kipchak", "Kirghiz": "Kipchak", "Tatar": "Kipchak", "Crimean Tatar":"Kipchak", "Chuvash": "Oghur", "Uzbek": "Karluk", # Uralic "Estonian": "Finnic", "Finnish": "Finnic", "Hungarian":"Ugric", # Dravidian "Kannada": "Southern Dravidian", "Malayalam":"Southern Dravidian", "Tamil": "Southern Dravidian", "Telugu": "South-Central Dravidian", # Sinitic (Chinese) "Chinese (Simplified)": "Mandarin", "Chinese (Traditional)": "Mandarin", "Min Nan Chinese": "Southern Min", # Other Sino-Tibetan "Burmese": "Burmish", "Newar": "Newaric", # Japonic / Koreanic "Japanese": "Japonic", "Korean": "Koreanic", # Caucasian & Kartvelian "Chechen": "Nakh", "Georgian": "Kartvelian", # Austronesian "Cebuano": "Central Philippine", "Tagalog": "Central Philippine", "Waray-Waray": "Central Philippine", "Indonesian": "Malayic", "Malay": "Malayic", "Minangkabau": "Malayic", "Javanese": "Javanese", "Sundanese": "Sundic", "Malagasy": "East Barito", # Philippine & Oceanic already covered above # Vietic & MSEA "Vietnamese": "Vietic", # Altaic hypotheses excluded; Kipchak/Oghuz etc already above # Afro-Asiatic "Arabic": "Central Semitic", "Egyptian Arabic": "Central Semitic", "Hebrew": "North-west Semitic", # Niger-Congo "Swahili": "Sabaki", "Yoruba": "Yoruboid", # Isolates & special groups "Albanian": "Albanian", "Armenian": "Armenian", "Basque": "Language Isolate", "Greek": "Hellenic", "Latin": "Latino-Faliscan", "Japanese": "Japonic", "Esperanto":"Constructed", "Ido": "Constructed", "Volapük": "Constructed", "Haitian": "French-based Creole", } # ── 1. high-level genealogical families ───────────────────────────────────────── high_level_families = { "Afrikaans": "Indo-European", "Albanian": "Indo-European", "Arabic": "Afroasiatic", "Egyptian Arabic": "Afroasiatic", "Aragonese": "Indo-European", "Armenian": "Indo-European", "Asturian": "Indo-European", "Azerbaijani": "Turkic", "Bashkir": "Turkic", "Basque": "Language Isolate", "Bavarian": "Indo-European", "Belarusian": "Indo-European", "Bengali": "Indo-European", "Bishnupriya Manipuri": "Indo-European", "Bosnian": "Indo-European", "Breton": "Indo-European", "Bulgarian": "Indo-European", "Burmese": "Sino-Tibetan", "Catalan": "Indo-European", "Cebuano": "Austronesian", "Chechen": "Northeast Caucasian", "Chinese (Simplified)": "Sino-Tibetan", "Chinese (Traditional)": "Sino-Tibetan", "Min Nan Chinese": "Sino-Tibetan", "Chuvash": "Turkic", "Croatian": "Indo-European", "Czech": "Indo-European", "Danish": "Indo-European", "Dutch": "Indo-European", "English": "Indo-European", "Estonian": "Uralic", "Finnish": "Uralic", "French": "Indo-European", "Galician": "Indo-European", "Georgian": "Kartvelian", "German": "Indo-European", "Greek": "Indo-European", "Gujarati": "Indo-European", "Haitian": "Creole", "Hebrew": "Afroasiatic", "Hindi": "Indo-European", "Hungarian": "Uralic", "Icelandic": "Indo-European", "Ido": "Constructed", "Indonesian": "Austronesian", "Irish": "Indo-European", "Italian": "Indo-European", "Japanese": "Japonic", "Javanese": "Austronesian", "Kannada": "Dravidian", "Kazakh": "Turkic", "Kirghiz": "Turkic", "Korean": "Koreanic", "Latin": "Indo-European", "Latvian": "Indo-European", "Lithuanian": "Indo-European", "Lombard": "Indo-European", "Low Saxon": "Indo-European", "Luxembourgish": "Indo-European", "Macedonian": "Indo-European", "Malagasy": "Austronesian", "Malay": "Austronesian", "Malayalam": "Dravidian", "Marathi": "Indo-European", "Minangkabau": "Austronesian", "Nepali": "Indo-European", "Newar": "Sino-Tibetan", "Norwegian (Bokmal)": "Indo-European", "Norwegian (Nynorsk)": "Indo-European", "Occitan": "Indo-European", "Persian (Farsi)": "Indo-European", "Piedmontese": "Indo-European", "Polish": "Indo-European", "Portuguese": "Indo-European", "Punjabi": "Indo-European", "Romanian": "Indo-European", "Russian": "Indo-European", "Scots": "Indo-European", "Serbian": "Indo-European", "Serbo-Croatian": "Indo-European", "Sicilian": "Indo-European", "Slovak": "Indo-European", "Slovenian": "Indo-European", "South Azerbaijani": "Turkic", "Spanish": "Indo-European", "Sundanese": "Austronesian", "Swahili": "Niger-Congo", "Swedish": "Indo-European", "Tagalog": "Austronesian", "Tajik": "Indo-European", "Tamil": "Dravidian", "Tatar": "Turkic", "Telugu": "Dravidian", "Turkish": "Turkic", "Ukrainian": "Indo-European", "Urdu": "Indo-European", "Uzbek": "Turkic", "Vietnamese": "Austroasiatic", "Volapük": "Constructed", "Waray-Waray": "Austronesian", "Welsh": "Indo-European", "West Frisian": "Indo-European", "Western Punjabi": "Indo-European", "Yoruba": "Niger-Congo", "Esperanto": "Constructed", "Crimean Tatar": "Turkic", } # ── 2. primary branches (first subdivision inside each family) ──────────────── primary_families_branches = { # Indo-European "Afrikaans": "Germanic", "Albanian": "Albanian", "Aragonese": "Romance", "Armenian": "Armenian", "Asturian": "Romance", "Bavarian": "Germanic", "Belarusian": "Slavic", "Bengali": "Indo-Aryan", "Bishnupriya Manipuri": "Indo-Aryan", "Bosnian": "Slavic", "Breton": "Celtic", "Bulgarian": "Slavic", "Catalan": "Romance", "Croatian": "Slavic", "Czech": "Slavic", "Danish": "Germanic", "Dutch": "Germanic", "English": "Germanic", "French": "Romance", "Galician": "Romance", "German": "Germanic", "Greek": "Hellenic", "Gujarati": "Indo-Aryan", "Hindi": "Indo-Aryan", "Icelandic": "Germanic", "Irish": "Celtic", "Italian": "Romance", "Latin": "Italic", "Latvian": "Baltic", "Lithuanian": "Baltic", "Lombard": "Romance", "Low Saxon": "Germanic", "Luxembourgish": "Germanic", "Macedonian": "Slavic", "Marathi": "Indo-Aryan", "Nepali": "Indo-Aryan", "Norwegian (Bokmal)": "Germanic", "Norwegian (Nynorsk)": "Germanic", "Occitan": "Romance", "Persian (Farsi)": "Iranian", "Piedmontese": "Romance", "Polish": "Slavic", "Portuguese": "Romance", "Punjabi": "Indo-Aryan", "Romanian": "Romance", "Russian": "Slavic", "Scots": "Germanic", "Serbian": "Slavic", "Serbo-Croatian": "Slavic", "Sicilian": "Romance", "Slovak": "Slavic", "Slovenian": "Slavic", "Spanish": "Romance", "Swedish": "Germanic", "Tajik": "Iranian", "Ukrainian": "Slavic", "Urdu": "Indo-Aryan", "West Frisian": "Germanic", "Western Punjabi": "Indo-Aryan", "Welsh": "Celtic", # Afroasiatic "Arabic": "Semitic", "Egyptian Arabic": "Semitic", "Hebrew": "Semitic", # Turkic "Azerbaijani": "Oghuz", "South Azerbaijani": "Oghuz", "Turkish": "Oghuz", "Bashkir": "Kipchak", "Kazakh": "Kipchak", "Kirghiz": "Kipchak", "Tatar": "Kipchak", "Crimean Tatar": "Kipchak", "Chuvash": "Oghur", "Uzbek": "Karluk", # Uralic "Estonian": "Finnic", "Finnish": "Finnic", "Hungarian": "Ugric", # Sino-Tibetan "Chinese (Simplified)": "Sinitic", "Chinese (Traditional)": "Sinitic", "Min Nan Chinese": "Sinitic", "Burmese": "Tibeto-Burman", "Newar": "Tibeto-Burman", # Austronesian "Cebuano": "Malayo-Polynesian", "Indonesian": "Malayo-Polynesian", "Javanese": "Malayo-Polynesian", "Malagasy": "Malayo-Polynesian", "Malay": "Malayo-Polynesian", "Minangkabau": "Malayo-Polynesian", "Sundanese": "Malayo-Polynesian", "Tagalog": "Philippine", "Waray-Waray": "Philippine", # Dravidian "Kannada": "South Dravidian", "Malayalam": "South Dravidian", "Tamil": "South Dravidian", "Telugu": "South-Central Dravidian", # Niger-Congo "Swahili": "Atlantic-Congo", "Yoruba": "Atlantic-Congo", # Misc. single-branch families "Basque": "Language Isolate", "Chechen": "Nakh", "Georgian": "Kartvelian", "Japanese": "Japonic", "Korean": "Koreanic", "Vietnamese": "Vietic", # Creole & Constructed "Haitian": "French-based Creole", "Esperanto": "Constructed", "Ido": "Constructed", "Volapük": "Constructed", }