Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

2. CLDR - Languages, Territories, Populations

02b-languages-territories-populations.livemd

2. CLDR - Languages, Territories, Populations

# Install dependencies
Mix.install([
  :ex_cldr,
  :ex_cldr_territories,
  :ex_cldr_languages,
  :ex_cldr_numbers,
  :jason
])

# Define a backend module
defmodule DemoApp.Cldr do
  use Cldr,
    locales: ["en"],
    default_locale: "en",
    providers: [Cldr.Number, Cldr.Territory, Cldr.Language],
    json_library: Jason
end

# Set an app-wide default backend
Application.put_env(:ex_cldr, :default_backend, DemoApp.Cldr)
Generating DemoApp.Cldr for 2 locales named [:en, :und] with a default locale named :en
:ok

Let’s check that everything works

Cldr.all_locale_names()
[:aa, :"aa-DJ", :"aa-ER", :ab, :af, :"af-NA", :agq, :ak, :am, :an, :ann, :apc, :ar, :"ar-AE",
 :"ar-BH", :"ar-DJ", :"ar-DZ", :"ar-EG", :"ar-EH", :"ar-ER", :"ar-IL", :"ar-IQ", :"ar-JO", :"ar-KM",
 :"ar-KW", :"ar-LB", :"ar-LY", :"ar-MA", :"ar-MR", :"ar-OM", :"ar-PS", :"ar-QA", :"ar-SA", :"ar-SD",
 :"ar-SO", :"ar-SS", :"ar-SY", :"ar-TD", :"ar-TN", :"ar-YE", :arn, :as, :asa, :ast, :az, :"az-Arab",
 :"az-Arab-IQ", :"az-Arab-TR", :"az-Cyrl", :"az-Latn", ...]
Cldr.all_locale_names()
|> Enum.count()
693

In the setup section above, we defined a backend module for ex_cldr to use. It’s differenct from the package itself, but has many of the same functions.

Cldr == DemoApp.Cldr
false
Cldr.__info__(:functions)
[
  all_locale_names: 0,
  available_locale_name?: 1,
  default_backend: 0,
  default_backend!: 0,
  default_locale: 0,
  default_locale: 1,
  default_script: 0,
  default_territory: 0,
  default_territory: 1,
  display_name: 1,
  display_name: 2,
  ellipsis: 1,
  ellipsis: 2,
  ellipsis: 3,
  flag: 1,
  get_locale: 0,
  get_locale: 1,
  install_locales: 1,
  known_calendars: 0,
  known_currencies: 0,
  known_gettext_locale_name: 1,
  known_gettext_locale_name: 2,
  known_gettext_locale_name?: 1,
  known_gettext_locale_name?: 2,
  known_gettext_locale_names: 0,
  known_gettext_locale_names: 1,
  known_locale_name: 1,
  known_locale_name: 2,
  known_locale_name?: 1,
  known_locale_name?: 2,
  known_locale_names: 0,
  known_locale_names: 1,
  known_number_system_types: 0,
  known_number_system_types: 1,
  known_number_systems: 0,
  known_rbnf_locale_name: 1,
  known_rbnf_locale_name: 2,
  known_rbnf_locale_name?: 1,
  known_rbnf_locale_name?: 2,
  known_rbnf_locale_names: 0,
  known_rbnf_locale_names: 1,
  known_territories: 0,
  known_territory_subdivision_containment: 0,
  known_territory_subdivisions: 0,
  locale_and_backend_from: 1,
  locale_and_backend_from: 2,
  locale_name: 1,
  maybe_log: 1,
  put_default_backend: 1,
  put_default_locale: 1,
  ...
]
Cldr.__info__(:functions)
|> Enum.count()
92
DemoApp.Cldr.__info__(:functions)
[
  __cldr__: 1,
  available_locale_name?: 1,
  default_locale: 0,
  default_territory: 0,
  ellipsis: 1,
  ellipsis: 2,
  ellipsis_chars: 1,
  get_locale: 0,
  known_calendars: 0,
  known_cldr_locale: 2,
  known_cldr_territory: 1,
  known_currencies: 0,
  known_gettext_locale_name: 1,
  known_gettext_locale_name?: 1,
  known_gettext_locale_names: 0,
  known_locale_name: 1,
  known_locale_name?: 1,
  known_locale_names: 0,
  known_number_system_types: 0,
  known_number_systems: 0,
  known_rbnf_locale_name: 1,
  known_rbnf_locale_name?: 1,
  known_rbnf_locale_names: 0,
  known_territories: 0,
  lenient_parse_map: 2,
  normalize_lenient_parse: 2,
  normalize_lenient_parse: 3,
  put_locale: 1,
  quote: 1,
  quote: 2,
  unknown_locale_names: 0,
  validate_calendar: 1,
  validate_currency: 1,
  validate_locale: 1,
  validate_number_system: 1,
  validate_number_system_type: 1,
  validate_territory: 1,
  with_locale: 2
]
DemoApp.Cldr.__info__(:functions)
|> Enum.count()
38

What territory info is available?

# CLDR defines a bunch of territories (including individual countries, regions, continents, etc.)

known_territories = Cldr.known_territories()
[:"001", :"002", :"003", :"005", :"009", :"011", :"013", :"014", :"015", :"017", :"018", :"019",
 :"021", :"029", :"030", :"034", :"035", :"039", :"053", :"054", :"057", :"061", :"142", :"143",
 :"145", :"150", :"151", :"154", :"155", :"202", :"419", :AC, :AD, :AE, :AF, :AG, :AI, :AL, :AM,
 :AO, :AQ, :AR, :AS, :AT, :AU, :AW, :AX, :AZ, :BA, :BB, ...]
# How many are there?

Enum.count(known_territories)
292
# Cldr.Territory.display_name/1 gives a bit more context

known_territories
|> Enum.each(fn territory_code ->
  {:ok, name} = Cldr.Territory.display_name(territory_code)
  IO.puts("#{territory_code}: #{name}")
end)
001: world
002: Africa
003: North America
005: South America
009: Oceania
011: Western Africa
013: Central America
014: Eastern Africa
015: Northern Africa
017: Middle Africa
018: Southern Africa
019: Americas
021: Northern America
029: Caribbean
030: Eastern Asia
034: Southern Asia
035: Southeast Asia
039: Southern Europe
053: Australasia
054: Melanesia
057: Micronesian Region
061: Polynesia
142: Asia
143: Central Asia
145: Western Asia
150: Europe
151: Eastern Europe
154: Northern Europe
155: Western Europe
202: Sub-Saharan Africa
419: Latin America
AC: Ascension Island
AD: Andorra
AE: United Arab Emirates
AF: Afghanistan
AG: Antigua & Barbuda
AI: Anguilla
AL: Albania
AM: Armenia
AO: Angola
AQ: Antarctica
AR: Argentina
AS: American Samoa
AT: Austria
AU: Australia
AW: Aruba
AX: Åland Islands
AZ: Azerbaijan
BA: Bosnia & Herzegovina
BB: Barbados
BD: Bangladesh
BE: Belgium
BF: Burkina Faso
BG: Bulgaria
BH: Bahrain
BI: Burundi
BJ: Benin
BL: St. Barthélemy
BM: Bermuda
BN: Brunei
BO: Bolivia
BQ: Caribbean Netherlands
BR: Brazil
BS: Bahamas
BT: Bhutan
BV: Bouvet Island
BW: Botswana
BY: Belarus
BZ: Belize
CA: Canada
CC: Cocos (Keeling) Islands
CD: Congo - Kinshasa
CF: Central African Republic
CG: Congo - Brazzaville
CH: Switzerland
CI: Côte d’Ivoire
CK: Cook Islands
CL: Chile
CM: Cameroon
CN: China
CO: Colombia
CP: Clipperton Island
CQ: Sark
CR: Costa Rica
CU: Cuba
CV: Cape Verde
CW: Curaçao
CX: Christmas Island
CY: Cyprus
CZ: Czechia
DE: Germany
DG: Diego Garcia
DJ: Djibouti
DK: Denmark
DM: Dominica
DO: Dominican Republic
DZ: Algeria
EA: Ceuta & Melilla
EC: Ecuador
EE: Estonia
EG: Egypt
EH: Western Sahara
ER: Eritrea
ES: Spain
ET: Ethiopia
EU: European Union
EZ: Eurozone
FI: Finland
FJ: Fiji
FK: Falkland Islands
FM: Micronesia
FO: Faroe Islands
FR: France
GA: Gabon
GB: United Kingdom
GD: Grenada
GE: Georgia
GF: French Guiana
GG: Guernsey
GH: Ghana
GI: Gibraltar
GL: Greenland
GM: Gambia
GN: Guinea
GP: Guadeloupe
GQ: Equatorial Guinea
GR: Greece
GS: South Georgia & South Sandwich Islands
GT: Guatemala
GU: Guam
GW: Guinea-Bissau
GY: Guyana
HK: Hong Kong SAR China
HM: Heard & McDonald Islands
HN: Honduras
HR: Croatia
HT: Haiti
HU: Hungary
IC: Canary Islands
ID: Indonesia
IE: Ireland
IL: Israel
IM: Isle of Man
IN: India
IO: British Indian Ocean Territory
IQ: Iraq
IR: Iran
IS: Iceland
IT: Italy
JE: Jersey
JM: Jamaica
JO: Jordan
JP: Japan
KE: Kenya
KG: Kyrgyzstan
KH: Cambodia
KI: Kiribati
KM: Comoros
KN: St. Kitts & Nevis
KP: North Korea
KR: South Korea
KW: Kuwait
KY: Cayman Islands
KZ: Kazakhstan
LA: Laos
LB: Lebanon
LC: St. Lucia
LI: Liechtenstein
LK: Sri Lanka
LR: Liberia
LS: Lesotho
LT: Lithuania
LU: Luxembourg
LV: Latvia
LY: Libya
MA: Morocco
MC: Monaco
MD: Moldova
ME: Montenegro
MF: St. Martin
MG: Madagascar
MH: Marshall Islands
MK: North Macedonia
ML: Mali
MM: Myanmar (Burma)
MN: Mongolia
MO: Macao SAR China
MP: Northern Mariana Islands
MQ: Martinique
MR: Mauritania
MS: Montserrat
MT: Malta
MU: Mauritius
MV: Maldives
MW: Malawi
MX: Mexico
MY: Malaysia
MZ: Mozambique
NA: Namibia
NC: New Caledonia
NE: Niger
NF: Norfolk Island
NG: Nigeria
NI: Nicaragua
NL: Netherlands
NO: Norway
NP: Nepal
NR: Nauru
NU: Niue
NZ: New Zealand
OM: Oman
PA: Panama
PE: Peru
PF: French Polynesia
PG: Papua New Guinea
PH: Philippines
PK: Pakistan
PL: Poland
PM: St. Pierre & Miquelon
PN: Pitcairn Islands
PR: Puerto Rico
PS: Palestinian Territories
PT: Portugal
PW: Palau
PY: Paraguay
QA: Qatar
QO: Outlying Oceania
RE: Réunion
RO: Romania
RS: Serbia
RU: Russia
RW: Rwanda
SA: Saudi Arabia
SB: Solomon Islands
SC: Seychelles
SD: Sudan
SE: Sweden
SG: Singapore
SH: St. Helena
SI: Slovenia
SJ: Svalbard & Jan Mayen
SK: Slovakia
SL: Sierra Leone
SM: San Marino
SN: Senegal
SO: Somalia
SR: Suriname
SS: South Sudan
ST: São Tomé & Príncipe
SV: El Salvador
SX: Sint Maarten
SY: Syria
SZ: Eswatini
TA: Tristan da Cunha
TC: Turks & Caicos Islands
TD: Chad
TF: French Southern Territories
TG: Togo
TH: Thailand
TJ: Tajikistan
TK: Tokelau
TL: Timor-Leste
TM: Turkmenistan
TN: Tunisia
TO: Tonga
TR: Türkiye
TT: Trinidad & Tobago
TV: Tuvalu
TW: Taiwan
TZ: Tanzania
UA: Ukraine
UG: Uganda
UM: U.S. Outlying Islands
UN: United Nations
US: United States
UY: Uruguay
UZ: Uzbekistan
VA: Vatican City
VC: St. Vincent & Grenadines
VE: Venezuela
VG: British Virgin Islands
VI: U.S. Virgin Islands
VN: Vietnam
VU: Vanuatu
WF: Wallis & Futuna
WS: Samoa
XK: Kosovo
YE: Yemen
YT: Mayotte
ZA: South Africa
ZM: Zambia
ZW: Zimbabwe
:ok
# some territories contain other territories

Cldr.Territory.children(:EU)
{:ok,
 [:AT, :BE, :CY, :CZ, :DE, :DK, :EE, :ES, :FI, :FR, :GR, :HR, :HU, :IE, :IT, :LT, :LU, :LV, :MT,
  :NL, :PL, :PT, :SE, :SI, :SK, :BG, :RO]}
Cldr.Territory.children!(:EU)
|> Enum.each(fn territory_code ->
  {:ok, name} = Cldr.Territory.display_name(territory_code)
  IO.puts("#{territory_code}: #{name}")
end)
AT: Austria
BE: Belgium
CY: Cyprus
CZ: Czechia
DE: Germany
DK: Denmark
EE: Estonia
ES: Spain
FI: Finland
FR: France
GR: Greece
HR: Croatia
HU: Hungary
IE: Ireland
IT: Italy
LT: Lithuania
LU: Luxembourg
LV: Latvia
MT: Malta
NL: Netherlands
PL: Poland
PT: Portugal
SE: Sweden
SI: Slovenia
SK: Slovakia
BG: Bulgaria
RO: Romania
:ok
# we can have a look at the sub-continent and continent that a particular territory belongs to

Cldr.territory_chain(:IN)
{:ok, [:IN, :"034", :"142", :"001"]}
{:ok, parents} = Cldr.territory_chain(:IN)

parents
|> Enum.each(fn territory_code ->
  {:ok, name} = Cldr.Territory.display_name(territory_code)
  IO.puts("#{territory_code}: #{name}")
end)
IN: India
034: Southern Asia
142: Asia
001: world
:ok
# for most territories, additional info is available

info = Cldr.Territory.info!(:BR)
Map.keys(info)
[:currency, :measurement_system, :language_population, :gdp, :literacy_percent, :population]
info
%{
  currency: [
    BRL: %{from: ~D[1994-07-01]},
    BRR: %{from: ~D[1993-08-01], to: ~D[1994-07-01]},
    BRE: %{from: ~D[1990-03-16], to: ~D[1993-08-01]},
    BRN: %{from: ~D[1989-01-15], to: ~D[1990-03-16]},
    BRC: %{from: ~D[1986-02-28], to: ~D[1989-01-15]},
    BRB: %{from: ~D[1967-02-13], to: ~D[1986-02-28]},
    BRZ: %{from: ~D[1942-11-01], to: ~D[1967-02-13]}
  ],
  measurement_system: %{default: :metric, paper_size: :a4, temperature: :metric},
  language_population: %{
    "de" => %{population_percent: 0.84},
    "en" => %{population_percent: 8},
    "es" => %{population_percent: 0.036},
    "gub" => %{population_percent: 0.0084},
    "it" => %{population_percent: 0.28},
    "ja" => %{population_percent: 0.21},
    "kgp" => %{population_percent: 0.024},
    "ko" => %{population_percent: 0.021},
    "pt" => %{population_percent: 91, official_status: "official"},
    "vec" => %{population_percent: 0.24, official_status: "official_regional"},
    "xav" => %{population_percent: 0.0047},
    "yrl" => %{population_percent: 0.01}
  },
  gdp: 3248000000000,
  literacy_percent: 90.4,
  population: 211716000
}
# but this is not the case for all territories

Cldr.Territory.info(:"001")
{:ok, nil}
# territory info includes a 'language population' map 

info[:language_population]
%{
  "de" => %{population_percent: 0.84},
  "en" => %{population_percent: 8},
  "es" => %{population_percent: 0.036},
  "gub" => %{population_percent: 0.0084},
  "it" => %{population_percent: 0.28},
  "ja" => %{population_percent: 0.21},
  "kgp" => %{population_percent: 0.024},
  "ko" => %{population_percent: 0.021},
  "pt" => %{population_percent: 91, official_status: "official"},
  "vec" => %{population_percent: 0.24, official_status: "official_regional"},
  "xav" => %{population_percent: 0.0047},
  "yrl" => %{population_percent: 0.01}
}
# What is the name of the language?

"pt-BR"
|> Cldr.LanguageTag.parse!()
|> DemoApp.Cldr.Language.to_string()
{:ok, "Portuguese"}

Which languages have the most native speakers?

First, we define some helper functions for aggregating the population data in CLDR.

defmodule Helpers do
  def round_to_nearest_million(number) when is_float(number) do
    number
    |> Decimal.from_float()
    |> round_to_nearest_million()
  end

  def round_to_nearest_million(number) when is_integer(number) do
    number
    |> Decimal.new()
    |> round_to_nearest_million()
  end

  def round_to_nearest_million(number) do
    number
    |> Decimal.round(-3)
    |> Decimal.to_integer()
    |> DemoApp.Cldr.Number.to_string!(locale: "en")
  end

  def language_name(territory_code, language_code) do
    locale_string = "#{language_code}-#{territory_code}"
    {:ok, language_tag} = Cldr.LanguageTag.parse(locale_string)

    case DemoApp.Cldr.Language.to_string(language_tag) do
      {:ok, name} -> name
      _ -> locale_string
    end
  end

  def languages_from_territory_info(%{language_population: language_map, population: population}) do
    language_map
    |> Enum.map(fn {language_code, %{population_percent: population_percent}} ->
      {language_code, population_percent * 0.01 * population}
    end)
    |> Enum.sort(fn {_language_code_a, population_a}, {_language_code_b, population_b} ->
      population_a > population_b
    end)
    |> Enum.filter(fn {_language_code, population} ->
      population > 10_000_000
    end)
  end

  def language_population_summary({territory_code, info}) do
    languages_from_territory_info(info)
    |> Enum.map(fn {language_code, population} ->
      {round_to_nearest_million(population),
       "#{language_code} (#{language_name(territory_code, language_code)})"}
    end)
  end
end
{:module, Helpers, <<70, 79, 82, 49, 0, 0, 18, ...>>, {:language_population_summary, 1}}

Let’s use these helpers to show a summary for an individual territory.

code = :IN
{:ok, name} = Cldr.Territory.display_name(code)
{:ok, info} = Cldr.Territory.info(code)
lang_populations = Helpers.language_population_summary({code, info})
{name, lang_populations}
{"India",
 [
   {"543,697,000", "hi (Hindi)"},
   {"251,957,000", "en (English)"},
   {"107,413,000", "bn (Bangla)"},
   {"95,478,000", "te (Telugu)"},
   {"92,826,000", "mr (Marathi)"},
   {"78,239,000", "ta (Tamil)"},
   {"66,305,000", "ur (Urdu)"},
   {"59,674,000", "gu (Gujarati)"},
   {"49,065,000", "kn (Kannada)"},
   {"42,435,000", "or (Odia)"},
   {"42,435,000", "ml (Malayalam)"},
   {"37,131,000", "pa (Punjabi)"},
   {"30,500,000", "bho (Bhojpuri)"},
   {"25,196,000", "awa (Awadhi)"},
   {"17,239,000", "as (Assamese)"},
   {"15,913,000", "mai (Maithili)"},
   {"15,913,000", "bgc (Haryanvi)"},
   {"15,913,000", "mag (Magahi)"},
   {"15,913,000", "mwr (Marwari)"},
   {"14,587,000", "hne (hne-IN)"},
   {"13,128,000", "dcc (dcc-IN)"}
 ]}

Or, we can loop through the known territories, sorting them by population, and summarize the languages spoken in the most populous ones.

num_largest_territories = 10

language_populations =
  Cldr.known_territories()
  |> Enum.map(fn territory_code ->
    # load territory info map
    {territory_code, Cldr.Territory.info!(territory_code)}
  end)
  |> Enum.filter(fn {_, info} ->
    # filter territories with no related info
    is_nil(info) == false
  end)
  |> Enum.sort(fn {_territory_code_a, info_a}, {_territory_code_b, info_b} ->
    # sort the remaining entries by population
    info_a.population > info_b.population
  end)
  # limit the number of territories
  |> Enum.take(num_largest_territories)
  |> Enum.map(fn {territory_code, info} ->
    # summarise the language population info for each territory
    {:ok, name} = Cldr.Territory.display_name(territory_code)

    %{
      name: name,
      population: Helpers.round_to_nearest_million(info.population),
      languages: Helpers.language_population_summary({territory_code, info})
    }
  end)
[
  %{
    name: "China",
    languages: [
      {"1,254,618,000", "zh (Chinese)"},
      {"83,641,000", "wuu (Wu Chinese)"},
      {"72,489,000", "yue-Hans (Cantonese)"},
      {"40,427,000", "hsn (Xiang Chinese)"},
      {"32,062,000", "hak (Hakka Chinese)"},
      {"26,486,000", "nan (Min Nan Chinese)"},
      {"23,698,000", "gan (Gan Chinese)"}
    ],
    population: "1,394,020,000"
  },
  %{
    name: "India",
    languages: [
      {"543,697,000", "hi (Hindi)"},
      {"251,957,000", "en (English)"},
      {"107,413,000", "bn (Bangla)"},
      {"95,478,000", "te (Telugu)"},
      {"92,826,000", "mr (Marathi)"},
      {"78,239,000", "ta (Tamil)"},
      {"66,305,000", "ur (Urdu)"},
      {"59,674,000", "gu (Gujarati)"},
      {"49,065,000", "kn (Kannada)"},
      {"42,435,000", "or (Odia)"},
      {"42,435,000", "ml (Malayalam)"},
      {"37,131,000", "pa (Punjabi)"},
      {"30,500,000", "bho (Bhojpuri)"},
      {"25,196,000", "awa (Awadhi)"},
      {"17,239,000", "as (Assamese)"},
      {"15,913,000", "mai (Maithili)"},
      {"15,913,000", "bgc (Haryanvi)"},
      {"15,913,000", "mag (Magahi)"},
      {"15,913,000", "mwr (Marwari)"},
      {"14,587,000", "hne (hne-IN)"},
      {"13,128,000", "dcc (dcc-IN)"}
    ],
    population: "1,326,090,000"
  },
  %{
    name: "United States",
    languages: [{"319,333,000", "en (English)"}, {"31,933,000", "es (Spanish)"}],
    population: "332,639,000"
  },
  %{
    name: "Indonesia",
    languages: [
      {"170,897,000", "id (Indonesian)"},
      {"90,789,000", "jv (Javanese)"},
      {"32,043,000", "su (Sundanese)"},
      {"16,823,000", "mad (Madurese)"}
    ],
    population: "267,026,000"
  },
  %{
    name: "Pakistan",
    languages: [
      {"221,826,000", "ur (Urdu)"},
      {"163,451,000", "pa-Arab (Punjabi)"},
      {"116,751,000", "en (English)"},
      {"93,400,000", "lah (Western Panjabi)"},
      {"37,360,000", "ps (Pashto)"},
      {"35,025,000", "sd (Sindhi)"},
      {"28,020,000", "skr (skr-PK)"}
    ],
    population: "233,501,000"
  },
  %{
    name: "Nigeria",
    languages: [
      {"113,435,000", "en (English)"},
      {"44,946,000", "pcm (Nigerian Pidgin)"},
      {"27,824,000", "yo (Yoruba)"},
      {"27,824,000", "ig (Igbo)"},
      {"27,824,000", "ha (Hausa)"},
      {"14,340,000", "fuv (fuv-NG)"}
    ],
    population: "214,028,000"
  },
  %{
    name: "Brazil",
    languages: [{"192,662,000", "pt (Portuguese)"}, {"16,937,000", "en (English)"}],
    population: "211,716,000"
  },
  %{
    name: "Bangladesh",
    languages: [
      {"159,398,000", "bn (Bangla)"},
      {"29,277,000", "en (English)"},
      {"10,572,000", "rkt (rkt-BD)"}
    ],
    population: "162,651,000"
  },
  %{name: "Russia", languages: [{"133,219,000", "ru (Russian)"}], population: "141,722,000"},
  %{
    name: "Mexico",
    languages: [{"106,780,000", "es (Spanish)"}, {"16,725,000", "en (English)"}],
    population: "128,650,000"
  }
]