Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Geonames

geonames.livemd

Geonames

Mix.install([
  {:req, "~> 0.5.0"},
  {:explorer, "~> 0.8.0"},
  {:kino_explorer, "~> 0.1.19"}
])

Description

We use Geonames as a source of data for database of cities, regions, and countries in Hamster Travel.

The documentation for available in the readme.txt.

Interesting bits for this project:

  • countryInfo.txt : country information : iso codes, fips codes, languages, capital ,…
  • XX.zip : features for country with iso code XX, see ‘geoname’ table for columns. ‘no-country’ for features not belonging to a country.
  • alternateNamesV2.zip : alternate names with language codes and geonameId: translations here

All downloads are available in this folder

require Explorer.DataFrame, as: DF

Countries info

File countryInfo.txt

csv =
  Req.get!("https://download.geonames.org/export/dump/countryInfo.txt").body
  |> String.split("\n")
  |> Enum.filter(fn s -> !String.starts_with?(s, "#") || String.starts_with?(s, "#ISO") end)
  |> Enum.join("\n")
countries =
  DF.load_csv!(csv, delimiter: "\t")
  |> DF.rename(%{"#ISO" => "ISO"})
countries
|> DF.lazy()
|> DF.filter(col("Population") > 0)
|> DF.select([
  "ISO",
  "Country",
  "geonameid",
  "Continent",
  "CurrencyCode",
  "CurrencyName",
  "ISO3",
  "ISO-Numeric"
])
|> DF.collect()

Features (DE)

[{_, _}, {_, csv}] = Req.get!("https://download.geonames.org/export/dump/DE.zip").body

csv
# create test data for my fixtures
# filter out regions

# regions_de =
#   csv
#   |> String.split("\n")
#   |> Enum.map(fn r -> String.split(String.trim(r), "\t") end)
#   |> Enum.filter(fn row ->
#     case row do
#       [_, _, _, _, _, _, feature_class, feature_code, _, _, _, _, _, _, _, _, _, _, _] ->
#         feature_class == "A" && feature_code == "ADM1"

#       _ ->
#         false
#     end
#   end)
#   |> Enum.map(fn row -> Enum.join(row, "\t") end)
#   |> Enum.join("\n")
#   |> Kino.Text.new()
# Cities test data 

# csv
# |> String.split("\n")
# |> Enum.map(fn r -> String.split(String.trim(r), "\t") end)
# |> Enum.filter(fn row ->
#   case row do
#     [_, _, _, _, _, _, feature_class, _, _, _, _, _, _, _, population, _, _, _, _] ->
#       feature_class == "P" && String.to_integer(population) > 100_000

#     _ ->
#       false
#   end
# end)
# |> Enum.map(fn row -> Enum.join(row, "\t") end)
# |> Enum.join("\n")
# |> Kino.Text.new()
de_features =
  DF.load_csv!(csv, delimiter: "\t", header: false, infer_schema_length: 10000)
  |> DF.rename(%{
    "column_1" => "geoname_id",
    "column_2" => "name",
    "column_3" => "asciiname",
    "column_4" => "alternatenames",
    "column_5" => "lat",
    "column_6" => "lon",
    "column_7" => "feature_class",
    "column_8" => "feature_code",
    "column_9" => "country_code",
    "column_10" => "cc2",
    "column_11" => "admin1 code",
    "column_12" => "admin2 code",
    "column_13" => "admin3 code",
    "column_14" => "admin4 code",
    "column_15" => "population",
    "column_16" => "elevation",
    "column_17" => "digital elevation model",
    "column_18" => "timezone",
    "column_19" => "updated_at"
  })

Cities in Germany

de_features
|> DF.lazy()
|> DF.filter(feature_class == "P" and population >= 0)
|> DF.select([
  "name",
  "lat",
  "lon",
  "population",
  "admin1 code",
  "country_code",
  "geoname_id"
])
|> DF.collect()

Regions of Germany

de_features
|> DF.lazy()
|> DF.filter(feature_class == "A" and feature_code == "ADM1")
|> DF.select(["name", "admin1 code", "geoname_id", "lat", "lon", "country_code"])
|> DF.collect()

Translations for Germany

[{_, _}, {_, csv}] =
  Req.get!("https://download.geonames.org/export/dump/alternatenames/DE.zip").body

csv
de_translations =
  DF.load_csv!(csv, delimiter: "\t", header: false)
  |> DF.rename(%{
    "column_1" => "al_id",
    "column_2" => "geonames_id",
    "column_3" => "lang",
    "column_4" => "altname",
    "column_5" => "is_preferred",
    "column_6" => "is_short",
    "column_7" => "is_colloquial",
    "column_8" => "is_historic",
    "column_9" => "from",
    "column_10" => "to"
  })
ru_de_translations = de_translations |> DF.lazy() |> DF.filter(lang == "ru")
ru_de_translations |> DF.lazy() |> DF.filter(geonames_id == 2_945_356) |> DF.collect()
ru_de_translations |> DF.lazy() |> DF.filter(geonames_id == 2_921_044) |> DF.collect()