Geonames
Mix.install([
{:req, "~> 0.5.0"},
{:explorer, "~> 0.8.0"},
{:kino_explorer, "~> 0.1.19"}
])
Description
We use Geonames as a source of data for database of cities, regions, and countries in Hamster Travel.
The documentation for available in the readme.txt.
Interesting bits for this project:
- countryInfo.txt : country information : iso codes, fips codes, languages, capital ,…
- XX.zip : features for country with iso code XX, see ‘geoname’ table for columns. ‘no-country’ for features not belonging to a country.
- alternateNamesV2.zip : alternate names with language codes and geonameId: translations here
All downloads are available in this folder
require Explorer.DataFrame, as: DF
Countries info
File countryInfo.txt
csv =
Req.get!("https://download.geonames.org/export/dump/countryInfo.txt").body
|> String.split("\n")
|> Enum.filter(fn s -> !String.starts_with?(s, "#") || String.starts_with?(s, "#ISO") end)
|> Enum.join("\n")
countries =
DF.load_csv!(csv, delimiter: "\t")
|> DF.rename(%{"#ISO" => "ISO"})
countries
|> DF.lazy()
|> DF.filter(col("Population") > 0)
|> DF.select([
"ISO",
"Country",
"geonameid",
"Continent",
"CurrencyCode",
"CurrencyName",
"ISO3",
"ISO-Numeric"
])
|> DF.collect()
Features (DE)
[{_, _}, {_, csv}] = Req.get!("https://download.geonames.org/export/dump/DE.zip").body
csv
# create test data for my fixtures
# filter out regions
# regions_de =
# csv
# |> String.split("\n")
# |> Enum.map(fn r -> String.split(String.trim(r), "\t") end)
# |> Enum.filter(fn row ->
# case row do
# [_, _, _, _, _, _, feature_class, feature_code, _, _, _, _, _, _, _, _, _, _, _] ->
# feature_class == "A" && feature_code == "ADM1"
# _ ->
# false
# end
# end)
# |> Enum.map(fn row -> Enum.join(row, "\t") end)
# |> Enum.join("\n")
# |> Kino.Text.new()
# Cities test data
# csv
# |> String.split("\n")
# |> Enum.map(fn r -> String.split(String.trim(r), "\t") end)
# |> Enum.filter(fn row ->
# case row do
# [_, _, _, _, _, _, feature_class, _, _, _, _, _, _, _, population, _, _, _, _] ->
# feature_class == "P" && String.to_integer(population) > 100_000
# _ ->
# false
# end
# end)
# |> Enum.map(fn row -> Enum.join(row, "\t") end)
# |> Enum.join("\n")
# |> Kino.Text.new()
de_features =
DF.load_csv!(csv, delimiter: "\t", header: false, infer_schema_length: 10000)
|> DF.rename(%{
"column_1" => "geoname_id",
"column_2" => "name",
"column_3" => "asciiname",
"column_4" => "alternatenames",
"column_5" => "lat",
"column_6" => "lon",
"column_7" => "feature_class",
"column_8" => "feature_code",
"column_9" => "country_code",
"column_10" => "cc2",
"column_11" => "admin1 code",
"column_12" => "admin2 code",
"column_13" => "admin3 code",
"column_14" => "admin4 code",
"column_15" => "population",
"column_16" => "elevation",
"column_17" => "digital elevation model",
"column_18" => "timezone",
"column_19" => "updated_at"
})
Cities in Germany
de_features
|> DF.lazy()
|> DF.filter(feature_class == "P" and population >= 0)
|> DF.select([
"name",
"lat",
"lon",
"population",
"admin1 code",
"country_code",
"geoname_id"
])
|> DF.collect()
Regions of Germany
de_features
|> DF.lazy()
|> DF.filter(feature_class == "A" and feature_code == "ADM1")
|> DF.select(["name", "admin1 code", "geoname_id", "lat", "lon", "country_code"])
|> DF.collect()
Translations for Germany
[{_, _}, {_, csv}] =
Req.get!("https://download.geonames.org/export/dump/alternatenames/DE.zip").body
csv
de_translations =
DF.load_csv!(csv, delimiter: "\t", header: false)
|> DF.rename(%{
"column_1" => "al_id",
"column_2" => "geonames_id",
"column_3" => "lang",
"column_4" => "altname",
"column_5" => "is_preferred",
"column_6" => "is_short",
"column_7" => "is_colloquial",
"column_8" => "is_historic",
"column_9" => "from",
"column_10" => "to"
})
ru_de_translations = de_translations |> DF.lazy() |> DF.filter(lang == "ru")
ru_de_translations |> DF.lazy() |> DF.filter(geonames_id == 2_945_356) |> DF.collect()
ru_de_translations |> DF.lazy() |> DF.filter(geonames_id == 2_921_044) |> DF.collect()