Linear regression

coursera/linear_regression.livemd

Raymond Boswel

@raymondboswel

elixir-machine-learning

More notebooks

Linear regression

Mix.install([
{:axon, "~> 0.6.1"},
{:nx, "~> 0.7.0"},
{:explorer, "~> 0.9.2"},
{:kino, "~> 0.14.0"},
{:scholar, "0.2.1"},
{:vega_lite, "~> 0.1.6"},
{:kino_vega_lite, "~> 0.1.11"}
])

Upload data

The data in the file should contain two columns, the first is the population of a city, the second is the profit made from a food truck.

ex1_file = Kino.Input.file("Example 1 Data")

# Reading and parsing the file

# Read the file
file_value = Kino.Input.read(ex1_file)
file_path = Kino.Input.file_path(file_value.file_ref)
{:ok, file_content} = File.read(file_path)

# Split file by newline, and then each line by comma

lines = String.split(file_content, "\n") |> Enum.filter(fn s -> String.length(s) > 0 end)

data = Enum.map(lines, fn l -> 
  [population, profit_per_foodtruck] = String.split(l, ",")
  %{ "population" => population, "profit_per_foodtruck" => profit_per_foodtruck }
  end)

data

VegaLite.new(width: 700, height: 400, title: "Raw Data")
|> VegaLite.data_from_values(data, only: ["population", "profit_per_foodtruck"])
|> VegaLite.mark(:point)
|> VegaLite.encode_field(:x, "population", type: :quantitative)
|> VegaLite.encode_field(:y, "profit_per_foodtruck", type: :quantitative)

# Perform linear regression with Scholar

x = Enum.map(lines, fn l -> 
  [population_string, _] = String.split(l, ",")
  {population, _} = Float.parse(population_string)
  [population]
  end)



y = Enum.map(lines, fn l -> 
  [_, profit_string] = String.split(l, ",")
  {profit, _}  = Float.parse(profit_string)
  [profit]
  end)

xTensor = Nx.tensor(x)
yTensor = Nx.tensor(y)

model = Scholar.Linear.LinearRegression.fit(xTensor, yTensor)

model.coefficients

data_with_regression = Enum.map(data, fn datum -> 
  {populationFloat, _} = Float.parse(datum["population"])
  %{
  "population" => Float.parse(datum["population"]) |> elem(0),
  "profit_per_foodtruck" => Float.parse(datum["profit_per_foodtruck"]) |> elem(0),
  "predicted_profit_per_foodtruck" =>  populationFloat * Nx.to_number(model.coefficients[0][0]) + Nx.to_number(model.intercept[0])
} end)

VegaLite.new(width: 700, height: 400, title: "Linear Regression")
|> VegaLite.data_from_values(data_with_regression, only: 
  ["population", "profit_per_foodtruck", "predicted_profit_per_foodtruck"])
|> VegaLite.layers([
  VegaLite.new()
  |> VegaLite.mark(:point)
  |> VegaLite.encode_field(:x, "population", type: :quantitative)
  |> VegaLite.encode_field(:y, "profit_per_foodtruck", type: :quantitative),
  VegaLite.new()
  |> VegaLite.mark(:line, color: :firebrick)
  |> VegaLite.encode_field(:x, "population", type: :quantitative)
  |> VegaLite.encode_field(:y, "predicted_profit_per_foodtruck", type: :quantitative)
])

Back