Prepare training set for OpenAI


Fetch Advent of Code dataset from huggingface

dataset_url = "https://huggingface.co/datasets/isavita/advent-of-code/resolve/main/train.json"
dataset = Req.get!(dataset_url, receive_timeout: 600_000).body

# get only go solutions, because go is the only language that the dataset has all challenges solved
dataset =
  Enum.filter(dataset, fn part ->
    part["solution_lang"] == "go"

System Constants

defmodule Constants do
  def file_ext, do: ".zig"
  def lang, do: "zig"
  def output_dir, do: ~s|#{System.get_env("HOME")}/code/advent_generated/training_data|
  def input_dir, do: ~s|#{System.get_env("HOME")}/code/advent_generated/zig|
defmodule Challenge do
  def call do
    # Read input and solve challenge
    result = 42

    # Print result


Prompt Templates with the task, but no example solution

sys_msg =
  ~s|You are a highly experienced programmer with a PhD in computer science participating in a coding challenge.
Write clean, efficient code without unnecessary comments, demonstrating your advanced skills by solving problems practically and concisely.
Aim to produce optimal and concise solutions, leveraging your decade of industry experience.|

user_part1_fn = fn task ->
  Write an #{Constants.lang()} program that reads input from a file called input.txt and prints the output to standard output.
  Focus on writing clean, efficient code that demonstrates your programming skills by concisely solving the challenge.

  Coding challenge:

user_part2_fn = fn task ->
  Write an #{Constants.lang()} program that reads input from a file called input.txt and prints the output to standard output.
  Focus on writing clean, efficient code that demonstrates your programming skills by concisely solving the challenge.

  Coding challenge:

assistant_solution_fn = fn solution ->
  Here is a solution of the challenge:

Training Data PreparationTraining Data Preparation

solutions = File.ls!(Constants.input_dir())
solved = Enum.filter(solutions, &String.contains?(&1, Constants.file_ext()))

data =
  Enum.reduce(dataset, [], fn day, acc ->
    case Enum.find(solved, nil, fn s -> String.starts_with?(s, day["name"]) end) do
      nil ->

      filename ->
        solution = File.read!("#{Constants.input_dir()}/#{filename}") |> String.trim()
        system_msg = %{"role" => "system", "content" => sys_msg}

        user_content =
          if String.contains?(day["name"], "part2_") do
            # this task includes both parts; part 1 and part 2.

        user_msg = %{"role" => "user", "content" => user_content}
        assistant_msg = %{"role" => "assistant", "content" => assistant_solution_fn.(solution)}

        messages = %{
          "messages" => [

        [messages | acc]
  |> Enum.reverse()

Data export to JSONL format

output_path =

file = File.open!(output_path, [:write, :create])

Enum.each(data, fn raw ->
  json = Jason.encode!(raw)
  IO.binwrite(file, json <> "\n")

Upload the training file

{resp, status} =
  System.cmd("curl", [
    "Authorization: Bearer #{System.fetch_env!("LB_OPENAI_API_KEY")}",
file_upload = Jason.decode!(resp) |> IO.inspect()
file_id = file_upload["id"]

Create a fine-tuned model

Create Training job from OpenAI UI here

Erlang Prompts

# sys_msg =
#   ~s|You are a highly experienced programmer with a PhD in computer science participating in a coding challenge.
# Write clean, efficient code without unnecessary comments, demonstrating your advanced skills by solving problems practically and concisely.
# Aim to produce optimal and concise solutions, leveraging your decade of industry experience.|

# user_part1_fn = fn task ->
#   ~s"""
#   Write an #{Constants.lang()} program that reads input from a file called input.txt and prints the output to standard output.
#   Structure the program as a module named task, with a single exported function call/0 that takes no arguments.
#   The program structure should follow this template:
#   ```#{Constants.lang()}
#   -module(task).
#   -export([call/0]).

#   call() ->
#     % Read input and solve challenge
#     Result = ...,

#     % Print result 
#     io:format("~p~n", [Result]).
#   ```

#   Aim to write clean, efficient #{Constants.lang()} code that demonstrates strong programming skills.

#   Coding challenge:
#   ```text
#   #{task}
#   ```
#   """
# end

# user_part2_fn = fn task ->
#   ~s"""
#   Write an #{Constants.lang()} program that reads input from a file called input.txt and prints the output to standard output.
#   Structure the program as a module named task, with a single exported function call/0 that takes no arguments.
#   The program structure should follow this template:
#   ```#{Constants.lang()}
#   -module(task).
#   -export([call/0]).

#   call() ->
#     % Read input and solve Part 2
#     Result2 = ..., 

#     % Print Part 2 result
#     io:format("~p~n", [Result2]).
#   ```

#   Focus on writing clean, efficient Erlang code that demonstrates your programming skills by concisely solving Part 2 of the challenge.

#   Coding challenge:
#   ```text
#   #{task}
#   ```
#   """
# end

# assistant_solution_fn = fn solution ->
#   ~s"""
#   Here is a solution of the challenge:
#   ```#{Constants.lang()}
#   #{solution}
#   ```
#   """
# end