NX PHI-3
Mix.install(
[
{:bumblebee, git: "https://github.com/elixir-nx/bumblebee", tag: "main"},
{:nx, "~> 0.7.0"},
{:exla, "~> 0.7.1"},
],
config: [nx: [default_backend: EXLA.Backend]]
)
# Set environment variables
# XLA_BUILD=true
# XLA_TARGET=cuda
# asdf plugin-add bazel
# asdf install bazel 6.5.0 # bazel 7 fails
# asdf global bazel 6.5.0
# rm -rf ~/.cache
# wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin
# sudo mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600
# sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub
# sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /"
# sudo apt update
# sudo apt install cuda-toolkit-12-2 nvidia-cudnn python3-numpy python-is-python3
# export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
# export CUDA_HOME=/usr/local/cuda
# export PATH="/usr/local/cuda/bin:$PATH"
# export XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda
Section
{:ok, gpt2} = Bumblebee.load_model({:hf, "gpt2"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "gpt2"})
serving = Bumblebee.Text.generation(gpt2, tokenizer, max_new_tokens: 10)
text = "Yesterday, I was reading a book and"
Nx.Serving.run(serving, text)