From fe55c600d43dd216a1df0b582a972a6e8a382be9 Mon Sep 17 00:00:00 2001 From: Foke Singh Date: Mon, 14 Oct 2024 11:27:41 +0000 Subject: [PATCH] =?UTF-8?q?Add/refresh=20how=E2=80=91to=20docs=20and=20exa?= =?UTF-8?q?mple=20loader=20for=20deployment,=20Docker,=20HuggingFace=20tok?= =?UTF-8?q?en,=20and=20getting=E2=80=91started=20tutorials.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/howtos/deploy_on_server.md | 6 +++--- docs/howtos/dockerize_models.md | 12 ++++++------ docs/howtos/huggingface_access_token.md | 4 ++-- docs/huggingface-access-token.md | 4 ++-- docs/tutorials/getting_started.md | 12 ++++++------ docs/tutorials/write_first_model.md | 6 +++--- examples/loader/main.zig | 2 +- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/howtos/deploy_on_server.md b/docs/howtos/deploy_on_server.md index 6e65e94..31bbd75 100644 --- a/docs/howtos/deploy_on_server.md +++ b/docs/howtos/deploy_on_server.md @@ -25,7 +25,7 @@ housing an NVIDIA GPU, run the following: ``` cd examples -bazel run -c opt //llama:OpenLLaMA-3B --@zml//runtimes:cuda=true +bazel run --config=release //llama:OpenLLaMA-3B --@zml//runtimes:cuda=true ``` @@ -42,7 +42,7 @@ As an example, here is how you build above OpenLLama for CUDA on Linux X86_64: ``` cd examples -bazel build -c opt //llama:OpenLLaMA-3B \ +bazel build --config=release //llama:OpenLLaMA-3B \ --@zml//runtimes:cuda=true \ --@zml//runtimes:cpu=false \ --platforms=@zml//platforms:linux_amd64 @@ -84,7 +84,7 @@ tar( ``` # cd examples -bazel build -c opt //mnist:archive \ +bazel build --config=release //mnist:archive \ --@zml//runtimes:cuda=true \ --@zml//runtimes:cpu=false \ --platforms=@zml//platforms:linux_amd64 diff --git a/docs/howtos/dockerize_models.md b/docs/howtos/dockerize_models.md index 108db2e..7bce7b0 100644 --- a/docs/howtos/dockerize_models.md +++ b/docs/howtos/dockerize_models.md @@ -134,7 +134,7 @@ And that's almost it! You can already build the image: ``` # cd examples -bazel build -c opt //simple_layer:image +bazel build --config=release //simple_layer:image INFO: Analyzed target //simple_layer:image (1 packages loaded, 8 targets configured). INFO: Found 1 target... @@ -169,7 +169,7 @@ oci_load( ... then we can load the image and run it with the following commands: ``` -bazel run -c opt //simple_layer:load +bazel run --config=release //simple_layer:load docker run --rm distroless/simple_layer:latest ``` @@ -194,7 +194,7 @@ This will push the `simple_layer` image with the tag `latest` (you can add more) to the docker registry: ``` -bazel run -c opt //simple_layer:push +bazel run --config=release //simple_layer:push ``` When dealing with maybe a public and a private container registry - or if you @@ -202,7 +202,7 @@ just want to try it out **right now**, you can always override the repository on the command line: ``` -bazel run -c opt //simple_layer:push -- --repository my.server.com/org/image +bazel run --config=release //simple_layer:push -- --repository my.server.com/org/image ``` @@ -216,7 +216,7 @@ We'll use the [MNIST example](https://github.com/zml/zml/tree/master/examples/mnist) to illustrate how to build Docker images that also contain data files. -You can `bazel run -c opt //mnist:push -- --repository +You can `bazel run --config=release //mnist:push -- --repository index.docker.io/my_org/zml_mnist` in the `./examples` folder if you want to try it out. @@ -232,7 +232,7 @@ platforms your containerized model should support.** **Example:** ``` -bazel run //mnist:push -c opt --@zml//runtimes:cuda=true -- --repository index.docker.io/my_org/zml_mnist +bazel run //mnist:push --config=release --@zml//runtimes:cuda=true -- --repository index.docker.io/my_org/zml_mnist ``` diff --git a/docs/howtos/huggingface_access_token.md b/docs/howtos/huggingface_access_token.md index 99967bb..77cbaf1 100644 --- a/docs/howtos/huggingface_access_token.md +++ b/docs/howtos/huggingface_access_token.md @@ -34,8 +34,8 @@ Now you're ready to download a gated model like `Meta-Llama-3-8b`! # requires token in $HOME/.cache/huggingface/token, as created by the # `huggingface-cli login` command, or the `HUGGINGFACE_TOKEN` environment variable. cd examples -bazel run -c opt //llama:Meta-Llama-3-8b -bazel run -c opt //llama:Meta-Llama-3-8b -- --promt="Once upon a time," +bazel run --config=release //llama:Meta-Llama-3-8b +bazel run --config=release //llama:Meta-Llama-3-8b -- --promt="Once upon a time," ``` diff --git a/docs/huggingface-access-token.md b/docs/huggingface-access-token.md index 9451416..633a607 100644 --- a/docs/huggingface-access-token.md +++ b/docs/huggingface-access-token.md @@ -31,7 +31,7 @@ Now you're ready to download a gated model like `Meta-Llama-3-8b`! # requires token in $HOME/.cache/huggingface/token, as created by the # `huggingface-cli login` command, or the `HUGGINGFACE_TOKEN` environment variable. cd examples -bazel run -c opt //llama:Meta-Llama-3-8b -bazel run -c opt //llama:Meta-Llama-3-8b -- --promt="Once upon a time," +bazel run --config=release //llama:Meta-Llama-3-8b +bazel run --config=release //llama:Meta-Llama-3-8b -- --promt="Once upon a time," ``` diff --git a/docs/tutorials/getting_started.md b/docs/tutorials/getting_started.md index 03070a9..0f98da8 100644 --- a/docs/tutorials/getting_started.md +++ b/docs/tutorials/getting_started.md @@ -50,7 +50,7 @@ On the command line: ``` cd examples -bazel run -c opt //mnist +bazel run --config=release //mnist ``` ### Llama @@ -75,8 +75,8 @@ Once you've been granted access, you're ready to download a gated model like # requires token in $HOME/.cache/huggingface/token, as created by the # `huggingface-cli login` command, or the `HUGGINGFACE_TOKEN` environment variable. cd examples -bazel run -c opt //llama:Llama-3.1-8B-Instruct -bazel run -c opt //llama:Llama-3.1-8B-Instruct -- --prompt="What is the capital of France?" +bazel run --config=release //llama:Llama-3.1-8B-Instruct +bazel run --config=release //llama:Llama-3.1-8B-Instruct -- --prompt="What is the capital of France?" ``` You can also try `Llama-3.1-70B-Instruct` if you have enough memory. @@ -88,8 +88,8 @@ Like the 8B model above, this model also requires approval. See ``` cd examples -bazel run -c opt //llama:Llama-3.2-1B-Instruct -bazel run -c opt //llama:Llama-3.2-1B-Instruct -- --prompt="What is the capital of France?" +bazel run --config=release //llama:Llama-3.2-1B-Instruct +bazel run --config=release //llama:Llama-3.2-1B-Instruct -- --prompt="What is the capital of France?" ``` For a larger 3.2 model, you can also try `Llama-3.2-3B-Instruct`. @@ -120,7 +120,7 @@ run the following: ``` cd examples -bazel run -c opt //llama:Llama-3.2-1B-Instruct \ +bazel run --config=release //llama:Llama-3.2-1B-Instruct \ --@zml//runtimes:cuda=true \ -- --prompt="What is the capital of France?" ``` diff --git a/docs/tutorials/write_first_model.md b/docs/tutorials/write_first_model.md index 0433949..4d755d1 100644 --- a/docs/tutorials/write_first_model.md +++ b/docs/tutorials/write_first_model.md @@ -380,9 +380,9 @@ const asynk = @import("async"); With everything in place now, running the model is easy: ``` -# run release (-c opt) +# run release (--config=release) cd examples -bazel run -c opt //simple_layer +bazel run --config=release //simple_layer # compile and run debug version bazel run //simple_layer @@ -391,7 +391,7 @@ bazel run //simple_layer And voila! Here's the output: ``` -bazel run -c opt //simple_layer +bazel run --config=release //simple_layer INFO: Analyzed target //simple_layer:simple_layer (0 packages loaded, 0 targets configured). INFO: Found 1 target... Target //simple_layer:simple_layer up-to-date: diff --git a/examples/loader/main.zig b/examples/loader/main.zig index 6673289..0487f27 100644 --- a/examples/loader/main.zig +++ b/examples/loader/main.zig @@ -24,7 +24,7 @@ pub fn asyncMain() !void { break :blk path; } else { std.debug.print("Missing file path argument\n", .{}); - std.debug.print("Try: bazel run -c opt //loader:safetensors -- /path/to/mymodel.safetensors or /path/to/model.safetensors.index.json \n", .{}); + std.debug.print("Try: bazel run --config=release //loader:safetensors -- /path/to/mymodel.safetensors or /path/to/model.safetensors.index.json \n", .{}); std.process.exit(0); };