Add/refresh how‑to docs and example loader for deployment, Docker, HuggingFace token, and getting‑started tutorials.
This commit is contained in:
parent
59f99c4501
commit
fe55c600d4
@ -25,7 +25,7 @@ housing an NVIDIA GPU, run the following:
|
|||||||
|
|
||||||
```
|
```
|
||||||
cd examples
|
cd examples
|
||||||
bazel run -c opt //llama:OpenLLaMA-3B --@zml//runtimes:cuda=true
|
bazel run --config=release //llama:OpenLLaMA-3B --@zml//runtimes:cuda=true
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -42,7 +42,7 @@ As an example, here is how you build above OpenLLama for CUDA on Linux X86_64:
|
|||||||
|
|
||||||
```
|
```
|
||||||
cd examples
|
cd examples
|
||||||
bazel build -c opt //llama:OpenLLaMA-3B \
|
bazel build --config=release //llama:OpenLLaMA-3B \
|
||||||
--@zml//runtimes:cuda=true \
|
--@zml//runtimes:cuda=true \
|
||||||
--@zml//runtimes:cpu=false \
|
--@zml//runtimes:cpu=false \
|
||||||
--platforms=@zml//platforms:linux_amd64
|
--platforms=@zml//platforms:linux_amd64
|
||||||
@ -84,7 +84,7 @@ tar(
|
|||||||
|
|
||||||
```
|
```
|
||||||
# cd examples
|
# cd examples
|
||||||
bazel build -c opt //mnist:archive \
|
bazel build --config=release //mnist:archive \
|
||||||
--@zml//runtimes:cuda=true \
|
--@zml//runtimes:cuda=true \
|
||||||
--@zml//runtimes:cpu=false \
|
--@zml//runtimes:cpu=false \
|
||||||
--platforms=@zml//platforms:linux_amd64
|
--platforms=@zml//platforms:linux_amd64
|
||||||
|
|||||||
@ -134,7 +134,7 @@ And that's almost it! You can already build the image:
|
|||||||
|
|
||||||
```
|
```
|
||||||
# cd examples
|
# cd examples
|
||||||
bazel build -c opt //simple_layer:image
|
bazel build --config=release //simple_layer:image
|
||||||
|
|
||||||
INFO: Analyzed target //simple_layer:image (1 packages loaded, 8 targets configured).
|
INFO: Analyzed target //simple_layer:image (1 packages loaded, 8 targets configured).
|
||||||
INFO: Found 1 target...
|
INFO: Found 1 target...
|
||||||
@ -169,7 +169,7 @@ oci_load(
|
|||||||
... then we can load the image and run it with the following commands:
|
... then we can load the image and run it with the following commands:
|
||||||
|
|
||||||
```
|
```
|
||||||
bazel run -c opt //simple_layer:load
|
bazel run --config=release //simple_layer:load
|
||||||
docker run --rm distroless/simple_layer:latest
|
docker run --rm distroless/simple_layer:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -194,7 +194,7 @@ This will push the `simple_layer` image with the tag `latest` (you can add more)
|
|||||||
to the docker registry:
|
to the docker registry:
|
||||||
|
|
||||||
```
|
```
|
||||||
bazel run -c opt //simple_layer:push
|
bazel run --config=release //simple_layer:push
|
||||||
```
|
```
|
||||||
|
|
||||||
When dealing with maybe a public and a private container registry - or if you
|
When dealing with maybe a public and a private container registry - or if you
|
||||||
@ -202,7 +202,7 @@ just want to try it out **right now**, you can always override the repository on
|
|||||||
the command line:
|
the command line:
|
||||||
|
|
||||||
```
|
```
|
||||||
bazel run -c opt //simple_layer:push -- --repository my.server.com/org/image
|
bazel run --config=release //simple_layer:push -- --repository my.server.com/org/image
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -216,7 +216,7 @@ We'll use the [MNIST
|
|||||||
example](https://github.com/zml/zml/tree/master/examples/mnist) to illustrate
|
example](https://github.com/zml/zml/tree/master/examples/mnist) to illustrate
|
||||||
how to build Docker images that also contain data files.
|
how to build Docker images that also contain data files.
|
||||||
|
|
||||||
You can `bazel run -c opt //mnist:push -- --repository
|
You can `bazel run --config=release //mnist:push -- --repository
|
||||||
index.docker.io/my_org/zml_mnist` in the `./examples` folder if you want to try
|
index.docker.io/my_org/zml_mnist` in the `./examples` folder if you want to try
|
||||||
it out.
|
it out.
|
||||||
|
|
||||||
@ -232,7 +232,7 @@ platforms your containerized model should support.**
|
|||||||
**Example:**
|
**Example:**
|
||||||
|
|
||||||
```
|
```
|
||||||
bazel run //mnist:push -c opt --@zml//runtimes:cuda=true -- --repository index.docker.io/my_org/zml_mnist
|
bazel run //mnist:push --config=release --@zml//runtimes:cuda=true -- --repository index.docker.io/my_org/zml_mnist
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -34,8 +34,8 @@ Now you're ready to download a gated model like `Meta-Llama-3-8b`!
|
|||||||
# requires token in $HOME/.cache/huggingface/token, as created by the
|
# requires token in $HOME/.cache/huggingface/token, as created by the
|
||||||
# `huggingface-cli login` command, or the `HUGGINGFACE_TOKEN` environment variable.
|
# `huggingface-cli login` command, or the `HUGGINGFACE_TOKEN` environment variable.
|
||||||
cd examples
|
cd examples
|
||||||
bazel run -c opt //llama:Meta-Llama-3-8b
|
bazel run --config=release //llama:Meta-Llama-3-8b
|
||||||
bazel run -c opt //llama:Meta-Llama-3-8b -- --promt="Once upon a time,"
|
bazel run --config=release //llama:Meta-Llama-3-8b -- --promt="Once upon a time,"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -31,7 +31,7 @@ Now you're ready to download a gated model like `Meta-Llama-3-8b`!
|
|||||||
# requires token in $HOME/.cache/huggingface/token, as created by the
|
# requires token in $HOME/.cache/huggingface/token, as created by the
|
||||||
# `huggingface-cli login` command, or the `HUGGINGFACE_TOKEN` environment variable.
|
# `huggingface-cli login` command, or the `HUGGINGFACE_TOKEN` environment variable.
|
||||||
cd examples
|
cd examples
|
||||||
bazel run -c opt //llama:Meta-Llama-3-8b
|
bazel run --config=release //llama:Meta-Llama-3-8b
|
||||||
bazel run -c opt //llama:Meta-Llama-3-8b -- --promt="Once upon a time,"
|
bazel run --config=release //llama:Meta-Llama-3-8b -- --promt="Once upon a time,"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -50,7 +50,7 @@ On the command line:
|
|||||||
|
|
||||||
```
|
```
|
||||||
cd examples
|
cd examples
|
||||||
bazel run -c opt //mnist
|
bazel run --config=release //mnist
|
||||||
```
|
```
|
||||||
|
|
||||||
### Llama
|
### Llama
|
||||||
@ -75,8 +75,8 @@ Once you've been granted access, you're ready to download a gated model like
|
|||||||
# requires token in $HOME/.cache/huggingface/token, as created by the
|
# requires token in $HOME/.cache/huggingface/token, as created by the
|
||||||
# `huggingface-cli login` command, or the `HUGGINGFACE_TOKEN` environment variable.
|
# `huggingface-cli login` command, or the `HUGGINGFACE_TOKEN` environment variable.
|
||||||
cd examples
|
cd examples
|
||||||
bazel run -c opt //llama:Llama-3.1-8B-Instruct
|
bazel run --config=release //llama:Llama-3.1-8B-Instruct
|
||||||
bazel run -c opt //llama:Llama-3.1-8B-Instruct -- --prompt="What is the capital of France?"
|
bazel run --config=release //llama:Llama-3.1-8B-Instruct -- --prompt="What is the capital of France?"
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also try `Llama-3.1-70B-Instruct` if you have enough memory.
|
You can also try `Llama-3.1-70B-Instruct` if you have enough memory.
|
||||||
@ -88,8 +88,8 @@ Like the 8B model above, this model also requires approval. See
|
|||||||
|
|
||||||
```
|
```
|
||||||
cd examples
|
cd examples
|
||||||
bazel run -c opt //llama:Llama-3.2-1B-Instruct
|
bazel run --config=release //llama:Llama-3.2-1B-Instruct
|
||||||
bazel run -c opt //llama:Llama-3.2-1B-Instruct -- --prompt="What is the capital of France?"
|
bazel run --config=release //llama:Llama-3.2-1B-Instruct -- --prompt="What is the capital of France?"
|
||||||
```
|
```
|
||||||
|
|
||||||
For a larger 3.2 model, you can also try `Llama-3.2-3B-Instruct`.
|
For a larger 3.2 model, you can also try `Llama-3.2-3B-Instruct`.
|
||||||
@ -120,7 +120,7 @@ run the following:
|
|||||||
|
|
||||||
```
|
```
|
||||||
cd examples
|
cd examples
|
||||||
bazel run -c opt //llama:Llama-3.2-1B-Instruct \
|
bazel run --config=release //llama:Llama-3.2-1B-Instruct \
|
||||||
--@zml//runtimes:cuda=true \
|
--@zml//runtimes:cuda=true \
|
||||||
-- --prompt="What is the capital of France?"
|
-- --prompt="What is the capital of France?"
|
||||||
```
|
```
|
||||||
|
|||||||
@ -380,9 +380,9 @@ const asynk = @import("async");
|
|||||||
With everything in place now, running the model is easy:
|
With everything in place now, running the model is easy:
|
||||||
|
|
||||||
```
|
```
|
||||||
# run release (-c opt)
|
# run release (--config=release)
|
||||||
cd examples
|
cd examples
|
||||||
bazel run -c opt //simple_layer
|
bazel run --config=release //simple_layer
|
||||||
|
|
||||||
# compile and run debug version
|
# compile and run debug version
|
||||||
bazel run //simple_layer
|
bazel run //simple_layer
|
||||||
@ -391,7 +391,7 @@ bazel run //simple_layer
|
|||||||
And voila! Here's the output:
|
And voila! Here's the output:
|
||||||
|
|
||||||
```
|
```
|
||||||
bazel run -c opt //simple_layer
|
bazel run --config=release //simple_layer
|
||||||
INFO: Analyzed target //simple_layer:simple_layer (0 packages loaded, 0 targets configured).
|
INFO: Analyzed target //simple_layer:simple_layer (0 packages loaded, 0 targets configured).
|
||||||
INFO: Found 1 target...
|
INFO: Found 1 target...
|
||||||
Target //simple_layer:simple_layer up-to-date:
|
Target //simple_layer:simple_layer up-to-date:
|
||||||
|
|||||||
@ -24,7 +24,7 @@ pub fn asyncMain() !void {
|
|||||||
break :blk path;
|
break :blk path;
|
||||||
} else {
|
} else {
|
||||||
std.debug.print("Missing file path argument\n", .{});
|
std.debug.print("Missing file path argument\n", .{});
|
||||||
std.debug.print("Try: bazel run -c opt //loader:safetensors -- /path/to/mymodel.safetensors or /path/to/model.safetensors.index.json \n", .{});
|
std.debug.print("Try: bazel run --config=release //loader:safetensors -- /path/to/mymodel.safetensors or /path/to/model.safetensors.index.json \n", .{});
|
||||||
std.process.exit(0);
|
std.process.exit(0);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user