Add Qwen3VL bf16 example implementation and tutorial docs, including BMP/JPG/PNG support via zignal library.

Add Qwen3VL bf16 example implementation and integrate zignal image format support; update Bazel build files and core ZML modules.
Update XLA to latest version
2025-12-31 12:46:11 +00:00 · 2025-12-29 16:17:11 +00:00 · 2025-12-23 17:24:34 +00:00 · 2025-12-22 17:33:18 +00:00 · 2025-12-18 10:04:04 +00:00 · 2025-12-15 11:08:19 +00:00
40 changed files with 2719 additions and 340 deletions
--- a/MODULE.bazel
+++ b/MODULE.bazel
@ -3,35 +3,31 @@ module(
 )

 bazel_dep(name = "abseil-cpp", version = "20250814.0")
-bazel_dep(name = "aspect_bazel_lib", version = "2.20.0")
+bazel_dep(name = "aspect_bazel_lib", version = "2.21.2")
 bazel_dep(name = "aspect_rules_py", version = "1.6.3")
-bazel_dep(name = "bazel_skylib", version = "1.8.1")
+bazel_dep(name = "bazel_skylib", version = "1.8.2")
 bazel_dep(name = "libxev", version = "0.0.0-20251014.0-9f785d2")
 bazel_dep(name = "patchelf", version = "0.18.0")
 bazel_dep(name = "pcre2", version = "10.45")
 bazel_dep(name = "platforms", version = "1.0.0")
 bazel_dep(name = "protobuf", version = "32.0", repo_name = "com_google_protobuf")
-bazel_dep(name = "rules_cc", version = "0.2.0")
+
+# Needs to be added before rules_cc so that the cc toolchain declared by
+# apple_support wins over the one in rules_cc.
+bazel_dep(name = "apple_support", version = "1.24.2")
+bazel_dep(name = "rules_cc", version = "0.2.12")
 bazel_dep(name = "rules_distroless", version = "0.5.1")
 bazel_dep(name = "rules_license", version = "1.0.0")
 bazel_dep(name = "rules_oci", version = "2.2.6")
 bazel_dep(name = "rules_proto", version = "7.1.0")
 bazel_dep(name = "rules_python", version = "1.5.3")
 bazel_dep(name = "rules_rust", version = "0.63.0")
-bazel_dep(name = "rules_zig", version = "0.12.1")
+bazel_dep(name = "rules_zig", version = "0.12.2")
 bazel_dep(name = "toolchains_llvm_bootstrapped", version = "0.2.4")
 bazel_dep(name = "with_cfg.bzl", version = "0.11.0")

 bazel_dep(name = "buildifier_prebuilt", version = "8.2.0.2", dev_dependency = True)

-# Remove this when rules_zig@0.12.2 is released.
-archive_override(
-    module_name = "rules_zig",
-    urls = ["https://github.com/aherrmann/rules_zig/archive/cbf03e45c9ffa7d23a50790929649805a08afbf9.tar.gz"],
-    strip_prefix = "rules_zig-cbf03e45c9ffa7d23a50790929649805a08afbf9",
-    integrity = "sha256-lu6gNjulEIqG68I7DRyJzWInDVk3lL+mCj2J14vYKwc=",
-)
-
 zig = use_extension("@rules_zig//zig:extensions.bzl", "zig")
 zig.index(file = "//bazel:zig_index.json")
 zig.toolchain(zig_version = "0.15.2")
@ -106,7 +102,7 @@ use_repo(zls, "zls_toolchains")
 register_toolchains("@zls_toolchains//:all")

 non_module_deps = use_extension("//:third_party/non_module_deps.bzl", "non_module_deps")
-use_repo(non_module_deps, "com_github_hejsil_clap", "com_google_sentencepiece", "mnist", "org_swig_swig", "xla")
+use_repo(non_module_deps, "com_github_hejsil_clap", "com_google_sentencepiece", "mnist", "org_swig_swig", "xla", "com_github_bfactory_ai_zignal")

 xla = use_extension("//third_party/xla:xla.bzl", "xla")
 use_repo(
@ -116,10 +112,14 @@ use_repo(
    "local_config_cuda",
    "local_config_remote_execution",
    "local_config_rocm",
+    "local_config_sycl",
    "local_config_tensorrt",
    "python_version_repo",
    "rules_ml_toolchain",
+    "rules_shell",
    "stablehlo",
+    "sycl_configure",
+    "sycl_configure_ext",
    "triton",
    "tsl",
 )
--- a/MODULE.bazel.lock
+++ b/MODULE.bazel.lock
@ -15,11 +15,11 @@
    "https://bazel-registry.zml.ai/modules/apple_support/1.11.1/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/apple_support/1.15.1/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/apple_support/1.17.1/MODULE.bazel": "not found",
+    "https://bazel-registry.zml.ai/modules/apple_support/1.24.2/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/aspect_bazel_lib/2.0.0/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/aspect_bazel_lib/2.14.0/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/aspect_bazel_lib/2.16.0/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/aspect_bazel_lib/2.19.3/MODULE.bazel": "not found",
-    "https://bazel-registry.zml.ai/modules/aspect_bazel_lib/2.20.0/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/aspect_bazel_lib/2.21.2/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/aspect_bazel_lib/2.7.2/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/aspect_bazel_lib/2.8.1/MODULE.bazel": "not found",
@ -37,6 +37,7 @@
    "https://bazel-registry.zml.ai/modules/bazel_features/1.21.0/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/bazel_features/1.23.0/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/bazel_features/1.24.0/MODULE.bazel": "not found",
+    "https://bazel-registry.zml.ai/modules/bazel_features/1.27.0/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/bazel_features/1.28.0/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/bazel_features/1.3.0/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/bazel_features/1.30.0/MODULE.bazel": "not found",
@ -112,6 +113,9 @@
    "https://bazel-registry.zml.ai/modules/rules_cc/0.0.9/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/rules_cc/0.1.1/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/rules_cc/0.2.0/MODULE.bazel": "not found",
+    "https://bazel-registry.zml.ai/modules/rules_cc/0.2.11/MODULE.bazel": "not found",
+    "https://bazel-registry.zml.ai/modules/rules_cc/0.2.12/MODULE.bazel": "not found",
+    "https://bazel-registry.zml.ai/modules/rules_cc/0.2.8/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/rules_cc/0.2.9/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/rules_distroless/0.5.1/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "not found",
@ -172,6 +176,7 @@
    "https://bazel-registry.zml.ai/modules/rules_swift/1.16.0/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/rules_swift/2.1.1/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/rules_zig/0.12.1/MODULE.bazel": "not found",
+    "https://bazel-registry.zml.ai/modules/rules_zig/0.12.2/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/stardoc/0.5.1/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/stardoc/0.5.3/MODULE.bazel": "not found",
    "https://bazel-registry.zml.ai/modules/stardoc/0.5.4/MODULE.bazel": "not found",
@ -207,12 +212,12 @@
    "https://bcr.bazel.build/modules/apple_support/1.11.1/MODULE.bazel": "1843d7cd8a58369a444fc6000e7304425fba600ff641592161d9f15b179fb896",
    "https://bcr.bazel.build/modules/apple_support/1.15.1/MODULE.bazel": "a0556fefca0b1bb2de8567b8827518f94db6a6e7e7d632b4c48dc5f865bc7c85",
    "https://bcr.bazel.build/modules/apple_support/1.17.1/MODULE.bazel": "655c922ab1209978a94ef6ca7d9d43e940cd97d9c172fb55f94d91ac53f8610b",
-    "https://bcr.bazel.build/modules/apple_support/1.17.1/source.json": "6b2b8c74d14e8d485528a938e44bdb72a5ba17632b9e14ef6e68a5ee96c8347f",
+    "https://bcr.bazel.build/modules/apple_support/1.24.2/MODULE.bazel": "0e62471818affb9f0b26f128831d5c40b074d32e6dda5a0d3852847215a41ca4",
+    "https://bcr.bazel.build/modules/apple_support/1.24.2/source.json": "2c22c9827093250406c5568da6c54e6fdf0ef06238def3d99c71b12feb057a8d",
    "https://bcr.bazel.build/modules/aspect_bazel_lib/2.0.0/MODULE.bazel": "e118477db5c49419a88d78ebc7a2c2cea9d49600fe0f490c1903324a2c16ecd9",
    "https://bcr.bazel.build/modules/aspect_bazel_lib/2.14.0/MODULE.bazel": "2b31ffcc9bdc8295b2167e07a757dbbc9ac8906e7028e5170a3708cecaac119f",
    "https://bcr.bazel.build/modules/aspect_bazel_lib/2.16.0/MODULE.bazel": "852f9ebbda017572a7c113a2434592dd3b2f55cd9a0faea3d4be5a09a59e4900",
    "https://bcr.bazel.build/modules/aspect_bazel_lib/2.19.3/MODULE.bazel": "253d739ba126f62a5767d832765b12b59e9f8d2bc88cc1572f4a73e46eb298ca",
-    "https://bcr.bazel.build/modules/aspect_bazel_lib/2.20.0/MODULE.bazel": "c5565bac49e1973227225b441fad1c938d498d83df62dc5da95b2fab0f0626a2",
    "https://bcr.bazel.build/modules/aspect_bazel_lib/2.21.2/MODULE.bazel": "276347663a25b0d5bd6cad869252bea3e160c4d980e764b15f3bae7f80b30624",
    "https://bcr.bazel.build/modules/aspect_bazel_lib/2.21.2/source.json": "f42051fa42629f0e59b7ac2adf0a55749144b11f1efcd8c697f0ee247181e526",
    "https://bcr.bazel.build/modules/aspect_bazel_lib/2.7.2/MODULE.bazel": "780d1a6522b28f5edb7ea09630748720721dfe27690d65a2d33aa7509de77e07",
@ -233,6 +238,7 @@
    "https://bcr.bazel.build/modules/bazel_features/1.21.0/MODULE.bazel": "675642261665d8eea09989aa3b8afb5c37627f1be178382c320d1b46afba5e3b",
    "https://bcr.bazel.build/modules/bazel_features/1.23.0/MODULE.bazel": "fd1ac84bc4e97a5a0816b7fd7d4d4f6d837b0047cf4cbd81652d616af3a6591a",
    "https://bcr.bazel.build/modules/bazel_features/1.24.0/MODULE.bazel": "4796b4c25b47053e9bbffa792b3792d07e228ff66cd0405faef56a978708acd4",
+    "https://bcr.bazel.build/modules/bazel_features/1.27.0/MODULE.bazel": "621eeee06c4458a9121d1f104efb80f39d34deff4984e778359c60eaf1a8cb65",
    "https://bcr.bazel.build/modules/bazel_features/1.28.0/MODULE.bazel": "4b4200e6cbf8fa335b2c3f43e1d6ef3e240319c33d43d60cc0fbd4b87ece299d",
    "https://bcr.bazel.build/modules/bazel_features/1.3.0/MODULE.bazel": "cdcafe83ec318cda34e02948e81d790aab8df7a929cec6f6969f13a489ccecd9",
    "https://bcr.bazel.build/modules/bazel_features/1.30.0/MODULE.bazel": "a14b62d05969a293b80257e72e597c2da7f717e1e69fa8b339703ed6731bec87",
@ -327,8 +333,11 @@
    "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5",
    "https://bcr.bazel.build/modules/rules_cc/0.1.1/MODULE.bazel": "2f0222a6f229f0bf44cd711dc13c858dad98c62d52bd51d8fc3a764a83125513",
    "https://bcr.bazel.build/modules/rules_cc/0.2.0/MODULE.bazel": "b5c17f90458caae90d2ccd114c81970062946f49f355610ed89bebf954f5783c",
+    "https://bcr.bazel.build/modules/rules_cc/0.2.11/MODULE.bazel": "e94f24f065bf2191dba2dace951814378b66a94bb3bcc48077492fe0508059b5",
+    "https://bcr.bazel.build/modules/rules_cc/0.2.12/MODULE.bazel": "4216c383ce3223c7dfbd3afcb09c7056265156150b2a3bd3fd9d02949a7ee1bc",
+    "https://bcr.bazel.build/modules/rules_cc/0.2.12/source.json": "593d822a2ce7b70d5fad3195f7eb64d519dde94d7c20b0b79e6c23891a79b2bd",
+    "https://bcr.bazel.build/modules/rules_cc/0.2.8/MODULE.bazel": "f1df20f0bf22c28192a794f29b501ee2018fa37a3862a1a2132ae2940a23a642",
    "https://bcr.bazel.build/modules/rules_cc/0.2.9/MODULE.bazel": "34263f1dca62ea664265438cef714d7db124c03e1ed55ebb4f1dc860164308d1",
-    "https://bcr.bazel.build/modules/rules_cc/0.2.9/source.json": "4e49b40effcbd14fbfb233eb929de42dfff7b66538b4ffda310ad501638e7986",
    "https://bcr.bazel.build/modules/rules_distroless/0.5.1/MODULE.bazel": "2a63f4744d30749128105da5f96adf7caf5628e37548293f89e7fa39c3b3f2c2",
    "https://bcr.bazel.build/modules/rules_distroless/0.5.1/source.json": "c6b9ff7f325bfed89c3671757f14c1d1bc6077d0fcef809b8aa2d007cac7dd1d",
    "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6",
@ -400,7 +409,8 @@
    "https://bcr.bazel.build/modules/rules_swift/2.1.1/MODULE.bazel": "494900a80f944fc7aa61500c2073d9729dff0b764f0e89b824eb746959bc1046",
    "https://bcr.bazel.build/modules/rules_swift/2.1.1/source.json": "40fc69dfaac64deddbb75bd99cdac55f4427d9ca0afbe408576a65428427a186",
    "https://bcr.bazel.build/modules/rules_zig/0.12.1/MODULE.bazel": "5953094b681c212eebe3dd4275809c52b9177aeaa058d8f1bff433822c0dcd01",
-    "https://bcr.bazel.build/modules/rules_zig/0.12.1/source.json": "d880cdc3473bfcf172c9aad9292e856430496e198d805539bb8be98e79f58a7c",
+    "https://bcr.bazel.build/modules/rules_zig/0.12.2/MODULE.bazel": "09d2ca5486c5c3bbcaa61057958d35f9b4e1e981e08a5a28ad516bab87e8c4ef",
+    "https://bcr.bazel.build/modules/rules_zig/0.12.2/source.json": "4cef85ce1b625967ce9c063b8716920e8e7067c38321981a7bfd581ad588f4ba",
    "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
    "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
    "https://bcr.bazel.build/modules/stardoc/0.5.4/MODULE.bazel": "6569966df04610b8520957cb8e97cf2e9faac2c0309657c537ab51c16c18a2a4",
@ -430,37 +440,6 @@
  },
  "selectedYankedVersions": {},
  "moduleExtensions": {
-    "@@apple_support+//crosstool:setup.bzl%apple_cc_configure_extension": {
-      "general": {
-        "bzlTransitiveDigest": "N6yHPMaF17im7GO6Gu+cdCtGXs+lAlBIhCeB1/ZZoPk=",
-        "usagesDigest": "39X2JjPCOAk6sThDALGv1L4q85GNjda2yfszm/phxxw=",
-        "recordedFileInputs": {},
-        "recordedDirentsInputs": {},
-        "envVariables": {},
-        "generatedRepoSpecs": {
-          "local_config_apple_cc_toolchains": {
-            "repoRuleId": "@@apple_support+//crosstool:setup.bzl%_apple_cc_autoconf_toolchains",
-            "attributes": {}
-          },
-          "local_config_apple_cc": {
-            "repoRuleId": "@@apple_support+//crosstool:setup.bzl%_apple_cc_autoconf",
-            "attributes": {}
-          }
-        },
-        "recordedRepoMappingEntries": [
-          [
-            "apple_support+",
-            "bazel_tools",
-            "bazel_tools"
-          ],
-          [
-            "bazel_tools",
-            "rules_cc",
-            "rules_cc+"
-          ]
-        ]
-      }
-    },
    "@@aspect_rules_py+//py:extensions.bzl%py_tools": {
      "general": {
        "bzlTransitiveDigest": "Udhh0icjIe0dHLjbgfmj2tzBz0geB4SXJl0FoPcpF84=",
@ -645,7 +624,7 @@
    },
    "@@rules_apple+//apple:apple.bzl%provisioning_profile_repository_extension": {
      "general": {
-        "bzlTransitiveDigest": "7XgPyOlTI0qJefux0gxRds30ZZCLPjr/7R9gFtU/jgE=",
+        "bzlTransitiveDigest": "Oa+WNwUQ6db3WoQOwZ8qU+aRE5kJufCF45z4ZCs6298=",
        "usagesDigest": "vsJl8Rw5NL+5Ag2wdUDoTeRF/5klkXO8545Iy7U1Q08=",
        "recordedFileInputs": {},
        "recordedDirentsInputs": {},
@ -694,6 +673,16 @@
          ],
          [
            "rules_cc+",
+            "cc_compatibility_proxy",
+            "rules_cc++compatibility_proxy+cc_compatibility_proxy"
+          ],
+          [
+            "rules_cc+",
+            "rules_cc",
+            "rules_cc+"
+          ],
+          [
+            "rules_cc++compatibility_proxy+cc_compatibility_proxy",
            "rules_cc",
            "rules_cc+"
          ],
@ -1180,7 +1169,7 @@
    },
    "@@rules_rust+//crate_universe:extensions.bzl%crate": {
      "general": {
-        "bzlTransitiveDigest": "uv5JLD8zajzRT1Hh+FYGrK5o1QjgTE4Ohde5NQMsUlk=",
+        "bzlTransitiveDigest": "ZaON2eA6LKrwcPU8xDGi0W3+6wD1r6kjb0pSYFC4Se8=",
        "usagesDigest": "mup6mgSFatk34Q0znMZPpIEOFZyk9v3xf6jRIXEFTxc=",
        "recordedFileInputs": {
          "@@//zml/tokenizer/hftokenizers/Cargo.lock": "d78ffab37ef69bf8ff45a4d686b25175f30e8fed4d8375612e84dcf6db0cfc9c",
@ -2478,6 +2467,16 @@
          ],
          [
            "rules_cc+",
+            "cc_compatibility_proxy",
+            "rules_cc++compatibility_proxy+cc_compatibility_proxy"
+          ],
+          [
+            "rules_cc+",
+            "rules_cc",
+            "rules_cc+"
+          ],
+          [
+            "rules_cc++compatibility_proxy+cc_compatibility_proxy",
            "rules_cc",
            "rules_cc+"
          ],
@ -2511,7 +2510,7 @@
    },
    "@@rules_rust+//crate_universe/private:internal_extensions.bzl%cu_nr": {
      "general": {
-        "bzlTransitiveDigest": "BEf6td+6l3OplJk+TxURVgZT9CK8AKbw6H/uSseljfQ=",
+        "bzlTransitiveDigest": "XkpmtSAoMsVR5D0IWKWoGVKSPjnIiH3NEyMQqyFvqm8=",
        "usagesDigest": "3vKI8uvqTpJCf+t8aU6UD5d5cUWinWhtMjKkRpCLR+A=",
        "recordedFileInputs": {},
        "recordedDirentsInputs": {},
@ -2611,6 +2610,16 @@
          ],
          [
            "rules_cc+",
+            "cc_compatibility_proxy",
+            "rules_cc++compatibility_proxy+cc_compatibility_proxy"
+          ],
+          [
+            "rules_cc+",
+            "rules_cc",
+            "rules_cc+"
+          ],
+          [
+            "rules_cc++compatibility_proxy+cc_compatibility_proxy",
            "rules_cc",
            "rules_cc+"
          ],
@ -2814,7 +2823,7 @@
    "@@rules_zig+//zig:extensions.bzl%zig": {
      "general": {
        "bzlTransitiveDigest": "wV3euhBZSI0D2ZlqbY1YW592mghFY88d9H6zBMQRaJY=",
-        "usagesDigest": "PFS+UEa30JWWU52AUTNHD1nDzqpwjSts6jIKzD7gVJU=",
+        "usagesDigest": "87SI2jgNGLvFKH9GAVUH7PVF1DJqfUvlXiSe9uiKbnk=",
        "recordedFileInputs": {
          "@@//bazel/zig_index.json": "e6c6b29fa227fa9b1806dc03af2b6dab00f52f34d39878557074e1626027259f",
          "@@rules_zig+//zig/private/versions.json": "6eb85ebaee72c4e6fbc82d8ad73f69e0899d839b70a7e19caa792ab6a5fa0c36"
--- a/docs/tutorials/write_first_model.md
+++ b/docs/tutorials/write_first_model.md
@ -215,8 +215,8 @@ const input_shape = zml.Shape.init(.{3}, .f16);
 // We manually produce a BufferStore. You would not normally do that.
 // A BufferStore is usually created by loading model data from a file.
 var buffers: zml.aio.BufferStore.Buffers = .{};
-try buffers.put(arena, "weight", zml.HostBuffer.fromArray(&weights));
-try buffers.put(arena, "bias", zml.HostBuffer.fromArray(&bias));
+try buffers.put(arena, "weight", zml.HostBuffer.fromArrayPtr(&weights));
+try buffers.put(arena, "bias", zml.HostBuffer.fromArrayPtr(&bias));

 // the actual BufferStore
 const bs: zml.aio.BufferStore = .{
@ -462,8 +462,8 @@ pub fn asyncMain() !void {
    // We manually produce a BufferStore. You would not normally do that.
    // A BufferStore is usually created by loading model data from a file.
    var buffers: zml.aio.BufferStore.Buffers = .{};
-    try buffers.put(arena, "weight", zml.HostBuffer.fromArray(&weights));
-    try buffers.put(arena, "bias", zml.HostBuffer.fromArray(&bias));
+    try buffers.put(arena, "weight", zml.HostBuffer.fromArrayPtr(&weights));
+    try buffers.put(arena, "bias", zml.HostBuffer.fromArrayPtr(&bias));

    // the actual BufferStore
    const bs: zml.aio.BufferStore = .{
--- a/examples/callback/main.zig
+++ b/examples/callback/main.zig
@ -82,11 +82,7 @@ pub const GrayScale = struct {
    }

    pub fn grayScaleCuda(self: GrayScale, rgb_d: zml.Buffer, gray_d: zml.Buffer) !void {
-        var args: [2][]u8 = .{
-            @as([*]u8, @ptrFromInt(rgb_d.devicePtr()))[0..rgb_d.shape().byteSize()],
-            @as([*]u8, @ptrFromInt(gray_d.devicePtr()))[0..gray_d.shape().byteSize()],
-        };
-        var args_ptr: [2:null]?*anyopaque = .{ @ptrCast(&args[0]), @ptrCast(&args[1]) };
+        var args: [2:null]?*anyopaque = .{ rgb_d.devicePtr(), gray_d.devicePtr() };
        // This is a naive kernel with one block per pixel.
        try cuda.check(cuda.launchKernel.?(
            @ptrCast(self.cu_data[1]), // function
@ -98,7 +94,7 @@ pub const GrayScale = struct {
            1, // num grids z
            0, // shared mem
            @ptrCast(self.stream),
-            &args_ptr,
+            &args,
            null,
        ));
        // Note: no explicit synchronization, we just enqueue work in the stream.
--- a/examples/qwen3_vl/BUILD.bazel
+++ b/examples/qwen3_vl/BUILD.bazel
@ -0,0 +1,27 @@
+load("@rules_zig//zig:defs.bzl", "zig_binary", "zig_test")
+
+zig_binary(
+    name = "qwen3_vl",
+    srcs = [
+        "qwen3_vl.zig",
+    ],
+    main = "main.zig",
+    deps = [
+        "@com_github_bfactory_ai_zignal//:zignal",
+        "@com_github_hejsil_clap//:clap",
+        "@zml//async",
+        "@zml//stdx",
+        "@zml//zml",
+    ],
+)
+
+zig_test(
+    name = "test",
+    main = "qwen3_vl.zig",
+    test_runner = "//zml:test_runner",
+    deps = [
+        "@zml//async",
+        "@zml//stdx",
+        "@zml//zml",
+    ],
+)
--- a/examples/qwen3_vl/main.zig
+++ b/examples/qwen3_vl/main.zig
@ -0,0 +1,745 @@
+const std = @import("std");
+const async = @import("async");
+const zml = @import("zml");
+const qwen = @import("qwen3_vl.zig");
+const clap = @import("clap");
+const stdx = @import("stdx");
+const zignal = @import("zignal");
+
+const floats = zml.floats;
+
+const log = std.log.scoped(.qwen);
+
+test {
+    std.testing.refAllDecls(@This());
+}
+
+pub const std_options: std.Options = .{
+    .log_level = .info,
+    .logFn = async.logFn(std.log.defaultLog),
+};
+
+const params = clap.parseParamsComptime(
+    \\--help                      print this help
+    \\--prompt         <STRING>   the prompt
+    \\--image          <STRING>   path to the image file (BMP format)
+    \\--hf-model-path  <STRING>   path to the directory containing model weights, config and tokenizer
+    \\--seed           <UINT>     random seed (optional)
+    \\--seq-len        <UINT>     sequence length (default: 512)
+    \\--create-options <STRING>   platform creation options in ZON format, defaults to {}
+);
+
+pub fn generateText(
+    config: qwen.Qwen.Config,
+    _: qwen.Qwen3VL,
+    mod_prefill: zml.ModuleExe(qwen.Qwen3VL.forward),
+    mod_decode: zml.ModuleExe(qwen.Qwen3VL.forward_decode),
+    kv_cache_: zml.Bufferized(qwen.KvCache),
+    tokenizer: zml.tokenizer.Tokenizer,
+    allocator: std.mem.Allocator,
+    seed: u128,
+    prompt: []const u8,
+    image_path: []const u8,
+    preprocessor_config: PreprocessorConfig,
+    max_seq_len: u32,
+    writer: *std.Io.Writer,
+) !void {
+    // Preprocess image and prompt
+    const preprocessor_input = try preprocessor(
+        allocator,
+        tokenizer,
+        prompt,
+        config,
+        preprocessor_config,
+        image_path,
+        max_seq_len,
+        4096, // max_side of the image
+    );
+    defer {
+        preprocessor_input.image_buffer_chw.deinit(allocator);
+        preprocessor_input.prompt_tokens.deinit(allocator);
+        preprocessor_input.prompt_shape.deinit(allocator);
+        preprocessor_input.image_dim.deinit(allocator);
+        preprocessor_input.token_index.deinit(allocator);
+    }
+
+    const platform = mod_decode.platform();
+    var tokenizer_decoder = try tokenizer.decoder();
+    defer tokenizer_decoder.deinit();
+
+    // Extract prompt_shape values before converting to device buffer
+    const prompt_shape_values = preprocessor_input.prompt_shape.items(u32);
+    const total_seq_len = prompt_shape_values[0] + prompt_shape_values[1] + prompt_shape_values[2];
+
+    // Prepare device buffers for prefill
+    const image_buffer_chw = try preprocessor_input.image_buffer_chw.toDevice(platform);
+    defer image_buffer_chw.deinit();
+
+    const prompt_tokens = try preprocessor_input.prompt_tokens.toDevice(platform);
+    defer prompt_tokens.deinit();
+
+    const prompt_shape = try preprocessor_input.prompt_shape.toDevice(platform);
+    defer prompt_shape.deinit();
+
+    const image_dim = try preprocessor_input.image_dim.toDevice(platform);
+    defer image_dim.deinit();
+
+    const token_index = try preprocessor_input.token_index.toDevice(platform);
+    defer token_index.deinit();
+
+    // init RNG and buffers
+    var rng = try zml.Tensor.Rng.init(platform, seed);
+    var generated_token_buffer = [_]u32{undefined};
+
+    // Prefill: process the full prompt with image
+    var kv_cache, var mrope_position_deltas, rng = prefill: {
+        const next_token, const kv_cache, const mrope_deltas, const new_rng = mod_prefill.call(.{
+            image_buffer_chw,
+            prompt_tokens,
+            image_dim,
+            token_index,
+            prompt_shape,
+            kv_cache_,
+            rng,
+        });
+
+        // Extract the generated token
+        _ = try next_token.toHost(std.mem.sliceAsBytes(&generated_token_buffer));
+
+        break :prefill .{ kv_cache, mrope_deltas, new_rng };
+    };
+    defer zml.aio.unloadBuffers(&kv_cache);
+    defer mrope_position_deltas.deinit();
+
+    // Prepare for token-by-token generation,
+    // start with the token generated based on the full prompt.
+    var current_token = try zml.Buffer.fromSlice(platform, .{ .bs = 1, .seq = 1 }, &generated_token_buffer);
+    defer current_token.deinit();
+
+    const output_tokens_len = max_seq_len - total_seq_len - 1;
+    const start = std.time.microTimestamp();
+
+    // One token has already been generated by the prefill.
+    var num_tokens_generated: usize = 1;
+
+    // Store all generated tokens
+    var generated_tokens = try std.ArrayList(u32).initCapacity(allocator, output_tokens_len);
+    defer generated_tokens.deinit(allocator);
+
+    const token_gen = max_seq_len - total_seq_len;
+    generation: for (0..token_gen) |i| {
+        // Collect and print generated sequence
+        num_tokens_generated += 1;
+        const generated_token = generated_token_buffer[0];
+        try generated_tokens.append(allocator, generated_token);
+        if (try tokenizer_decoder.next(generated_token)) |chunk| {
+            try writer.writeAll(chunk);
+        }
+
+        // check for eos
+        if (i == output_tokens_len) break :generation;
+        if (generated_token == 151643 or generated_token == 151645) break :generation;
+
+        // Current token pos needs to go into a zml.Buffer
+        const cache_position_buffer = &[_]i64{@intCast(total_seq_len - 1 + i)};
+        const cache_position = try zml.Buffer.fromSlice(platform, .{}, cache_position_buffer);
+        defer cache_position.deinit();
+
+        // Call to generate the next token
+        const next_token, const updated_kv_cache, const new_rng = mod_decode.call(.{ current_token, cache_position, kv_cache, mrope_position_deltas, rng });
+
+        current_token = next_token;
+        kv_cache = updated_kv_cache;
+        rng = new_rng;
+
+        // Extract the generated token from the buffer
+        _ = try current_token.toHost(std.mem.sliceAsBytes(&generated_token_buffer));
+    }
+
+    const end = std.time.microTimestamp();
+    const duration = stdx.math.divFloat(f64, end - start, std.time.us_per_s);
+    const speed = @as(f64, @floatFromInt(num_tokens_generated)) / duration;
+
+    // Decode and print all generated tokens at the end
+    std.debug.print("\n", .{});
+    for (generated_tokens.items) |token| {
+        if (try tokenizer_decoder.next(token)) |chunk| {
+            try writer.writeAll(chunk);
+        }
+    }
+
+    std.debug.print("\n", .{});
+    log.info("Generated {d} tokens in {:.3}s: {d:.3}tok/s", .{ num_tokens_generated, duration, speed });
+}
+
+pub fn main() !void {
+    try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
+}
+
+pub fn asyncMain() !void {
+    log.info("   Qwen3-VL was compiled with {}", .{@import("builtin").mode});
+
+    const allocator = std.heap.c_allocator;
+
+    const parsers = comptime .{
+        .BOOL = bool_parser,
+        .UINT = clap.parsers.int(u32, 0),
+        .STRING = clap.parsers.string,
+        .PATH = clap.parsers.string,
+    };
+    var diag: clap.Diagnostic = .{};
+    var stderr_buffer: [1024]u8 = undefined;
+    var stderr = std.fs.File.stderr().writer(&stderr_buffer);
+    defer stderr.interface.flush() catch {};
+
+    var cli = clap.parse(clap.Help, &params, parsers, .{
+        .diagnostic = &diag,
+        .allocator = allocator,
+    }) catch |err| {
+        diag.report(&stderr.interface, err) catch {};
+        stderr.interface.writeAll("usage: ") catch {};
+        clap.usage(&stderr.interface, clap.Help, &params) catch {};
+        stderr.interface.writeAll("\n") catch {};
+        return;
+    };
+    defer cli.deinit();
+
+    if (cli.args.help != 0) {
+        clap.help(&stderr.interface, clap.Help, &params, .{}) catch {};
+        return;
+    }
+
+    const hf_model_path = cli.args.@"hf-model-path" orelse {
+        log.err("Missing --hf-model-path", .{});
+        return;
+    };
+
+    const image_path = cli.args.image orelse {
+        log.err("Missing --image", .{});
+        return;
+    };
+
+    const model_config_path = try std.fs.path.join(allocator, &.{ hf_model_path, "config.json" });
+    defer allocator.free(model_config_path);
+
+    const model_weights_path = b: {
+        const simple_path = try std.fs.path.join(allocator, &.{ hf_model_path, "model.safetensors" });
+        if (async.File.access(simple_path, .{})) {
+            break :b simple_path;
+        } else |_| {
+            allocator.free(simple_path);
+        }
+
+        const sharded_path = try std.fs.path.join(allocator, &.{ hf_model_path, "model.safetensors.index.json" });
+        break :b sharded_path;
+    };
+    defer allocator.free(model_weights_path);
+
+    const model_tokenizer_path = try std.fs.path.join(allocator, &.{ hf_model_path, "tokenizer.json" });
+    defer allocator.free(model_tokenizer_path);
+
+    const preprocessor_config_path = try std.fs.path.join(allocator, &.{ hf_model_path, "preprocessor_config.json" });
+    defer allocator.free(preprocessor_config_path);
+
+    // Load config
+    const config = blk: {
+        var config_json_file = try async.File.open(model_config_path, .{ .mode = .read_only });
+        defer config_json_file.close() catch unreachable;
+        var config_json_buffer: [256]u8 = undefined;
+        var config_reader = config_json_file.reader(&config_json_buffer);
+        var reader = std.json.Reader.init(allocator, &config_reader.interface);
+        defer reader.deinit();
+        const config_obj = try std.json.parseFromTokenSourceLeaky(qwen.Qwen.Config, allocator, &reader, .{ .ignore_unknown_fields = true });
+        break :blk config_obj;
+    };
+
+    // Load preprocessor config
+    const preprocessor_config = blk: {
+        var preprocessor_config_json_file = try async.File.open(preprocessor_config_path, .{ .mode = .read_only });
+        defer preprocessor_config_json_file.close() catch unreachable;
+        var preprocessor_config_json_buffer: [256]u8 = undefined;
+        var preprocessor_config_reader = preprocessor_config_json_file.reader(&preprocessor_config_json_buffer);
+        var reader = std.json.Reader.init(allocator, &preprocessor_config_reader.interface);
+        defer reader.deinit();
+        const preprocessor_config_obj = try std.json.parseFromTokenSourceLeaky(PreprocessorConfig, allocator, &reader, .{ .ignore_unknown_fields = true });
+        break :blk preprocessor_config_obj;
+    };
+
+    var context = try zml.Context.init();
+    defer context.deinit();
+
+    const compilation_options = zml.CompilationOptions{
+        .xla_dump_to = "/tmp/zml/qwen3vl",
+        .sharding_enabled = true,
+    };
+
+    // Initialize ZML platform
+    const create_opts_zon = cli.args.@"create-options" orelse ".{}";
+    const create_opts = std.zon.parse.fromSlice(zml.Platform.CreateOptions, allocator, @ptrCast(create_opts_zon), null, .{ .free_on_error = false }) catch |err| {
+        log.err("Failed to parse --create-options as ZON ({}): {s}", .{ err, create_opts_zon });
+        return err;
+    };
+
+    const platform = context.autoPlatform(create_opts).withCompilationOptions(compilation_options);
+    context.printAvailablePlatforms(platform);
+
+    var store = try zml.aio.detectFormatAndOpen(allocator, model_weights_path);
+    defer store.deinit();
+
+    // Initialize model
+    const seq_len: u32 = cli.args.@"seq-len" orelse 512;
+
+    // Options for the generation
+    const qwen_options: qwen.Qwen.Options = .{
+        .max_seq_len = seq_len,
+        .sampling_strategy = .{
+            .topk = 3,
+            .temperature = 1.2,
+        },
+    };
+
+    var compiler_arena = std.heap.ArenaAllocator.init(allocator);
+    defer compiler_arena.deinit();
+
+    const qwen_tensors: qwen.Qwen3VL = try qwen.Qwen3VL.init(compiler_arena.allocator(), config, qwen_options, store);
+
+    // Load tokenizer early (needed for preprocessor)
+    var tokenizer = blk: {
+        log.info("Loading tokenizer from {s}", .{model_tokenizer_path});
+        var timer = try stdx.time.Timer.start();
+        defer log.info("Loaded tokenizer from {s} [{D}]", .{ model_tokenizer_path, timer.read() });
+
+        break :blk try zml.tokenizer.Tokenizer.fromFile(allocator, model_tokenizer_path);
+    };
+    errdefer tokenizer.deinit();
+
+    const prompt = cli.args.prompt orelse "Describe this image.";
+
+    // Use preprocessor to calculate all needed values for compilation
+    const preprocessor_input = try preprocessor(
+        allocator,
+        tokenizer,
+        prompt,
+        config,
+        preprocessor_config,
+        image_path,
+        seq_len,
+        4096, // max_side
+    );
+    defer {
+        preprocessor_input.image_buffer_chw.deinit(allocator);
+        preprocessor_input.prompt_tokens.deinit(allocator);
+        preprocessor_input.prompt_shape.deinit(allocator);
+        preprocessor_input.image_dim.deinit(allocator);
+        preprocessor_input.token_index.deinit(allocator);
+    }
+
+    // Use shapes from preprocessor for compilation
+    const image_buffer_shape = preprocessor_input.image_buffer_chw.shape();
+    const prompt_tokens_shape = preprocessor_input.prompt_tokens.shape();
+    const prompt_shape_shape = preprocessor_input.prompt_shape.shape();
+    const image_dim_shape = preprocessor_input.image_dim.shape();
+    const token_index_shape = preprocessor_input.token_index.shape();
+
+    // Specify shapes for decode
+    const decode_input_ids_shape = zml.Shape.init(.{ .bs = 1, .seq = 1 }, .u32);
+    const decode_cache_position_shape = zml.Shape.init(.{}, .i64);
+    const decode_mrope_shape = zml.Shape.init(.{ .seq = 1 }, .i32);
+
+    const dtype = qwen_tensors.qwen.text_model.embed_tokens.weight.dtype();
+    const kv_shape = zml.Shape.init(.{
+        .bs = 1,
+        .layer = config.text_config.num_hidden_layers,
+        .k = seq_len,
+        .h = config.text_config.num_key_value_heads,
+        .hd = config.text_config.head_dim,
+    }, dtype).withSharding(.{.h});
+
+    const kv_cache_shape: zml.ShapeOf(qwen.KvCache) = qwen.KvCache.initShape(kv_shape);
+    const rng_shape = zml.Tensor.Rng.shape();
+
+    // Compile models asynchronously
+    var start = try std.time.Timer.start();
+    var fut_mod_prefill = try async.async(zml.compileModel, .{
+        allocator,
+        qwen.Qwen3VL.forward,
+        qwen_tensors,
+        .{
+            image_buffer_shape,
+            prompt_tokens_shape,
+            image_dim_shape,
+            token_index_shape,
+            prompt_shape_shape,
+            kv_cache_shape,
+            preprocessor_input.h_resized,
+            preprocessor_input.w_resized,
+            rng_shape,
+        },
+        platform,
+    });
+
+    var fut_mod_decode = try async.async(zml.compileModel, .{
+        allocator,
+        qwen.Qwen3VL.forward_decode,
+        qwen_tensors,
+        .{
+            decode_input_ids_shape,
+            decode_cache_position_shape,
+            kv_cache_shape,
+            decode_mrope_shape,
+            rng_shape,
+        },
+        platform,
+    });
+
+    // Load weights while compiling
+    log.info("\tLoading Qwen3-VL weights from {s}...", .{model_weights_path});
+    var qwen_buffers = try store.loadModelById(qwen.Qwen3VL, compiler_arena.allocator(), qwen_tensors, platform);
+    defer zml.aio.unloadBuffers(&qwen_buffers);
+    log.info("✅\tLoaded weights in {D}", .{start.read()});
+
+    var qwen_module_prefill = (try fut_mod_prefill.await()).prepare(qwen_buffers);
+    defer qwen_module_prefill.deinit();
+    var qwen_module_decode = (try fut_mod_decode.await()).prepare(qwen_buffers);
+    defer qwen_module_decode.deinit();
+    log.info("✅\tCompiled model in {D}", .{start.read()});
+
+    log.info("Creating KvCache", .{});
+    const kv_cache = try qwen.KvCache.initBuffer(kv_shape, platform);
+
+    log.info("✅\tPrompt: {s}", .{prompt});
+    log.info("✅\tImage: {s} \n", .{image_path});
+
+    var stdout = std.fs.File.stdout().writer(&.{});
+
+    const seed: u128 = cli.args.seed orelse @bitCast(std.time.nanoTimestamp());
+
+    try generateText(
+        config,
+        qwen_tensors,
+        qwen_module_prefill,
+        qwen_module_decode,
+        kv_cache,
+        tokenizer,
+        allocator,
+        seed,
+        prompt[0..],
+        image_path[0..],
+        preprocessor_config,
+        512,
+        &stdout.interface,
+    );
+}
+
+fn bool_parser(in: []const u8) error{}!bool {
+    return std.mem.indexOfScalar(u8, "tTyY1", in[0]) != null;
+}
+
+// Keep all existing helper functions unchanged
+pub const Size = struct {
+    longest_edge: u64,
+    shortest_edge: u64,
+};
+
+pub const PreprocessorConfig = struct {
+    size: Size,
+    patch_size: u32,
+    temporal_patch_size: u32,
+    image_mean: []const f32,
+    image_std: []const f32,
+};
+
+fn loadImageWithZignal(comptime T: type, allocator: std.mem.Allocator, path: []const u8) !zignal.Image(T) {
+    if (std.mem.endsWith(u8, path, ".png") or std.mem.endsWith(u8, path, ".PNG")) {
+        return zignal.png.load(T, allocator, path, .{});
+    } else if (std.mem.endsWith(u8, path, ".jpg") or std.mem.endsWith(u8, path, ".jpeg") or
+        std.mem.endsWith(u8, path, ".JPG") or std.mem.endsWith(u8, path, ".JPEG"))
+    {
+        return zignal.jpeg.load(T, allocator, path, .{});
+    } else {
+        return error.UnsupportedImageFormat;
+    }
+}
+
+const Input = struct {
+    image_buffer_chw: zml.HostBuffer,
+    prompt_tokens: zml.HostBuffer,
+    prompt_shape: zml.HostBuffer,
+    image_dim: zml.HostBuffer,
+    token_index: zml.HostBuffer,
+    h_resized: u32,
+    w_resized: u32,
+
+    pub fn deinit(self: *Input, allocator: std.mem.Allocator) void {
+        self.image_buffer_chw.deinit(allocator);
+        self.prompt_tokens.deinit(allocator);
+        self.prompt_shape.deinit(allocator);
+        self.image_dim.deinit(allocator);
+        self.token_index.deinit(allocator);
+    }
+};
+
+pub fn preprocessor(
+    allocator: std.mem.Allocator,
+    tokenizer: zml.tokenizer.Tokenizer,
+    prompt: []const u8,
+    config: qwen.Qwen.Config,
+    preprocessor_config: PreprocessorConfig,
+    image_path: []const u8,
+    max_seq_len: u32,
+    max_side: u32,
+) !Input {
+
+    // Detect the extension of the file (bmp, png, jpeg)
+    const ext = if (std.mem.lastIndexOf(u8, image_path, ".")) |idx|
+        std.mem.trim(u8, image_path[idx + 1 ..], " \t\n\r")
+    else
+        "";
+
+    const is_bmp = (ext.len == 3 and
+        std.ascii.toLower(ext[0]) == 'b' and
+        std.ascii.toLower(ext[1]) == 'm' and
+        std.ascii.toLower(ext[2]) == 'p');
+
+    var height: u32 = undefined;
+    var width: u32 = undefined;
+    var rgb_data: []u8 = undefined;
+
+    const image: RgbImage = if (is_bmp) img: {
+        const image_rgb = try loadBmpAsRgb(allocator, image_path);
+        break :img image_rgb;
+    } else img: {
+        var image_from_zignal = loadImageWithZignal(zignal.Rgb, allocator, image_path) catch |err| {
+            log.err("zignal failed to load {s}: {}. Please convert the image to BMP format (24-bit uncompressed) or use a supported format.", .{ image_path, err });
+            return err;
+        };
+        defer image_from_zignal.deinit(allocator);
+        height = @as(u32, @intCast(image_from_zignal.rows));
+        width = @as(u32, @intCast(image_from_zignal.cols));
+        const rgb_len = height * width * 3;
+        rgb_data = try allocator.alloc(u8, rgb_len);
+        errdefer allocator.free(rgb_data);
+
+        // Iterate over all pixels and extract R, G, B
+        var pix_dest: u32 = 0;
+        var y: u32 = 0;
+        while (y < height) : (y += 1) {
+            var x: u32 = 0;
+            while (x < width) : (x += 1) {
+                // at() takes (row, col) which is (y, x)
+                const pixel = image_from_zignal.at(y, x).*;
+
+                rgb_data[pix_dest + 0] = pixel.r;
+                rgb_data[pix_dest + 1] = pixel.g;
+                rgb_data[pix_dest + 2] = pixel.b;
+                pix_dest += 3;
+            }
+        }
+        const image_rgb = RgbImage{
+            .width = width,
+            .height = height,
+            .data = rgb_data,
+        };
+        break :img image_rgb;
+    };
+
+    height = image.height;
+    width = image.width;
+    rgb_data = image.data;
+
+    // Create the HostBuffer for the actual image (small) and the padding image (large)
+    const image_hwc = rgb_data;
+    const image_buffer = try allocator.alloc(u8, max_side * max_side * 3);
+    @memset(image_buffer, 0);
+    const image_small_hwc = zml.HostBuffer.fromSlice(zml.Shape.init(.{ .h = height, .w = width, .c = 3 }, .u8), image_hwc);
+    const image_buffer_hwc = zml.HostBuffer.fromSlice(zml.Shape.init(.{ .h = max_side, .w = max_side, .c = 3 }, .u8), image_buffer);
+
+    // Insert the actual image into the padding image (top left corner of the padding image)
+    const small_height = @as(usize, @intCast(height));
+    const small_width = @as(usize, @intCast(width));
+    const channels = 3;
+    const row_size_small = small_width * channels;
+    const row_size_large = @as(usize, @intCast(max_side)) * channels;
+
+    // Copy line per line
+    const small_bytes = image_small_hwc.bytes();
+    var large_bytes = image_buffer_hwc.mutBytes();
+    for (0..small_height) |h| {
+        const src_offset = h * row_size_small;
+        const dst_offset = h * row_size_large;
+        @memcpy(large_bytes[dst_offset .. dst_offset + row_size_small], small_bytes[src_offset .. src_offset + row_size_small]);
+    }
+
+    const factor = preprocessor_config.patch_size * preprocessor_config.temporal_patch_size;
+    const min_pixels = preprocessor_config.size.shortest_edge;
+    const max_pixels = preprocessor_config.size.longest_edge;
+    stdx.debug.assert(@max(height, width) / @min(height, width) <= 200, "Invalid image ratio", .{});
+
+    // Calculate the resized height and width (rounded to nearest multiple of factor)
+    var h_resized: u32 = @as(u32, @intFromFloat(@round(stdx.math.divFloat(f64, height, factor)))) * factor;
+    var w_resized: u32 = @as(u32, @intFromFloat(@round(stdx.math.divFloat(f64, width, factor)))) * factor;
+
+    // Adjust if pixel count constraints are violated
+    if (@as(u64, h_resized) * @as(u64, w_resized) > max_pixels) {
+        const beta = std.math.sqrt(stdx.math.divFloat(f64, height * width, max_pixels));
+        const h_scaled = stdx.math.divFloat(f64, height, beta);
+        const w_scaled = stdx.math.divFloat(f64, width, beta);
+        h_resized = @max(factor, @as(u32, @intFromFloat(std.math.floor(stdx.math.divFloat(f64, h_scaled, factor)))) * factor);
+        w_resized = @max(factor, @as(u32, @intFromFloat(std.math.floor(stdx.math.divFloat(f64, w_scaled, factor)))) * factor);
+    } else if (@as(u64, h_resized) * @as(u64, w_resized) < min_pixels) {
+        const beta = std.math.sqrt(stdx.math.divFloat(f64, min_pixels, height * width));
+        const h_scaled = stdx.math.divFloat(f64, height, 1) * beta;
+        const w_scaled = stdx.math.divFloat(f64, width, 1) * beta;
+        h_resized = @max(factor, @as(u32, @intFromFloat(std.math.ceil(stdx.math.divFloat(f64, h_scaled, factor)))) * factor);
+        w_resized = @max(factor, @as(u32, @intFromFloat(std.math.ceil(stdx.math.divFloat(f64, w_scaled, factor)))) * factor);
+    }
+    const patch_size = config.vision_config.patch_size;
+
+    // Calculate the number of image pad tokens
+    const number_image_pad_tokens = 1 * (h_resized / patch_size) * (w_resized / patch_size) / std.math.pow(u32, config.vision_config.spatial_merge_size, 2);
+
+    // Apply the chat template to the prompt
+    const prompt_processed = try applyChatTemplate(allocator, tokenizer, prompt, number_image_pad_tokens);
+    const prompt_shape = prompt_processed.prompt_shape;
+
+    //Create the HostBuffer by reallocating the prompt tokens to the max_seq_len
+    const prompt_buffer = try allocator.realloc(prompt_processed.prompt_tokens, max_seq_len);
+    const prompt_tokens = zml.HostBuffer.fromSlice(.{ .bs = 1, .seq = max_seq_len }, prompt_buffer);
+    // Create the HostBuffer for the prompt shape
+    const prompt_shape_buffer = (try zml.HostBuffer.fromArray(allocator, prompt_shape)).withTags(.{.prompt_shape});
+    // Create the HostBuffer for the image size
+    const image_size_buffer = (try zml.HostBuffer.fromArray(allocator, [_]u32{ height, width, 3 })).withTags(.{.chw});
+    // Create the HostBuffer for the token index
+    const token_index_buffer = try zml.HostBuffer.empty(allocator, zml.Shape.init(.{}, .i64));
+    const index: i64 = 0;
+    @memcpy(token_index_buffer.mutItems(i64), &[_]i64{index});
+
+    return Input{
+        .image_buffer_chw = image_buffer_hwc,
+        .prompt_tokens = prompt_tokens,
+        .prompt_shape = prompt_shape_buffer,
+        .image_dim = image_size_buffer,
+        .token_index = token_index_buffer,
+        .h_resized = h_resized,
+        .w_resized = w_resized,
+    };
+}
+
+// Apply the chat template to the prompt
+// Returns the prompt tokens and the prompt shape
+// The shape is 4 txt tokens, n vision pad tokens, m + 6  text tokens (4 and 6 are the hardcoded tokens by the chat template )
+pub fn applyChatTemplate(allocator: std.mem.Allocator, tokenizer: zml.tokenizer.Tokenizer, prompt: []const u8, number_image_pad_tokens: u32) !struct { prompt_tokens: []u32, prompt_shape: [3]u32 } {
+    var encoder = try tokenizer.encoder();
+    defer encoder.deinit();
+    const im_start_id = tokenizer.tokenToId("<|im_start|>") orelse return error.NoSuchToken;
+    const im_end_id = tokenizer.tokenToId("<|im_end|>") orelse return error.NoSuchToken;
+    const user = tokenizer.tokenToId("user") orelse return error.NoSuchToken;
+    const assistant = tokenizer.tokenToId("assistant") orelse return error.NoSuchToken;
+    const vision_start_id = tokenizer.tokenToId("<|vision_start|>") orelse return error.NoSuchToken;
+    const vision_end_id = tokenizer.tokenToId("<|vision_end|>") orelse return error.NoSuchToken;
+    const image_pad_id = tokenizer.tokenToId("<|image_pad|>") orelse return error.NoSuchToken;
+    const newline = (try encoder.encode("\n"))[0];
+
+    var tokens: std.ArrayList(u32) = try .initCapacity(allocator, prompt.len);
+    try tokens.appendSlice(allocator, &.{ im_start_id, user, newline });
+    try tokens.appendSlice(allocator, &.{vision_start_id});
+    for (0..number_image_pad_tokens) |i| {
+        _ = i;
+        try tokens.appendSlice(allocator, &.{image_pad_id});
+    }
+    try tokens.appendSlice(allocator, &.{vision_end_id});
+    try tokens.appendSlice(allocator, try encoder.encode(prompt));
+    try tokens.appendSlice(allocator, &.{ im_end_id, newline });
+    try tokens.appendSlice(allocator, &.{ im_start_id, assistant, newline });
+    const prompt_tokens = try encoder.encode(prompt);
+    const prompt_shape: [3]u32 = .{ 4, number_image_pad_tokens, @as(u32, @intCast(prompt_tokens.len)) + 6 };
+    return .{ .prompt_tokens = try tokens.toOwnedSlice(allocator), .prompt_shape = prompt_shape };
+}
+
+pub const RgbImage = struct {
+    width: u32,
+    height: u32,
+    data: []u8,
+
+    pub fn deinit(self: *RgbImage, allocator: std.mem.Allocator) void {
+        allocator.free(self.data);
+        self.* = undefined;
+    }
+};
+
+pub fn loadBmpAsRgb(allocator: std.mem.Allocator, path: []const u8) !RgbImage {
+    var file = try std.fs.cwd().openFile(path, .{ .mode = .read_only });
+    defer file.close();
+
+    const max_len = 64 * 1024 * 1024; // 64 MiB safety cap
+    const file_bytes = try file.readToEndAlloc(allocator, max_len);
+    defer allocator.free(file_bytes);
+
+    if (file_bytes.len < 54) return error.InvalidBmpHeader;
+    if (!std.mem.eql(u8, file_bytes[0..2], "BM")) return error.InvalidBmpSignature;
+
+    const readU16 = struct {
+        fn f(bytes: []const u8) u16 {
+            return std.mem.readInt(u16, bytes[0..2], .little);
+        }
+    }.f;
+    const readI32 = struct {
+        fn f(bytes: []const u8) i32 {
+            return std.mem.readInt(i32, bytes[0..4], .little);
+        }
+    }.f;
+    const readU32 = struct {
+        fn f(bytes: []const u8) u32 {
+            return std.mem.readInt(u32, bytes[0..4], .little);
+        }
+    }.f;
+
+    const data_offset = readU32(file_bytes[10..14]);
+    const dib_header_size = readU32(file_bytes[14..18]);
+    if (dib_header_size < 40) return error.UnsupportedBmpFormat;
+
+    const width_i32 = readI32(file_bytes[18..22]);
+    const height_i32 = readI32(file_bytes[22..26]);
+    if (width_i32 <= 0 or height_i32 == 0) return error.InvalidBmpDimensions;
+
+    const planes = readU16(file_bytes[26..28]);
+    const bits_per_pixel = readU16(file_bytes[28..30]);
+    const compression = readU32(file_bytes[30..34]);
+    if (planes != 1 or compression != 0 or bits_per_pixel != 24) return error.UnsupportedBmpFormat;
+
+    const width: u32 = @intCast(width_i32);
+    const abs_height: u32 = @intCast(if (height_i32 < 0) -height_i32 else height_i32);
+    const is_top_down = height_i32 < 0;
+
+    const row_stride = ((width * 3 + 3) / 4) * 4;
+    const pixel_array_size = row_stride * abs_height;
+    if (data_offset + pixel_array_size > file_bytes.len) return error.TruncatedBmp;
+
+    const rgb_len = width * abs_height * 3;
+    var rgb_data = try allocator.alloc(u8, rgb_len);
+    errdefer allocator.free(rgb_data);
+
+    var row: u32 = 0;
+    while (row < abs_height) : (row += 1) {
+        const src_row_index = if (is_top_down) row else abs_height - 1 - row;
+        const src_start = data_offset + src_row_index * row_stride;
+        const src_slice = file_bytes[src_start .. src_start + row_stride];
+
+        const dst_start = row * width * 3;
+        var col: u32 = 0;
+        while (col < width) : (col += 1) {
+            const src_pixel = col * 3;
+            const dst_pixel = dst_start + col * 3;
+            // BMP pixels are stored in BGR order.
+            rgb_data[dst_pixel + 0] = src_slice[src_pixel + 2];
+            rgb_data[dst_pixel + 1] = src_slice[src_pixel + 1];
+            rgb_data[dst_pixel + 2] = src_slice[src_pixel + 0];
+        }
+    }
+
+    return RgbImage{
+        .width = width,
+        .height = abs_height,
+        .data = rgb_data,
+    };
+}
--- a/examples/qwen3_vl/qwen3_vl.zig
+++ b/examples/qwen3_vl/qwen3_vl.zig
--- a/examples/simple_layer/main.zig
+++ b/examples/simple_layer/main.zig
@ -47,8 +47,8 @@ pub fn asyncMain() !void {
    // A BufferStore is usually created by loading model data from a file.
    var store: zml.aio.BufferStore = .init(allocator);
    defer store.deinit();
-    try store.buffers.put(store.arena.allocator(), "weight", zml.HostBuffer.fromArray(&weights));
-    try store.buffers.put(store.arena.allocator(), "bias", zml.HostBuffer.fromArray(&bias));
+    try store.buffers.put(store.arena.allocator(), "weight", zml.HostBuffer.fromArrayPtr(&weights));
+    try store.buffers.put(store.arena.allocator(), "bias", zml.HostBuffer.fromArrayPtr(&bias));

    // A clone of our model, consisting of shapes. We only need shapes for compiling.
    // We use the BufferStore to infer the shapes.
--- a/pjrt/pjrt.zig
+++ b/pjrt/pjrt.zig
@ -1311,6 +1311,20 @@ pub const Ffi = extern struct {
        }
    };

+    pub const TypeInfo = struct {
+        deleter: ?*const fn (*anyopaque) callconv(.c) void = null,
+        serialize: ?*const fn () callconv(.c) void = null,
+        deserialize: ?*const fn () callconv(.c) void = null,
+
+        pub fn toCStruct(self: TypeInfo) c.PJRT_FFI_Type_Info {
+            return .{
+                .deleter = @ptrCast(self.deleter),
+                .serialize = @ptrCast(self.serialize),
+                .deserialize = @ptrCast(self.deserialize),
+            };
+        }
+    };
+
    // todo : support all missing handlers available in GPU plugin extension: handler_instantiate, handler_prepare, handler_initialize
    // introduced by https://github.com/openxla/xla/commit/ef85a7bcc308313492ebc50295a8a08b4e51b8f5
    pub fn register(
@ -1337,13 +1351,14 @@ pub const Ffi = extern struct {
        }
    }

-    pub fn registerTypeId(self: *const Ffi, api: *const Api, type_name: []const u8) ApiError!ffi.TypeId {
-        var ret = pjrtStruct(c.PJRT_FFI_TypeID_Register_Args{
+    pub fn registerTypeId(self: *const Ffi, api: *const Api, type_name: []const u8, type_info: ?*const c.PJRT_FFI_Type_Info) ApiError!ffi.TypeId {
+        var ret = pjrtStruct(c.PJRT_FFI_Type_Register_Args{
            .type_name = type_name.ptr,
            .type_name_size = type_name.len,
            .type_id = 0, // let the plugin assign a unique type ID
+            .type_info = @ptrCast(@constCast(type_info)),
        });
-        const result = self.inner.type_id_register.?(&ret);
+        const result = self.inner.type_register.?(&ret);
        if (result) |pjrt_c_error| {
            const pjrt_error: *Error = @ptrCast(pjrt_c_error);
            return pjrt_error.getCode(api).toApiError();
--- a/runtimes/cpu/cpu.bzl
+++ b/runtimes/cpu/cpu.bzl
@ -23,22 +23,22 @@ def _cpu_pjrt_plugin_impl(mctx):
    http_archive(
        name = "libpjrt_cpu_linux_amd64",
        build_file_content = _BUILD_FILE_DEFAULT_VISIBILITY + _BUILD_LINUX,
-        sha256 = "124dc500291a5930f910ca23533520e22c90797110b29fd2c0d8274475f4a220",
-        url = "https://github.com/zml/pjrt-artifacts/releases/download/v13.0.0/pjrt-cpu_linux-amd64.tar.gz",
+        sha256 = "ecc26dc792d2577474348eb48f3989aba8c3bb8d3cbd6df77ccf43357092a700",
+        url = "https://github.com/zml/pjrt-artifacts/releases/download/v14.0.1/pjrt-cpu_linux-amd64.tar.gz",
    )

    http_archive(
        name = "libpjrt_cpu_darwin_amd64",
        build_file_content = _BUILD_FILE_DEFAULT_VISIBILITY + _BUILD_DARWIN,
-        sha256 = "6e5b59874880f4db37c53fb1d52520d410b0078f9d2606a90762c6c622693c26",
-        url = "https://github.com/zml/pjrt-artifacts/releases/download/v13.0.0/pjrt-cpu_darwin-amd64.tar.gz",
+        sha256 = "4a21db4ecd015fb772614ce4b491551d483ce11321c8784e3d0e07a9a425d5eb",
+        url = "https://github.com/zml/pjrt-artifacts/releases/download/v14.0.1/pjrt-cpu_darwin-amd64.tar.gz",
    )

    http_archive(
        name = "libpjrt_cpu_darwin_arm64",
        build_file_content = _BUILD_FILE_DEFAULT_VISIBILITY + _BUILD_DARWIN,
-        sha256 = "a6354bfed828a011e6d809eda2230e10c40c80044c67fe618b2a9615c047f092",
-        url = "https://github.com/zml/pjrt-artifacts/releases/download/v13.0.0/pjrt-cpu_darwin-arm64.tar.gz",
+        sha256 = "e0ab4492468999ae7861a27837427846a708f4346fdae9ad1e84b80e1313566a",
+        url = "https://github.com/zml/pjrt-artifacts/releases/download/v14.0.1/pjrt-cpu_darwin-arm64.tar.gz",
    )

    return mctx.extension_metadata(
--- a/runtimes/cuda/cuda.bzl
+++ b/runtimes/cuda/cuda.bzl
@ -229,8 +229,8 @@ def _cuda_impl(mctx):
    http_archive(
        name = "libpjrt_cuda",
        build_file = "libpjrt_cuda.BUILD.bazel",
-        url = "https://github.com/zml/pjrt-artifacts/releases/download/v13.0.0/pjrt-cuda_linux-amd64.tar.gz",
-        sha256 = "6cdac9bac6db904e4423c9745c61000cf3acaf3c7da8016ab0016f076869048a",
+        url = "https://github.com/zml/pjrt-artifacts/releases/download/v14.0.1/pjrt-cuda_linux-amd64.tar.gz",
+        sha256 = "4b618f05f9cd4cd14966717f7a521b1aa80b425999755870ce2d1caf45685578",
    )

    return mctx.extension_metadata(
--- a/runtimes/rocm/rocm.bzl
+++ b/runtimes/rocm/rocm.bzl
@ -121,7 +121,7 @@ _ROCM_PACKAGES = {
                "dlopen": "zmlxrocm_dlopen",
            },
        ),
-        packages.filegroup(name = "hiprtc", srcs = ["lib/libhiprtc.so.6"]),
+        packages.filegroup(name = "hiprtc", srcs = ["lib/libhiprtc.so.6", "lib/libhiprtc-builtins.so.6"]),
    ]),
    "hipsolver": packages.filegroup(name = "hipsolver", srcs = ["lib/libhipsolver.so.0"]),
 }
@ -153,8 +153,8 @@ def _rocm_impl(mctx):
    http_archive(
        name = "libpjrt_rocm",
        build_file = "libpjrt_rocm.BUILD.bazel",
-        url = "https://github.com/zml/pjrt-artifacts/releases/download/v13.0.0/pjrt-rocm_linux-amd64.tar.gz",
-        sha256 = "945c43c68325c0e91cd41eaa594a9f9f6e78da7cc06892d83bf345b69f7bd714",
+        url = "https://github.com/zml/pjrt-artifacts/releases/download/v14.0.1/pjrt-rocm_linux-amd64.tar.gz",
+        sha256 = "087858044f17bc06b70d7cbffc33e7f2bf590d732f3ce2c24425e41453ea1cf4",
    )

    return mctx.extension_metadata(
--- a/runtimes/rocm/zmlxrocm.zig
+++ b/runtimes/rocm/zmlxrocm.zig
@ -11,7 +11,9 @@ pub export fn zmlxrocm_dlopen(filename: [*c]const u8, flags: c_int) ?*anyopaque
        .{ "libamd_comgr.so", "libamd_comgr.so.3" },
        .{ "librocprofiler-register.so", "librocprofiler-register.so.0" },
        .{ "libMIOpen.so", "libMIOpen.so.1" },
+        .{ "libMIOpen.so.1", "libMIOpen.so.1" },
        .{ "librccl.so", "librccl.so.1" },
+        .{ "librocblas.so.4", "librocblas.so.4" },
        .{ "librocblas.so", "librocblas.so.4" },
        .{ "libroctracer64.so", "libroctracer64.so.4" },
        .{ "libroctx64.so", "libroctx64.so.4" },
--- a/third_party/com_github_bfactory_ai_zignal/BUILD.bazel
+++ b/third_party/com_github_bfactory_ai_zignal/BUILD.bazel
@ -0,0 +1 @@
+# Empty BUILD.bazel to make this a Bazel package
--- a/third_party/com_github_bfactory_ai_zignal/repo.bzl
+++ b/third_party/com_github_bfactory_ai_zignal/repo.bzl
@ -0,0 +1,9 @@
+load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
+
+def repo():
+    new_git_repository(
+        name = "com_github_bfactory_ai_zignal",
+        remote = "https://github.com/loupicaaa/zignal.git",
+        commit = "21553a48014add0e7f069f8c72b9277786185127",
+        build_file = "//:third_party/com_github_bfactory_ai_zignal/zignal.bazel",
+    )
--- a/third_party/com_github_bfactory_ai_zignal/zignal.bazel
+++ b/third_party/com_github_bfactory_ai_zignal/zignal.bazel
@ -0,0 +1,9 @@
+load("@rules_zig//zig:defs.bzl", "zig_library")
+
+zig_library(
+    name = "zignal",
+    import_name = "zignal",
+    srcs = glob(["**/*.zig"], exclude = ["build.zig", "build.zig.zon"]),
+    main = "src/root.zig",  # Le fichier principal devrait être à la racine
+    visibility = ["//visibility:public"],
+)
--- a/third_party/non_module_deps.bzl
+++ b/third_party/non_module_deps.bzl
@ -1,3 +1,4 @@
+load("//third_party/com_github_bfactory_ai_zignal:repo.bzl", com_github_bfactory_ai_zignal = "repo")
 load("//third_party/com_github_hejsil_clap:repo.bzl", com_github_hejsil_clap = "repo")
 load("//third_party/com_google_sentencepiece:repo.bzl", com_google_sentencepiece = "repo")
 load("//third_party/mnist:repo.bzl", mnist = "repo")
@ -10,6 +11,7 @@ def _non_module_deps_impl(mctx):
    com_github_hejsil_clap()
    mnist()
    xla()
+    com_github_bfactory_ai_zignal()

    return mctx.extension_metadata(
        reproducible = True,
--- a/third_party/xla/patches/0001-PjRT-C-API-male-header-C-compliant-for-PJRT-FFI-exte.patch
+++ b/third_party/xla/patches/0001-PjRT-C-API-male-header-C-compliant-for-PJRT-FFI-exte.patch
@ -0,0 +1,45 @@
+From 0d88ac9b06c8bc78db817d85e90cd60d38e6561a Mon Sep 17 00:00:00 2001
+From: Hugo Mano <hugo@zml.ai>
+Date: Mon, 3 Nov 2025 16:54:54 +0100
+Subject: [PATCH] PjRT C API: make PJRT FFI C extension header compliant
+
+
+XLA PR: https://github.com/openxla/xla/pull/33470
+
+---
+ xla/pjrt/c/pjrt_c_api_ffi_extension.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/xla/pjrt/c/pjrt_c_api_ffi_extension.h b/xla/pjrt/c/pjrt_c_api_ffi_extension.h
+index e756650911..33b78238b9 100644
+--- a/xla/pjrt/c/pjrt_c_api_ffi_extension.h
+++ b/xla/pjrt/c/pjrt_c_api_ffi_extension.h
+@@ -32,13 +32,13 @@ extern "C" {
+ // See: https://en.wikipedia.org/wiki/Foreign_function_interface
+ #define PJRT_API_FFI_EXTENSION_VERSION 3
+ 
+-struct PJRT_FFI_Type_Info {
+typedef struct PJRT_FFI_Type_Info {
+   void (*deleter)(void* object);
+   void (*serialize)();    // placeholder for future use
+   void (*deserialize)();  // placeholder for future use
+-};
+} PJRT_FFI_Type_Info;
+ 
+-struct PJRT_FFI_Type_Register_Args {
+typedef struct PJRT_FFI_Type_Register_Args {
+   size_t struct_size;
+   PJRT_Extension_Base* extension_start;
+ 
+@@ -46,7 +46,7 @@ struct PJRT_FFI_Type_Register_Args {
+   size_t type_name_size;
+   int64_t type_id;  // in-out
+   PJRT_FFI_Type_Info* type_info;
+-};
+} PJRT_FFI_Type_Register_Args;
+ PJRT_DEFINE_STRUCT_TRAITS(PJRT_FFI_Type_Register_Args, type_info);
+ 
+ // Registers external type in a static type registry. If `type_id` is set to `0`
+-- 
+2.50.1 (Apple Git-155)
+
--- a/third_party/xla/repo.bzl
+++ b/third_party/xla/repo.bzl
@ -4,9 +4,9 @@ def repo():
    git_repository(
        name = "xla",
        remote = "https://github.com/openxla/xla.git",
-        commit = "b3fbfeeb076f2b536897180f4a274680ed9d52eb",
+        commit = "9a77a882bb2bc75cb8c29620ff8cd0fd089bdc86",
        patch_args = ["-p1"],
        patches = [
-            # patches live in the patches directory
+            "third_party/xla/patches/0001-PjRT-C-API-male-header-C-compliant-for-PJRT-FFI-exte.patch",
        ],
    )
--- a/third_party/xla/xla.bzl
+++ b/third_party/xla/xla.bzl
@ -42,6 +42,15 @@ if_rocm_newer_than = always_newer_than
 is_rocm_configured = always_false
 if_gpu_is_configured = always_if_false
 if_cuda_or_rocm = always_if_false
+""",
+    })
+    simple_files(name = "local_config_sycl", files = {
+        "BUILD.bazel": "",
+        "sycl/BUILD.bazel": "",
+        "crosstool/BUILD.bazel": "",
+        "sycl/build_defs.bzl": _BZL_HELPERS + """\
+if_sycl = always_if_false
+if_sycl_is_configured = always_if_false
 """,
    })
    simple_files(name = "local_config_remote_execution", files = {
@ -56,6 +65,17 @@ if_cuda_or_rocm = always_if_false
    simple_files(name = "rules_ml_toolchain", files = {
        "third_party/gpus/BUILD.bazel": "",
        "third_party/gpus/nvidia_common_rules.bzl": """cuda_rpath_flags = lambda *args, **kwargs: []""",
+        "third_party/extensions/sycl_configure.bzl": "",
+    })
+    simple_files(name = "sycl_configure_ext", files = {})
+    simple_files(name = "sycl_configure", files = {})
+    simple_files(name = "rules_shell", files = {
+        "BUILD.bazel": "",
+        "shell/BUILD.bazel": "",
+        "shell/sh_binary.bzl": """
+def sh_binary(**kwargs):
+    native.sh_binary(**kwargs)
+""",
    })

 def _xla_impl(mctx):
@ -70,7 +90,7 @@ def _xla_impl(mctx):
        patch_file = ["//third_party/grpc:grpc.patch"],
        urls = tf_mirror_urls("https://github.com/grpc/grpc/archive/refs/tags/v1.74.0.tar.gz"),
    )
-    tf_vendored(name = "tsl", relpath = "third_party/tsl")
+    tf_vendored(name = "tsl", path = "third_party/tsl")

    _dummy_repos(mctx)

--- a/third_party/zls/cat.zig
+++ b/third_party/zls/cat.zig
@ -8,7 +8,7 @@ pub fn main() !void {
    defer std.process.argsFree(gpa, args);

    const file_path = args[1];
-    var file = try std.fs.cwd().openFile(file_path, .{ .mode = .read_only});
+    var file = try std.fs.cwd().openFile(file_path, .{ .mode = .read_only });
    defer file.close();

    if (builtin.zig_version.major == 0 and builtin.zig_version.minor >= 15) {
--- a/tools/hf/requirements.in
+++ b/tools/hf/requirements.in
@ -1 +1,2 @@
 huggingface_hub
+hf_transfer
--- a/tools/hf/requirements.lock.txt
+++ b/tools/hf/requirements.lock.txt
@ -1,174 +1,255 @@
 # This file was autogenerated by uv via the following command:
 #    bazel run //tools/hf:requirements.update
-certifi==2025.8.3 \
-    --hash=sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407 \
-    --hash=sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5
+certifi==2025.10.5 \
+    --hash=sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de \
+    --hash=sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43
    # via requests
-charset-normalizer==3.4.3 \
-    --hash=sha256:00237675befef519d9af72169d8604a067d92755e84fe76492fef5441db05b91 \
-    --hash=sha256:02425242e96bcf29a49711b0ca9f37e451da7c70562bc10e8ed992a5a7a25cc0 \
-    --hash=sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154 \
-    --hash=sha256:07a0eae9e2787b586e129fdcbe1af6997f8d0e5abaa0bc98c0e20e124d67e601 \
-    --hash=sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884 \
-    --hash=sha256:0e78314bdc32fa80696f72fa16dc61168fda4d6a0c014e0380f9d02f0e5d8a07 \
-    --hash=sha256:0f2be7e0cf7754b9a30eb01f4295cc3d4358a479843b31f328afd210e2c7598c \
-    --hash=sha256:13faeacfe61784e2559e690fc53fa4c5ae97c6fcedb8eb6fb8d0a15b475d2c64 \
-    --hash=sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe \
-    --hash=sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f \
-    --hash=sha256:16a8770207946ac75703458e2c743631c79c59c5890c80011d536248f8eaa432 \
-    --hash=sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc \
-    --hash=sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa \
-    --hash=sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9 \
-    --hash=sha256:1e8ac75d72fa3775e0b7cb7e4629cec13b7514d928d15ef8ea06bca03ef01cae \
-    --hash=sha256:1ef99f0456d3d46a50945c98de1774da86f8e992ab5c77865ea8b8195341fc19 \
-    --hash=sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d \
-    --hash=sha256:23b6b24d74478dc833444cbd927c338349d6ae852ba53a0d02a2de1fce45b96e \
-    --hash=sha256:252098c8c7a873e17dd696ed98bbe91dbacd571da4b87df3736768efa7a792e4 \
-    --hash=sha256:257f26fed7d7ff59921b78244f3cd93ed2af1800ff048c33f624c87475819dd7 \
-    --hash=sha256:2c322db9c8c89009a990ef07c3bcc9f011a3269bc06782f916cd3d9eed7c9312 \
-    --hash=sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92 \
-    --hash=sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31 \
-    --hash=sha256:31a9a6f775f9bcd865d88ee350f0ffb0e25936a7f930ca98995c05abf1faf21c \
-    --hash=sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f \
-    --hash=sha256:34a7f768e3f985abdb42841e20e17b330ad3aaf4bb7e7aeeb73db2e70f077b99 \
-    --hash=sha256:3653fad4fe3ed447a596ae8638b437f827234f01a8cd801842e43f3d0a6b281b \
-    --hash=sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15 \
-    --hash=sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392 \
-    --hash=sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f \
-    --hash=sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8 \
-    --hash=sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491 \
-    --hash=sha256:4ca4c094de7771a98d7fbd67d9e5dbf1eb73efa4f744a730437d8a3a5cf994f0 \
-    --hash=sha256:511729f456829ef86ac41ca78c63a5cb55240ed23b4b737faca0eb1abb1c41bc \
-    --hash=sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0 \
-    --hash=sha256:585f3b2a80fbd26b048a0be90c5aae8f06605d3c92615911c3a2b03a8a3b796f \
-    --hash=sha256:5b413b0b1bfd94dbf4023ad6945889f374cd24e3f62de58d6bb102c4d9ae534a \
-    --hash=sha256:5d8d01eac18c423815ed4f4a2ec3b439d654e55ee4ad610e153cf02faf67ea40 \
-    --hash=sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927 \
-    --hash=sha256:6cf8fd4c04756b6b60146d98cd8a77d0cdae0e1ca20329da2ac85eed779b6849 \
-    --hash=sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce \
-    --hash=sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14 \
-    --hash=sha256:70bfc5f2c318afece2f5838ea5e4c3febada0be750fcf4775641052bbba14d05 \
-    --hash=sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c \
-    --hash=sha256:74d77e25adda8581ffc1c720f1c81ca082921329452eba58b16233ab1842141c \
-    --hash=sha256:78deba4d8f9590fe4dae384aeff04082510a709957e968753ff3c48399f6f92a \
-    --hash=sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc \
-    --hash=sha256:88ab34806dea0671532d3f82d82b85e8fc23d7b2dd12fa837978dad9bb392a34 \
-    --hash=sha256:8999f965f922ae054125286faf9f11bc6932184b93011d138925a1773830bbe9 \
-    --hash=sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096 \
-    --hash=sha256:939578d9d8fd4299220161fdd76e86c6a251987476f5243e8864a7844476ba14 \
-    --hash=sha256:96b2b3d1a83ad55310de8c7b4a2d04d9277d5591f40761274856635acc5fcb30 \
-    --hash=sha256:a2d08ac246bb48479170408d6c19f6385fa743e7157d716e144cad849b2dd94b \
-    --hash=sha256:b256ee2e749283ef3ddcff51a675ff43798d92d746d1a6e4631bf8c707d22d0b \
-    --hash=sha256:b5e3b2d152e74e100a9e9573837aba24aab611d39428ded46f4e4022ea7d1942 \
-    --hash=sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db \
-    --hash=sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5 \
-    --hash=sha256:c60e092517a73c632ec38e290eba714e9627abe9d301c8c8a12ec32c314a2a4b \
-    --hash=sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce \
-    --hash=sha256:c6e490913a46fa054e03699c70019ab869e990270597018cef1d8562132c2669 \
-    --hash=sha256:c6f162aabe9a91a309510d74eeb6507fab5fff92337a15acbe77753d88d9dcf0 \
-    --hash=sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018 \
-    --hash=sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93 \
-    --hash=sha256:cc9370a2da1ac13f0153780040f465839e6cccb4a1e44810124b4e22483c93fe \
-    --hash=sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049 \
-    --hash=sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a \
-    --hash=sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef \
-    --hash=sha256:d0e909868420b7049dafd3a31d45125b31143eec59235311fc4c57ea26a4acd2 \
-    --hash=sha256:d22dbedd33326a4a5190dd4fe9e9e693ef12160c77382d9e87919bce54f3d4ca \
-    --hash=sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16 \
-    --hash=sha256:d79c198e27580c8e958906f803e63cddb77653731be08851c7df0b1a14a8fc0f \
-    --hash=sha256:d95bfb53c211b57198bb91c46dd5a2d8018b3af446583aab40074bf7988401cb \
-    --hash=sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1 \
-    --hash=sha256:ec557499516fc90fd374bf2e32349a2887a876fbf162c160e3c01b6849eaf557 \
-    --hash=sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37 \
-    --hash=sha256:fb731e5deb0c7ef82d698b0f4c5bb724633ee2a489401594c5c88b02e6cb15f7 \
-    --hash=sha256:fb7f67a1bfa6e40b438170ebdc8158b78dc465a5a67b6dde178a46987b244a72 \
-    --hash=sha256:fd10de089bcdcd1be95a2f73dbe6254798ec1bda9f450d5828c96f93e2536b9c \
-    --hash=sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9
+charset-normalizer==3.4.4 \
+    --hash=sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad \
+    --hash=sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93 \
+    --hash=sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394 \
+    --hash=sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89 \
+    --hash=sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc \
+    --hash=sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86 \
+    --hash=sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63 \
+    --hash=sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d \
+    --hash=sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f \
+    --hash=sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8 \
+    --hash=sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0 \
+    --hash=sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505 \
+    --hash=sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161 \
+    --hash=sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af \
+    --hash=sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152 \
+    --hash=sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318 \
+    --hash=sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72 \
+    --hash=sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4 \
+    --hash=sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e \
+    --hash=sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3 \
+    --hash=sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576 \
+    --hash=sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c \
+    --hash=sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1 \
+    --hash=sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8 \
+    --hash=sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1 \
+    --hash=sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2 \
+    --hash=sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44 \
+    --hash=sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26 \
+    --hash=sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88 \
+    --hash=sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016 \
+    --hash=sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede \
+    --hash=sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf \
+    --hash=sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a \
+    --hash=sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc \
+    --hash=sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0 \
+    --hash=sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84 \
+    --hash=sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db \
+    --hash=sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1 \
+    --hash=sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7 \
+    --hash=sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed \
+    --hash=sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8 \
+    --hash=sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133 \
+    --hash=sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e \
+    --hash=sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef \
+    --hash=sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14 \
+    --hash=sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2 \
+    --hash=sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0 \
+    --hash=sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d \
+    --hash=sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828 \
+    --hash=sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f \
+    --hash=sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf \
+    --hash=sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6 \
+    --hash=sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328 \
+    --hash=sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090 \
+    --hash=sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa \
+    --hash=sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381 \
+    --hash=sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c \
+    --hash=sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb \
+    --hash=sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc \
+    --hash=sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a \
+    --hash=sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec \
+    --hash=sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc \
+    --hash=sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac \
+    --hash=sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e \
+    --hash=sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313 \
+    --hash=sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569 \
+    --hash=sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3 \
+    --hash=sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d \
+    --hash=sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525 \
+    --hash=sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894 \
+    --hash=sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3 \
+    --hash=sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9 \
+    --hash=sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a \
+    --hash=sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9 \
+    --hash=sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14 \
+    --hash=sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25 \
+    --hash=sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50 \
+    --hash=sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf \
+    --hash=sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1 \
+    --hash=sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3 \
+    --hash=sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac \
+    --hash=sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e \
+    --hash=sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815 \
+    --hash=sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c \
+    --hash=sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6 \
+    --hash=sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6 \
+    --hash=sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e \
+    --hash=sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4 \
+    --hash=sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84 \
+    --hash=sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69 \
+    --hash=sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15 \
+    --hash=sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191 \
+    --hash=sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0 \
+    --hash=sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897 \
+    --hash=sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd \
+    --hash=sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2 \
+    --hash=sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794 \
+    --hash=sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d \
+    --hash=sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074 \
+    --hash=sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3 \
+    --hash=sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224 \
+    --hash=sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838 \
+    --hash=sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a \
+    --hash=sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d \
+    --hash=sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d \
+    --hash=sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f \
+    --hash=sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8 \
+    --hash=sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490 \
+    --hash=sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966 \
+    --hash=sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9 \
+    --hash=sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3 \
+    --hash=sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e \
+    --hash=sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608
    # via requests
-filelock==3.19.1 \
-    --hash=sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58 \
-    --hash=sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d
+filelock==3.20.0 \
+    --hash=sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2 \
+    --hash=sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4
    # via huggingface-hub
-fsspec==2025.7.0 \
-    --hash=sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58 \
-    --hash=sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21
+fsspec==2025.9.0 \
+    --hash=sha256:19fd429483d25d28b65ec68f9f4adc16c17ea2c7c7bf54ec61360d478fb19c19 \
+    --hash=sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7
    # via huggingface-hub
-hf-xet==1.1.8 \
-    --hash=sha256:09e86514c3c4284ed8a57d6b0f3d089f9836a0af0a1ceb3c9dd664f1f3eaefef \
-    --hash=sha256:25b9d43333bbef39aeae1616789ec329c21401a7fe30969d538791076227b591 \
-    --hash=sha256:3d5f82e533fc51c7daad0f9b655d9c7811b5308e5890236828bd1dd3ed8fea74 \
-    --hash=sha256:4171f31d87b13da4af1ed86c98cf763292e4720c088b4957cf9d564f92904ca9 \
-    --hash=sha256:4a9b99ab721d385b83f4fc8ee4e0366b0b59dce03b5888a86029cc0ca634efbf \
-    --hash=sha256:62a0043e441753bbc446dcb5a3fe40a4d03f5fb9f13589ef1df9ab19252beb53 \
-    --hash=sha256:8e2dba5896bca3ab61d0bef4f01a1647004de59640701b37e37eaa57087bbd9d \
-    --hash=sha256:bfe5700bc729be3d33d4e9a9b5cc17a951bf8c7ada7ba0c9198a6ab2053b7453
-    # via huggingface-hub
-huggingface-hub==0.34.4 \
-    --hash=sha256:9b365d781739c93ff90c359844221beef048403f1bc1f1c123c191257c3c890a \
-    --hash=sha256:a4228daa6fb001be3f4f4bdaf9a0db00e1739235702848df00885c9b5742c85c
+hf-transfer==0.1.9 \
+    --hash=sha256:035572865dab29d17e783fbf1e84cf1cb24f3fcf8f1b17db1cfc7fdf139f02bf \
+    --hash=sha256:0d991376f0eac70a60f0cbc95602aa708a6f7c8617f28b4945c1431d67b8e3c8 \
+    --hash=sha256:16f208fc678911c37e11aa7b586bc66a37d02e636208f18b6bc53d29b5df40ad \
+    --hash=sha256:1a6bd16c667ebe89a069ca163060127a794fa3a3525292c900b8c8cc47985b0d \
+    --hash=sha256:2c7fc1b85f4d0f76e452765d7648c9f4bfd0aedb9ced2ae1ebfece2d8cfaf8e2 \
+    --hash=sha256:3a736dfbb2c84f5a2c975478ad200c0c8bfcb58a25a35db402678fb87ce17fa4 \
+    --hash=sha256:3ebc4ab9023414880c8b1d3c38174d1c9989eb5022d37e814fa91a3060123eb0 \
+    --hash=sha256:435cc3cdc8524ce57b074032b8fd76eed70a4224d2091232fa6a8cef8fd6803e \
+    --hash=sha256:504b8427fd785dd8546d53b9fafe6e436bd7a3adf76b9dce556507650a7b4567 \
+    --hash=sha256:57fd9880da1ee0f47250f735f791fab788f0aa1ee36afc49f761349869c8b4d9 \
+    --hash=sha256:5828057e313de59300dd1abb489444bc452efe3f479d3c55b31a8f680936ba42 \
+    --hash=sha256:5d561f0520f493c66b016d99ceabe69c23289aa90be38dd802d2aef279f15751 \
+    --hash=sha256:6e94e8822da79573c9b6ae4d6b2f847c59a7a06c5327d7db20751b68538dc4f6 \
+    --hash=sha256:8669dbcc7a3e2e8d61d42cd24da9c50d57770bd74b445c65123291ca842a7e7a \
+    --hash=sha256:8674026f21ed369aa2a0a4b46000aca850fc44cd2b54af33a172ce5325b4fc82 \
+    --hash=sha256:89a23f58b7b7effbc047b8ca286f131b17728c99a9f972723323003ffd1bb916 \
+    --hash=sha256:8fd0167c4407a3bc4cdd0307e65ada2294ec04f1813d8a69a5243e379b22e9d8 \
+    --hash=sha256:a5b366d34cd449fe9b20ef25941e6eef0460a2f74e7389f02e673e1f88ebd538 \
+    --hash=sha256:cdca9bfb89e6f8f281890cc61a8aff2d3cecaff7e1a4d275574d96ca70098557 \
+    --hash=sha256:d2fde99d502093ade3ab1b53f80da18480e9902aa960dab7f74fb1b9e5bc5746 \
+    --hash=sha256:dc7fff1345980d6c0ebb92c811d24afa4b98b3e07ed070c8e38cc91fd80478c5 \
+    --hash=sha256:e66acf91df4a8b72f60223059df3003062a5ae111757187ed1a06750a30e911b \
+    --hash=sha256:e6ac4eddcd99575ed3735ed911ddf9d1697e2bd13aa3f0ad7e3904dd4863842e \
+    --hash=sha256:ee8b10afedcb75f71091bcc197c526a6ebf5c58bbbadb34fdeee6160f55f619f \
+    --hash=sha256:fc6bd19e1cc177c66bdef15ef8636ad3bde79d5a4f608c158021153b4573509d
    # via -r tools/hf/requirements.in
-idna==3.10 \
-    --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \
-    --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3
+hf-xet==1.1.10 \
+    --hash=sha256:0a0005fd08f002180f7a12d4e13b22be277725bc23ed0529f8add5c7a6309c06 \
+    --hash=sha256:408aef343800a2102374a883f283ff29068055c111f003ff840733d3b715bb97 \
+    --hash=sha256:5f54b19cc347c13235ae7ee98b330c26dd65ef1df47e5316ffb1e87713ca7045 \
+    --hash=sha256:686083aca1a6669bc85c21c0563551cbcdaa5cf7876a91f3d074a030b577231d \
+    --hash=sha256:6b6bceb6361c80c1cc42b5a7b4e3efd90e64630bcf11224dcac50ef30a47e435 \
+    --hash=sha256:71081925383b66b24eedff3013f8e6bbd41215c3338be4b94ba75fd75b21513b \
+    --hash=sha256:eae7c1fc8a664e54753ffc235e11427ca61f4b0477d757cc4eb9ae374b69f09c \
+    --hash=sha256:f900481cf6e362a6c549c61ff77468bd59d6dd082f3170a36acfef2eb6a6793f
+    # via huggingface-hub
+huggingface-hub==0.35.3 \
+    --hash=sha256:0e3a01829c19d86d03793e4577816fe3bdfc1602ac62c7fb220d593d351224ba \
+    --hash=sha256:350932eaa5cc6a4747efae85126ee220e4ef1b54e29d31c3b45c5612ddf0b32a
+    # via -r tools/hf/requirements.in
+idna==3.11 \
+    --hash=sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea \
+    --hash=sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902
    # via requests
 packaging==25.0 \
    --hash=sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484 \
    --hash=sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f
    # via huggingface-hub
-pyyaml==6.0.2 \
-    --hash=sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff \
-    --hash=sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48 \
-    --hash=sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086 \
-    --hash=sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e \
-    --hash=sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133 \
-    --hash=sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5 \
-    --hash=sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484 \
-    --hash=sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee \
-    --hash=sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5 \
-    --hash=sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68 \
-    --hash=sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a \
-    --hash=sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf \
-    --hash=sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99 \
-    --hash=sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8 \
-    --hash=sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85 \
-    --hash=sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19 \
-    --hash=sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc \
-    --hash=sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a \
-    --hash=sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1 \
-    --hash=sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317 \
-    --hash=sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c \
-    --hash=sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631 \
-    --hash=sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d \
-    --hash=sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652 \
-    --hash=sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5 \
-    --hash=sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e \
-    --hash=sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b \
-    --hash=sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8 \
-    --hash=sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476 \
-    --hash=sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706 \
-    --hash=sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563 \
-    --hash=sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237 \
-    --hash=sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b \
-    --hash=sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083 \
-    --hash=sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180 \
-    --hash=sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425 \
-    --hash=sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e \
-    --hash=sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f \
-    --hash=sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725 \
-    --hash=sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183 \
-    --hash=sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab \
-    --hash=sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774 \
-    --hash=sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725 \
-    --hash=sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e \
-    --hash=sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5 \
-    --hash=sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d \
-    --hash=sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290 \
-    --hash=sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44 \
-    --hash=sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed \
-    --hash=sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4 \
-    --hash=sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba \
-    --hash=sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12 \
-    --hash=sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4
+pyyaml==6.0.3 \
+    --hash=sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c \
+    --hash=sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a \
+    --hash=sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3 \
+    --hash=sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956 \
+    --hash=sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6 \
+    --hash=sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c \
+    --hash=sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65 \
+    --hash=sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a \
+    --hash=sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0 \
+    --hash=sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b \
+    --hash=sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1 \
+    --hash=sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6 \
+    --hash=sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7 \
+    --hash=sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e \
+    --hash=sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007 \
+    --hash=sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310 \
+    --hash=sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4 \
+    --hash=sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9 \
+    --hash=sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295 \
+    --hash=sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea \
+    --hash=sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0 \
+    --hash=sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e \
+    --hash=sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac \
+    --hash=sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9 \
+    --hash=sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7 \
+    --hash=sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35 \
+    --hash=sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb \
+    --hash=sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b \
+    --hash=sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69 \
+    --hash=sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5 \
+    --hash=sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b \
+    --hash=sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c \
+    --hash=sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369 \
+    --hash=sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd \
+    --hash=sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824 \
+    --hash=sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198 \
+    --hash=sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065 \
+    --hash=sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c \
+    --hash=sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c \
+    --hash=sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764 \
+    --hash=sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196 \
+    --hash=sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b \
+    --hash=sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00 \
+    --hash=sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac \
+    --hash=sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8 \
+    --hash=sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e \
+    --hash=sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28 \
+    --hash=sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3 \
+    --hash=sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5 \
+    --hash=sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4 \
+    --hash=sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b \
+    --hash=sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf \
+    --hash=sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5 \
+    --hash=sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702 \
+    --hash=sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8 \
+    --hash=sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788 \
+    --hash=sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da \
+    --hash=sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d \
+    --hash=sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc \
+    --hash=sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c \
+    --hash=sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba \
+    --hash=sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f \
+    --hash=sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917 \
+    --hash=sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5 \
+    --hash=sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26 \
+    --hash=sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f \
+    --hash=sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b \
+    --hash=sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be \
+    --hash=sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c \
+    --hash=sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3 \
+    --hash=sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6 \
+    --hash=sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926 \
+    --hash=sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0
    # via huggingface-hub
 requests==2.32.5 \
    --hash=sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6 \
@ -178,9 +259,9 @@ tqdm==4.67.1 \
    --hash=sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2 \
    --hash=sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2
    # via huggingface-hub
-typing-extensions==4.14.1 \
-    --hash=sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36 \
-    --hash=sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76
+typing-extensions==4.15.0 \
+    --hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
+    --hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
    # via huggingface-hub
 urllib3==2.5.0 \
    --hash=sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760 \
--- a/upb/BUILD.bazel
+++ b/upb/BUILD.bazel
@ -14,8 +14,4 @@ zig_library(
    main = "upb.zig",
    visibility = ["//visibility:public"],
    deps = [":upb_c"],
-    zigopts = [
-        #TODO(cerisier): Remove me when this is done inside rules_zig.
-        "-fllvm",
-    ],
 )
--- a/zml/BUILD.bazel
+++ b/zml/BUILD.bazel
@ -57,8 +57,6 @@ zig_library(
    zigopts = [
        "-lc",
        "-freference-trace=20",
-        #TODO(cerisier): Remove me when this is done inside rules_zig.
-        "-fllvm",
    ],
    main = "zml.zig",
    visibility = ["//visibility:public"],
--- a/zml/aio/torch/pickle.zig
+++ b/zml/aio/torch/pickle.zig
@ -762,9 +762,13 @@ pub const Op = union(enum) {
    }
 };

+pub const ParseError = error{
+    UnknownPickleOp,
+} || std.fmt.ParseIntError || std.Io.Reader.DelimiterError || std.mem.Allocator.Error;
+
 /// Read a stream of bytes, and interpret it as a stream of Pickle operators.
 /// The given allocator needs to be an arena cause we are not aligning allocations to avoid copies.
-pub fn parse(arena: std.mem.Allocator, reader: *std.Io.Reader) ![]const Op {
+pub fn parse(arena: std.mem.Allocator, reader: *std.Io.Reader) ParseError![]const Op {
    // It's not very efficient to interleave the results with the data copied from the stream,
    // because growth event in the results ArrayList will lead to fragmentation.
    // Trying to mitigate that by using a generous default size.
@ -776,7 +780,8 @@ pub fn parse(arena: std.mem.Allocator, reader: *std.Io.Reader) ![]const Op {
        const code: OpCode = @enumFromInt(try reader.takeByte());
        const op: Op = switch (code) {
            .int => int: {
-                const bytes = try reader.takeDelimiterExclusive('\n');
+                const bytes_with_ln = try reader.takeDelimiterInclusive('\n');
+                const bytes = bytes_with_ln[0 .. bytes_with_ln.len - 1];
                // Legacy hack, see OpCode.int documentation
                // We do this parsing right away to simplify downstream code.
                break :int if (bytes.len == 2 and bytes[0] == '0' and bytes[1] == '0')
@ -830,17 +835,10 @@ pub fn parse(arena: std.mem.Allocator, reader: *std.Io.Reader) ![]const Op {
            .dup => .dup,
            .mark => .mark,
            .pop_mark => .pop_mark,
-            // If we fail to parse delay the error to the evaluation.
-            .get => get: {
-                const digits = try reader.takeDelimiterExclusive('\n');
-                break :get .{ .get = std.fmt.parseInt(u32, digits, 10) catch std.math.maxInt(u32) };
-            },
+            .get => .{ .get = try readTextU32(reader) },
            .binget => .{ .get = try reader.takeByte() },
            .long_binget => .{ .get = try reader.takeInt(u32, .little) },
-            .put => put: {
-                const digits = try reader.takeDelimiterExclusive('\n');
-                break :put .{ .put = std.fmt.parseInt(u32, digits, 10) catch std.math.maxInt(u32) };
-            },
+            .put => .{ .put = try readTextU32(reader) },
            .binput => .{ .put = try reader.takeByte() },
            .long_binput => .{ .put = try reader.takeInt(u32, .little) },
            .memoize => .memoize,
@ -882,7 +880,7 @@ pub fn parse(arena: std.mem.Allocator, reader: *std.Io.Reader) ![]const Op {
            .binpersid => .binpersid,
            _ => |unk_tag| {
                log.err("Unknow pickle operator {}, note we are only supporting pickle protocol up to version 5.", .{unk_tag});
-                return error.NotSupported;
+                return error.UnknownPickleOp;
            },
        };
        try results.append(arena, op);
@ -1053,8 +1051,11 @@ fn writeIntBuff(comptime T: type, value: T) [@divExact(@typeInfo(T).int.bits, 8)
    return res;
 }

-fn readLine(reader: *std.Io.Reader, alloc_writer: *std.Io.Writer.Allocating) ![]const u8 {
-    const n = try reader.streamDelimiter(&alloc_writer.writer, '\n');
+fn readLine(reader: *std.Io.Reader, alloc_writer: *std.Io.Writer.Allocating) ParseError![]const u8 {
+    const n = reader.streamDelimiter(&alloc_writer.writer, '\n') catch |err| switch (err) {
+        error.WriteFailed => return error.OutOfMemory,
+        else => |e| return e,
+    };
    std.debug.assert(try reader.takeByte() == '\n');
    const w = &alloc_writer.writer;
    std.debug.assert(w.end == n);
@ -1063,3 +1064,9 @@ fn readLine(reader: *std.Io.Reader, alloc_writer: *std.Io.Writer.Allocating) ![]
    w.end = 0;
    return items;
 }
+
+fn readTextU32(reader: *std.Io.Reader) ParseError!u32 {
+    // Note we use takeDelimiterInclusive because the newline must always be there.
+    const digits = try reader.takeDelimiterInclusive('\n');
+    return try std.fmt.parseInt(u32, digits[0 .. digits.len - 1], 10);
+}
--- a/zml/buffer.zig
+++ b/zml/buffer.zig
@ -140,7 +140,7 @@ pub const Buffer = struct {
    /// Copies the given Zig array to the accelerator memory and
    /// return a Buffer using the array shape.
    pub fn fromArray(platform: Platform, arr: anytype) !Buffer {
-        const host_buffer = HostBuffer.fromArray(&arr);
+        const host_buffer = HostBuffer.fromArrayPtr(&arr);
        return try from(platform, host_buffer, .{ .wait = true });
    }

@ -160,7 +160,7 @@ pub const Buffer = struct {
    /// Copies the given Zig array to the accelerator memory and
    /// return a Buffer using the array shape.
    pub fn fromArrayOpts(platform: Platform, arr: anytype, opts: FromOptions) !Buffer {
-        const host_buffer = HostBuffer.fromArray(&arr);
+        const host_buffer = HostBuffer.fromArrayPtr(&arr);
        return try from(platform, host_buffer, opts);
    }

@ -280,10 +280,9 @@ pub const Buffer = struct {
        };
    }

-    pub fn devicePtr(self: Buffer) u64 {
+    pub fn devicePtr(self: Buffer) *anyopaque {
        stdx.debug.internalAssert(!self.hasShardedAxis(), "TODO: support sharded Buffer", .{});
-        const opaque_ptr: *anyopaque = self._shards.get(0).getOpaqueDeviceMemoryDataPointer(self._api) catch unreachable;
-        return @intFromPtr(opaque_ptr);
+        return self._shards.get(0).getOpaqueDeviceMemoryDataPointer(self._api) catch unreachable;
    }

    /// Fetches the content of the given buffer into a stack variable of the given type.
@ -362,7 +361,7 @@ pub const Buffer = struct {
    }

    pub fn format(self: Buffer, writer: *std.Io.Writer) !void {
-        try writer.print("Buffer({f})@{x}", .{ self._shape, self.devicePtr() });
+        try writer.print("Buffer({f})@{x}", .{ self._shape, @intFromPtr(self.devicePtr()) });
    }

    pub fn getMemory(self: Buffer) *const pjrt.Memory {
@ -470,7 +469,7 @@ pub const Buffer = struct {
        const host_visible_memories: []const Memory = &.{ .host_pinned, .host_unpinned };
        for (host_visible_memories) |memory| {
            const x = try uninitialized(platform, .init(.{6}, .u8), .{ .memory = memory });
-            const x_ptr: [*]u8 = @ptrFromInt(x.devicePtr());
+            const x_ptr: [*]u8 = @ptrCast(x.devicePtr());
            @memcpy(x_ptr, &[_]u8{ 104, 101, 108, 108, 111, 33 });

            const y = try x.getValue([6]u8);
--- a/zml/callback.zig
+++ b/zml/callback.zig
@ -151,7 +151,8 @@ pub fn register(Callback: type, platform: Platform) pjrt.ApiError!void {
    const target_name = "zml$" ++ @typeName(Callback);

    const proxy_cb = proxy(Callback);
-    Callback.type_id = try ffi.registerTypeId(platform.pjrt_api, @typeName(Callback));
+    const type_info: pjrt.Ffi.TypeInfo = .{};
+    Callback.type_id = try ffi.registerTypeId(platform.pjrt_api, @typeName(Callback), &type_info.toCStruct());
    try ffi.register(platform.pjrt_api, target_name, @tagName(platform.target), &proxy_cb, Callback.callback_config.traits);
    log.debug("Registered custom call {} with target name \"{s}\"", .{ Callback, target_name });
 }
@ -197,12 +198,12 @@ fn CallbackImpl(comptime Callback: type, call_frame: *pjrt.ffi.CallFrame) ?*pjrt
        else
            .asViewOfDeviceBuffer(platform, shape, null, ffi_buffer.data);
        if (opts.copy_inputs_to_host_pinned and platform.target != .cpu) {
-            log.debug("Copying argument {d} {f} {x} to host_pinned memory !", .{ i, zml_buffer, zml_buffer.devicePtr() });
+            // log.debug("Copying argument {d} {f} {x} to host_pinned memory !", .{ i, zml_buffer, @intFromPtr(zml_buffer.devicePtr()) });
            zml_buffer = zml_buffer.copyToMemory(platform, .host_pinned, .{ .wait = true }) catch |err| {
-                log.err("Failed to copy input buffer {d} {f} {x} to host_pinned: {}", .{ i, zml_buffer, zml_buffer.devicePtr(), err });
+                log.err("Failed to copy input buffer {d} {f} {x} to host_pinned: {}", .{ i, zml_buffer, @intFromPtr(zml_buffer.devicePtr()), err });
                return .create(call_frame.api, .resource_exhausted, "host pinned OOM");
            };
-            log.debug("--> {f} {x}", .{ zml_buffer, zml_buffer.devicePtr() });
+            // log.debug("--> {f} {x}", .{ zml_buffer, @intFromPtr(zml_buffer.devicePtr()) });
        }
        callback_args[i] = zml_buffer;
    }
--- a/zml/hostbuffer.zig
+++ b/zml/hostbuffer.zig
@ -93,7 +93,7 @@ pub const HostBuffer = struct {
    /// Creates a tensor from a **pointer** to a "multi dimension" array.
    /// Note this doesn't copy, the pointee array need to survive the `HostBuffer` object.
    /// Typically this is use with constant arrays.
-    pub fn fromArray(arr_ptr: anytype) HostBuffer {
+    pub fn fromArrayPtr(arr_ptr: anytype) HostBuffer {
        const T = @TypeOf(arr_ptr.*);
        const sh = parseArrayInfo(T);
        std.debug.assert(sh.byteSize() == @sizeOf(T));
@ -105,6 +105,17 @@ pub const HostBuffer = struct {
        };
    }

+    /// Creates a tensor from an array by allocating and copying the content.
+    pub fn fromArray(allocator: std.mem.Allocator, arr: anytype) !HostBuffer {
+        const T = @TypeOf(arr);
+        const sh = parseArrayInfo(T);
+        std.debug.assert(sh.byteSize() == @sizeOf(T));
+
+        const buffer = try empty(allocator, sh);
+        @memcpy(std.mem.sliceAsBytes(buffer.mutItems(@TypeOf(arr[0]))), std.mem.sliceAsBytes(&arr));
+        return buffer;
+    }
+
    /// Returns a HostBuffer tagged with the tags in 'tagz'.
    pub fn withTags(self: HostBuffer, tagz: anytype) HostBuffer {
        var res = self;
@ -328,7 +339,7 @@ pub const HostBuffer = struct {
        return self.prettyPrintIndented(writer, 4, 0, options);
    }

-    fn prettyPrintIndented(self: HostBuffer, writer: *std.Io.Writer, num_rows: u8, indent_level: u8, options: std.fmt.Number) !void {
+    fn prettyPrintIndented(self: HostBuffer, writer: *std.Io.Writer, num_rows: u32, indent_level: u8, options: std.fmt.Number) !void {
        if (self.rank() == 0) {
            // Special case input tensor is a scalar
            return switch (self.dtype()) {
--- a/zml/meta.zig
+++ b/zml/meta.zig
@ -302,6 +302,8 @@ test mapAlloc {
 pub fn MapRestrict(From: type, To: type) type {
    return struct {
        pub fn map(T: type) type {
+            @setEvalBranchQuota(10_000);
+
            switch (T) {
                From => return To,
                ?From => return ?To,
--- a/zml/nn.zig
+++ b/zml/nn.zig
@ -32,7 +32,7 @@ pub const Linear = struct {
        }

        // log.debug("Linear({*}): {d} -> {d} -> {d}", .{ self, x.dims(), y.dims(), if (self.bias) |bias| y.add(bias).dims() else y.dims() });
-        return if (self.bias) |bias| y.add(bias.broadcast(y.shape(), &.{y.axis(-1)})) else y;
+        return if (self.bias) |bias| y.add(bias.convert(y.dtype()).broadcast(y.shape(), &.{y.axis(-1)})) else y;
    }
 };

@ -100,10 +100,11 @@ pub const LayerNorm = struct {
    pub fn forward(self: LayerNorm, x: Tensor) Tensor {
        const normed = normalizeVariance(x, self.eps);
        const ax = x.axis(-1);
-        var out = normed.mul(self.weight.broadcast(x.shape(), &.{ax}));
-        if (self.bias) |bias| out = out.add(bias.broadcast(x.shape(), &.{ax}));
+        var out = normed.mul(self.weight.broadcast(x.shape(), &.{ax}).convert(.f32));

-        return out;
+        if (self.bias) |bias| out = out.add(bias.broadcast(x.shape(), &.{ax}).convert(.f32));
+
+        return out.convert(x.dtype());
    }
 };

@ -112,6 +113,7 @@ pub fn rmsNorm(x: Tensor, axis: anytype, eps: f32) Tensor {
    // upcast to improve precision
    const variance = x.convert(.f32).powByConst(2).mean(ax);
    const rsqrt = Tensor.rsqrt(variance.addConstant(eps)).convert(x.dtype());
+
    return x.mul(rsqrt.broad(x.shape()));
 }

@ -190,7 +192,7 @@ pub const RopeOpts = struct {
            if (content != .object) return error.InvalidEnumTag;

            const obj = content.object;
-            const impl = obj.get("rope_type") orelse return error.MissingField;
+            const impl = obj.get("rope_type") orelse obj.get("type") orelse return error.MissingField;
            if (impl != .string) return error.InvalidEnumTag;
            if (std.mem.eql(u8, impl.string, "llama3")) {
                // Note: leaky is fine here cause Llama3 struct don't need to allocate memory.
@ -583,7 +585,7 @@ test nearest {
        const result = try zml.testing.compileAndCall(platform, upsample, .{ input_3d_basic, .{ .scale_factor = &.{3}, .mode = .nearest } });
        try std.testing.expectEqualSlices(i64, &.{ 1, 1, 6 }, result.dims());
        const expected: [1][1][6]i32 = .{.{.{ 1, 1, 1, 2, 2, 2 }}};
-        try zml.testing.expectClose(zml.HostBuffer.fromArray(&expected), result, 0);
+        try zml.testing.expectClose(zml.HostBuffer.fromArrayPtr(&expected), result, 0);
    }
    // 3D Tensor (advanced)
    {
@ -605,7 +607,7 @@ test nearest {
                .{ 21, 21, 22, 22, 23, 23, 24, 24 },
            },
        };
-        try zml.testing.expectClose(zml.HostBuffer.fromArray(&expected), result, 0);
+        try zml.testing.expectClose(zml.HostBuffer.fromArrayPtr(&expected), result, 0);
    }
    // 4D Tensor (basic)
    {
@ -663,7 +665,7 @@ test nearest {
                },
            },
        };
-        try zml.testing.expectClose(zml.HostBuffer.fromArray(&expected), result, 0);
+        try zml.testing.expectClose(zml.HostBuffer.fromArrayPtr(&expected), result, 0);
    }
    // 5D Tensor (basic)
    {
@ -688,7 +690,7 @@ test nearest {
                },
            },
        };
-        try zml.testing.expectClose(zml.HostBuffer.fromArray(&expected), result, 0);
+        try zml.testing.expectClose(zml.HostBuffer.fromArrayPtr(&expected), result, 0);
    }
 }

@ -835,7 +837,7 @@ pub fn resizeCubic1d(image: Tensor, axis: i8, new_len: u63, opt: ResizeOpts) Ten
        .{ 1, -2.5, 2, -0.5 },
        .{ -0.5, 1.5, -1.5, 0.5 },
    };
-    const weights = zml.Tensor.constantTensor(zml.HostBuffer.fromArray(&weights_)).convert(dtype).withTags(.{ ._interpolated, ._neighbors });
+    const weights = zml.Tensor.constantTensor(zml.HostBuffer.fromArrayPtr(&weights_)).convert(dtype).withTags(.{ ._interpolated, ._neighbors });

    // actually do the interpolation.
    // Note: ideally this matmul should be inlined with the gather, but that's currently not the case.
@ -940,7 +942,7 @@ pub fn sdpa(q_: Tensor, k_: Tensor, v_: Tensor, opts: SdpaOpts) Tensor {
    k = k.mul(head_scaling.convert(k.dtype()));

    var attn_weights = q.dot(k, .{.hd});
-    // log.debug("attn_weights : {f}, attn_mask : {?f}", .{ attn_weights, attn_mask });
+
    if (attn_mask) |mask| attn_weights = attn_weights.add(mask.broad(attn_weights.shape()));
    attn_weights = attn_weights.convert(.f32);
    attn_weights = if (opts.softmax_bias) |softmax_bias| attn: {
@ -949,7 +951,6 @@ pub fn sdpa(q_: Tensor, k_: Tensor, v_: Tensor, opts: SdpaOpts) Tensor {
        const bias = softmax_bias.splitAxis(.h, .{ .h = k.dim(.h), .hq = .auto });
        break :attn attn_weights.convert(.f32).softmaxBiased(.k, bias).convert(q.dtype());
    } else attn_weights.convert(.f32).softmax(.k).convert(q.dtype());
-
    var attn = attn_weights.dot(v, .{.k});
    return attn.transpose(q.shape()).merge(.{ .h = .{ .h, .hq } });
 }
--- a/zml/platform.zig
+++ b/zml/platform.zig
@ -89,7 +89,9 @@ pub const Platform = struct {
        const memory_target: pjrt.Memory.Kind = switch (memory) {
            .host_unpinned => switch (platform.target) {
                // Cuda doesn't have host_unpinned.
-                .cuda => .host_pinned,
+                // ROCm doesn't seem to have it either.
+                // TODO(gwenzek): investigate why it was not forced before.
+                .cuda, .rocm => .host_pinned,
                else => .host_unpinned,
            },
            inline else => |t| t,
--- a/zml/tensor.zig
+++ b/zml/tensor.zig
@ -715,7 +715,7 @@ pub const Tensor = struct {
                        for (&powers, 0..) |*p, i| p.* = std.math.pow(u64, 2, i * 16);
                        break :blk powers;
                    };
-                    const values = Tensor.constantTensor(HostBuffer.fromArray(&powers)).withTags(.{.d});
+                    const values = Tensor.constantTensor(HostBuffer.fromArrayPtr(&powers)).withTags(.{.d});
                    const counts = values.gather(.{ .d = samples }, .{}).sum(.n).bitCast(.u16);
                    const actual_dist = counts.reshape(target_dist.shape()).convert(target_dist.dtype()).divByConst(s.dim(.n));
                    return .{ rng, .{ .mean = mean_, .variance = variance, .actual_dist = actual_dist } };
@ -764,7 +764,7 @@ pub const Tensor = struct {
        return _result(self._shape, op.result(0));
    }

-    inline fn convolution(self: Tensor, other: Tensor, opts: dialect.stablehlo.ConvolutionOpts, loc: mlir.Location) Tensor {
+    pub inline fn convolution(self: Tensor, other: Tensor, opts: dialect.stablehlo.ConvolutionOpts, loc: mlir.Location) Tensor {
        stdx.debug.assert(self.rank() == other.rank(), "convolution expects tensor ranks to match, got {} and {}", .{ self.rank(), other.rank() });
        const N = self.rank();
        stdx.debug.guard(opts.window_strides.len == N - 2, @src());
@ -859,6 +859,49 @@ pub const Tensor = struct {
        return _result(new_shape, op.result(0));
    }

+    pub fn conv3d(
+        input: Tensor,
+        kernel: Tensor,
+        opts: struct {
+            window_strides: []const i64 = &.{ 1, 1, 1 }, //[time, height, width]
+            padding: []const i64 = &.{ 0, 0, 0, 0, 0, 0 }, //[front, back, top, bottom, left, right]
+            lhs_dilation: []const i64 = &.{ 1, 1, 1 }, //[time, height, width]
+            rhs_dilation: []const i64 = &.{ 1, 1, 1 }, //[time, height, width]
+            window_reversal: []const bool = &.{ false, false, false }, //[time, height, width]
+            input_batch_dimension: i64 = 0,
+            input_feature_dimension: i64 = 1,
+            input_spatial_dimensions: []const i64 = &.{ 2, 3, 4 },
+            kernel_input_feature_dimension: i64 = 1,
+            kernel_output_feature_dimension: i64 = 0,
+            kernel_spatial_dimensions: []const i64 = &.{ 2, 3, 4 },
+            output_batch_dimension: i64 = 0,
+            output_feature_dimension: i64 = 1,
+            output_spatial_dimensions: []const i64 = &.{ 2, 3, 4 },
+            feature_group_count: i64 = 1,
+            batch_group_count: i64 = 1,
+        },
+    ) Tensor {
+        const loc = input.getContext().location(@src(), "opts={}", .{opts});
+        return input.convolution(kernel, .{
+            .window_strides = opts.window_strides,
+            .pad_value = opts.padding,
+            .lhs_dilation = opts.lhs_dilation,
+            .rhs_dilation = opts.rhs_dilation,
+            .window_reversal = opts.window_reversal,
+            .input_batch_dimension = opts.input_batch_dimension,
+            .input_feature_dimension = opts.input_feature_dimension,
+            .input_spatial_dimensions = opts.input_spatial_dimensions,
+            .kernel_input_feature_dimension = opts.kernel_input_feature_dimension,
+            .kernel_output_feature_dimension = opts.kernel_output_feature_dimension,
+            .kernel_spatial_dimensions = opts.kernel_spatial_dimensions,
+            .output_batch_dimension = opts.output_batch_dimension,
+            .output_feature_dimension = opts.output_feature_dimension,
+            .output_spatial_dimensions = opts.output_spatial_dimensions,
+            .feature_group_count = opts.feature_group_count,
+            .batch_group_count = opts.batch_group_count,
+        }, loc);
+    }
+
    /// Returns a Tensor containing the result of the 1D convolution of 'input' by 'kernel'.
    pub fn conv1d(
        input: Tensor,
@ -1283,7 +1326,7 @@ pub const Tensor = struct {
        const input = try zml.Buffer.fromSlice(platform, .{2}, &[_]f32{ -0.6884, 1.6795 });
        const res = try zml.testing.compileAndCall(platform, leakyReLU, .{ input, 0.1 });

-        const expectation = zml.HostBuffer.fromArray(&[2]f32{ -0.0688, 1.6795 });
+        const expectation = zml.HostBuffer.fromArrayPtr(&[2]f32{ -0.0688, 1.6795 });
        try zml.testing.expectClose(expectation, res, 1e-4);
    }

@ -1979,9 +2022,10 @@ pub const Tensor = struct {
        const sh = Shape.init(.{args.steps}, dt);
        var iota_op = dialect.stablehlo.iota(ctx.mlirCtx(), 0, mlirx.tensorType(ctx.mlirCtx(), sh), loc);
        var res = _result(sh, iota_op.result(0));
+        const range = args.end - args.start;

        if (args.steps != 1) {
-            res = res.scale(args.steps);
+            res = res.scale(range / @as(f64, @floatFromInt(args.steps - 1)));
        }

        if (args.start != 0) {
@ -2600,7 +2644,7 @@ pub const Tensor = struct {

        const result = try zml.testing.compileAndCall(platform, Local._gatherSlices, .{ operand, Shape.init(.{ .b = 2, .c = 3 }, .u16), start_indices, .{} });

-        const expected = zml.HostBuffer.fromArray(&[2][2][2][3]u16{
+        const expected = zml.HostBuffer.fromArrayPtr(&[2][2][2][3]u16{
            .{
                .{ .{ 13, 14, 15 }, .{ 19, 20, 21 } },
                .{ .{ 37, 38, 39 }, .{ 43, 44, 45 } },
--- a/zml/testing.zig
+++ b/zml/testing.zig
@ -213,14 +213,28 @@ pub fn testLayerOut(
    const fwd = @TypeOf(layer).forward;
    const FwdSign = zml.ModuleSignature(fwd);

-    const input_tensors = try zml.aio.populateModelWithPrefix(FwdSign.ArgsT, alloc, activations, name ++ ".in");
-    const input_shapes = try shapesOf(input_tensors, alloc);
+    const ArgsT = FwdSign.ArgsT;

-    const n_in = zml.module.countTensors(&input_tensors);
-    const n_in_exp = activations.countLayers(name ++ ".in");
-    if (n_in != n_in_exp) {
-        log.warn("Reference models uses {d} inputs, but implementation uses {d}", .{ n_in_exp, n_in });
-    }
+    // Check if layer has inputs
+    const has_inputs = switch (@typeInfo(ArgsT)) {
+        .@"struct" => |info| info.fields.len > 0,
+        else => false,
+    };
+
+    // Get input shapes (empty for layers without input)
+    const input_shapes = if (has_inputs) blk: {
+        const input_tensors = try zml.aio.populateModelWithPrefix(FwdSign.ArgsT, alloc, activations, name ++ ".in");
+        const n_in = zml.module.countTensors(&input_tensors);
+        const n_in_exp = activations.countLayers(name ++ ".in");
+        if (n_in != n_in_exp) {
+            log.warn("Reference models uses {d} inputs, but implementation uses {d}", .{ n_in_exp, n_in });
+        }
+        break :blk try zml.shapesOf(input_tensors, alloc);
+    } else blk: {
+        // For layers without input, ArgsT should be void or empty tuple
+        const empty_shapes: zml.ShapeOf(ArgsT) = undefined;
+        break :blk empty_shapes;
+    };

    const exe = try zml.compileModel(alloc, fwd, layer, input_shapes, platform);

@ -230,32 +244,29 @@ pub fn testLayerOut(
    }
    const mod = exe.prepare(layer_weights);

-    const FetchCtx = struct {
-        store: zml.aio.BufferStore,
-        index: u32,
-        prefix: std.ArrayList(u8),
-        platform: zml.Platform,
+    // Call the module with input buffers (empty for layers without input)
+    if (has_inputs) {
+        const FetchCtx = struct {
+            store: zml.aio.BufferStore,
+            index: u32,
+            prefix: std.ArrayList(u8),
+            platform: zml.Platform,

-        fn fetch(ctx: *@This(), x: zml.Tensor) zml.Buffer {
-            _ = x;
-            defer ctx.index += 1;
-            var full_prefix = ctx.*.prefix;
-            _ = full_prefix.writer(undefined).print("{d}", .{ctx.index}) catch unreachable;
-            log.info("prefix: {s}", .{full_prefix.items});
-            const host = ctx.store.get(full_prefix.items) orelse {
-                log.err("Didn't find test input: {s}", .{full_prefix.items});
-                @panic("Missing test input");
-            };
-            return host.toDevice(ctx.platform) catch unreachable;
-        }
-    };
+            fn fetch(ctx: *@This(), x: zml.Tensor) zml.Buffer {
+                _ = x;
+                defer ctx.index += 1;
+                var full_prefix = ctx.*.prefix;
+                _ = full_prefix.writer(undefined).print("{d}", .{ctx.index}) catch unreachable;
+                log.info("prefix: {s}", .{full_prefix.items});
+                const host = ctx.store.get(full_prefix.items) orelse {
+                    log.err("Didn't find test input: {s}", .{full_prefix.items});
+                    @panic("Missing test input");
+                };
+                return host.toDevice(ctx.platform) catch unreachable;
+            }
+        };

-    // Note: zml.populateModelWithPrefix isn't enough,
-    // because it assumes we have the same structure in the activation file
-    // than in the function signature.
-    // But for sake of decoupling the reference implementation
-    // and ZML code that's not always the case.
-    {
+        const input_tensors = try zml.aio.populateModelWithPrefix(FwdSign.ArgsT, alloc, activations, name ++ ".in");
        var input_buffers: zml.Bufferized(FwdSign.ArgsT) = undefined;
        var fetch_ctx: FetchCtx = .{ .store = activations, .index = 0, .prefix = .{}, .platform = platform };
        try fetch_ctx.prefix.ensureTotalCapacity(alloc, name.len + 32);
@ -263,10 +274,16 @@ pub fn testLayerOut(
        try zml.meta.mapAlloc(FetchCtx.fetch, alloc, &fetch_ctx, input_tensors, &input_buffers);
        defer zml.aio.unloadBuffers(&input_buffers);
        _ = mod.call(input_buffers);
+    } else {
+        // For layers without input, ArgsT should be void
+        // Bufferized(void) is void, so we can't call mod.call normally
+        // Use _unsafeCall directly and then get the results manually
+        mod.inner._unsafeCall();
    }

    var buf: [1024]u8 = undefined;
    var failed: bool = false;
+    log.info("COMPARAISON DES SORTIES", .{});
    for (0..mod.inner.result_shapes.len) |i| {
        const full_name = std.fmt.bufPrint(&buf, "{s}.{d}", .{ out_name, i }) catch unreachable;
        const expected_out = activations.get(full_name) orelse {
@ -305,14 +322,49 @@ test testLayer {
    var activations = zml.aio.BufferStore.init(std.testing.allocator);
    defer activations.deinit();
    {
-        const input = zml.HostBuffer.fromArray(&[2]f32{ 1, -1 });
+        const input = zml.HostBuffer.fromArrayPtr(&[2]f32{ 1, -1 });
        try activations.buffers.put(activations.arena.allocator(), "model.layer.in.0", input);
-        const output = zml.HostBuffer.fromArray(&[5]f32{ 0, -1, -1, 0, -1 });
+        const output = zml.HostBuffer.fromArrayPtr(&[5]f32{ 0, -1, -1, 0, -1 });
        try activations.buffers.put(activations.arena.allocator(), "model.layer.out.0", output);
    }

-    // test the ZML layer reproduces the "captured" activations:
+    // Test the ZML layer reproduces the activations:
    try zml.testing.testLayer(platform, activations, "model.layer", layer, layer_weights, 1e-5);
+
+    const LayerWithoutInput = struct {
+        weight: zml.Tensor,
+
+        pub fn forward(self: @This()) zml.Tensor {
+            // Return the weights
+            return self.weight;
+        }
+    };
+
+    const layer_no_input: LayerWithoutInput = .{
+        .weight = zml.Tensor{ ._shape = zml.Shape.init(.{ 3, 4 }, .f32), ._id = .{ .buffer_id = 43 } },
+    };
+
+    const layer_no_input_weights: zml.Bufferized(LayerWithoutInput) = .{
+        .weight = try zml.Buffer.fromArray(
+            platform,
+            [3][4]f32{
+                .{ 1.0, 2.0, 3.0, 4.0 },
+                .{ 5.0, 6.0, 7.0, 8.0 },
+                .{ 9.0, 10.0, 11.0, 12.0 },
+            },
+        ),
+    };
+
+    // Expected output
+    const expected_output = zml.HostBuffer.fromArrayPtr(&[3][4]f32{
+        .{ 1.0, 2.0, 3.0, 4.0 },
+        .{ 5.0, 6.0, 7.0, 8.0 },
+        .{ 9.0, 10.0, 11.0, 12.0 },
+    });
+    try activations.buffers.put(activations.arena.allocator(), "model.layer_no_input.out.0", expected_output);
+
+    // Test the ZML layer without input reproduces the "captured" activations:
+    try zml.testing.testLayer(platform, activations, "model.layer_no_input", layer_no_input, layer_no_input_weights, 1e-5);
 }

 pub inline fn expectEqual(expected: anytype, actual: @TypeOf(expected)) !void {
--- a/zml/tokenizer/BUILD.bazel
+++ b/zml/tokenizer/BUILD.bazel
@ -19,10 +19,6 @@ zig_binary(
    name = "main",
    main = "main.zig",
    visibility = ["//visibility:public"],
-    zigopts = [
-        #TODO(cerisier): Remove me when this is done inside rules_zig.
-        "-fllvm"
-    ],
    deps = [
        ":tokenizer",
        "//async",
--- a/zml/tools/BUILD.bazel
+++ b/zml/tools/BUILD.bazel
@ -17,9 +17,8 @@ zig_library(
    main = "tools.zig",
    visibility = ["//visibility:public"],
    deps = select({
-        "@platforms//os:macos": [
-            ":macos_c",
-        ],
+        # TODO(cerisier): fix MacOsTracer
+        # "@platforms//os:macos": [ ":macos_c" ],
        "//conditions:default": [],
    }),
 )
--- a/zml/tools/tracer.zig
+++ b/zml/tools/tracer.zig
@ -3,7 +3,8 @@ const builtin = @import("builtin");
 const c = @import("c");

 pub const Tracer = switch (builtin.os.tag) {
-    .macos => MacOsTracer,
+    // TODO(cerisier): fix MacOsTracer
+    // .macos => MacOsTracer,
    .linux => if (@hasDecl(c, "ZML_RUNTIME_CUDA")) CudaTracer else FakeTracer,
    else => FakeTracer,
 };
--- a/zml/torch.zig
+++ b/zml/torch.zig
@ -148,7 +148,7 @@ test pixelShuffle {

    const output = try zml.testing.compileAndCall(platform, pixelShuffle, .{ input, upscale_factor });

-    const exp = zml.HostBuffer.fromArray(&[1][1][12][12]i32{.{.{
+    const exp = zml.HostBuffer.fromArrayPtr(&[1][1][12][12]i32{.{.{
        .{ 0, 16, 32, 1, 17, 33, 2, 18, 34, 3, 19, 35 },
        .{ 48, 64, 80, 49, 65, 81, 50, 66, 82, 51, 67, 83 },
        .{ 96, 112, 128, 97, 113, 129, 98, 114, 130, 99, 115, 131 },
--- a/zml/zml.zig
+++ b/zml/zml.zig
@ -42,6 +42,7 @@ pub const Shape = @import("shape.zig").Shape;
 pub const ShapeOf = @import("tensor.zig").ShapeOf;
 pub const Target = @import("platform.zig").Target;
 pub const Tensor = @import("tensor.zig").Tensor;
+pub const shapesOf = @import("tensor.zig").shapesOf;
 pub const testing = @import("testing.zig");
 pub const torch = @import("torch.zig");
Author	SHA1	Message	Date
Foke Singh	82882cfd3e	Add Qwen3VL bf16 example implementation and tutorial docs, including BMP/JPG/PNG support via zignal library.	2025-12-31 12:46:11 +00:00
Tarry Singh	e659dc8fa3	Add Qwen3VL bf16 example implementation and integrate zignal image format support; update Bazel build files and core ZML modules.	2025-12-29 16:17:11 +00:00
Tarry Singh	b8b4d33379	Update XLA to latest version	2025-12-23 17:24:34 +00:00
Tarry Singh	57bef8d66c	Switch workspace to use apple_support C++ toolchains instead of rules_cc, aligning with the recommended toolchain.	2025-12-22 17:33:18 +00:00
Tarry Singh	6c80f2f394	Set LLVM backend as default for all Zig compile actions via rules_zig zigopt global setting, due to stability concerns with the self-hosted backend.	2025-12-18 10:04:04 +00:00
Tarry Singh	a3abf148b4	Fix pickle loader to use takeDelimiterInclusive for Zig 0.15.2 and update ZLS runner, buffer, callback, and tracer utilities accordingly.	2025-12-15 11:08:19 +00:00
Foke Singh	1b8d0ac627	Update callback example to use `takeDelimiterInclusive` for Zig 0.15.2 delimiter behavior change.	2025-12-12 14:00:50 +00:00
Tarry Singh	7e81d022ff	Add implicit dependency on hf_transfer for tools/hf.	2025-12-08 09:24:52 +00:00
Tarry Singh	fa886db3bb	Disable ZML aio torch pickle test pending implementation fix due to protocol support limitation in version 0.15.2.	2025-12-03 10:57:27 +00:00
Tarry Singh	ba5043ddf8	Switch ZML to use .host_pinned memory on ROCm, addressing missing host_unpinned memory error.	2025-12-01 14:34:13 +00:00
				`@ -0,0 +1 @@`
				`# Empty BUILD.bazel to make this a Bazel package`