Radix/zml/platform.zig

const builtin = @import("builtin");
const std = @import("std");

const asynk = @import("async");
const runtimes = @import("runtimes");

const meta = @import("meta.zig");
const module = @import("module.zig");
const pjrt = @import("pjrtx.zig");
const log = std.log.scoped(.zml);

pub const Target = runtimes.Platform;

pub const available_targets = switch (builtin.os.tag) {
    .macos => [_]Target{
        .cpu,
    },
    .linux => [_]Target{
        .cpu,
        .cuda,
        .rocm,
        .tpu,
    },
    else => [_]Target{},
};

pub const CompilationOptions = struct {
    xla_dump_to: ?[]const u8 = null,
    xla_dump_fusion_visualization: bool = false,
    cache_location: ?[]const u8 = null,
    sharding_enabled: bool = false,
    sharding_axes: std.BoundedArray([*:0]const u8, 8) = .{},
};

pub const Platform = struct {
    target: Target,
    pjrt_api: *const pjrt.Api,
    pjrt_client: *pjrt.Client,
    compilation_options: CompilationOptions = .{},

    pub const MAX_NUM_DEVICES: u8 = 8;

    pub fn init(target: Target, api: *const pjrt.Api) !Platform {
        const pjrt_client = try pjrt.Client.init(api, &.{});
        const true_num_devices = pjrt_client.getAddressableDevices(api).len;
        if (true_num_devices > MAX_NUM_DEVICES) {
            log.warn("platform {} got {} devices, but ZML only support up to {} devices. Some devices won't be used.", .{ target, true_num_devices, MAX_NUM_DEVICES });
        }
        return .{
            .target = target,
            .pjrt_api = api,
            .pjrt_client = pjrt_client,
            .compilation_options = .{},
        };
    }

    pub fn getDevices(self: Platform) []const *const pjrt.Device {
        const all_devices = self.pjrt_client.getAddressableDevices(self.pjrt_api);
        if (all_devices.len > MAX_NUM_DEVICES) {
            return all_devices[0..MAX_NUM_DEVICES];
        }
        return all_devices;
    }

    pub const Sharding = struct { num_replicas: u8, num_partitions: u8 };

    pub fn sharding(self: Platform) Sharding {
        // replicas run the same function but with different inputs,
        // while partitions contribute to one evaluation over a shared input.
        // Inside an inference process, we generally don't want replicas,
        // as it's best to fully isolate replicas on different processes.
        // For now we hardcode num_replicas = 1.
        const num_devices: u8 = @intCast(self.getDevices().len);
        return if (self.compilation_options.sharding_enabled)
            .{ .num_replicas = 1, .num_partitions = num_devices }
        else
            .{ .num_replicas = 1, .num_partitions = 1 };
    }

    pub fn withCompilationOptions(self: Platform, opts: CompilationOptions) Platform {
        var res = self;
        res.compilation_options = opts;
        return res;
    }

    pub fn deinit(self: *Platform) void {
        self.pjrt_client.deinit(self.pjrt_api);
    }

    /// Returns the Profiler for this API.
    /// Not all platform have a profiling api, for those the profiler object will do nothing.
    /// Platforms with known profiler extensions: cuda, xpu
    pub fn getProfiler(self: Platform, options: pjrt.Profiler.Options) pjrt.Profiler {
        return self.pjrt_client.getProfiler(self.pjrt_api, options);
    }
};
Add initial Bazel build configuration, async runtime implementation, and core MLIR dialect definitions for ZML. 2023-01-02 14:28:25 +00:00			`const builtin = @import("builtin");`
			`const std = @import("std");`

zml: Remove pjrtx wrapper, migrate remaining helpers to their native modules, and fix blocking issue in Event.await. 2023-03-06 17:05:56 +00:00			`const asynk = @import("async");`
Introduce a thin abstraction layer between ZML and PJRT to manage plugin loading decisions, enable compile‑time detection of linked runtimes, and handle cases such as libtpu blocking metadata access. 2023-05-15 09:36:41 +00:00			`const runtimes = @import("runtimes");`
zml: Remove pjrtx wrapper, migrate remaining helpers to their native modules, and fix blocking issue in Event.await. 2023-03-06 17:05:56 +00:00
Add initial Bazel build configuration, async runtime implementation, and core MLIR dialect definitions for ZML. 2023-01-02 14:28:25 +00:00			`const meta = @import("meta.zig");`
			`const module = @import("module.zig");`
zml: reintroduce pjrtx to handle reactor blocking issues in async scenarios, particularly with Events. 2023-05-26 15:54:15 +00:00			`const pjrt = @import("pjrtx.zig");`
Add in-process sharding support across core ZML components (platform, shape, tensor, MLIR generation, buffers, and PJRT integration) 2023-02-24 17:33:14 +00:00			`const log = std.log.scoped(.zml);`
Add initial Bazel build configuration, async runtime implementation, and core MLIR dialect definitions for ZML. 2023-01-02 14:28:25 +00:00
Introduce a thin abstraction layer between ZML and PJRT to manage plugin loading decisions, enable compile‑time detection of linked runtimes, and handle cases such as libtpu blocking metadata access. 2023-05-15 09:36:41 +00:00			`pub const Target = runtimes.Platform;`
Add initial Bazel build configuration, async runtime implementation, and core MLIR dialect definitions for ZML. 2023-01-02 14:28:25 +00:00
			`pub const available_targets = switch (builtin.os.tag) {`
			`.macos => [_]Target{`
			`.cpu,`
			`},`
			`.linux => [_]Target{`
			`.cpu,`
			`.cuda,`
			`.rocm,`
			`.tpu,`
			`},`
			`else => [_]Target{},`
			`};`

			`pub const CompilationOptions = struct {`
			`xla_dump_to: ?[]const u8 = null,`
			`xla_dump_fusion_visualization: bool = false,`
			`cache_location: ?[]const u8 = null,`
Add in-process sharding support across core ZML components (platform, shape, tensor, MLIR generation, buffers, and PJRT integration) 2023-02-24 17:33:14 +00:00			`sharding_enabled: bool = false,`
			`sharding_axes: std.BoundedArray([*:0]const u8, 8) = .{},`
Add initial Bazel build configuration, async runtime implementation, and core MLIR dialect definitions for ZML. 2023-01-02 14:28:25 +00:00			`};`

			`pub const Platform = struct {`
			`target: Target,`
			`pjrt_api: *const pjrt.Api,`
			`pjrt_client: *pjrt.Client,`
			`compilation_options: CompilationOptions = .{},`

Add in-process sharding support across core ZML components (platform, shape, tensor, MLIR generation, buffers, and PJRT integration) 2023-02-24 17:33:14 +00:00			`pub const MAX_NUM_DEVICES: u8 = 8;`

Add initial Bazel build configuration, async runtime implementation, and core MLIR dialect definitions for ZML. 2023-01-02 14:28:25 +00:00			`pub fn init(target: Target, api: *const pjrt.Api) !Platform {`
			`const pjrt_client = try pjrt.Client.init(api, &.{});`
Add in-process sharding support across core ZML components (platform, shape, tensor, MLIR generation, buffers, and PJRT integration) 2023-02-24 17:33:14 +00:00			`const true_num_devices = pjrt_client.getAddressableDevices(api).len;`
			`if (true_num_devices > MAX_NUM_DEVICES) {`
			`log.warn("platform {} got {} devices, but ZML only support up to {} devices. Some devices won't be used.", .{ target, true_num_devices, MAX_NUM_DEVICES });`
			`}`
Add initial Bazel build configuration, async runtime implementation, and core MLIR dialect definitions for ZML. 2023-01-02 14:28:25 +00:00			`return .{`
			`.target = target,`
			`.pjrt_api = api,`
			`.pjrt_client = pjrt_client,`
			`.compilation_options = .{},`
			`};`
			`}`

zml: Remove pjrtx wrapper, migrate remaining helpers to their native modules, and fix blocking issue in Event.await. 2023-03-06 17:05:56 +00:00			`pub fn getDevices(self: Platform) []const *const pjrt.Device {`
Add in-process sharding support across core ZML components (platform, shape, tensor, MLIR generation, buffers, and PJRT integration) 2023-02-24 17:33:14 +00:00			`const all_devices = self.pjrt_client.getAddressableDevices(self.pjrt_api);`
			`if (all_devices.len > MAX_NUM_DEVICES) {`
			`return all_devices[0..MAX_NUM_DEVICES];`
			`}`
			`return all_devices;`
			`}`

			`pub const Sharding = struct { num_replicas: u8, num_partitions: u8 };`

			`pub fn sharding(self: Platform) Sharding {`
			`// replicas run the same function but with different inputs,`
			`// while partitions contribute to one evaluation over a shared input.`
			`// Inside an inference process, we generally don't want replicas,`
			`// as it's best to fully isolate replicas on different processes.`
			`// For now we hardcode num_replicas = 1.`
			`const num_devices: u8 = @intCast(self.getDevices().len);`
			`return if (self.compilation_options.sharding_enabled)`
			`.{ .num_replicas = 1, .num_partitions = num_devices }`
			`else`
			`.{ .num_replicas = 1, .num_partitions = 1 };`
Add initial Bazel build configuration, async runtime implementation, and core MLIR dialect definitions for ZML. 2023-01-02 14:28:25 +00:00			`}`

			`pub fn withCompilationOptions(self: Platform, opts: CompilationOptions) Platform {`
			`var res = self;`
			`res.compilation_options = opts;`
			`return res;`
			`}`

			`pub fn deinit(self: *Platform) void {`
			`self.pjrt_client.deinit(self.pjrt_api);`
			`}`

			`/// Returns the Profiler for this API.`
			`/// Not all platform have a profiling api, for those the profiler object will do nothing.`
			`/// Platforms with known profiler extensions: cuda, xpu`
zml: Remove pjrtx wrapper, migrate remaining helpers to their native modules, and fix blocking issue in Event.await. 2023-03-06 17:05:56 +00:00			`pub fn getProfiler(self: Platform, options: pjrt.Profiler.Options) pjrt.Profiler {`
Add initial Bazel build configuration, async runtime implementation, and core MLIR dialect definitions for ZML. 2023-01-02 14:28:25 +00:00			`return self.pjrt_client.getProfiler(self.pjrt_api, options);`
			`}`
			`};`