diff --git a/docs/howtos/howto_torch2zml.md b/docs/howtos/howto_torch2zml.md
index 026e201..5c751c5 100644
--- a/docs/howtos/howto_torch2zml.md
+++ b/docs/howtos/howto_torch2zml.md
@@ -118,13 +118,13 @@ model. Put the following in `my_project/torch2zml.zig`.
 const std = @import("std");
 const log = std.log;
 
-const asynk = @import("async");
+const async = @import("async");
 const zml = @import("zml");
 
 pub fn main() !void {
     var gpa = std.heap.GeneralPurposeAllocator(.{}){};
     defer _ = gpa.deinit();
-    try asynk.AsyncThread.main(gpa.allocator(), asyncMain, .{});
+    try async.AsyncThread.main(gpa.allocator(), asyncMain, .{});
 }
 
 pub fn asyncMain() !void {
diff --git a/docs/tutorials/write_first_model.md b/docs/tutorials/write_first_model.md
index 7cea735..ebaf6c2 100644
--- a/docs/tutorials/write_first_model.md
+++ b/docs/tutorials/write_first_model.md
@@ -99,10 +99,7 @@ Let's start by writing some Zig code, importing ZML and often-used modules:
 ```zig
 const std = @import("std");
 const zml = @import("zml");
-const asynk = @import("async");
-
-// shortcut to the asyncc function in the asynk module
-const asyncc = asynk.asyncc;
+const async = @import("async");
 ```
 
 You will use above lines probably in all ZML projects. Also, note that **ZML is
@@ -154,7 +151,7 @@ like this:
 pub fn main() !void {
     var gpa = std.heap.GeneralPurposeAllocator(.{}){};
     defer _ = gpa.deinit();
-    try asynk.AsyncThread.main(gpa.allocator(), asyncMain);
+    try async.AsyncThread.main(gpa.allocator(), asyncMain);
 }
 
 
@@ -251,7 +248,7 @@ const model_shapes = try zml.aio.populateModel(Layer, allocator, bs);
 
 // Start compiling. This uses the inferred shapes from the BufferStore.
 // The shape of the input tensor, we have to pass in manually.
-var compilation = try asyncc(
+var compilation = try async.async(
     zml.compileModel,
     .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform },
 );
@@ -263,11 +260,11 @@ var model_weights = try zml.aio.loadBuffers(Layer, .{}, bs, arena, platform);
 defer zml.aio.unloadBuffers(&model_weights);  // for good practice
 
 // Wait for compilation to finish
-const compiled = try compilation.awaitt();
+const compiled = try compilation.await();
 ```
 
-Compiling is happening in the background via the `asyncc` function. We call
-`asyncc` with the `zml.compileModel` function and its arguments
+Compiling is happening in the background via the `async` function. We call
+`async` with the `zml.compileModel` function and its arguments
 separately. The arguments themselves are basically the shapes of the weights in
 the BufferStore, the `.forward` function name in order to compile
 `Layer.forward`, the shape of the input tensor(s), and the platform for which to
@@ -371,7 +368,7 @@ top of the Zig file:
 
 ```zig
 const zml = @import("zml");
-const asynk = @import("async");
+const async = @import("async");
 ```
 
 
@@ -418,9 +415,7 @@ You can access the complete source code of this walkthrough here:
 ```zig
 const std = @import("std");
 const zml = @import("zml");
-const asynk = @import("async");
-
-const asyncc = asynk.asyncc;
+const async = @import("async");
 
 /// Model definition
 const Layer = struct {
@@ -439,7 +434,7 @@ const Layer = struct {
 pub fn main() !void {
     var gpa = std.heap.GeneralPurposeAllocator(.{}){};
     defer _ = gpa.deinit();
-    try asynk.AsyncThread.main(gpa.allocator(), asyncMain);
+    try async.AsyncThread.main(gpa.allocator(), asyncMain);
 }
 
 pub fn asyncMain() !void {
@@ -482,7 +477,7 @@ pub fn asyncMain() !void {
 
     // Start compiling. This uses the inferred shapes from the BufferStore.
     // The shape of the input tensor, we have to pass in manually.
-    var compilation = try asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
+    var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
 
     // Produce a bufferized weights struct from the fake BufferStore.
     // This is like the inferred shapes, but with actual values.
@@ -491,7 +486,7 @@ pub fn asyncMain() !void {
     defer zml.aio.unloadBuffers(&model_weights); // for good practice
 
     // Wait for compilation to finish
-    const compiled = try compilation.awaitt();
+    const compiled = try compilation.await();
 
     // pass the model weights to the compiled module to create an executable
     // module
diff --git a/examples/benchmark/main.zig b/examples/benchmark/main.zig
index 6428eda..fb6bccb 100644
--- a/examples/benchmark/main.zig
+++ b/examples/benchmark/main.zig
@@ -1,13 +1,13 @@
 const std = @import("std");
 const zml = @import("zml");
 const stdx = @import("stdx");
-const asynk = @import("async");
+const async = @import("async");
 const flags = stdx.flags;
 
 // set log level to debug to print the generated IR
 pub const std_options: std.Options = .{
     .log_level = .warn,
-    .logFn = asynk.logFn(std.log.defaultLog),
+    .logFn = async.logFn(std.log.defaultLog),
 };
 
 pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
@@ -15,7 +15,7 @@ pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
 }
 
 pub fn main() !void {
-    try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
+    try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
 }
 
 pub fn asyncMain() !void {
@@ -53,10 +53,10 @@ pub fn asyncMain() !void {
     // Start compiling.
     // The shape of the input tensor, we have to pass in manually.
     timer.reset();
-    var compilation = try asynk.asyncc(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform });
+    var compilation = try async.async(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform });
 
     // Wait for compilation to finish
-    const executable = try compilation.awaitt();
+    const executable = try compilation.await();
     defer executable.deinit();
     const compilation_elapsed = timer.lap() / std.time.ns_per_ms;
     std.debug.print("-" ** 160 ++ "\n\n", .{});
diff --git a/examples/callback/main.zig b/examples/callback/main.zig
index be3b309..107c5f9 100644
--- a/examples/callback/main.zig
+++ b/examples/callback/main.zig
@@ -1,13 +1,13 @@
 const std = @import("std");
 
-const asynk = @import("async");
+const async = @import("async");
 const runtimes = @import("runtimes");
 const zml = @import("zml");
 const cu = zml.platform_specific;
 
 pub const std_options: std.Options = .{
     .log_level = .info,
-    .logFn = asynk.logFn(std.log.defaultLog),
+    .logFn = async.logFn(std.log.defaultLog),
 };
 
 const log = std.log.scoped(.@"examples/custom_call");
@@ -126,7 +126,7 @@ pub fn grayscale(rgb: zml.Tensor) zml.Tensor {
 }
 
 pub fn main() !void {
-    try asynk.AsyncThread.main(std.heap.smp_allocator, asyncMain);
+    try async.AsyncThread.main(std.heap.smp_allocator, asyncMain);
 }
 
 pub fn asyncMain() !void {
diff --git a/examples/llama/main.zig b/examples/llama/main.zig
index c1c8d80..dc31af5 100644
--- a/examples/llama/main.zig
+++ b/examples/llama/main.zig
@@ -1,6 +1,6 @@
 const std = @import("std");
 
-const asynk = @import("async");
+const async = @import("async");
 const clap = @import("clap");
 const stdx = @import("stdx");
 const zml = @import("zml");
@@ -19,7 +19,7 @@ const log = std.log.scoped(.llama);
 
 pub const std_options: std.Options = .{
     .log_level = .info,
-    .logFn = asynk.logFn(std.log.defaultLog),
+    .logFn = async.logFn(std.log.defaultLog),
 };
 
 const params = clap.parseParamsComptime(
@@ -152,7 +152,7 @@ pub fn generateText(
 }
 
 pub fn main() !void {
-    try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
+    try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
 }
 
 pub fn asyncMain() !void {
@@ -198,7 +198,7 @@ pub fn asyncMain() !void {
 
     const model_weights_path = b: {
         const simple_path = try std.fs.path.join(allocator, &.{ hf_model_path, "model.safetensors" });
-        if (asynk.File.access(simple_path, .{})) {
+        if (async.File.access(simple_path, .{})) {
             break :b simple_path;
         } else |_| {
             allocator.free(simple_path);
@@ -213,7 +213,7 @@ pub fn asyncMain() !void {
     defer allocator.free(model_tokenizer_path);
 
     const config = blk: {
-        var config_json_file = try asynk.File.open(model_config_path, .{ .mode = .read_only });
+        var config_json_file = try async.File.open(model_config_path, .{ .mode = .read_only });
         defer config_json_file.close() catch unreachable;
         var config_json_buffer: [256]u8 = undefined;
         var config_reader = config_json_file.reader(&config_json_buffer);
@@ -276,7 +276,7 @@ pub fn asyncMain() !void {
 
     // Compile the model twice, one for prefill, one for generation.
     var start = try std.time.Timer.start();
-    var fut_mod_prefill = try asynk.asyncc(zml.compileModel, .{
+    var fut_mod_prefill = try async.async(zml.compileModel, .{
         allocator, llama.LlamaLM.forward, llama_tensors,
         .{
             prefill_tokens_shape,
@@ -287,7 +287,7 @@ pub fn asyncMain() !void {
         platform,
     });
 
-    var fut_mod = try asynk.asyncc(zml.compileModel, .{
+    var fut_mod = try async.async(zml.compileModel, .{
         allocator, llama.LlamaLM.forward, llama_tensors,
         .{
             gen_tokens_shape,
@@ -304,9 +304,9 @@ pub fn asyncMain() !void {
     defer zml.aio.unloadBuffers(&llama_buffers);
     log.info("✅\tLoaded weights in {D}", .{start.read()});
 
-    var llama_module_prefill = (try fut_mod_prefill.awaitt()).prepare(llama_buffers);
+    var llama_module_prefill = (try fut_mod_prefill.await()).prepare(llama_buffers);
     defer llama_module_prefill.deinit();
-    var llama_module = (try fut_mod.awaitt()).prepare(llama_buffers);
+    var llama_module = (try fut_mod.await()).prepare(llama_buffers);
     defer llama_module.deinit();
     log.info("✅\tCompiled model in {D}", .{start.read()});
     log.info("Creating KvCache", .{});
diff --git a/examples/llama/test.zig b/examples/llama/test.zig
index 42c0b49..0e1edba 100644
--- a/examples/llama/test.zig
+++ b/examples/llama/test.zig
@@ -1,4 +1,4 @@
-const asynk = @import("async");
+const async = @import("async");
 const std = @import("std");
 const stdx = @import("stdx");
 const zml = @import("zml");
@@ -10,7 +10,7 @@ const LlamaLM = llama_mod.LlamaLM;
 const Tensor = zml.Tensor;
 
 pub fn main() !void {
-    try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
+    try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
 }
 
 pub fn asyncMain() !void {
@@ -58,7 +58,7 @@ pub fn asyncMain() !void {
     const num_kv_heads: i64 = cli_args.num_kv_heads orelse buffer_store.metadata("num_kv_heads", .int) orelse num_heads;
 
     const config = blk: {
-        var config_json_file = try asynk.File.open(cli_args.config, .{ .mode = .read_only });
+        var config_json_file = try async.File.open(cli_args.config, .{ .mode = .read_only });
         defer config_json_file.close() catch unreachable;
         var reader = std.json.reader(allocator, config_json_file.reader());
         defer reader.deinit();
diff --git a/examples/mnist/mnist.zig b/examples/mnist/mnist.zig
index 5c85f5b..632cafd 100644
--- a/examples/mnist/mnist.zig
+++ b/examples/mnist/mnist.zig
@@ -1,13 +1,13 @@
 const std = @import("std");
 
-const asynk = @import("async");
+const async = @import("async");
 const zml = @import("zml");
 
 const log = std.log.scoped(.mnist);
 
 pub const std_options: std.Options = .{
     .log_level = .info,
-    .logFn = asynk.logFn(std.log.defaultLog),
+    .logFn = async.logFn(std.log.defaultLog),
 };
 
 /// Model definition
@@ -37,7 +37,7 @@ const Mnist = struct {
 };
 
 pub fn main() !void {
-    try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
+    try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
 }
 
 pub fn asyncMain() !void {
@@ -75,14 +75,14 @@ pub fn asyncMain() !void {
     // Start compiling
     log.info("Compiling model to MLIR....", .{});
     var start_time = try std.time.Timer.start();
-    var compilation = try asynk.asyncc(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
+    var compilation = try async.async(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
 
     // While compiling, start loading weights on the platform
     var model_weights = try zml.aio.loadModelBuffers(Mnist, mnist_model, buffer_store, arena, platform);
     defer zml.aio.unloadBuffers(&model_weights);
 
     // Wait for end of compilation and end of weights loading.
-    const compiled_mnist = try compilation.awaitt();
+    const compiled_mnist = try compilation.await();
     log.info("✅ Compiled model in {d}ms", .{start_time.read() / std.time.ns_per_ms});
 
     const mnist = compiled_mnist.prepare(model_weights);
@@ -92,7 +92,7 @@ pub fn asyncMain() !void {
     log.info("Starting inference...", .{});
 
     // Load a random digit image from the dataset.
-    const dataset = try asynk.File.open(t10kfilename, .{ .mode = .read_only });
+    const dataset = try async.File.open(t10kfilename, .{ .mode = .read_only });
     defer dataset.close() catch unreachable;
     var rng = std.Random.Xoshiro256.init(@intCast(std.time.timestamp()));
 
diff --git a/examples/modernbert/main.zig b/examples/modernbert/main.zig
index 95d5aff..1206eb9 100644
--- a/examples/modernbert/main.zig
+++ b/examples/modernbert/main.zig
@@ -1,6 +1,6 @@
 const std = @import("std");
 
-const asynk = @import("async");
+const async = @import("async");
 const clap = @import("clap");
 const stdx = @import("stdx");
 const zml = @import("zml");
@@ -15,7 +15,7 @@ pub const std_options: std.Options = .{
     .log_scope_levels = &[_]std.log.ScopeLevel{
         .{ .scope = .modernbert, .level = .info },
     },
-    .logFn = asynk.logFn(std.log.defaultLog),
+    .logFn = async.logFn(std.log.defaultLog),
 };
 
 const params = clap.parseParamsComptime(
@@ -38,7 +38,7 @@ const clap_parsers = .{
 };
 
 pub fn main() !void {
-    try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
+    try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
 }
 
 pub fn asyncMain() !void {
@@ -136,7 +136,7 @@ pub fn asyncMain() !void {
 
     // Compile the model
     log.info("\tCompiling ModernBERT model...", .{});
-    var fut_mod = try asynk.asyncc(zml.compile, .{
+    var fut_mod = try async.async(zml.compile, .{
         allocator,
         modernbert.ModernBertForMaskedLM.forward,
         .{modernbert_options},
@@ -144,7 +144,7 @@ pub fn asyncMain() !void {
         tensor_store,
         platform,
     });
-    var bert_module = (try fut_mod.awaitt()).prepare(bert_weights);
+    var bert_module = (try fut_mod.await()).prepare(bert_weights);
     defer bert_module.deinit();
     log.info("✅\tLoaded weights and compiled model in {D}", .{start.read()});
 
diff --git a/examples/modernbert/modernbert.zig b/examples/modernbert/modernbert.zig
index 79585f1..2e6eb4a 100644
--- a/examples/modernbert/modernbert.zig
+++ b/examples/modernbert/modernbert.zig
@@ -1,6 +1,6 @@
 const std = @import("std");
 
-const asynk = @import("async");
+const async = @import("async");
 const stdx = @import("stdx");
 const zml = @import("zml");
 const Tensor = zml.Tensor;
diff --git a/examples/modernbert/test.zig b/examples/modernbert/test.zig
index 8dd8bbb..5a6d449 100644
--- a/examples/modernbert/test.zig
+++ b/examples/modernbert/test.zig
@@ -1,7 +1,7 @@
 const clap = @import("clap");
 const std = @import("std");
 const zml = @import("zml");
-const asynk = @import("async");
+const async = @import("async");
 const log = std.log;
 const Tensor = zml.Tensor;
 const modernbert_module = @import("modernbert.zig");
@@ -20,7 +20,7 @@ fn printUsageAndExit(stderr: anytype) noreturn {
     std.process.exit(0);
 }
 pub fn main() !void {
-    try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
+    try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
 }
 
 pub fn asyncMain() !void {
diff --git a/examples/simple_layer/main.zig b/examples/simple_layer/main.zig
index e0b89a9..57d5d16 100644
--- a/examples/simple_layer/main.zig
+++ b/examples/simple_layer/main.zig
@@ -1,6 +1,6 @@
 const std = @import("std");
 
-const asynk = @import("async");
+const async = @import("async");
 const zml = @import("zml");
 
 /// Model definition
@@ -18,7 +18,7 @@ const Layer = struct {
 };
 
 pub fn main() !void {
-    try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
+    try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
 }
 
 pub fn asyncMain() !void {
@@ -58,7 +58,7 @@ pub fn asyncMain() !void {
 
     // Start compiling. This uses the inferred shapes from the BufferStore.
     // The shape of the input tensor, we have to pass in manually.
-    var compilation = try asynk.asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
+    var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
 
     // Produce a bufferized weights struct from the fake BufferStore.
     // This is like the inferred shapes, but with actual values.
@@ -67,7 +67,7 @@ pub fn asyncMain() !void {
     defer zml.aio.unloadBuffers(&model_weights); // for good practice
 
     // Wait for compilation to finish
-    const compiled = try compilation.awaitt();
+    const compiled = try compilation.await();
 
     // pass the model weights to the compiled module to create an executable module
     var executable = compiled.prepare(model_weights);