Skip to content

Optimization Guide

This guide covers optimization strategies for getting the best performance from compressionz.

Performance differs dramatically between debug and release builds:

Terminal window
# Debug build (default)
zig build
# LZ4: ~500 MB/s
# Release build
zig build -Doptimize=ReleaseFast
# LZ4: ~36 GB/s (72× faster!)
Build ModeLZ4 SpeedUse Case
Debug~500 MB/sDevelopment
ReleaseSafe~20 GB/sProduction with checks
ReleaseFast~36 GB/sMaximum performance
ReleaseSmall~25 GB/sMinimal binary size
  • Development: Debug (fast compilation)
  • Testing: ReleaseSafe (catches bugs)
  • Production: ReleaseFast (maximum speed)
// Fastest compression
const result = try cz.compress(.lz4_raw, data, allocator);
// Fast with self-describing format
const result = try cz.compress(.snappy, data, allocator);
// Best balance of speed and ratio
const result = try cz.compress(.zstd, data, allocator);
// Best ratio for one-time compression
const result = try cz.compressWithOptions(.brotli, data, allocator, .{
.level = .best,
});
// Best ratio with reasonable speed
const result = try cz.compressWithOptions(.zstd, data, allocator, .{
.level = .best,
});
ScenarioCodecLevelThroughput
Real-timeLZ4 Rawdefault36 GB/s
MessagingSnappydefault31 GB/s
GeneralZstddefault12 GB/s
ArchivalZstdbest1.3 GB/s
Web assetsBrotlibest86 MB/s

Avoid allocation overhead with pre-allocated buffers:

// Standard API (allocates each time)
for (items) |item| {
const compressed = try cz.compress(.lz4, item, allocator);
defer allocator.free(compressed); // Free each iteration
try process(compressed);
}
// Zero-copy (no allocations)
var buffer: [65536]u8 = undefined;
for (items) |item| {
const compressed = try cz.compressInto(.lz4, item, &buffer, .{});
try process(compressed);
}
const Compressor = struct {
buffer: []u8,
pub fn init(allocator: std.mem.Allocator, max_input_size: usize) !Compressor {
const buffer_size = cz.maxCompressedSize(.lz4, max_input_size);
return .{
.buffer = try allocator.alloc(u8, buffer_size),
};
}
pub fn compress(self: *Compressor, data: []const u8) ![]u8 {
return cz.compressInto(.lz4, data, self.buffer, .{});
}
};
pub fn processBatch(items: []const []const u8, backing: std.mem.Allocator) !void {
var arena = std.heap.ArenaAllocator.init(backing);
defer arena.deinit(); // One free for all allocations
for (items) |item| {
const compressed = try cz.compress(.zstd, item, arena.allocator());
try sendData(compressed);
// No individual frees needed
}
}

Zstd:

LevelCompressRatioNotes
fast12 GB/s99.9%Recommended
default12 GB/s99.9%Same as fast
best1.3 GB/s99.9%9× slower, marginal gain

Brotli:

LevelCompressRatioNotes
fast1.3 GB/s99.9%Dynamic content
default1.3 GB/s99.9%Same as fast
best86 MB/s99.9%+Only for static content

Use .default unless you have a specific reason:

  • .fast rarely helps (often same as default)
  • .best has diminishing returns for most data

Larger chunks = better throughput, more memory:

// Small chunks (more overhead)
var buf: [4096]u8 = undefined;
// Large chunks (better throughput)
var buf: [65536]u8 = undefined; // Recommended
// Very large (diminishing returns)
var buf: [1048576]u8 = undefined;

Process data as it arrives:

pub fn streamProcess(input: anytype, output: anytype, allocator: std.mem.Allocator) !void {
var decomp = try cz.decompressor(.gzip, allocator, input);
defer decomp.deinit();
var comp = try cz.compressor(.zstd, allocator, output, .{});
defer comp.deinit();
var buf: [65536]u8 = undefined;
while (true) {
const n = try decomp.reader().read(&buf);
if (n == 0) break;
try comp.writer().writeAll(buf[0..n]);
}
try comp.finish();
}
Data SizeWithout DictWith DictUse Dict?
100 B105 B45 B✅ Yes
1 KB780 B380 B✅ Yes
10 KB3 KB1.9 KB✅ Yes
100 KB28 KB24 KBMaybe
1 MB684 B680 B❌ No

Rule of thumb: Use dictionaries for data < 10 KB with known patterns.

Use CaseSizeNotes
JSON APIs16-32 KBCommon field names
Log messages32-64 KBCommon log patterns
Protocol buffers8-16 KBSchema patterns

Larger dictionaries have diminishing returns.

Compress multiple items in parallel:

const std = @import("std");
const cz = @import("compressionz");
pub fn compressParallel(items: []const []const u8, allocator: std.mem.Allocator) ![][]u8 {
const results = try allocator.alloc([]u8, items.len);
var pool = std.Thread.Pool.init(.{ .allocator = allocator });
defer pool.deinit();
for (items, 0..) |item, i| {
try pool.spawn(compressOne, .{ item, allocator, &results[i] });
}
pool.waitForAll();
return results;
}
fn compressOne(item: []const u8, allocator: std.mem.Allocator, result: *[]u8) void {
result.* = cz.compress(.zstd, item, allocator) catch unreachable;
}

Split into chunks:

pub fn compressLargeFile(data: []const u8, chunk_size: usize, allocator: std.mem.Allocator) ![][]u8 {
const num_chunks = (data.len + chunk_size - 1) / chunk_size;
const chunks = try allocator.alloc([]u8, num_chunks);
// Compress chunks in parallel...
}

Test with your actual data:

const std = @import("std");
const cz = @import("compressionz");
pub fn benchmark(data: []const u8, allocator: std.mem.Allocator) !void {
const codecs = [_]cz.Codec{ .lz4, .snappy, .zstd, .gzip, .brotli };
std.debug.print("Input size: {d} bytes\n\n", .{data.len});
std.debug.print("{s:<12} {s:>10} {s:>10} {s:>10}\n", .{
"Codec", "Size", "Compress", "Decompress",
});
inline for (codecs) |codec| {
var timer = try std.time.Timer.start();
const compressed = try cz.compress(codec, data, allocator);
const compress_ns = timer.read();
timer.reset();
const decompressed = try cz.decompress(codec, compressed, allocator);
const decompress_ns = timer.read();
allocator.free(compressed);
allocator.free(decompressed);
std.debug.print("{s:<12} {d:>10} {d:>9}µs {d:>9}µs\n", .{
codec.name(),
compressed.len,
compress_ns / 1000,
decompress_ns / 1000,
});
}
}
// Wrong: 72× slower
$ zig build && ./app
// Right: Full speed
$ zig build -Doptimize=ReleaseFast && ./app
// Wrong: Compressing already compressed data
const gzip_data = try cz.compress(.gzip, image_data, allocator);
const zstd_data = try cz.compress(.zstd, gzip_data, allocator); // Waste of CPU!
// Right: Compress once
const compressed = try cz.compress(.zstd, raw_data, allocator);
// Wrong: Brotli best for real-time data
const compressed = try cz.compressWithOptions(.brotli, message, allocator, .{
.level = .best, // 86 MB/s is too slow for real-time!
});
// Right: Use LZ4 or Snappy for real-time
const compressed = try cz.compress(.lz4_raw, message, allocator);
// Wrong: Allocation per iteration
while (hasData()) {
const compressed = try cz.compress(.lz4, getData(), allocator);
defer allocator.free(compressed);
try send(compressed);
}
// Right: Reuse buffer
var buffer: [65536]u8 = undefined;
while (hasData()) {
const compressed = try cz.compressInto(.lz4, getData(), &buffer, .{});
try send(compressed);
}
  1. Use ReleaseFast for production
  2. Choose the right codec for your use case
  3. Use .default level unless you have specific needs
  4. Reuse buffers in hot paths
  5. Use dictionaries for small, structured data
  6. Benchmark with your actual data