Skip to content

Codec Detection

compressionz can automatically detect compression formats by examining the first few bytes of data. This enables handling unknown compressed data without prior knowledge of the format.

const cz = @import("compressionz");
pub fn decompressAuto(data: []const u8, allocator: std.mem.Allocator) ![]u8 {
if (cz.Codec.detect(data)) |codec| {
return cz.decompress(codec, data, allocator);
}
return error.UnknownFormat;
}
pub fn detect(data: []const u8) ?Codec

Returns the detected codec or null if the format is unknown.

CodecDetectableMagic Bytes
LZ4 Frame0x04 0x22 0x4D 0x18
Zstd0x28 0xB5 0x2F 0xFD
Gzip0x1F 0x8B
ZlibCMF/FLG check
SnappysNaPpY
LZ4 RawNo magic
BrotliNo magic
DeflateNo magic
Bytes 0-3: 0x04 0x22 0x4D 0x18 (little-endian 0x184D2204)
if (data.len >= 4 and
data[0] == 0x04 and data[1] == 0x22 and
data[2] == 0x4D and data[3] == 0x18)
{
// LZ4 Frame
}
Bytes 0-3: 0x28 0xB5 0x2F 0xFD (little-endian 0xFD2FB528)
if (data.len >= 4 and
data[0] == 0x28 and data[1] == 0xB5 and
data[2] == 0x2F and data[3] == 0xFD)
{
// Zstd
}
Bytes 0-1: 0x1F 0x8B
Byte 2: Compression method (0x08 = deflate)
if (data.len >= 2 and data[0] == 0x1F and data[1] == 0x8B) {
// Gzip
}

Zlib uses a checksum-based detection:

Byte 0 (CMF): Compression method (low 4 bits = 8 for deflate)
Byte 1 (FLG): Flags
Check: (CMF * 256 + FLG) % 31 == 0
if (data.len >= 2) {
const cmf = data[0];
const flg = data[1];
if ((cmf & 0x0F) == 8 and
(@as(u16, cmf) * 256 + flg) % 31 == 0)
{
// Zlib
}
}
Bytes 0-5: "sNaPpY" (stream identifier)
if (data.len >= 6 and std.mem.eql(u8, data[0..6], "sNaPpY")) {
// Snappy framed format
}
const cz = @import("compressionz");
const std = @import("std");
pub fn decompress(data: []const u8, allocator: std.mem.Allocator) ![]u8 {
const codec = cz.Codec.detect(data) orelse {
// Might be uncompressed or undetectable format
return error.UnknownFormat;
};
return cz.decompress(codec, data, allocator);
}
pub fn readFile(allocator: std.mem.Allocator, path: []const u8) ![]u8 {
const data = try std.fs.cwd().readFileAlloc(allocator, path, 100 * 1024 * 1024);
errdefer allocator.free(data);
// Try to decompress if compressed
if (cz.Codec.detect(data)) |codec| {
const decompressed = try cz.decompress(codec, data, allocator);
allocator.free(data);
return decompressed;
}
// Return as-is if not compressed
return data;
}
const cz = @import("compressionz");
pub const ContentEncoding = enum {
none,
gzip,
zstd,
br, // Brotli
pub fn fromHeader(header: ?[]const u8) ContentEncoding {
const value = header orelse return .none;
if (std.mem.indexOf(u8, value, "zstd") != null) return .zstd;
if (std.mem.indexOf(u8, value, "br") != null) return .br;
if (std.mem.indexOf(u8, value, "gzip") != null) return .gzip;
return .none;
}
pub fn toCodec(self: ContentEncoding) ?cz.Codec {
return switch (self) {
.none => null,
.gzip => .gzip,
.zstd => .zstd,
.br => .brotli,
};
}
};
pub fn decodeResponse(encoding: ContentEncoding, body: []const u8, allocator: std.mem.Allocator) ![]u8 {
if (encoding.toCodec()) |codec| {
return cz.decompress(codec, body, allocator);
}
// Auto-detect as fallback
if (cz.Codec.detect(body)) |detected| {
return cz.decompress(detected, body, allocator);
}
// Return uncompressed
return allocator.dupe(u8, body);
}

For formats without magic bytes, use context or file extensions:

pub fn codecFromExtension(path: []const u8) ?cz.Codec {
const ext = std.fs.path.extension(path);
const map = std.ComptimeStringMap(cz.Codec, .{
.{ ".gz", .gzip },
.{ ".zst", .zstd },
.{ ".lz4", .lz4 },
.{ ".br", .brotli },
.{ ".snappy", .snappy },
.{ ".zz", .zlib },
});
return map.get(ext);
}
pub fn codecFromContentType(content_type: []const u8) ?cz.Codec {
if (std.mem.indexOf(u8, content_type, "gzip") != null) return .gzip;
if (std.mem.indexOf(u8, content_type, "zstd") != null) return .zstd;
if (std.mem.indexOf(u8, content_type, "br") != null) return .brotli;
return null;
}

Beyond detection, query codec capabilities:

const codec: cz.Codec = .zstd;
// Feature queries
codec.supportsStreaming(); // true
codec.supportsDictionary(); // true
codec.hasBuiltinChecksum(); // true
codec.isFramed(); // true
codec.requiresExpectedSize(); // false
// Metadata
codec.name(); // "Zstandard"
codec.extension(); // ".zst"
pub fn selectCodec(needs_streaming: bool, needs_dictionary: bool) cz.Codec {
if (needs_dictionary and needs_streaming) {
return .zstd; // Only Zstd has both
}
if (needs_streaming) {
return .gzip; // Widely compatible streaming
}
if (needs_dictionary) {
return .zstd; // Best dictionary support
}
return .lz4; // Fastest for simple cases
}
const result = blk: {
const codec = cz.Codec.detect(data) orelse {
// Unknown format - might be:
// 1. Uncompressed data
// 2. Brotli or Deflate (no magic)
// 3. Corrupted data
// Try common undetectable formats
if (tryBrotli(data, allocator)) |d| break :blk d;
if (tryDeflate(data, allocator)) |d| break :blk d;
return error.UnknownFormat;
};
break :blk cz.decompress(codec, data, allocator);
};

Detection is O(1) — it only examines the first few bytes:

// Detection is essentially free
const codec = cz.Codec.detect(gigabyte_of_data); // Instant

Always safe to call on any data, regardless of size.