Codec Detection
compressionz can automatically detect compression formats by examining the first few bytes of data. This enables handling unknown compressed data without prior knowledge of the format.
Basic Detection
Section titled “Basic Detection”const cz = @import("compressionz");
pub fn decompressAuto(data: []const u8, allocator: std.mem.Allocator) ![]u8 { if (cz.Codec.detect(data)) |codec| { return cz.decompress(codec, data, allocator); } return error.UnknownFormat;}Codec.detect()
Section titled “Codec.detect()”pub fn detect(data: []const u8) ?CodecReturns the detected codec or null if the format is unknown.
Detection Support
Section titled “Detection Support”| Codec | Detectable | Magic Bytes |
|---|---|---|
| LZ4 Frame | ✅ | 0x04 0x22 0x4D 0x18 |
| Zstd | ✅ | 0x28 0xB5 0x2F 0xFD |
| Gzip | ✅ | 0x1F 0x8B |
| Zlib | ✅ | CMF/FLG check |
| Snappy | ✅ | sNaPpY |
| LZ4 Raw | ❌ | No magic |
| Brotli | ❌ | No magic |
| Deflate | ❌ | No magic |
Magic Bytes Reference
Section titled “Magic Bytes Reference”LZ4 Frame
Section titled “LZ4 Frame”Bytes 0-3: 0x04 0x22 0x4D 0x18 (little-endian 0x184D2204)if (data.len >= 4 and data[0] == 0x04 and data[1] == 0x22 and data[2] == 0x4D and data[3] == 0x18){ // LZ4 Frame}Bytes 0-3: 0x28 0xB5 0x2F 0xFD (little-endian 0xFD2FB528)if (data.len >= 4 and data[0] == 0x28 and data[1] == 0xB5 and data[2] == 0x2F and data[3] == 0xFD){ // Zstd}Bytes 0-1: 0x1F 0x8BByte 2: Compression method (0x08 = deflate)if (data.len >= 2 and data[0] == 0x1F and data[1] == 0x8B) { // Gzip}Zlib uses a checksum-based detection:
Byte 0 (CMF): Compression method (low 4 bits = 8 for deflate)Byte 1 (FLG): FlagsCheck: (CMF * 256 + FLG) % 31 == 0if (data.len >= 2) { const cmf = data[0]; const flg = data[1]; if ((cmf & 0x0F) == 8 and (@as(u16, cmf) * 256 + flg) % 31 == 0) { // Zlib }}Snappy (Framed)
Section titled “Snappy (Framed)”Bytes 0-5: "sNaPpY" (stream identifier)if (data.len >= 6 and std.mem.eql(u8, data[0..6], "sNaPpY")) { // Snappy framed format}Use Cases
Section titled “Use Cases”Generic Decompressor
Section titled “Generic Decompressor”const cz = @import("compressionz");const std = @import("std");
pub fn decompress(data: []const u8, allocator: std.mem.Allocator) ![]u8 { const codec = cz.Codec.detect(data) orelse { // Might be uncompressed or undetectable format return error.UnknownFormat; };
return cz.decompress(codec, data, allocator);}File Handler
Section titled “File Handler”pub fn readFile(allocator: std.mem.Allocator, path: []const u8) ![]u8 { const data = try std.fs.cwd().readFileAlloc(allocator, path, 100 * 1024 * 1024); errdefer allocator.free(data);
// Try to decompress if compressed if (cz.Codec.detect(data)) |codec| { const decompressed = try cz.decompress(codec, data, allocator); allocator.free(data); return decompressed; }
// Return as-is if not compressed return data;}Multi-Format API
Section titled “Multi-Format API”const cz = @import("compressionz");
pub const ContentEncoding = enum { none, gzip, zstd, br, // Brotli
pub fn fromHeader(header: ?[]const u8) ContentEncoding { const value = header orelse return .none; if (std.mem.indexOf(u8, value, "zstd") != null) return .zstd; if (std.mem.indexOf(u8, value, "br") != null) return .br; if (std.mem.indexOf(u8, value, "gzip") != null) return .gzip; return .none; }
pub fn toCodec(self: ContentEncoding) ?cz.Codec { return switch (self) { .none => null, .gzip => .gzip, .zstd => .zstd, .br => .brotli, }; }};
pub fn decodeResponse(encoding: ContentEncoding, body: []const u8, allocator: std.mem.Allocator) ![]u8 { if (encoding.toCodec()) |codec| { return cz.decompress(codec, body, allocator); }
// Auto-detect as fallback if (cz.Codec.detect(body)) |detected| { return cz.decompress(detected, body, allocator); }
// Return uncompressed return allocator.dupe(u8, body);}Handling Undetectable Formats
Section titled “Handling Undetectable Formats”For formats without magic bytes, use context or file extensions:
By Extension
Section titled “By Extension”pub fn codecFromExtension(path: []const u8) ?cz.Codec { const ext = std.fs.path.extension(path);
const map = std.ComptimeStringMap(cz.Codec, .{ .{ ".gz", .gzip }, .{ ".zst", .zstd }, .{ ".lz4", .lz4 }, .{ ".br", .brotli }, .{ ".snappy", .snappy }, .{ ".zz", .zlib }, });
return map.get(ext);}By Content-Type
Section titled “By Content-Type”pub fn codecFromContentType(content_type: []const u8) ?cz.Codec { if (std.mem.indexOf(u8, content_type, "gzip") != null) return .gzip; if (std.mem.indexOf(u8, content_type, "zstd") != null) return .zstd; if (std.mem.indexOf(u8, content_type, "br") != null) return .brotli; return null;}Codec Capabilities Query
Section titled “Codec Capabilities Query”Beyond detection, query codec capabilities:
const codec: cz.Codec = .zstd;
// Feature queriescodec.supportsStreaming(); // truecodec.supportsDictionary(); // truecodec.hasBuiltinChecksum(); // truecodec.isFramed(); // truecodec.requiresExpectedSize(); // false
// Metadatacodec.name(); // "Zstandard"codec.extension(); // ".zst"Example: Feature-Based Selection
Section titled “Example: Feature-Based Selection”pub fn selectCodec(needs_streaming: bool, needs_dictionary: bool) cz.Codec { if (needs_dictionary and needs_streaming) { return .zstd; // Only Zstd has both } if (needs_streaming) { return .gzip; // Widely compatible streaming } if (needs_dictionary) { return .zstd; // Best dictionary support } return .lz4; // Fastest for simple cases}Error Handling
Section titled “Error Handling”const result = blk: { const codec = cz.Codec.detect(data) orelse { // Unknown format - might be: // 1. Uncompressed data // 2. Brotli or Deflate (no magic) // 3. Corrupted data
// Try common undetectable formats if (tryBrotli(data, allocator)) |d| break :blk d; if (tryDeflate(data, allocator)) |d| break :blk d;
return error.UnknownFormat; };
break :blk cz.decompress(codec, data, allocator);};Performance Note
Section titled “Performance Note”Detection is O(1) — it only examines the first few bytes:
// Detection is essentially freeconst codec = cz.Codec.detect(gigabyte_of_data); // InstantAlways safe to call on any data, regardless of size.