Skip to content

Commit

Permalink
add std.zip and support zip files in build.zig.zon
Browse files Browse the repository at this point in the history
fixes #17408

Co-authored-by: Joel Gustafson <joelg@mit.edu>
  • Loading branch information
marler8997 and joeltg committed Apr 22, 2024
1 parent c7ffdbc commit 7f40093
Show file tree
Hide file tree
Showing 6 changed files with 606 additions and 0 deletions.
4 changes: 4 additions & 0 deletions lib/std/io.zig
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,10 @@ pub fn GenericWriter(
return @errorCast(self.any().writeStruct(value));
}

pub inline fn writeStructEndian(self: Self, value: anytype, endian: std.builtin.Endian) Error!void {
return @errorCast(self.any().writeStructEndian(value, endian));
}

pub inline fn any(self: *const Self) AnyWriter {
return .{
.context = @ptrCast(&self.context),
Expand Down
12 changes: 12 additions & 0 deletions lib/std/io/Writer.zig
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const std = @import("../std.zig");
const assert = std.debug.assert;
const mem = std.mem;
const native_endian = @import("builtin").target.cpu.arch.endian();

context: *const anyopaque,
writeFn: *const fn (context: *const anyopaque, bytes: []const u8) anyerror!usize,
Expand Down Expand Up @@ -59,6 +60,17 @@ pub fn writeStruct(self: Self, value: anytype) anyerror!void {
return self.writeAll(mem.asBytes(&value));
}

pub fn writeStructEndian(self: Self, value: anytype, endian: std.builtin.Endian) anyerror!void {
// TODO: make sure this value is not a reference type
if (native_endian == endian) {
return self.writeStruct(value);
} else {
var copy = value;
mem.byteSwapAllFields(@TypeOf(value), &copy);
return self.writeStruct(copy);
}
}

pub fn writeFile(self: Self, file: std.fs.File) anyerror!void {
// TODO: figure out how to adjust std lib abstractions so that this ends up
// doing sendfile or maybe even copy_file_range under the right conditions.
Expand Down
1 change: 1 addition & 0 deletions lib/std/std.zig
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ pub const unicode = @import("unicode.zig");
pub const valgrind = @import("valgrind.zig");
pub const wasm = @import("wasm.zig");
pub const zig = @import("zig.zig");
pub const zip = @import("zip.zig");
pub const start = @import("start.zig");

const root = @import("root");
Expand Down
352 changes: 352 additions & 0 deletions lib/std/zip.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,352 @@
/// The .ZIP File Format Specification is found here:
/// https://pkwaredownloads.blob.core.windows.net/pem/APPNOTE.txt
const builtin = @import("builtin");
const std = @import("std");
const testing = std.testing;

pub const File = @import("zip/test.zig").File;
pub const FileCache = @import("zip/test.zig").FileCache;
pub const writeFile = @import("zip/test.zig").writeFile;

pub const CompressionMethod = enum(u16) {
store = 0,
deflate = 8,
_,
};

pub const central_file_header_sig = [4]u8{ 'P', 'K', 1, 2 };
pub const local_file_header_sig = [4]u8{ 'P', 'K', 3, 4 };
pub const end_of_central_directory_sig = [4]u8{ 'P', 'K', 5, 6 };

pub const LocalFileHeader = extern struct {
signature: [4]u8 align(1),
minimum_version: u16 align(1),
flags: u16,
compression_method: CompressionMethod align(1),
last_modification_time: u16 align(1),
last_modification_date: u16 align(1),
crc32: u32 align(1),
compressed_size: u32 align(1),
uncompressed_size: u32 align(1),
filename_len: u16 align(1),
extra_len: u16 align(1),
};

pub const CentralDirectoryFileHeader = extern struct {
signature: [4]u8 align(1),
version: u16 align(1),
minimum_version: u16 align(1),
flags: u16 align(1),
compression_method: CompressionMethod align(1),
last_modification_time: u16 align(1),
last_modification_date: u16 align(1),
crc32: u32 align(1),
compressed_size: u32 align(1),
uncompressed_size: u32 align(1),
filename_len: u16 align(1),
extra_len: u16 align(1),
comment_len: u16 align(1),
disk_number: u16 align(1),
internal_file_attributes: u16 align(1),
external_file_attributes: u32 align(1),
local_file_header_offset: u32 align(1),
};

pub const EndOfCentralDirectoryRecord = extern struct {
signature: [4]u8 align(1),
disk_number: u16 align(1),
central_directory_disk_number: u16 align(1),
record_count_disk: u16 align(1),
record_count_total: u16 align(1),
central_directory_size: u32 align(1),
central_directory_offset: u32 align(1),
comment_len: u16 align(1),
};

pub fn findEocdr(file: std.fs.File) !EndOfCentralDirectoryRecord {
// The EOCD record can contain a variable-length comment at the end,
// which makes ZIP file parsing ambiguous in general, since a valid
// comment could contain the bytes of another valid EOCD record.
// Here we just search backwards for the first instance of the EOCD
// signature, and return an error if a valid EOCD record doesn't follow.

// TODO: make this more efficient
// we need a backward_buffered_reader
const file_size = try file.getEndPos();

const record_len = @sizeOf(EndOfCentralDirectoryRecord);
var record_value: EndOfCentralDirectoryRecord = undefined;
const record_bytes: *[record_len]u8 = @ptrCast(&record_value);
if (file_size < record_len)
return error.ZipTruncated;
try file.seekFromEnd(-record_len);
{
const len = try file.readAll(record_bytes);
if (len != record_len)
return error.ZipTruncated;
}

var comment_len: u16 = 0;
while (true) {
if (std.mem.eql(u8, record_bytes[0..4], &end_of_central_directory_sig) and
std.mem.readInt(u16, record_bytes[20..22], .little) == comment_len)
{
break;
}

if (comment_len == std.math.maxInt(u16))
return error.ZipMissingEocdr;
std.mem.copyBackwards(u8, record_bytes[1..], record_bytes[0 .. record_bytes.len - 1]);
comment_len += 1;

if (@as(u64, record_len) + @as(u64, comment_len) > file_size)
return error.ZipMissingEocdr;

try file.seekFromEnd(-record_len - @as(i64, comment_len));
{
const len = try file.readAll(record_bytes[0..1]);
if (len != 1)
return error.ZipTruncated;
}
}

if (builtin.target.cpu.arch.endian() != .little) {
std.mem.byteSwapAllFields(@TypeOf(record_value), &record_value);
}
return record_value;
}

/// `decompress` returns the actual CRC-32 of the decompressed bytes,
/// which should be validated against the expected entry.crc32 value.
/// `writer` can be anything with a `writeAll(self: *Self, chunk: []const u8) anyerror!void` method.
pub fn decompress(
method: CompressionMethod,
uncompressed_size: u32,
reader: anytype,
writer: anytype,
) !u32 {
var hash = std.hash.Crc32.init();

switch (method) {
.store => {
var buf: [std.mem.page_size]u8 = undefined;
while (true) {
const len = try reader.read(&buf);
if (len == 0) break;
try writer.writeAll(buf[0..len]);
hash.update(buf[0..len]);
}
},
.deflate => {
var br = std.io.bufferedReader(reader);
var total_uncompressed: u32 = 0;
var decompressor = std.compress.flate.decompressor(br.reader());
while (try decompressor.next()) |chunk| {
try writer.writeAll(chunk);
hash.update(chunk);
total_uncompressed += @intCast(chunk.len);
}
if (br.end != br.start)
return error.ZipDeflateTruncated;
if (total_uncompressed != uncompressed_size)
return error.ZipUncompressSizeMismatch;
},
_ => return error.UnsupportedCompressionMethod,
}

return hash.final();
}

pub const Iterator = struct {
file: std.fs.File,
eocdr: EndOfCentralDirectoryRecord,
next_central_header_index: u16,
next_central_header_offset: u64,

pub fn init(file: std.fs.File) !Iterator {
const eocdr = try findEocdr(file);

// Don't support multi-disk archives.
if (eocdr.disk_number != 0 or
eocdr.central_directory_disk_number != 0 or
eocdr.record_count_disk != eocdr.record_count_total)
{
return error.ZipUnsupportedMultiDisk;
}

return .{
.file = file,
.eocdr = eocdr,
.next_central_header_offset = 0,
.next_central_header_index = 0,
};
}

pub fn next(self: *Iterator) !?Entry {
if (self.next_central_header_index >= self.eocdr.record_count_total) {
return null;
}

const header_file_offset: u64 = @as(u64, self.eocdr.central_directory_offset) + self.next_central_header_offset;
const header = blk: {
try self.file.seekTo(header_file_offset);
break :blk try self.file.reader().readStructEndian(CentralDirectoryFileHeader, .little);
};
if (!std.mem.eql(u8, &header.signature, &central_file_header_sig))
return error.ZipHeader;

self.next_central_header_index += 1;
self.next_central_header_offset += @sizeOf(CentralDirectoryFileHeader) + header.filename_len + header.extra_len + header.comment_len;

if (header.disk_number != 0)
return error.ZipUnsupportedMultiDisk;
return .{
.header_file_offset = header_file_offset,
.header = header,
};
}

pub const Entry = struct {
header_file_offset: u64,
header: CentralDirectoryFileHeader,

pub fn extract(self: Entry, zip_file: std.fs.File, filename_buf: []u8, dest: std.fs.Dir) !u32 {
if (filename_buf.len < self.header.filename_len)
return error.ZipInsufficientBuffer;
const filename = filename_buf[0..self.header.filename_len];

try zip_file.seekTo(self.header_file_offset + @sizeOf(CentralDirectoryFileHeader));
{
const len = try zip_file.readAll(filename);
if (len != filename.len)
return error.ZipTruncated;
}

const local_data_header_offset: u64 = local_data_header_offset: {
const local_header = blk: {
try zip_file.seekTo(self.header.local_file_header_offset);
break :blk try zip_file.reader().readStructEndian(LocalFileHeader, .little);
};
if (!std.mem.eql(u8, &local_header.signature, &local_file_header_sig))
return error.ZipHeader;
// TODO: verify minimum_version
// TODO: verify flags
// TODO: verify compression method
// TODO: verify last_mod_time
// TODO: verify last_mod_date
// TODO: verify filename_len and filename?
// TODO: extra?

if (local_header.crc32 != 0 and local_header.crc32 != self.header.crc32)
return error.ZipRedundancyFail;
if (local_header.compressed_size != 0 and
local_header.compressed_size != self.header.compressed_size)
return error.ZipRedundancyFail;
if (local_header.uncompressed_size != 0 and
local_header.uncompressed_size != self.header.uncompressed_size)
return error.ZipRedundancyFail;

break :local_data_header_offset @as(u64, local_header.filename_len) +
@as(u64, local_header.extra_len);
};

if (filename.len == 0 or filename[0] == '/')
return error.ZipBadFilename;

// All entries that end in '/' are directories
if (filename[filename.len - 1] == '/') {
if (self.header.uncompressed_size != 0)
return error.ZipBadDirectorySize;
try dest.makePath(filename[0 .. filename.len - 1]);
return std.hash.Crc32.hash(&.{});
}

const out_file = blk: {
if (std.fs.path.dirname(filename)) |dirname| {
var parent_dir = try dest.makeOpenPath(dirname, .{});
defer parent_dir.close();

const basename = std.fs.path.basename(filename);
break :blk try parent_dir.createFile(basename, .{ .exclusive = true });
}
break :blk try dest.createFile(filename, .{ .exclusive = true });
};
defer out_file.close();
const local_data_file_offset: u64 =
@as(u64, self.header.local_file_header_offset) +
@as(u64, @sizeOf(LocalFileHeader)) +
local_data_header_offset;
try zip_file.seekTo(local_data_file_offset);
var limited_reader = std.io.limitedReader(zip_file.reader(), self.header.compressed_size);
const crc = try decompress(
self.header.compression_method,
self.header.uncompressed_size,
limited_reader.reader(),
out_file.writer(),
);
if (limited_reader.bytes_left != 0)
return error.ZipDecompressTruncated;
return crc;
}
};
};

pub fn pipeToFileSystem(dest: std.fs.Dir, file: std.fs.File) !void {
var iter = try Iterator.init(file);

var filename_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
while (try iter.next()) |entry| {
const crc32 = try entry.extract(file, &filename_buf, dest);
if (crc32 != entry.header.crc32)
return error.ZipCrcMismatch;
}
}

fn testZip(comptime files: []const File) !void {
var cache: [files.len]FileCache = undefined;
try testZipWithCache(files, &cache);
}
fn testZipWithCache(files: []const File, cache: []FileCache) !void {
var tmp = testing.tmpDir(.{ .no_follow = true });
defer tmp.cleanup();
const dir = tmp.dir;

{
var file = try dir.createFile("zip", .{});
defer file.close();
try writeFile(file, files, cache);
}

var zip_file = try dir.openFile("zip", .{});
defer zip_file.close();
try pipeToFileSystem(dir, zip_file);

for (files) |test_file| {
var file = try dir.openFile(test_file.name, .{});
defer file.close();
var buf: [4096]u8 = undefined;
const n = try file.reader().readAll(&buf);
try testing.expectEqualStrings(test_file.content, buf[0..n]);
}
}

test "zip one file" {
try testZip(&[_]File{
.{ .name = "onefile.txt", .content = "Just a single file\n", .compression = .store },
});
}
test "zip multiple files" {
try testZip(&[_]File{
.{ .name = "foo", .content = "a foo file\n", .compression = .store },
.{ .name = "subdir/bar", .content = "bar is this right?\nanother newline\n", .compression = .store },
.{ .name = "subdir/another/baz", .content = "bazzy mc bazzerson", .compression = .store },
});
}
test "zip deflated" {
try testZip(&[_]File{
.{ .name = "deflateme", .content = "This is a deflated file.\nIt should be smaller in the Zip file1\n", .compression = .deflate },
// TODO: re-enable this if/when we add support for deflate64
//.{ .name = "deflateme64", .content = "The 64k version of deflate!\n", .compression = .deflate64 },
.{ .name = "raw", .content = "Not all files need to be deflated in the same Zip.\n", .compression = .store },
});
}

0 comments on commit 7f40093

Please sign in to comment.