Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

macho: add support for merging cstrings and literals #137

Merged
merged 28 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c66e151
macho: start adding string deduping and merging
kubkon May 10, 2024
dc33403
macho: parse literals into atoms (subsections)
kubkon May 11, 2024
cfb1392
macho: dedup literal atoms
kubkon May 11, 2024
6180669
macho: kill duped merge subsections and reroute relocs
kubkon May 11, 2024
e8a4f2f
macho: always sort literal atoms deterministically
kubkon May 11, 2024
dbe3d43
macho: check if atom actually has relocs when resolving merge sections
kubkon May 12, 2024
c3c1e33
macho: test merging literals for images
kubkon May 12, 2024
3a47020
test/macho: verify contents of output literal sections
kubkon May 13, 2024
170903f
macho: fix emitting objects in -r mode and merging literals
kubkon May 14, 2024
39c618a
macho: emit relocs in -r mode for S_LITERAL_POINTERS
kubkon May 16, 2024
c5b1711
macho: split splitting literals into separate steps
kubkon May 17, 2024
e6bfa03
macho: rename MergeSection to LiteralSection
kubkon May 17, 2024
0e3528b
macho: do not keep LiteralSections along
kubkon May 17, 2024
2788bd0
macho: dedup pointer literals
kubkon May 18, 2024
83400d6
macho: dedup literals in internal object also
kubkon May 19, 2024
6420dba
macho: revert changes to how we init output sections
kubkon May 19, 2024
08eca03
macho: do not put useless temp names for synthetic atoms
kubkon May 19, 2024
ed153a9
macho: revert more obsolete changes
kubkon May 19, 2024
9164318
macho: remove unused strtab from internal object
kubkon May 19, 2024
67b5dea
macho: unify literals pool across different section types
kubkon May 19, 2024
dcec9ed
macho: simplify Literals interface
kubkon May 19, 2024
69b5b17
macho: rename Literals to LiteralPool
kubkon May 19, 2024
d26c60d
macho: do not duplicate __DATA,__data section for no reason
kubkon May 19, 2024
4b891bb
macho: make sure we do not ensure too much capacity in -r mode for re…
kubkon May 19, 2024
7ecfcc0
macho: make sure we run literals dedup after we resolve everywhere first
kubkon May 21, 2024
ce1f05f
macho: skip merge-literals-2 smoke test if not aarch64
kubkon May 21, 2024
1d974fb
macho: store index into LiteralPool in atom rather than new target di…
kubkon May 21, 2024
03be748
macho: actually kill deduped literal atoms and test for that
kubkon May 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 106 additions & 3 deletions src/MachO.zig
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ pub fn flush(self: *MachO) !void {

try self.convertTentativeDefinitions();
try self.createObjcSections();
try self.dedupLiterals();
try self.claimUnresolved();

if (self.options.dead_strip) {
Expand Down Expand Up @@ -1148,9 +1149,8 @@ fn initOutputSections(self: *MachO) !void {
atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(self), self);
}
}
if (self.data_sect_index == null) {
self.data_sect_index = try self.addSection("__DATA", "__data", .{});
}
self.data_sect_index = self.getSectionByName("__DATA", "__data") orelse
try self.addSection("__DATA", "__data", .{});
}

fn resolveSyntheticSymbols(self: *MachO) !void {
Expand Down Expand Up @@ -1230,6 +1230,27 @@ fn createObjcSections(self: *MachO) !void {
const name = eatPrefix(sym.getName(self), "_objc_msgSend$").?;
const selrefs_index = try internal.addObjcMsgsendSections(name, self);
try sym.addExtra(.{ .objc_selrefs = selrefs_index }, self);
sym.flags.objc_stubs = true;
}
}

pub fn dedupLiterals(self: *MachO) !void {
const gpa = self.base.allocator;
var lp: LiteralPool = .{};
defer lp.deinit(gpa);

for (self.objects.items) |index| {
try self.getFile(index).?.object.resolveLiterals(&lp, self);
}
if (self.getInternalObject()) |object| {
try object.resolveLiterals(&lp, self);
}

for (self.objects.items) |index| {
self.getFile(index).?.object.dedupLiterals(lp, self);
}
if (self.getInternalObject()) |object| {
object.dedupLiterals(lp, self);
}
}

Expand Down Expand Up @@ -3149,6 +3170,87 @@ pub const LinkObject = struct {
/// start of __TEXT segment.
const default_pagezero_vmsize: u64 = 0x100000000;

pub const LiteralPool = struct {
table: std.AutoArrayHashMapUnmanaged(void, void) = .{},
keys: std.ArrayListUnmanaged(Key) = .{},
values: std.ArrayListUnmanaged(Atom.Index) = .{},
data: std.ArrayListUnmanaged(u8) = .{},

pub fn deinit(lp: *LiteralPool, allocator: Allocator) void {
lp.table.deinit(allocator);
lp.keys.deinit(allocator);
lp.values.deinit(allocator);
lp.data.deinit(allocator);
}

const InsertResult = struct {
found_existing: bool,
index: Index,
atom: *Atom.Index,
};

pub fn getAtom(lp: LiteralPool, index: Index, macho_file: *MachO) *Atom {
assert(index < lp.values.items.len);
return macho_file.getAtom(lp.values.items[index]).?;
}

pub fn insert(lp: *LiteralPool, allocator: Allocator, @"type": u8, string: []const u8) !InsertResult {
const size: u32 = @intCast(string.len);
try lp.data.ensureUnusedCapacity(allocator, size);
const off: u32 = @intCast(lp.data.items.len);
lp.data.appendSliceAssumeCapacity(string);
const adapter = Adapter{ .lp = lp };
const key = Key{ .off = off, .size = size, .seed = @"type" };
const gop = try lp.table.getOrPutAdapted(allocator, key, adapter);
if (!gop.found_existing) {
try lp.keys.append(allocator, key);
_ = try lp.values.addOne(allocator);
}
return .{
.found_existing = gop.found_existing,
.index = @intCast(gop.index),
.atom = &lp.values.items[gop.index],
};
}

const Key = struct {
off: u32,
size: u32,
seed: u8,

fn getData(key: Key, lp: *const LiteralPool) []const u8 {
return lp.data.items[key.off..][0..key.size];
}

fn eql(key: Key, other: Key, lp: *const LiteralPool) bool {
const key_data = key.getData(lp);
const other_data = other.getData(lp);
return mem.eql(u8, key_data, other_data);
}

fn hash(key: Key, lp: *const LiteralPool) u32 {
const data = key.getData(lp);
return @truncate(Hash.hash(key.seed, data));
}
};

const Adapter = struct {
lp: *const LiteralPool,

pub fn eql(ctx: @This(), key: Key, b_void: void, b_map_index: usize) bool {
_ = b_void;
const other = ctx.lp.keys.items[b_map_index];
return key.eql(other, ctx.lp);
}

pub fn hash(ctx: @This(), key: Key) u32 {
return key.hash(ctx.lp);
}
};

pub const Index = u32;
};

const Section = struct {
header: macho.section_64,
segment_id: u8,
Expand Down Expand Up @@ -3212,6 +3314,7 @@ const DwarfInfo = @import("MachO/DwarfInfo.zig");
const ExportTrieSection = synthetic.ExportTrieSection;
const File = @import("MachO/file.zig").File;
const GotSection = synthetic.GotSection;
const Hash = std.hash.Wyhash;
const Indsymtab = synthetic.Indsymtab;
const InternalObject = @import("MachO/InternalObject.zig");
const MachO = @This();
Expand Down
25 changes: 22 additions & 3 deletions src/MachO/Atom.zig
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,18 @@ pub fn getThunk(self: Atom, macho_file: *MachO) *Thunk {
return macho_file.getThunk(extra.thunk);
}

pub fn getLiteralPoolIndex(self: Atom, macho_file: *MachO) ?MachO.LiteralPool.Index {
if (!self.flags.literal_pool) return null;
return self.getExtra(macho_file).?.literal_index;
}

const AddExtraOpts = struct {
thunk: ?u32 = null,
rel_index: ?u32 = null,
rel_count: ?u32 = null,
unwind_index: ?u32 = null,
unwind_count: ?u32 = null,
literal_index: ?u32 = null,
};

pub fn addExtra(atom: *Atom, opts: AddExtraOpts, macho_file: *MachO) !void {
Expand All @@ -147,6 +153,16 @@ pub inline fn setExtra(atom: Atom, extra: Extra, macho_file: *MachO) void {
}

pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 {
if (macho_file.options.relocatable) {
const osec = macho_file.getSectionByName(sect.segName(), sect.sectName()) orelse
try macho_file.addSection(
sect.segName(),
sect.sectName(),
.{ .flags = sect.flags },
);
return osec;
}

const segname, const sectname, const flags = blk: {
if (sect.isCode()) break :blk .{
"__TEXT",
Expand Down Expand Up @@ -209,9 +225,6 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 {
sectname,
.{ .flags = flags },
);
if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__data")) {
macho_file.data_sect_index = osec;
}
return osec;
}

Expand Down Expand Up @@ -890,6 +903,9 @@ pub const Flags = packed struct {

/// Whether this atom has any unwind records.
unwind: bool = false,

/// Whether this atom has LiteralPool entry.
literal_pool: bool = false,
};

pub const Extra = struct {
Expand All @@ -907,6 +923,9 @@ pub const Extra = struct {

/// Count of relocations belonging to this atom.
unwind_count: u32 = 0,

/// Index into LiteralPool entry for this atom.
literal_index: u32 = 0,
};

const aarch64 = @import("../aarch64.zig");
Expand Down
124 changes: 98 additions & 26 deletions src/MachO/InternalObject.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ index: File.Index,
sections: std.MultiArrayList(Section) = .{},
atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
strtab: std.ArrayListUnmanaged(u8) = .{},

objc_methnames: std.ArrayListUnmanaged(u8) = .{},
objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64),
Expand All @@ -18,7 +17,6 @@ pub fn deinit(self: *InternalObject, allocator: Allocator) void {
self.sections.deinit(allocator);
self.atoms.deinit(allocator);
self.symbols.deinit(allocator);
self.strtab.deinit(allocator);
self.objc_methnames.deinit(allocator);
}

Expand All @@ -38,21 +36,18 @@ pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO)
}

/// Creates a fake input sections __TEXT,__objc_methname and __DATA,__objc_selrefs.
pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !u32 {
pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !Atom.Index {
const methname_atom_index = try self.addObjcMethnameSection(sym_name, macho_file);
return try self.addObjcSelrefsSection(sym_name, methname_atom_index, macho_file);
return try self.addObjcSelrefsSection(methname_atom_index, macho_file);
}

fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_file: *MachO) !Atom.Index {
const gpa = macho_file.base.allocator;
const atom_index = try macho_file.addAtom();
try self.atoms.append(gpa, atom_index);

const name = try std.fmt.allocPrintZ(gpa, "__TEXT$__objc_methname${s}", .{methname});
defer gpa.free(name);
const atom = macho_file.getAtom(atom_index).?;
atom.atom_index = atom_index;
atom.name = try self.addString(gpa, name);
atom.file = self.index;
atom.size = methname.len + 1;
atom.alignment = 0;
Expand All @@ -72,21 +67,13 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil
return atom_index;
}

fn addObjcSelrefsSection(
self: *InternalObject,
methname: []const u8,
methname_atom_index: Atom.Index,
macho_file: *MachO,
) !Atom.Index {
fn addObjcSelrefsSection(self: *InternalObject, methname_atom_index: Atom.Index, macho_file: *MachO) !Atom.Index {
const gpa = macho_file.base.allocator;
const atom_index = try macho_file.addAtom();
try self.atoms.append(gpa, atom_index);

const name = try std.fmt.allocPrintZ(gpa, "__DATA$__objc_selrefs${s}", .{methname});
defer gpa.free(name);
const atom = macho_file.getAtom(atom_index).?;
atom.atom_index = atom_index;
atom.name = try self.addString(gpa, name);
atom.file = self.index;
atom.size = @sizeOf(u64);
atom.alignment = 3;
Expand Down Expand Up @@ -122,6 +109,98 @@ fn addObjcSelrefsSection(
return atom_index;
}

pub fn resolveLiterals(self: InternalObject, lp: *MachO.LiteralPool, macho_file: *MachO) !void {
const gpa = macho_file.base.allocator;

var buffer = std.ArrayList(u8).init(gpa);
defer buffer.deinit();

const slice = self.sections.slice();
for (slice.items(.header), self.atoms.items, 0..) |header, atom_index, n_sect| {
if (Object.isCstringLiteral(header) or Object.isFixedSizeLiteral(header)) {
const data = self.getSectionData(@intCast(n_sect));
const atom = macho_file.getAtom(atom_index).?;
const res = try lp.insert(gpa, header.type(), data);
if (!res.found_existing) {
res.atom.* = atom_index;
}
atom.flags.literal_pool = true;
try atom.addExtra(.{ .literal_index = res.index }, macho_file);
} else if (Object.isPtrLiteral(header)) {
const atom = macho_file.getAtom(atom_index).?;
const relocs = atom.getRelocs(macho_file);
assert(relocs.len == 1);
const rel = relocs[0];
assert(rel.tag == .local);
const target = macho_file.getAtom(rel.target).?;
const addend = std.math.cast(u32, rel.addend) orelse return error.Overflow;
try buffer.ensureUnusedCapacity(target.size);
buffer.resize(target.size) catch unreachable;
try target.getCode(macho_file, buffer.items);
const res = try lp.insert(gpa, header.type(), buffer.items[addend..]);
buffer.clearRetainingCapacity();
if (!res.found_existing) {
res.atom.* = atom_index;
}
atom.flags.literal_pool = true;
try atom.addExtra(.{ .literal_index = res.index }, macho_file);
}
}
}

pub fn dedupLiterals(self: InternalObject, lp: MachO.LiteralPool, macho_file: *MachO) void {
for (self.atoms.items) |atom_index| {
const atom = macho_file.getAtom(atom_index) orelse continue;
if (!atom.flags.alive) continue;
if (!atom.flags.relocs) continue;

const relocs = blk: {
const extra = atom.getExtra(macho_file).?;
const relocs = self.sections.items(.relocs)[atom.n_sect].items;
break :blk relocs[extra.rel_index..][0..extra.rel_count];
};
for (relocs) |*rel| switch (rel.tag) {
.local => {
const target = macho_file.getAtom(rel.target).?;
if (target.getLiteralPoolIndex(macho_file)) |lp_index| {
const lp_atom = lp.getAtom(lp_index, macho_file);
if (target.atom_index != lp_atom.atom_index) {
target.flags.alive = false;
rel.target = lp_atom.atom_index;
}
}
},
.@"extern" => {
const target_sym = rel.getTargetSymbol(macho_file);
if (target_sym.getAtom(macho_file)) |target_atom| {
if (target_atom.getLiteralPoolIndex(macho_file)) |lp_index| {
const lp_atom = lp.getAtom(lp_index, macho_file);
if (target_atom.atom_index != lp_atom.atom_index) {
target_atom.flags.alive = false;
target_sym.atom = lp_atom.atom_index;
}
}
}
},
};
}

for (self.symbols.items) |sym_index| {
const sym = macho_file.getSymbol(sym_index);
if (!sym.flags.objc_stubs) continue;
var extra = sym.getExtra(macho_file).?;
const atom = macho_file.getAtom(extra.objc_selrefs).?;
if (atom.getLiteralPoolIndex(macho_file)) |lp_index| {
const lp_atom = lp.getAtom(lp_index, macho_file);
if (atom.atom_index != lp_atom.atom_index) {
atom.flags.alive = false;
extra.objc_selrefs = lp_atom.atom_index;
sym.setExtra(extra, macho_file);
}
}
}
}

pub fn calcSymtabSize(self: *InternalObject, macho_file: *MachO) !void {
for (self.symbols.items) |sym_index| {
const sym = macho_file.getSymbol(sym_index);
Expand Down Expand Up @@ -179,17 +258,10 @@ pub fn getSectionData(self: *const InternalObject, index: u32) []const u8 {
} else @panic("ref to non-existent section");
}

fn addString(self: *InternalObject, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 {
const off: u32 = @intCast(self.strtab.items.len);
try self.strtab.ensureUnusedCapacity(allocator, name.len + 1);
self.strtab.appendSliceAssumeCapacity(name);
self.strtab.appendAssumeCapacity(0);
return off;
}

pub fn getString(self: InternalObject, off: u32) [:0]const u8 {
assert(off < self.strtab.items.len);
return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0);
_ = self;
_ = off;
return "";
}

pub fn asFile(self: *InternalObject) File {
Expand Down
Loading
Loading