kubkon · kubkon · May 22, 2024 · May 10, 2024 · May 11, 2024 · May 11, 2024
diff --git a/src/MachO.zig b/src/MachO.zig
@@ -360,6 +360,7 @@ pub fn flush(self: *MachO) !void {
 
     try self.convertTentativeDefinitions();
     try self.createObjcSections();
+    try self.dedupLiterals();
     try self.claimUnresolved();
 
     if (self.options.dead_strip) {
@@ -1148,9 +1149,8 @@ fn initOutputSections(self: *MachO) !void {
             atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(self), self);
         }
     }
-    if (self.data_sect_index == null) {
-        self.data_sect_index = try self.addSection("__DATA", "__data", .{});
-    }
+    self.data_sect_index = self.getSectionByName("__DATA", "__data") orelse
+        try self.addSection("__DATA", "__data", .{});
 }
 
 fn resolveSyntheticSymbols(self: *MachO) !void {
@@ -1230,6 +1230,27 @@ fn createObjcSections(self: *MachO) !void {
         const name = eatPrefix(sym.getName(self), "_objc_msgSend$").?;
         const selrefs_index = try internal.addObjcMsgsendSections(name, self);
         try sym.addExtra(.{ .objc_selrefs = selrefs_index }, self);
+        sym.flags.objc_stubs = true;
+    }
+}
+
+pub fn dedupLiterals(self: *MachO) !void {
+    const gpa = self.base.allocator;
+    var lp: LiteralPool = .{};
+    defer lp.deinit(gpa);
+
+    for (self.objects.items) |index| {
+        try self.getFile(index).?.object.resolveLiterals(&lp, self);
+    }
+    if (self.getInternalObject()) |object| {
+        try object.resolveLiterals(&lp, self);
+    }
+
+    for (self.objects.items) |index| {
+        self.getFile(index).?.object.dedupLiterals(lp, self);
+    }
+    if (self.getInternalObject()) |object| {
+        object.dedupLiterals(lp, self);
     }
 }
 
@@ -3149,6 +3170,87 @@ pub const LinkObject = struct {
 /// start of __TEXT segment.
 const default_pagezero_vmsize: u64 = 0x100000000;
 
+pub const LiteralPool = struct {
+    table: std.AutoArrayHashMapUnmanaged(void, void) = .{},
+    keys: std.ArrayListUnmanaged(Key) = .{},
+    values: std.ArrayListUnmanaged(Atom.Index) = .{},
+    data: std.ArrayListUnmanaged(u8) = .{},
+
+    pub fn deinit(lp: *LiteralPool, allocator: Allocator) void {
+        lp.table.deinit(allocator);
+        lp.keys.deinit(allocator);
+        lp.values.deinit(allocator);
+        lp.data.deinit(allocator);
+    }
+
+    const InsertResult = struct {
+        found_existing: bool,
+        index: Index,
+        atom: *Atom.Index,
+    };
+
+    pub fn getAtom(lp: LiteralPool, index: Index, macho_file: *MachO) *Atom {
+        assert(index < lp.values.items.len);
+        return macho_file.getAtom(lp.values.items[index]).?;
+    }
+
+    pub fn insert(lp: *LiteralPool, allocator: Allocator, @"type": u8, string: []const u8) !InsertResult {
+        const size: u32 = @intCast(string.len);
+        try lp.data.ensureUnusedCapacity(allocator, size);
+        const off: u32 = @intCast(lp.data.items.len);
+        lp.data.appendSliceAssumeCapacity(string);
+        const adapter = Adapter{ .lp = lp };
+        const key = Key{ .off = off, .size = size, .seed = @"type" };
+        const gop = try lp.table.getOrPutAdapted(allocator, key, adapter);
+        if (!gop.found_existing) {
+            try lp.keys.append(allocator, key);
+            _ = try lp.values.addOne(allocator);
+        }
+        return .{
+            .found_existing = gop.found_existing,
+            .index = @intCast(gop.index),
+            .atom = &lp.values.items[gop.index],
+        };
+    }
+
+    const Key = struct {
+        off: u32,
+        size: u32,
+        seed: u8,
+
+        fn getData(key: Key, lp: *const LiteralPool) []const u8 {
+            return lp.data.items[key.off..][0..key.size];
+        }
+
+        fn eql(key: Key, other: Key, lp: *const LiteralPool) bool {
+            const key_data = key.getData(lp);
+            const other_data = other.getData(lp);
+            return mem.eql(u8, key_data, other_data);
+        }
+
+        fn hash(key: Key, lp: *const LiteralPool) u32 {
+            const data = key.getData(lp);
+            return @truncate(Hash.hash(key.seed, data));
+        }
+    };
+
+    const Adapter = struct {
+        lp: *const LiteralPool,
+
+        pub fn eql(ctx: @This(), key: Key, b_void: void, b_map_index: usize) bool {
+            _ = b_void;
+            const other = ctx.lp.keys.items[b_map_index];
+            return key.eql(other, ctx.lp);
+        }
+
+        pub fn hash(ctx: @This(), key: Key) u32 {
+            return key.hash(ctx.lp);
+        }
+    };
+
+    pub const Index = u32;
+};
+
 const Section = struct {
     header: macho.section_64,
     segment_id: u8,
@@ -3212,6 +3314,7 @@ const DwarfInfo = @import("MachO/DwarfInfo.zig");
 const ExportTrieSection = synthetic.ExportTrieSection;
 const File = @import("MachO/file.zig").File;
 const GotSection = synthetic.GotSection;
+const Hash = std.hash.Wyhash;
 const Indsymtab = synthetic.Indsymtab;
 const InternalObject = @import("MachO/InternalObject.zig");
 const MachO = @This();

diff --git a/src/MachO/Atom.zig b/src/MachO/Atom.zig
@@ -117,12 +117,18 @@ pub fn getThunk(self: Atom, macho_file: *MachO) *Thunk {
     return macho_file.getThunk(extra.thunk);
 }
 
+pub fn getLiteralPoolIndex(self: Atom, macho_file: *MachO) ?MachO.LiteralPool.Index {
+    if (!self.flags.literal_pool) return null;
+    return self.getExtra(macho_file).?.literal_index;
+}
+
 const AddExtraOpts = struct {
     thunk: ?u32 = null,
     rel_index: ?u32 = null,
     rel_count: ?u32 = null,
     unwind_index: ?u32 = null,
     unwind_count: ?u32 = null,
+    literal_index: ?u32 = null,
 };
 
 pub fn addExtra(atom: *Atom, opts: AddExtraOpts, macho_file: *MachO) !void {
@@ -147,6 +153,16 @@ pub inline fn setExtra(atom: Atom, extra: Extra, macho_file: *MachO) void {
 }
 
 pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 {
+    if (macho_file.options.relocatable) {
+        const osec = macho_file.getSectionByName(sect.segName(), sect.sectName()) orelse
+            try macho_file.addSection(
+            sect.segName(),
+            sect.sectName(),
+            .{ .flags = sect.flags },
+        );
+        return osec;
+    }
+
     const segname, const sectname, const flags = blk: {
         if (sect.isCode()) break :blk .{
             "__TEXT",
@@ -209,9 +225,6 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 {
         sectname,
         .{ .flags = flags },
     );
-    if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__data")) {
-        macho_file.data_sect_index = osec;
-    }
     return osec;
 }
 
@@ -890,6 +903,9 @@ pub const Flags = packed struct {
 
     /// Whether this atom has any unwind records.
     unwind: bool = false,
+
+    /// Whether this atom has LiteralPool entry.
+    literal_pool: bool = false,
 };
 
 pub const Extra = struct {
@@ -907,6 +923,9 @@ pub const Extra = struct {
 
     /// Count of relocations belonging to this atom.
     unwind_count: u32 = 0,
+
+    /// Index into LiteralPool entry for this atom.
+    literal_index: u32 = 0,
 };
 
 const aarch64 = @import("../aarch64.zig");

diff --git a/src/MachO/InternalObject.zig b/src/MachO/InternalObject.zig
@@ -3,7 +3,6 @@ index: File.Index,
 sections: std.MultiArrayList(Section) = .{},
 atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
 symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
-strtab: std.ArrayListUnmanaged(u8) = .{},
 
 objc_methnames: std.ArrayListUnmanaged(u8) = .{},
 objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64),
@@ -18,7 +17,6 @@ pub fn deinit(self: *InternalObject, allocator: Allocator) void {
     self.sections.deinit(allocator);
     self.atoms.deinit(allocator);
     self.symbols.deinit(allocator);
-    self.strtab.deinit(allocator);
     self.objc_methnames.deinit(allocator);
 }
 
@@ -38,21 +36,18 @@ pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO)
 }
 
 /// Creates a fake input sections __TEXT,__objc_methname and __DATA,__objc_selrefs.
-pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !u32 {
+pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !Atom.Index {
     const methname_atom_index = try self.addObjcMethnameSection(sym_name, macho_file);
-    return try self.addObjcSelrefsSection(sym_name, methname_atom_index, macho_file);
+    return try self.addObjcSelrefsSection(methname_atom_index, macho_file);
 }
 
 fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_file: *MachO) !Atom.Index {
     const gpa = macho_file.base.allocator;
     const atom_index = try macho_file.addAtom();
     try self.atoms.append(gpa, atom_index);
 
-    const name = try std.fmt.allocPrintZ(gpa, "__TEXT$__objc_methname${s}", .{methname});
-    defer gpa.free(name);
     const atom = macho_file.getAtom(atom_index).?;
     atom.atom_index = atom_index;
-    atom.name = try self.addString(gpa, name);
     atom.file = self.index;
     atom.size = methname.len + 1;
     atom.alignment = 0;
@@ -72,21 +67,13 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil
     return atom_index;
 }
 
-fn addObjcSelrefsSection(
-    self: *InternalObject,
-    methname: []const u8,
-    methname_atom_index: Atom.Index,
-    macho_file: *MachO,
-) !Atom.Index {
+fn addObjcSelrefsSection(self: *InternalObject, methname_atom_index: Atom.Index, macho_file: *MachO) !Atom.Index {
     const gpa = macho_file.base.allocator;
     const atom_index = try macho_file.addAtom();
     try self.atoms.append(gpa, atom_index);
 
-    const name = try std.fmt.allocPrintZ(gpa, "__DATA$__objc_selrefs${s}", .{methname});
-    defer gpa.free(name);
     const atom = macho_file.getAtom(atom_index).?;
     atom.atom_index = atom_index;
-    atom.name = try self.addString(gpa, name);
     atom.file = self.index;
     atom.size = @sizeOf(u64);
     atom.alignment = 3;
@@ -122,6 +109,98 @@ fn addObjcSelrefsSection(
     return atom_index;
 }
 
+pub fn resolveLiterals(self: InternalObject, lp: *MachO.LiteralPool, macho_file: *MachO) !void {
+    const gpa = macho_file.base.allocator;
+
+    var buffer = std.ArrayList(u8).init(gpa);
+    defer buffer.deinit();
+
+    const slice = self.sections.slice();
+    for (slice.items(.header), self.atoms.items, 0..) |header, atom_index, n_sect| {
+        if (Object.isCstringLiteral(header) or Object.isFixedSizeLiteral(header)) {
+            const data = self.getSectionData(@intCast(n_sect));
+            const atom = macho_file.getAtom(atom_index).?;
+            const res = try lp.insert(gpa, header.type(), data);
+            if (!res.found_existing) {
+                res.atom.* = atom_index;
+            }
+            atom.flags.literal_pool = true;
+            try atom.addExtra(.{ .literal_index = res.index }, macho_file);
+        } else if (Object.isPtrLiteral(header)) {
+            const atom = macho_file.getAtom(atom_index).?;
+            const relocs = atom.getRelocs(macho_file);
+            assert(relocs.len == 1);
+            const rel = relocs[0];
+            assert(rel.tag == .local);
+            const target = macho_file.getAtom(rel.target).?;
+            const addend = std.math.cast(u32, rel.addend) orelse return error.Overflow;
+            try buffer.ensureUnusedCapacity(target.size);
+            buffer.resize(target.size) catch unreachable;
+            try target.getCode(macho_file, buffer.items);
+            const res = try lp.insert(gpa, header.type(), buffer.items[addend..]);
+            buffer.clearRetainingCapacity();
+            if (!res.found_existing) {
+                res.atom.* = atom_index;
+            }
+            atom.flags.literal_pool = true;
+            try atom.addExtra(.{ .literal_index = res.index }, macho_file);
+        }
+    }
+}
+
+pub fn dedupLiterals(self: InternalObject, lp: MachO.LiteralPool, macho_file: *MachO) void {
+    for (self.atoms.items) |atom_index| {
+        const atom = macho_file.getAtom(atom_index) orelse continue;
+        if (!atom.flags.alive) continue;
+        if (!atom.flags.relocs) continue;
+
+        const relocs = blk: {
+            const extra = atom.getExtra(macho_file).?;
+            const relocs = self.sections.items(.relocs)[atom.n_sect].items;
+            break :blk relocs[extra.rel_index..][0..extra.rel_count];
+        };
+        for (relocs) |*rel| switch (rel.tag) {
+            .local => {
+                const target = macho_file.getAtom(rel.target).?;
+                if (target.getLiteralPoolIndex(macho_file)) |lp_index| {
+                    const lp_atom = lp.getAtom(lp_index, macho_file);
+                    if (target.atom_index != lp_atom.atom_index) {
+                        target.flags.alive = false;
+                        rel.target = lp_atom.atom_index;
+                    }
+                }
+            },
+            .@"extern" => {
+                const target_sym = rel.getTargetSymbol(macho_file);
+                if (target_sym.getAtom(macho_file)) |target_atom| {
+                    if (target_atom.getLiteralPoolIndex(macho_file)) |lp_index| {
+                        const lp_atom = lp.getAtom(lp_index, macho_file);
+                        if (target_atom.atom_index != lp_atom.atom_index) {
+                            target_atom.flags.alive = false;
+                            target_sym.atom = lp_atom.atom_index;
+                        }
+                    }
+                }
+            },
+        };
+    }
+
+    for (self.symbols.items) |sym_index| {
+        const sym = macho_file.getSymbol(sym_index);
+        if (!sym.flags.objc_stubs) continue;
+        var extra = sym.getExtra(macho_file).?;
+        const atom = macho_file.getAtom(extra.objc_selrefs).?;
+        if (atom.getLiteralPoolIndex(macho_file)) |lp_index| {
+            const lp_atom = lp.getAtom(lp_index, macho_file);
+            if (atom.atom_index != lp_atom.atom_index) {
+                atom.flags.alive = false;
+                extra.objc_selrefs = lp_atom.atom_index;
+                sym.setExtra(extra, macho_file);
+            }
+        }
+    }
+}
+
 pub fn calcSymtabSize(self: *InternalObject, macho_file: *MachO) !void {
     for (self.symbols.items) |sym_index| {
         const sym = macho_file.getSymbol(sym_index);
@@ -179,17 +258,10 @@ pub fn getSectionData(self: *const InternalObject, index: u32) []const u8 {
     } else @panic("ref to non-existent section");
 }
 
-fn addString(self: *InternalObject, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 {
-    const off: u32 = @intCast(self.strtab.items.len);
-    try self.strtab.ensureUnusedCapacity(allocator, name.len + 1);
-    self.strtab.appendSliceAssumeCapacity(name);
-    self.strtab.appendAssumeCapacity(0);
-    return off;
-}
-
 pub fn getString(self: InternalObject, off: u32) [:0]const u8 {
-    assert(off < self.strtab.items.len);
-    return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0);
+    _ = self;
+    _ = off;
+    return "";
 }
 
 pub fn asFile(self: *InternalObject) File {