Skip to content

Commit d581fda

Browse files
gbaraldipull[bot]
authored andcommitted
Save a couple loads/stores in sweep pages (JuliaLang#49263)
We did a load/store on every iteration. Keep a temporary in a register instead. It's a very small difference but it's visible in vtune.
1 parent 19efb8f commit d581fda

File tree

3 files changed

+21
-17
lines changed

3 files changed

+21
-17
lines changed

src/gc-debug.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -563,11 +563,11 @@ JL_NO_ASAN static void gc_scrub_range(char *low, char *high)
563563
// Find the age bit
564564
char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET;
565565
int obj_id = (((char*)tag) - page_begin) / osize;
566-
uint8_t *ages = pg->ages + obj_id / 8;
566+
uint32_t *ages = pg->ages + obj_id / 32;
567567
// Force this to be a young object to save some memory
568568
// (especially on 32bit where it's more likely to have pointer-like
569569
// bit patterns)
570-
*ages &= ~(1 << (obj_id % 8));
570+
*ages &= ~(1 << (obj_id % 32));
571571
memset(tag, 0xff, osize);
572572
// set mark to GC_MARKED (young and marked)
573573
tag->bits.gc = GC_MARKED;

src/gc.c

+17-14
Original file line numberDiff line numberDiff line change
@@ -976,8 +976,8 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
976976
page->has_young = 1;
977977
char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET;
978978
int obj_id = (((char*)o) - page_begin) / page->osize;
979-
uint8_t *ages = page->ages + obj_id / 8;
980-
jl_atomic_fetch_and_relaxed((_Atomic(uint8_t)*)ages, ~(1 << (obj_id % 8)));
979+
uint32_t *ages = page->ages + obj_id / 32;
980+
jl_atomic_fetch_and_relaxed((_Atomic(uint32_t)*)ages, ~(1 << (obj_id % 32)));
981981
}
982982
}
983983
objprofile_count(jl_typeof(jl_valueof(o)),
@@ -1406,7 +1406,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
14061406
jl_ptls_t ptls = jl_current_task->ptls;
14071407
jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
14081408
pg->osize = p->osize;
1409-
pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1);
1409+
pg->ages = (uint32_t*)malloc_s(LLT_ALIGN(GC_PAGE_SZ / 8 / p->osize + 1, sizeof(uint32_t)));
14101410
pg->thread_n = ptls->tid;
14111411
jl_taggedvalue_t *fl = reset_page(ptls, p, pg, NULL);
14121412
p->newpages = fl;
@@ -1506,7 +1506,7 @@ int64_t lazy_freed_pages = 0;
15061506
static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT
15071507
{
15081508
char *data = pg->data;
1509-
uint8_t *ages = pg->ages;
1509+
uint32_t *ages = pg->ages;
15101510
jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
15111511
char *lim = (char*)v + GC_PAGE_SZ - GC_PAGE_OFFSET - osize;
15121512
size_t old_nfree = pg->nfree;
@@ -1557,18 +1557,25 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
15571557
int16_t prev_nold = 0;
15581558
int pg_nfree = 0;
15591559
jl_taggedvalue_t **pfl_begin = NULL;
1560-
uint8_t msk = 1; // mask for the age bit in the current age byte
1560+
uint32_t msk = 1; // mask for the age bit in the current age byte
1561+
uint32_t age = *ages;
15611562
while ((char*)v <= lim) {
1563+
if (!msk) {
1564+
msk = 1;
1565+
*ages = age;
1566+
ages++;
1567+
age = *ages;
1568+
}
15621569
int bits = v->bits.gc;
15631570
if (!gc_marked(bits)) {
15641571
*pfl = v;
15651572
pfl = &v->next;
15661573
pfl_begin = pfl_begin ? pfl_begin : pfl;
15671574
pg_nfree++;
1568-
*ages &= ~msk;
1575+
age &= ~msk;
15691576
}
15701577
else { // marked young or old
1571-
if (*ages & msk || bits == GC_OLD_MARKED) { // old enough
1578+
if (age & msk || bits == GC_OLD_MARKED) { // old enough
15721579
// `!age && bits == GC_OLD_MARKED` is possible for
15731580
// non-first-class objects like array buffers
15741581
// (they may get promoted by jl_gc_wb_buf for example,
@@ -1584,17 +1591,13 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
15841591
has_young = 1;
15851592
}
15861593
has_marked |= gc_marked(bits);
1587-
*ages |= msk;
1594+
age |= msk;
15881595
freedall = 0;
15891596
}
15901597
v = (jl_taggedvalue_t*)((char*)v + osize);
15911598
msk <<= 1;
1592-
if (!msk) {
1593-
msk = 1;
1594-
ages++;
1595-
}
15961599
}
1597-
1600+
*ages = age;
15981601
assert(!freedall);
15991602
pg->has_marked = has_marked;
16001603
pg->has_young = has_young;
@@ -4017,7 +4020,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
40174020
goto valid_object;
40184021
// We know now that the age bit reflects liveness status during
40194022
// the last sweep and that the cell has not been reused since.
4020-
if (!(meta->ages[obj_id / 8] & (1 << (obj_id % 8)))) {
4023+
if (!(meta->ages[obj_id / 32] & (1 << (obj_id % 32)))) {
40214024
return NULL;
40224025
}
40234026
// Not a freelist entry, therefore a valid object.

src/gc.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef JL_GC_H
1010
#define JL_GC_H
1111

12+
#include <stddef.h>
1213
#include <stdlib.h>
1314
#include <string.h>
1415
#include <strings.h>
@@ -170,7 +171,7 @@ typedef struct {
170171
uint16_t fl_end_offset; // offset of last free object in this page
171172
uint16_t thread_n; // thread id of the heap that owns this page
172173
char *data;
173-
uint8_t *ages;
174+
uint32_t *ages;
174175
} jl_gc_pagemeta_t;
175176

176177
// Page layout:

0 commit comments

Comments
 (0)