Skip to content

Commit 61452e5

Browse files
committed
Auto merge of #47269 - michaelwoerister:mangled-cgu-names, r=alexcrichton
Shorten names of some compiler generated artifacts. This PR makes the compiler mangle codegen unit names by default. The name of every codegen unit name will now be a random string of 16 characters. It also makes the file extensions of some intermediate compiler products shorter. Hopefully, these changes will reduce the pressure on tools with path length restrictions like buildbot. The change should also solve problems with case-insensitive file system. cc #47186 and #47222 r? @alexcrichton
2 parents 2e33c89 + 94f3037 commit 61452e5

File tree

10 files changed

+70
-28
lines changed

10 files changed

+70
-28
lines changed

src/librustc/mir/mono.rs

+12
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ use syntax::ast::NodeId;
1212
use syntax::symbol::InternedString;
1313
use ty::Instance;
1414
use util::nodemap::FxHashMap;
15+
use rustc_data_structures::base_n;
1516
use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
1617
StableHasher};
1718
use ich::{Fingerprint, StableHashingContext, NodeIdHashingMode};
19+
use std::hash::Hash;
1820

1921
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)]
2022
pub enum MonoItem<'tcx> {
@@ -119,6 +121,16 @@ impl<'tcx> CodegenUnit<'tcx> {
119121
{
120122
&mut self.items
121123
}
124+
125+
pub fn mangle_name(human_readable_name: &str) -> String {
126+
// We generate a 80 bit hash from the name. This should be enough to
127+
// avoid collisions and is still reasonably short for filenames.
128+
let mut hasher = StableHasher::new();
129+
human_readable_name.hash(&mut hasher);
130+
let hash: u128 = hasher.finish();
131+
let hash = hash & ((1u128 << 80) - 1);
132+
base_n::encode(hash, base_n::CASE_INSENSITIVE)
133+
}
122134
}
123135

124136
impl<'tcx> HashStable<StableHashingContext<'tcx>> for CodegenUnit<'tcx> {

src/librustc/session/config.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
12341234
"rewrite operators on i128 and u128 into lang item calls (typically provided \
12351235
by compiler-builtins) so translation doesn't need to support them,
12361236
overriding the default for the current target"),
1237+
human_readable_cgu_names: bool = (false, parse_bool, [TRACKED],
1238+
"generate human-readable, predictable names for codegen units"),
12371239
}
12381240

12391241
pub fn default_lib_output() -> CrateType {

src/librustc_data_structures/base_n.rs

+13-9
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,21 @@
1313
1414
use std::str;
1515

16-
pub const MAX_BASE: u64 = 64;
17-
pub const ALPHANUMERIC_ONLY: u64 = 62;
16+
pub const MAX_BASE: usize = 64;
17+
pub const ALPHANUMERIC_ONLY: usize = 62;
18+
pub const CASE_INSENSITIVE: usize = 36;
1819

1920
const BASE_64: &'static [u8; MAX_BASE as usize] =
2021
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
2122

2223
#[inline]
23-
pub fn push_str(mut n: u64, base: u64, output: &mut String) {
24+
pub fn push_str(mut n: u128, base: usize, output: &mut String) {
2425
debug_assert!(base >= 2 && base <= MAX_BASE);
25-
let mut s = [0u8; 64];
26+
let mut s = [0u8; 128];
2627
let mut index = 0;
2728

29+
let base = base as u128;
30+
2831
loop {
2932
s[index] = BASE_64[(n % base) as usize];
3033
index += 1;
@@ -39,16 +42,16 @@ pub fn push_str(mut n: u64, base: u64, output: &mut String) {
3942
}
4043

4144
#[inline]
42-
pub fn encode(n: u64, base: u64) -> String {
43-
let mut s = String::with_capacity(13);
45+
pub fn encode(n: u128, base: usize) -> String {
46+
let mut s = String::new();
4447
push_str(n, base, &mut s);
4548
s
4649
}
4750

4851
#[test]
4952
fn test_encode() {
50-
fn test(n: u64, base: u64) {
51-
assert_eq!(Ok(n), u64::from_str_radix(&encode(n, base), base as u32));
53+
fn test(n: u128, base: usize) {
54+
assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
5255
}
5356

5457
for base in 2..37 {
@@ -57,7 +60,8 @@ fn test_encode() {
5760
test(35, base);
5861
test(36, base);
5962
test(37, base);
60-
test(u64::max_value(), base);
63+
test(u64::max_value() as u128, base);
64+
test(u128::max_value(), base);
6165

6266
for i in 0 .. 1_000 {
6367
test(i * 983, base);

src/librustc_incremental/persist/fs.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ const QUERY_CACHE_FILENAME: &'static str = "query-cache.bin";
137137
// or hexadecimal numbers (we want short file and directory names). Since these
138138
// numbers will be used in file names, we choose an encoding that is not
139139
// case-sensitive (as opposed to base64, for example).
140-
const INT_ENCODE_BASE: u64 = 36;
140+
const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE;
141141

142142
pub fn dep_graph_path(sess: &Session) -> PathBuf {
143143
in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
@@ -357,7 +357,7 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) {
357357
let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]);
358358

359359
// Append the svh
360-
base_n::push_str(svh.as_u64(), INT_ENCODE_BASE, &mut new_sub_dir_name);
360+
base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name);
361361

362362
// Create the full path
363363
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
@@ -465,7 +465,7 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
465465

466466
let directory_name = format!("s-{}-{}-working",
467467
timestamp,
468-
base_n::encode(random_number as u64,
468+
base_n::encode(random_number as u128,
469469
INT_ENCODE_BASE));
470470
debug!("generate_session_dir_path: directory_name = {}", directory_name);
471471
let directory_path = crate_dir.join(directory_name);
@@ -599,7 +599,7 @@ fn timestamp_to_string(timestamp: SystemTime) -> String {
599599
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
600600
let micros = duration.as_secs() * 1_000_000 +
601601
(duration.subsec_nanos() as u64) / 1000;
602-
base_n::encode(micros, INT_ENCODE_BASE)
602+
base_n::encode(micros as u128, INT_ENCODE_BASE)
603603
}
604604

605605
fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
@@ -626,7 +626,8 @@ fn crate_path(sess: &Session,
626626
// The full crate disambiguator is really long. 64 bits of it should be
627627
// sufficient.
628628
let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash();
629-
let crate_disambiguator = base_n::encode(crate_disambiguator, INT_ENCODE_BASE);
629+
let crate_disambiguator = base_n::encode(crate_disambiguator as u128,
630+
INT_ENCODE_BASE);
630631

631632
let crate_name = format!("{}-{}", crate_name, crate_disambiguator);
632633
incr_dir.join(crate_name)

src/librustc_incremental/persist/work_product.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ pub fn save_trans_partition(sess: &Session,
3535
let extension = match kind {
3636
WorkProductFileKind::Object => "o",
3737
WorkProductFileKind::Bytecode => "bc",
38-
WorkProductFileKind::BytecodeCompressed => "bc-compressed",
38+
WorkProductFileKind::BytecodeCompressed => "bc.z",
3939
};
40-
let file_name = format!("cgu-{}.{}", cgu_name, extension);
40+
let file_name = format!("{}.{}", cgu_name, extension);
4141
let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name);
4242
match link_or_copy(path, &path_in_incr_dir) {
4343
Ok(_) => Some((kind, file_name)),

src/librustc_mir/monomorphize/partitioning.rs

+24-9
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,16 @@ impl<'tcx> CodegenUnitExt<'tcx> for CodegenUnit<'tcx> {
200200
}
201201

202202
// Anything we can't find a proper codegen unit for goes into this.
203-
const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit";
203+
fn fallback_cgu_name(tcx: TyCtxt) -> InternedString {
204+
const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit";
205+
206+
if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
207+
Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str()
208+
} else {
209+
Symbol::intern(&CodegenUnit::mangle_name(FALLBACK_CODEGEN_UNIT)).as_str()
210+
}
211+
}
212+
204213

205214
pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
206215
trans_items: I,
@@ -297,7 +306,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
297306

298307
let codegen_unit_name = match characteristic_def_id {
299308
Some(def_id) => compute_codegen_unit_name(tcx, def_id, is_volatile),
300-
None => Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(),
309+
None => fallback_cgu_name(tcx),
301310
};
302311

303312
let make_codegen_unit = || {
@@ -381,7 +390,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
381390
// always ensure we have at least one CGU; otherwise, if we have a
382391
// crate with just types (for example), we could wind up with no CGU
383392
if codegen_units.is_empty() {
384-
let codegen_unit_name = Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str();
393+
let codegen_unit_name = fallback_cgu_name(tcx);
385394
codegen_units.insert(codegen_unit_name.clone(),
386395
CodegenUnit::new(codegen_unit_name.clone()));
387396
}
@@ -630,10 +639,10 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
630639
// Unfortunately we cannot just use the `ty::item_path` infrastructure here
631640
// because we need paths to modules and the DefIds of those are not
632641
// available anymore for external items.
633-
let mut mod_path = String::with_capacity(64);
642+
let mut cgu_name = String::with_capacity(64);
634643

635644
let def_path = tcx.def_path(def_id);
636-
mod_path.push_str(&tcx.crate_name(def_path.krate).as_str());
645+
cgu_name.push_str(&tcx.crate_name(def_path.krate).as_str());
637646

638647
for part in tcx.def_path(def_id)
639648
.data
@@ -644,15 +653,21 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
644653
_ => false,
645654
}
646655
}) {
647-
mod_path.push_str("-");
648-
mod_path.push_str(&part.data.as_interned_str());
656+
cgu_name.push_str("-");
657+
cgu_name.push_str(&part.data.as_interned_str());
649658
}
650659

651660
if volatile {
652-
mod_path.push_str(".volatile");
661+
cgu_name.push_str(".volatile");
653662
}
654663

655-
return Symbol::intern(&mod_path[..]).as_str();
664+
let cgu_name = if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
665+
cgu_name
666+
} else {
667+
CodegenUnit::mangle_name(&cgu_name)
668+
};
669+
670+
Symbol::intern(&cgu_name[..]).as_str()
656671
}
657672

658673
fn numbered_codegen_unit_name(crate_name: &str, index: usize) -> InternedString {

src/librustc_trans/assert_module_sources.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@
2828
//! perturb the reuse results.
2929
3030
use rustc::dep_graph::{DepNode, DepConstructor};
31+
use rustc::mir::mono::CodegenUnit;
3132
use rustc::ty::TyCtxt;
3233
use syntax::ast;
34+
use syntax_pos::symbol::Symbol;
3335
use rustc::ich::{ATTR_PARTITION_REUSED, ATTR_PARTITION_TRANSLATED};
3436

3537
const MODULE: &'static str = "module";
@@ -71,9 +73,11 @@ impl<'a, 'tcx> AssertModuleSource<'a, 'tcx> {
7173
}
7274

7375
let mname = self.field(attr, MODULE);
76+
let mangled_cgu_name = CodegenUnit::mangle_name(&mname.as_str());
77+
let mangled_cgu_name = Symbol::intern(&mangled_cgu_name).as_str();
7478

7579
let dep_node = DepNode::new(self.tcx,
76-
DepConstructor::CompileCodegenUnit(mname.as_str()));
80+
DepConstructor::CompileCodegenUnit(mangled_cgu_name));
7781

7882
if let Some(loaded_from_cache) = self.tcx.dep_graph.was_loaded_from_cache(&dep_node) {
7983
match (disposition, loaded_from_cache) {

src/librustc_trans/back/bytecode.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub const RLIB_BYTECODE_OBJECT_MAGIC: &'static [u8] = b"RUST_OBJECT";
4747
// The version number this compiler will write to bytecode objects in rlibs
4848
pub const RLIB_BYTECODE_OBJECT_VERSION: u8 = 2;
4949

50-
pub const RLIB_BYTECODE_EXTENSION: &str = "bytecode.encoded";
50+
pub const RLIB_BYTECODE_EXTENSION: &str = "bc.z";
5151

5252
pub fn encode(identifier: &str, bytecode: &[u8]) -> Vec<u8> {
5353
let mut encoded = Vec::new();

src/librustc_trans/context.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ impl<'b, 'tcx> CrateContext<'b, 'tcx> {
572572
let mut name = String::with_capacity(prefix.len() + 6);
573573
name.push_str(prefix);
574574
name.push_str(".");
575-
base_n::push_str(idx as u64, base_n::ALPHANUMERIC_ONLY, &mut name);
575+
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
576576
name
577577
}
578578

src/tools/compiletest/src/runtest.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1520,6 +1520,10 @@ impl<'test> TestCx<'test> {
15201520
rustc.args(&["-Z", "incremental-queries"]);
15211521
}
15221522

1523+
if self.config.mode == CodegenUnits {
1524+
rustc.args(&["-Z", "human_readable_cgu_names"]);
1525+
}
1526+
15231527
match self.config.mode {
15241528
CompileFail | ParseFail | Incremental => {
15251529
// If we are extracting and matching errors in the new

0 commit comments

Comments
 (0)