Skip to content

Commit b769e35

Browse files
authoredNov 18, 2024··
[clang][serialization] Blobify IMPORTS strings and signatures (#116095)
This PR changes a part of the PCM format to store string-like things in the blob attached to a record instead of VBR6-encoding them into the record itself. Applied to the `IMPORTS` section (which is very hot), this speeds up dependency scanning by 2.8%.
1 parent f14e1a8 commit b769e35

File tree

6 files changed

+227
-186
lines changed

6 files changed

+227
-186
lines changed
 

‎clang/include/clang/Serialization/ASTBitCodes.h

+3-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ namespace serialization {
4444
/// Version 4 of AST files also requires that the version control branch and
4545
/// revision match exactly, since there is no backward compatibility of
4646
/// AST files at this time.
47-
const unsigned VERSION_MAJOR = 33;
47+
const unsigned VERSION_MAJOR = 34;
4848

4949
/// AST file minor version number supported by this version of
5050
/// Clang.
@@ -350,9 +350,8 @@ enum ControlRecordTypes {
350350
/// and information about the compiler used to build this AST file.
351351
METADATA = 1,
352352

353-
/// Record code for the list of other AST files imported by
354-
/// this AST file.
355-
IMPORTS,
353+
/// Record code for another AST file imported by this AST file.
354+
IMPORT,
356355

357356
/// Record code for the original file that was used to
358357
/// generate the AST file, including both its file ID and its

‎clang/include/clang/Serialization/ASTReader.h

+4-10
Original file line numberDiff line numberDiff line change
@@ -2389,23 +2389,17 @@ class ASTReader
23892389

23902390
// Read a string
23912391
static std::string ReadString(const RecordDataImpl &Record, unsigned &Idx);
2392-
2393-
// Skip a string
2394-
static void SkipString(const RecordData &Record, unsigned &Idx) {
2395-
Idx += Record[Idx] + 1;
2396-
}
2392+
static StringRef ReadStringBlob(const RecordDataImpl &Record, unsigned &Idx,
2393+
StringRef &Blob);
23972394

23982395
// Read a path
23992396
std::string ReadPath(ModuleFile &F, const RecordData &Record, unsigned &Idx);
24002397

24012398
// Read a path
24022399
std::string ReadPath(StringRef BaseDirectory, const RecordData &Record,
24032400
unsigned &Idx);
2404-
2405-
// Skip a path
2406-
static void SkipPath(const RecordData &Record, unsigned &Idx) {
2407-
SkipString(Record, Idx);
2408-
}
2401+
std::string ReadPathBlob(StringRef BaseDirectory, const RecordData &Record,
2402+
unsigned &Idx, StringRef &Blob);
24092403

24102404
/// Read a version tuple.
24112405
static VersionTuple ReadVersionTuple(const RecordData &Record, unsigned &Idx);

‎clang/include/clang/Serialization/ASTWriter.h

+4
Original file line numberDiff line numberDiff line change
@@ -769,13 +769,17 @@ class ASTWriter : public ASTDeserializationListener,
769769

770770
/// Add a string to the given record.
771771
void AddString(StringRef Str, RecordDataImpl &Record);
772+
void AddStringBlob(StringRef Str, RecordDataImpl &Record,
773+
SmallVectorImpl<char> &Blob);
772774

773775
/// Convert a path from this build process into one that is appropriate
774776
/// for emission in the module file.
775777
bool PreparePathForOutput(SmallVectorImpl<char> &Path);
776778

777779
/// Add a path to the given record.
778780
void AddPath(StringRef Path, RecordDataImpl &Record);
781+
void AddPathBlob(StringRef Str, RecordDataImpl &Record,
782+
SmallVectorImpl<char> &Blob);
779783

780784
/// Emit the current record with the given path as a blob.
781785
void EmitRecordWithPath(unsigned Abbrev, RecordDataRef Record,

‎clang/lib/Serialization/ASTReader.cpp

+120-104
Original file line numberDiff line numberDiff line change
@@ -3092,98 +3092,97 @@ ASTReader::ReadControlBlock(ModuleFile &F,
30923092
break;
30933093
}
30943094

3095-
case IMPORTS: {
3095+
case IMPORT: {
30963096
// Validate the AST before processing any imports (otherwise, untangling
30973097
// them can be error-prone and expensive). A module will have a name and
30983098
// will already have been validated, but this catches the PCH case.
30993099
if (ASTReadResult Result = readUnhashedControlBlockOnce())
31003100
return Result;
31013101

3102-
// Load each of the imported PCH files.
3103-
unsigned Idx = 0, N = Record.size();
3104-
while (Idx < N) {
3105-
// Read information about the AST file.
3106-
ModuleKind ImportedKind = (ModuleKind)Record[Idx++];
3107-
// Whether we're importing a standard c++ module.
3108-
bool IsImportingStdCXXModule = Record[Idx++];
3109-
// The import location will be the local one for now; we will adjust
3110-
// all import locations of module imports after the global source
3111-
// location info are setup, in ReadAST.
3112-
auto [ImportLoc, ImportModuleFileIndex] =
3113-
ReadUntranslatedSourceLocation(Record[Idx++]);
3114-
// The import location must belong to the current module file itself.
3115-
assert(ImportModuleFileIndex == 0);
3116-
off_t StoredSize = !IsImportingStdCXXModule ? (off_t)Record[Idx++] : 0;
3117-
time_t StoredModTime =
3118-
!IsImportingStdCXXModule ? (time_t)Record[Idx++] : 0;
3119-
3120-
ASTFileSignature StoredSignature;
3121-
if (!IsImportingStdCXXModule) {
3122-
auto FirstSignatureByte = Record.begin() + Idx;
3123-
StoredSignature = ASTFileSignature::create(
3124-
FirstSignatureByte, FirstSignatureByte + ASTFileSignature::size);
3125-
Idx += ASTFileSignature::size;
3126-
}
3102+
unsigned Idx = 0;
3103+
// Read information about the AST file.
3104+
ModuleKind ImportedKind = (ModuleKind)Record[Idx++];
3105+
3106+
// The import location will be the local one for now; we will adjust
3107+
// all import locations of module imports after the global source
3108+
// location info are setup, in ReadAST.
3109+
auto [ImportLoc, ImportModuleFileIndex] =
3110+
ReadUntranslatedSourceLocation(Record[Idx++]);
3111+
// The import location must belong to the current module file itself.
3112+
assert(ImportModuleFileIndex == 0);
3113+
3114+
StringRef ImportedName = ReadStringBlob(Record, Idx, Blob);
3115+
3116+
bool IsImportingStdCXXModule = Record[Idx++];
3117+
3118+
off_t StoredSize = 0;
3119+
time_t StoredModTime = 0;
3120+
ASTFileSignature StoredSignature;
3121+
std::string ImportedFile;
3122+
3123+
// For prebuilt and explicit modules first consult the file map for
3124+
// an override. Note that here we don't search prebuilt module
3125+
// directories if we're not importing standard c++ module, only the
3126+
// explicit name to file mappings. Also, we will still verify the
3127+
// size/signature making sure it is essentially the same file but
3128+
// perhaps in a different location.
3129+
if (ImportedKind == MK_PrebuiltModule || ImportedKind == MK_ExplicitModule)
3130+
ImportedFile = PP.getHeaderSearchInfo().getPrebuiltModuleFileName(
3131+
ImportedName, /*FileMapOnly*/ !IsImportingStdCXXModule);
3132+
3133+
if (IsImportingStdCXXModule && ImportedFile.empty()) {
3134+
Diag(diag::err_failed_to_find_module_file) << ImportedName;
3135+
return Missing;
3136+
}
31273137

3128-
std::string ImportedName = ReadString(Record, Idx);
3129-
std::string ImportedFile;
3130-
3131-
// For prebuilt and explicit modules first consult the file map for
3132-
// an override. Note that here we don't search prebuilt module
3133-
// directories if we're not importing standard c++ module, only the
3134-
// explicit name to file mappings. Also, we will still verify the
3135-
// size/signature making sure it is essentially the same file but
3136-
// perhaps in a different location.
3137-
if (ImportedKind == MK_PrebuiltModule || ImportedKind == MK_ExplicitModule)
3138-
ImportedFile = PP.getHeaderSearchInfo().getPrebuiltModuleFileName(
3139-
ImportedName, /*FileMapOnly*/ !IsImportingStdCXXModule);
3140-
3141-
// For C++20 Modules, we won't record the path to the imported modules
3142-
// in the BMI
3143-
if (!IsImportingStdCXXModule) {
3144-
if (ImportedFile.empty()) {
3145-
// Use BaseDirectoryAsWritten to ensure we use the same path in the
3146-
// ModuleCache as when writing.
3147-
ImportedFile = ReadPath(BaseDirectoryAsWritten, Record, Idx);
3148-
} else
3149-
SkipPath(Record, Idx);
3150-
} else if (ImportedFile.empty()) {
3151-
Diag(clang::diag::err_failed_to_find_module_file) << ImportedName;
3152-
return Missing;
3153-
}
3138+
if (!IsImportingStdCXXModule) {
3139+
StoredSize = (off_t)Record[Idx++];
3140+
StoredModTime = (time_t)Record[Idx++];
31543141

3155-
// If our client can't cope with us being out of date, we can't cope with
3156-
// our dependency being missing.
3157-
unsigned Capabilities = ClientLoadCapabilities;
3158-
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
3159-
Capabilities &= ~ARR_Missing;
3160-
3161-
// Load the AST file.
3162-
auto Result = ReadASTCore(ImportedFile, ImportedKind, ImportLoc, &F,
3163-
Loaded, StoredSize, StoredModTime,
3164-
StoredSignature, Capabilities);
3165-
3166-
// If we diagnosed a problem, produce a backtrace.
3167-
bool recompilingFinalized =
3168-
Result == OutOfDate && (Capabilities & ARR_OutOfDate) &&
3169-
getModuleManager().getModuleCache().isPCMFinal(F.FileName);
3170-
if (isDiagnosedResult(Result, Capabilities) || recompilingFinalized)
3171-
Diag(diag::note_module_file_imported_by)
3172-
<< F.FileName << !F.ModuleName.empty() << F.ModuleName;
3173-
if (recompilingFinalized)
3174-
Diag(diag::note_module_file_conflict);
3175-
3176-
switch (Result) {
3177-
case Failure: return Failure;
3178-
// If we have to ignore the dependency, we'll have to ignore this too.
3179-
case Missing:
3180-
case OutOfDate: return OutOfDate;
3181-
case VersionMismatch: return VersionMismatch;
3182-
case ConfigurationMismatch: return ConfigurationMismatch;
3183-
case HadErrors: return HadErrors;
3184-
case Success: break;
3142+
StringRef SignatureBytes = Blob.substr(0, ASTFileSignature::size);
3143+
StoredSignature = ASTFileSignature::create(SignatureBytes.begin(),
3144+
SignatureBytes.end());
3145+
Blob = Blob.substr(ASTFileSignature::size);
3146+
3147+
if (ImportedFile.empty()) {
3148+
// Use BaseDirectoryAsWritten to ensure we use the same path in the
3149+
// ModuleCache as when writing.
3150+
ImportedFile =
3151+
ReadPathBlob(BaseDirectoryAsWritten, Record, Idx, Blob);
31853152
}
31863153
}
3154+
3155+
// If our client can't cope with us being out of date, we can't cope with
3156+
// our dependency being missing.
3157+
unsigned Capabilities = ClientLoadCapabilities;
3158+
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
3159+
Capabilities &= ~ARR_Missing;
3160+
3161+
// Load the AST file.
3162+
auto Result = ReadASTCore(ImportedFile, ImportedKind, ImportLoc, &F,
3163+
Loaded, StoredSize, StoredModTime,
3164+
StoredSignature, Capabilities);
3165+
3166+
// If we diagnosed a problem, produce a backtrace.
3167+
bool recompilingFinalized =
3168+
Result == OutOfDate && (Capabilities & ARR_OutOfDate) &&
3169+
getModuleManager().getModuleCache().isPCMFinal(F.FileName);
3170+
if (isDiagnosedResult(Result, Capabilities) || recompilingFinalized)
3171+
Diag(diag::note_module_file_imported_by)
3172+
<< F.FileName << !F.ModuleName.empty() << F.ModuleName;
3173+
if (recompilingFinalized)
3174+
Diag(diag::note_module_file_conflict);
3175+
3176+
switch (Result) {
3177+
case Failure: return Failure;
3178+
// If we have to ignore the dependency, we'll have to ignore this too.
3179+
case Missing:
3180+
case OutOfDate: return OutOfDate;
3181+
case VersionMismatch: return VersionMismatch;
3182+
case ConfigurationMismatch: return ConfigurationMismatch;
3183+
case HadErrors: return HadErrors;
3184+
case Success: break;
3185+
}
31873186
break;
31883187
}
31893188

@@ -5624,36 +5623,38 @@ bool ASTReader::readASTFileControlBlock(
56245623
break;
56255624
}
56265625

5627-
case IMPORTS: {
5626+
case IMPORT: {
56285627
if (!NeedsImports)
56295628
break;
56305629

5631-
unsigned Idx = 0, N = Record.size();
5632-
while (Idx < N) {
5633-
// Read information about the AST file.
5630+
unsigned Idx = 0;
5631+
// Read information about the AST file.
5632+
5633+
// Skip Kind
5634+
Idx++;
56345635

5635-
// Skip Kind
5636-
Idx++;
5637-
bool IsStandardCXXModule = Record[Idx++];
5636+
// Skip ImportLoc
5637+
Idx++;
56385638

5639-
// Skip ImportLoc
5640-
Idx++;
5639+
StringRef ModuleName = ReadStringBlob(Record, Idx, Blob);
56415640

5642-
// In C++20 Modules, we don't record the path to imported
5643-
// modules in the BMI files.
5644-
if (IsStandardCXXModule) {
5645-
std::string ModuleName = ReadString(Record, Idx);
5646-
Listener.visitImport(ModuleName, /*Filename=*/"");
5647-
continue;
5648-
}
5641+
bool IsStandardCXXModule = Record[Idx++];
56495642

5650-
// Skip Size, ModTime and Signature
5651-
Idx += 1 + 1 + ASTFileSignature::size;
5652-
std::string ModuleName = ReadString(Record, Idx);
5653-
std::string FilenameStr = ReadString(Record, Idx);
5654-
auto Filename = ResolveImportedPath(PathBuf, FilenameStr, ModuleDir);
5655-
Listener.visitImport(ModuleName, *Filename);
5643+
// In C++20 Modules, we don't record the path to imported
5644+
// modules in the BMI files.
5645+
if (IsStandardCXXModule) {
5646+
Listener.visitImport(ModuleName, /*Filename=*/"");
5647+
continue;
56565648
}
5649+
5650+
// Skip Size and ModTime.
5651+
Idx += 1 + 1;
5652+
// Skip signature.
5653+
Blob = Blob.substr(ASTFileSignature::size);
5654+
5655+
StringRef FilenameStr = ReadStringBlob(Record, Idx, Blob);
5656+
auto Filename = ResolveImportedPath(PathBuf, FilenameStr, ModuleDir);
5657+
Listener.visitImport(ModuleName, *Filename);
56575658
break;
56585659
}
56595660

@@ -9602,6 +9603,14 @@ std::string ASTReader::ReadString(const RecordDataImpl &Record, unsigned &Idx) {
96029603
return Result;
96039604
}
96049605

9606+
StringRef ASTReader::ReadStringBlob(const RecordDataImpl &Record, unsigned &Idx,
9607+
StringRef &Blob) {
9608+
unsigned Len = Record[Idx++];
9609+
StringRef Result = Blob.substr(0, Len);
9610+
Blob = Blob.substr(Len);
9611+
return Result;
9612+
}
9613+
96059614
std::string ASTReader::ReadPath(ModuleFile &F, const RecordData &Record,
96069615
unsigned &Idx) {
96079616
return ReadPath(F.BaseDirectory, Record, Idx);
@@ -9613,6 +9622,13 @@ std::string ASTReader::ReadPath(StringRef BaseDirectory,
96139622
return ResolveImportedPathAndAllocate(PathBuf, Filename, BaseDirectory);
96149623
}
96159624

9625+
std::string ASTReader::ReadPathBlob(StringRef BaseDirectory,
9626+
const RecordData &Record, unsigned &Idx,
9627+
StringRef &Blob) {
9628+
StringRef Filename = ReadStringBlob(Record, Idx, Blob);
9629+
return ResolveImportedPathAndAllocate(PathBuf, Filename, BaseDirectory);
9630+
}
9631+
96169632
VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record,
96179633
unsigned &Idx) {
96189634
unsigned Major = Record[Idx++];

0 commit comments

Comments
 (0)
Please sign in to comment.