Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[vulkan] Implement offline cache cleaning on vulkan #5968

Merged
merged 5 commits into from
Sep 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 146 additions & 4 deletions taichi/cache/gfx/cache_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,116 @@
#include "taichi/analysis/offline_cache_util.h"
#include "taichi/codegen/spirv/snode_struct_compiler.h"
#include "taichi/common/cleanup.h"
#include "taichi/common/version.h"
#include "taichi/program/kernel.h"
#include "taichi/runtime/gfx/aot_module_loader_impl.h"
#include "taichi/runtime/gfx/snode_tree_manager.h"
#include "taichi/util/lock.h"
#include "taichi/util/offline_cache.h"

namespace taichi {
namespace lang {
namespace gfx {

namespace {

constexpr char kMetadataFileLockName[] = "metadata.lock";
constexpr char kAotMetadataFilename[] = "metadata.tcb";
constexpr char kDebuggingAotMetadataFilename[] = "metadata.json";
constexpr char kGraphMetadataFilename[] = "graphs.tcb";
constexpr char kOfflineCacheMetadataFilename[] = "offline_cache_metadata.tcb";
using CompiledKernelData = gfx::GfxRuntime::RegisterParams;

inline gfx::CacheManager::Metadata::KernelMetadata make_kernel_metadata(
const std::string &key,
const gfx::GfxRuntime::RegisterParams &compiled) {
std::size_t codes_size = 0;
for (const auto &e : compiled.task_spirv_source_codes) {
codes_size += e.size() * sizeof(*e.data());
}

gfx::CacheManager::Metadata::KernelMetadata res;
res.kernel_key = key;
res.size = codes_size;
res.created_at = std::time(nullptr);
res.last_used_at = std::time(nullptr);
res.num_files = compiled.task_spirv_source_codes.size();
return res;
}

} // namespace

namespace offline_cache {

template <>
struct CacheCleanerUtils<gfx::CacheManager::Metadata> {
using MetadataType = gfx::CacheManager::Metadata;
using KernelMetaData = MetadataType::KernelMetadata;

// To load metadata from file
static bool load_metadata(const CacheCleanerConfig &config,
MetadataType &result) {
return read_from_binary_file(
result, taichi::join_path(config.path, config.metadata_filename));
}

// To save metadata as file
static bool save_metadata(const CacheCleanerConfig &config,
const MetadataType &data) {
// Update AOT metadata
gfx::TaichiAotData old_aot_data, new_aot_data;
auto aot_metadata_path =
taichi::join_path(config.path, kAotMetadataFilename);
if (read_from_binary_file(old_aot_data, aot_metadata_path)) {
const auto &kernels = data.kernels;
for (auto &k : old_aot_data.kernels) {
if (kernels.count(k.name)) {
new_aot_data.kernels.push_back(std::move(k));
}
}
write_to_binary_file(new_aot_data, aot_metadata_path);
}
write_to_binary_file(
data, taichi::join_path(config.path, config.metadata_filename));
return true;
}

static bool save_debugging_metadata(const CacheCleanerConfig &config,
const MetadataType &data) {
// Do nothing
return true;
}

// To check version
static bool check_version(const CacheCleanerConfig &config,
const Version &version) {
return version[0] == TI_VERSION_MAJOR && version[1] == TI_VERSION_MINOR &&
version[2] == TI_VERSION_PATCH;
}

// To get cache files name
static std::vector<std::string> get_cache_files(
const CacheCleanerConfig &config,
const KernelMetaData &kernel_meta) {
std::vector<std::string> result;
for (std::size_t i = 0; i < kernel_meta.num_files; ++i) {
result.push_back(kernel_meta.kernel_key + std::to_string(i) + ".spv");
}
return result;
}

// To remove other files except cache files and offline cache metadta files
static void remove_other_files(const CacheCleanerConfig &config) {
taichi::remove(taichi::join_path(config.path, kAotMetadataFilename));
taichi::remove(
taichi::join_path(config.path, kDebuggingAotMetadataFilename));
taichi::remove(taichi::join_path(config.path, kGraphMetadataFilename));
}
};

} // namespace offline_cache

namespace gfx {

CacheManager::CacheManager(Params &&init_params)
: mode_(init_params.mode),
runtime_(init_params.runtime),
Expand All @@ -28,8 +122,8 @@ CacheManager::CacheManager(Params &&init_params)
path_ = offline_cache::get_cache_path_by_arch(init_params.cache_path,
init_params.arch);

if (taichi::path_exists(taichi::join_path(path_, "metadata.tcb")) &&
taichi::path_exists(taichi::join_path(path_, "graphs.tcb"))) {
if (taichi::path_exists(taichi::join_path(path_, kAotMetadataFilename)) &&
taichi::path_exists(taichi::join_path(path_, kGraphMetadataFilename))) {
auto lock_path = taichi::join_path(path_, kMetadataFileLockName);
if (lock_with_file(lock_path)) {
auto _ = make_cleanup([&lock_path]() {
Expand All @@ -47,6 +141,10 @@ CacheManager::CacheManager(Params &&init_params)
caching_module_builder_ = std::make_unique<gfx::AotModuleBuilderImpl>(
compiled_structs_, init_params.arch,
std::move(init_params.target_device));

offline_cache_metadata_.version[0] = TI_VERSION_MAJOR;
offline_cache_metadata_.version[1] = TI_VERSION_MINOR;
offline_cache_metadata_.version[2] = TI_VERSION_PATCH;
}

CompiledKernelData CacheManager::load_or_compile(CompileConfig *config,
Expand All @@ -66,7 +164,7 @@ CompiledKernelData CacheManager::load_or_compile(CompileConfig *config,
}

void CacheManager::dump_with_merging() const {
if (mode_ == MemAndDiskCache) {
if (mode_ == MemAndDiskCache && !offline_cache_metadata_.kernels.empty()) {
taichi::create_directories(path_);
auto *cache_builder =
static_cast<gfx::AotModuleBuilderImpl *>(caching_module_builder_.get());
Expand All @@ -79,12 +177,50 @@ void CacheManager::dump_with_merging() const {
TI_WARN("Unlock {} failed", lock_path);
}
});

// Update metadata.{tcb,json}
cache_builder->merge_with_old_meta_data(path_);
cache_builder->dump(path_, "");

// Update offline_cache_metadata.tcb
Metadata old_data;
const auto filename =
taichi::join_path(path_, kOfflineCacheMetadataFilename);
if (read_from_binary_file(old_data, filename)) {
for (auto &[k, v] : offline_cache_metadata_.kernels) {
auto iter = old_data.kernels.find(k);
if (iter != old_data.kernels.end()) { // Update
iter->second.last_used_at = v.last_used_at;
} else { // Add new
old_data.size += v.size;
old_data.kernels[k] = std::move(v);
}
}
write_to_binary_file(old_data, filename);
} else {
write_to_binary_file(offline_cache_metadata_, filename);
}
}
}
}

void CacheManager::clean_offline_cache(offline_cache::CleanCachePolicy policy,
int max_bytes,
double cleaning_factor) const {
if (mode_ == MemAndDiskCache) {
using CacheCleaner = offline_cache::CacheCleaner<Metadata>;
offline_cache::CacheCleanerConfig params;
params.path = path_;
params.policy = policy;
params.cleaning_factor = cleaning_factor;
params.max_size = max_bytes;
params.metadata_filename = kOfflineCacheMetadataFilename;
params.debugging_metadata_filename = ""; // No debugging file
params.metadata_lock_name = kMetadataFileLockName;
CacheCleaner::run(params);
}
}

std::optional<CompiledKernelData> CacheManager::try_load_cached_kernel(
Kernel *kernel,
const std::string &key) {
Expand Down Expand Up @@ -113,6 +249,9 @@ std::optional<CompiledKernelData> CacheManager::try_load_cached_kernel(
auto compiled = aot_kernel_impl->params();
// TODO: Support multiple SNodeTrees in AOT.
compiled.num_snode_trees = compiled_structs_.size();
auto kmetadata = make_kernel_metadata(key, compiled);
offline_cache_metadata_.size += kmetadata.size;
offline_cache_metadata_.kernels[key] = std::move(kmetadata);
return compiled;
}
}
Expand All @@ -132,6 +271,9 @@ CompiledKernelData CacheManager::compile_and_cache_kernel(
TI_ASSERT(params_opt.has_value());
// TODO: Support multiple SNodeTrees in AOT.
params_opt->num_snode_trees = compiled_structs_.size();
auto kmetadata = make_kernel_metadata(key, *params_opt);
offline_cache_metadata_.size += kmetadata.size;
offline_cache_metadata_.kernels[key] = std::move(kmetadata);
return *params_opt;
}

Expand Down
10 changes: 8 additions & 2 deletions taichi/cache/gfx/cache_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@
#include "taichi/aot/module_builder.h"
#include "taichi/aot/module_loader.h"
#include "taichi/runtime/gfx/runtime.h"
#include "taichi/util/offline_cache.h"

namespace taichi {
namespace lang {
namespace gfx {

class CacheManager {
using CompiledKernelData = gfx::GfxRuntime::RegisterParams;

public:
using Metadata = offline_cache::Metadata;
enum Mode { NotCache, MemCache, MemAndDiskCache };

struct Params {
Expand All @@ -21,12 +25,13 @@ class CacheManager {
const std::vector<spirv::CompiledSNodeStructs> *compiled_structs;
};

using CompiledKernelData = gfx::GfxRuntime::RegisterParams;

CacheManager(Params &&init_params);

CompiledKernelData load_or_compile(CompileConfig *config, Kernel *kernel);
void dump_with_merging() const;
void clean_offline_cache(offline_cache::CleanCachePolicy policy,
int max_bytes,
double cleaning_factor) const;

private:
std::optional<CompiledKernelData> try_load_cached_kernel(
Expand All @@ -40,6 +45,7 @@ class CacheManager {
std::string path_;
GfxRuntime *runtime_{nullptr};
const std::vector<spirv::CompiledSNodeStructs> &compiled_structs_;
Metadata offline_cache_metadata_;
std::unique_ptr<AotModuleBuilder> caching_module_builder_{nullptr};
std::unique_ptr<aot::Module> cached_module_{nullptr};
};
Expand Down
48 changes: 30 additions & 18 deletions taichi/runtime/llvm/llvm_offline_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "taichi/runtime/llvm/llvm_context.h"
#include "taichi/util/io.h"
#include "taichi/util/lock.h"
#include "taichi/util/offline_cache.h"

namespace taichi {
namespace lang {
Expand Down Expand Up @@ -60,32 +61,38 @@ struct CacheCleanerUtils<LlvmOfflineCache> {
using KernelMetaData = typename MetadataType::KernelMetadata;

// To load metadata from file
static bool load_metadata(MetadataType &result, const std::string &filepath) {
return read_from_binary_file(result, filepath);
static bool load_metadata(const CacheCleanerConfig &config,
MetadataType &result) {
return read_from_binary_file(
result, taichi::join_path(config.path, config.metadata_filename));
}

// To save metadata as file
static bool save_metadata(const MetadataType &data,
const std::string &filepath) {
write_to_binary_file(data, filepath);
static bool save_metadata(const CacheCleanerConfig &config,
const MetadataType &data) {
write_to_binary_file(
data, taichi::join_path(config.path, config.metadata_filename));
return true;
}

static bool save_debugging_metadata(const MetadataType &data,
const std::string &filepath) {
static bool save_debugging_metadata(const CacheCleanerConfig &config,
const MetadataType &data) {
TextSerializer ts;
ts.serialize_to_json("cache", data);
ts.write_to_file(get_llvm_cache_metadata_json_file_path(filepath));
ts.write_to_file(
taichi::join_path(config.path, config.debugging_metadata_filename));
return true;
}

// To check version
static bool check_version(const Version &version) {
static bool check_version(const CacheCleanerConfig &config,
const Version &version) {
return is_current_llvm_cache_version(version);
}

// To get cache files name
static std::vector<std::string> get_cache_files(
const CacheCleanerConfig &config,
const KernelMetaData &kernel_meta) {
std::vector<std::string> result;
for (int i = 0; i < kernel_meta.compiled_data_list.size(); i++) {
Expand All @@ -96,6 +103,11 @@ struct CacheCleanerUtils<LlvmOfflineCache> {
}
return result;
}

// To remove other files except cache files and offline cache metadta files
static void remove_other_files(const CacheCleanerConfig &config) {
// Do nothing
}
};

} // namespace offline_cache
Expand Down Expand Up @@ -414,15 +426,15 @@ void LlvmOfflineCacheFileWriter::clean_cache(const std::string &path,
int max_bytes,
double cleaning_factor) {
using CacheCleaner = offline_cache::CacheCleaner<LlvmOfflineCache>;
CacheCleaner::Params params;
params.path = path;
params.policy = policy;
params.cleaning_factor = cleaning_factor;
params.max_size = max_bytes;
params.metadata_filename = std::string(kMetadataFilename) + ".tcb";
params.debugging_metadata_filename = std::string(kMetadataFilename) + ".json";
params.metadata_lock_name = kMetadataFileLockName;
CacheCleaner::run(params);
offline_cache::CacheCleanerConfig config;
config.path = path;
config.policy = policy;
config.cleaning_factor = cleaning_factor;
config.max_size = max_bytes;
config.metadata_filename = std::string(kMetadataFilename) + ".tcb";
config.debugging_metadata_filename = std::string(kMetadataFilename) + ".json";
config.metadata_lock_name = kMetadataFileLockName;
CacheCleaner::run(config);
}

LlvmOfflineCache::KernelCacheData LlvmOfflineCache::KernelCacheData::clone()
Expand Down
10 changes: 7 additions & 3 deletions taichi/runtime/program_impls/vulkan/vulkan_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "taichi/runtime/gfx/aot_module_builder_impl.h"
#include "taichi/runtime/gfx/snode_tree_manager.h"
#include "taichi/runtime/gfx/aot_module_loader_impl.h"
#include "taichi/util/offline_cache.h"

#if !defined(ANDROID)
#include "GLFW/glfw3.h"
Expand Down Expand Up @@ -207,9 +208,12 @@ std::unique_ptr<aot::Kernel> VulkanProgramImpl::make_aot_kernel(
}

void VulkanProgramImpl::dump_cache_data_to_disk() {
if (offline_cache::enabled_wip_offline_cache(config->offline_cache)) {
get_cache_manager()->dump_with_merging();
}
const auto &mgr = get_cache_manager();
mgr->clean_offline_cache(offline_cache::string_to_clean_cache_policy(
config->offline_cache_cleaning_policy),
config->offline_cache_max_size_of_files,
config->offline_cache_cleaning_factor);
mgr->dump_with_merging();
}

const std::unique_ptr<gfx::CacheManager>
Expand Down
Loading