diff --git a/taichi/cache/gfx/cache_manager.cpp b/taichi/cache/gfx/cache_manager.cpp index 1228c8ec9e71f..bcd054b1ec93c 100644 --- a/taichi/cache/gfx/cache_manager.cpp +++ b/taichi/cache/gfx/cache_manager.cpp @@ -2,22 +2,116 @@ #include "taichi/analysis/offline_cache_util.h" #include "taichi/codegen/spirv/snode_struct_compiler.h" #include "taichi/common/cleanup.h" +#include "taichi/common/version.h" #include "taichi/program/kernel.h" #include "taichi/runtime/gfx/aot_module_loader_impl.h" #include "taichi/runtime/gfx/snode_tree_manager.h" #include "taichi/util/lock.h" +#include "taichi/util/offline_cache.h" namespace taichi { namespace lang { -namespace gfx { namespace { constexpr char kMetadataFileLockName[] = "metadata.lock"; +constexpr char kAotMetadataFilename[] = "metadata.tcb"; +constexpr char kDebuggingAotMetadataFilename[] = "metadata.json"; +constexpr char kGraphMetadataFilename[] = "graphs.tcb"; +constexpr char kOfflineCacheMetadataFilename[] = "offline_cache_metadata.tcb"; using CompiledKernelData = gfx::GfxRuntime::RegisterParams; +inline gfx::CacheManager::Metadata::KernelMetadata make_kernel_metadata( + const std::string &key, + const gfx::GfxRuntime::RegisterParams &compiled) { + std::size_t codes_size = 0; + for (const auto &e : compiled.task_spirv_source_codes) { + codes_size += e.size() * sizeof(*e.data()); + } + + gfx::CacheManager::Metadata::KernelMetadata res; + res.kernel_key = key; + res.size = codes_size; + res.created_at = std::time(nullptr); + res.last_used_at = std::time(nullptr); + res.num_files = compiled.task_spirv_source_codes.size(); + return res; +} + } // namespace +namespace offline_cache { + +template <> +struct CacheCleanerUtils { + using MetadataType = gfx::CacheManager::Metadata; + using KernelMetaData = MetadataType::KernelMetadata; + + // To load metadata from file + static bool load_metadata(const CacheCleanerConfig &config, + MetadataType &result) { + return read_from_binary_file( + result, taichi::join_path(config.path, config.metadata_filename)); + } + + // To save metadata as file + static bool save_metadata(const CacheCleanerConfig &config, + const MetadataType &data) { + // Update AOT metadata + gfx::TaichiAotData old_aot_data, new_aot_data; + auto aot_metadata_path = + taichi::join_path(config.path, kAotMetadataFilename); + if (read_from_binary_file(old_aot_data, aot_metadata_path)) { + const auto &kernels = data.kernels; + for (auto &k : old_aot_data.kernels) { + if (kernels.count(k.name)) { + new_aot_data.kernels.push_back(std::move(k)); + } + } + write_to_binary_file(new_aot_data, aot_metadata_path); + } + write_to_binary_file( + data, taichi::join_path(config.path, config.metadata_filename)); + return true; + } + + static bool save_debugging_metadata(const CacheCleanerConfig &config, + const MetadataType &data) { + // Do nothing + return true; + } + + // To check version + static bool check_version(const CacheCleanerConfig &config, + const Version &version) { + return version[0] == TI_VERSION_MAJOR && version[1] == TI_VERSION_MINOR && + version[2] == TI_VERSION_PATCH; + } + + // To get cache files name + static std::vector get_cache_files( + const CacheCleanerConfig &config, + const KernelMetaData &kernel_meta) { + std::vector result; + for (std::size_t i = 0; i < kernel_meta.num_files; ++i) { + result.push_back(kernel_meta.kernel_key + std::to_string(i) + ".spv"); + } + return result; + } + + // To remove other files except cache files and offline cache metadta files + static void remove_other_files(const CacheCleanerConfig &config) { + taichi::remove(taichi::join_path(config.path, kAotMetadataFilename)); + taichi::remove( + taichi::join_path(config.path, kDebuggingAotMetadataFilename)); + taichi::remove(taichi::join_path(config.path, kGraphMetadataFilename)); + } +}; + +} // namespace offline_cache + +namespace gfx { + CacheManager::CacheManager(Params &&init_params) : mode_(init_params.mode), runtime_(init_params.runtime), @@ -28,8 +122,8 @@ CacheManager::CacheManager(Params &&init_params) path_ = offline_cache::get_cache_path_by_arch(init_params.cache_path, init_params.arch); - if (taichi::path_exists(taichi::join_path(path_, "metadata.tcb")) && - taichi::path_exists(taichi::join_path(path_, "graphs.tcb"))) { + if (taichi::path_exists(taichi::join_path(path_, kAotMetadataFilename)) && + taichi::path_exists(taichi::join_path(path_, kGraphMetadataFilename))) { auto lock_path = taichi::join_path(path_, kMetadataFileLockName); if (lock_with_file(lock_path)) { auto _ = make_cleanup([&lock_path]() { @@ -47,6 +141,10 @@ CacheManager::CacheManager(Params &&init_params) caching_module_builder_ = std::make_unique( compiled_structs_, init_params.arch, std::move(init_params.target_device)); + + offline_cache_metadata_.version[0] = TI_VERSION_MAJOR; + offline_cache_metadata_.version[1] = TI_VERSION_MINOR; + offline_cache_metadata_.version[2] = TI_VERSION_PATCH; } CompiledKernelData CacheManager::load_or_compile(CompileConfig *config, @@ -66,7 +164,7 @@ CompiledKernelData CacheManager::load_or_compile(CompileConfig *config, } void CacheManager::dump_with_merging() const { - if (mode_ == MemAndDiskCache) { + if (mode_ == MemAndDiskCache && !offline_cache_metadata_.kernels.empty()) { taichi::create_directories(path_); auto *cache_builder = static_cast(caching_module_builder_.get()); @@ -79,12 +177,50 @@ void CacheManager::dump_with_merging() const { TI_WARN("Unlock {} failed", lock_path); } }); + + // Update metadata.{tcb,json} cache_builder->merge_with_old_meta_data(path_); cache_builder->dump(path_, ""); + + // Update offline_cache_metadata.tcb + Metadata old_data; + const auto filename = + taichi::join_path(path_, kOfflineCacheMetadataFilename); + if (read_from_binary_file(old_data, filename)) { + for (auto &[k, v] : offline_cache_metadata_.kernels) { + auto iter = old_data.kernels.find(k); + if (iter != old_data.kernels.end()) { // Update + iter->second.last_used_at = v.last_used_at; + } else { // Add new + old_data.size += v.size; + old_data.kernels[k] = std::move(v); + } + } + write_to_binary_file(old_data, filename); + } else { + write_to_binary_file(offline_cache_metadata_, filename); + } } } } +void CacheManager::clean_offline_cache(offline_cache::CleanCachePolicy policy, + int max_bytes, + double cleaning_factor) const { + if (mode_ == MemAndDiskCache) { + using CacheCleaner = offline_cache::CacheCleaner; + offline_cache::CacheCleanerConfig params; + params.path = path_; + params.policy = policy; + params.cleaning_factor = cleaning_factor; + params.max_size = max_bytes; + params.metadata_filename = kOfflineCacheMetadataFilename; + params.debugging_metadata_filename = ""; // No debugging file + params.metadata_lock_name = kMetadataFileLockName; + CacheCleaner::run(params); + } +} + std::optional CacheManager::try_load_cached_kernel( Kernel *kernel, const std::string &key) { @@ -113,6 +249,9 @@ std::optional CacheManager::try_load_cached_kernel( auto compiled = aot_kernel_impl->params(); // TODO: Support multiple SNodeTrees in AOT. compiled.num_snode_trees = compiled_structs_.size(); + auto kmetadata = make_kernel_metadata(key, compiled); + offline_cache_metadata_.size += kmetadata.size; + offline_cache_metadata_.kernels[key] = std::move(kmetadata); return compiled; } } @@ -132,6 +271,9 @@ CompiledKernelData CacheManager::compile_and_cache_kernel( TI_ASSERT(params_opt.has_value()); // TODO: Support multiple SNodeTrees in AOT. params_opt->num_snode_trees = compiled_structs_.size(); + auto kmetadata = make_kernel_metadata(key, *params_opt); + offline_cache_metadata_.size += kmetadata.size; + offline_cache_metadata_.kernels[key] = std::move(kmetadata); return *params_opt; } diff --git a/taichi/cache/gfx/cache_manager.h b/taichi/cache/gfx/cache_manager.h index 3197f5a5679c5..b99e2f888e05c 100644 --- a/taichi/cache/gfx/cache_manager.h +++ b/taichi/cache/gfx/cache_manager.h @@ -3,13 +3,17 @@ #include "taichi/aot/module_builder.h" #include "taichi/aot/module_loader.h" #include "taichi/runtime/gfx/runtime.h" +#include "taichi/util/offline_cache.h" namespace taichi { namespace lang { namespace gfx { class CacheManager { + using CompiledKernelData = gfx::GfxRuntime::RegisterParams; + public: + using Metadata = offline_cache::Metadata; enum Mode { NotCache, MemCache, MemAndDiskCache }; struct Params { @@ -21,12 +25,13 @@ class CacheManager { const std::vector *compiled_structs; }; - using CompiledKernelData = gfx::GfxRuntime::RegisterParams; - CacheManager(Params &&init_params); CompiledKernelData load_or_compile(CompileConfig *config, Kernel *kernel); void dump_with_merging() const; + void clean_offline_cache(offline_cache::CleanCachePolicy policy, + int max_bytes, + double cleaning_factor) const; private: std::optional try_load_cached_kernel( @@ -40,6 +45,7 @@ class CacheManager { std::string path_; GfxRuntime *runtime_{nullptr}; const std::vector &compiled_structs_; + Metadata offline_cache_metadata_; std::unique_ptr caching_module_builder_{nullptr}; std::unique_ptr cached_module_{nullptr}; }; diff --git a/taichi/runtime/llvm/llvm_offline_cache.cpp b/taichi/runtime/llvm/llvm_offline_cache.cpp index 63550c226fd5a..e61a1640ede89 100644 --- a/taichi/runtime/llvm/llvm_offline_cache.cpp +++ b/taichi/runtime/llvm/llvm_offline_cache.cpp @@ -17,6 +17,7 @@ #include "taichi/runtime/llvm/llvm_context.h" #include "taichi/util/io.h" #include "taichi/util/lock.h" +#include "taichi/util/offline_cache.h" namespace taichi { namespace lang { @@ -60,32 +61,38 @@ struct CacheCleanerUtils { using KernelMetaData = typename MetadataType::KernelMetadata; // To load metadata from file - static bool load_metadata(MetadataType &result, const std::string &filepath) { - return read_from_binary_file(result, filepath); + static bool load_metadata(const CacheCleanerConfig &config, + MetadataType &result) { + return read_from_binary_file( + result, taichi::join_path(config.path, config.metadata_filename)); } // To save metadata as file - static bool save_metadata(const MetadataType &data, - const std::string &filepath) { - write_to_binary_file(data, filepath); + static bool save_metadata(const CacheCleanerConfig &config, + const MetadataType &data) { + write_to_binary_file( + data, taichi::join_path(config.path, config.metadata_filename)); return true; } - static bool save_debugging_metadata(const MetadataType &data, - const std::string &filepath) { + static bool save_debugging_metadata(const CacheCleanerConfig &config, + const MetadataType &data) { TextSerializer ts; ts.serialize_to_json("cache", data); - ts.write_to_file(get_llvm_cache_metadata_json_file_path(filepath)); + ts.write_to_file( + taichi::join_path(config.path, config.debugging_metadata_filename)); return true; } // To check version - static bool check_version(const Version &version) { + static bool check_version(const CacheCleanerConfig &config, + const Version &version) { return is_current_llvm_cache_version(version); } // To get cache files name static std::vector get_cache_files( + const CacheCleanerConfig &config, const KernelMetaData &kernel_meta) { std::vector result; for (int i = 0; i < kernel_meta.compiled_data_list.size(); i++) { @@ -96,6 +103,11 @@ struct CacheCleanerUtils { } return result; } + + // To remove other files except cache files and offline cache metadta files + static void remove_other_files(const CacheCleanerConfig &config) { + // Do nothing + } }; } // namespace offline_cache @@ -414,15 +426,15 @@ void LlvmOfflineCacheFileWriter::clean_cache(const std::string &path, int max_bytes, double cleaning_factor) { using CacheCleaner = offline_cache::CacheCleaner; - CacheCleaner::Params params; - params.path = path; - params.policy = policy; - params.cleaning_factor = cleaning_factor; - params.max_size = max_bytes; - params.metadata_filename = std::string(kMetadataFilename) + ".tcb"; - params.debugging_metadata_filename = std::string(kMetadataFilename) + ".json"; - params.metadata_lock_name = kMetadataFileLockName; - CacheCleaner::run(params); + offline_cache::CacheCleanerConfig config; + config.path = path; + config.policy = policy; + config.cleaning_factor = cleaning_factor; + config.max_size = max_bytes; + config.metadata_filename = std::string(kMetadataFilename) + ".tcb"; + config.debugging_metadata_filename = std::string(kMetadataFilename) + ".json"; + config.metadata_lock_name = kMetadataFileLockName; + CacheCleaner::run(config); } LlvmOfflineCache::KernelCacheData LlvmOfflineCache::KernelCacheData::clone() diff --git a/taichi/runtime/program_impls/vulkan/vulkan_program.cpp b/taichi/runtime/program_impls/vulkan/vulkan_program.cpp index 544c18173f616..f256c73128b81 100644 --- a/taichi/runtime/program_impls/vulkan/vulkan_program.cpp +++ b/taichi/runtime/program_impls/vulkan/vulkan_program.cpp @@ -5,6 +5,7 @@ #include "taichi/runtime/gfx/aot_module_builder_impl.h" #include "taichi/runtime/gfx/snode_tree_manager.h" #include "taichi/runtime/gfx/aot_module_loader_impl.h" +#include "taichi/util/offline_cache.h" #if !defined(ANDROID) #include "GLFW/glfw3.h" @@ -207,9 +208,12 @@ std::unique_ptr VulkanProgramImpl::make_aot_kernel( } void VulkanProgramImpl::dump_cache_data_to_disk() { - if (offline_cache::enabled_wip_offline_cache(config->offline_cache)) { - get_cache_manager()->dump_with_merging(); - } + const auto &mgr = get_cache_manager(); + mgr->clean_offline_cache(offline_cache::string_to_clean_cache_policy( + config->offline_cache_cleaning_policy), + config->offline_cache_max_size_of_files, + config->offline_cache_cleaning_factor); + mgr->dump_with_merging(); } const std::unique_ptr diff --git a/taichi/util/offline_cache.h b/taichi/util/offline_cache.h index c2391d28baccc..dfe330e23cccf 100644 --- a/taichi/util/offline_cache.h +++ b/taichi/util/offline_cache.h @@ -45,36 +45,74 @@ inline CleanCachePolicy string_to_clean_cache_policy(const std::string &str) { return Never; } +struct KernelMetadata { + std::string kernel_key; + std::size_t size{0}; // byte + std::time_t created_at{0}; // sec + std::time_t last_used_at{0}; // sec + std::size_t num_files{0}; + + TI_IO_DEF(kernel_key, size, created_at, last_used_at, num_files); +}; + +struct Metadata { + using KernelMetadata = struct KernelMetadata; + + Version version{}; + std::size_t size{0}; // byte + std::unordered_map kernels; + + TI_IO_DEF(version, size, kernels); +}; + +struct CacheCleanerConfig { + std::string path; + CleanCachePolicy policy; + int max_size{0}; + double cleaning_factor{0.f}; + std::string metadata_filename; + std::string debugging_metadata_filename; + std::string metadata_lock_name; +}; + template struct CacheCleanerUtils { using KernelMetaData = typename MetadataType::KernelMetadata; // To load metadata from file - static bool load_metadata(MetadataType &result, const std::string &filepath) { + static bool load_metadata(const CacheCleanerConfig &config, + MetadataType &result) { TI_NOT_IMPLEMENTED; } // To save metadata as file - static bool save_metadata(const MetadataType &data, - const std::string &filepath) { + static bool save_metadata(const CacheCleanerConfig &config, + const MetadataType &data) { TI_NOT_IMPLEMENTED; } - static bool save_debugging_metadata(const MetadataType &data, - const std::string &filepath) { + static bool save_debugging_metadata(const CacheCleanerConfig &config, + const MetadataType &data) { TI_NOT_IMPLEMENTED; } // To check version - static bool check_version(const Version &version) { + static bool check_version(const CacheCleanerConfig &config, + const Version &version) { TI_NOT_IMPLEMENTED; } // To get cache files name static std::vector get_cache_files( + const CacheCleanerConfig &config, const KernelMetaData &kernel_meta) { TI_NOT_IMPLEMENTED; } + + // To remove other files except cache files and offline cache metadta files + static void remove_other_files(const CacheCleanerConfig &config) { + TI_NOT_IMPLEMENTED; + } }; template @@ -83,17 +121,7 @@ class CacheCleaner { using KernelMetadata = typename MetadataType::KernelMetadata; public: - struct Params { - std::string path; - CleanCachePolicy policy; - int max_size{0}; - double cleaning_factor{0.f}; - std::string metadata_filename; - std::string debugging_metadata_filename; - std::string metadata_lock_name; - }; - - static void run(const Params &config) { + static void run(const CacheCleanerConfig &config) { TI_ASSERT(!config.path.empty()); TI_ASSERT(config.max_size > 0); TI_ASSERT(!config.metadata_filename.empty()); @@ -132,16 +160,17 @@ class CacheCleaner { }); TI_DEBUG("Start cleaning cache"); - if (!Utils::load_metadata(cache_data, metadata_file)) { + if (!Utils::load_metadata(config, cache_data)) { return; } if ((policy & CleanOldVersion) && - !Utils::check_version(cache_data.version)) { + !Utils::check_version(config, cache_data.version)) { if (taichi::remove(metadata_file)) { taichi::remove(debugging_metadata_file); + Utils::remove_other_files(config); for (const auto &[k, v] : cache_data.kernels) { - for (const auto &f : Utils::get_cache_files(v)) { + for (const auto &f : Utils::get_cache_files(config, v)) { taichi::remove(taichi::join_path(path, f)); } } @@ -188,7 +217,7 @@ class CacheCleaner { TI_ASSERT(q.size() <= cnt); while (!q.empty()) { const auto *e = q.top(); - for (const auto &f : Utils::get_cache_files(e->second)) { + for (const auto &f : Utils::get_cache_files(config, e->second)) { files_to_rm.push_back(f); } cache_data.size -= e->second.size; @@ -199,8 +228,9 @@ class CacheCleaner { if (cache_data.kernels.empty()) { // Remove ok_rm_meta = taichi::remove(metadata_file); taichi::remove(debugging_metadata_file); + Utils::remove_other_files(config); } else { // Update - Utils::save_metadata(cache_data, metadata_file); + Utils::save_metadata(config, cache_data); ok_rm_meta = true; } } @@ -210,7 +240,7 @@ class CacheCleaner { if (ok_rm_meta) { if (!cache_data.kernels.empty()) { // For debugging (Not safe: without locking) - Utils::save_debugging_metadata(cache_data, debugging_metadata_file); + Utils::save_debugging_metadata(config, cache_data); } for (const auto &f : files_to_rm) { auto file_path = taichi::join_path(path, f); diff --git a/tests/python/test_offline_cache.py b/tests/python/test_offline_cache.py index 0d043d3500598..f6c6abcec4bf6 100644 --- a/tests/python/test_offline_cache.py +++ b/tests/python/test_offline_cache.py @@ -4,7 +4,7 @@ import os import shutil import threading -from os import listdir, remove, rmdir, stat +from os import listdir, rmdir, stat from os.path import join from tempfile import mkdtemp from time import sleep @@ -26,7 +26,7 @@ def is_offline_cache_file(filename): - suffixes = ('.ll', '.bc') + suffixes = ('.ll', '.bc', '.spv') return filename.endswith(suffixes) @@ -46,7 +46,8 @@ def expected_num_cache_files(arch, num_offloads: List[int] = None) -> int: if arch in [ti.cpu, ti.cuda]: result += 2 # metadata.{json, tcb} elif arch in [ti.vulkan]: - result += 3 # metadata.{json, tcb}, graphs.tcb + # metadata.{json, tcb}, graphs.tcb, offline_cache_metadata.tcb + result += 4 return result @@ -472,9 +473,7 @@ def helper(): curr_arch, [2, 2]) -# FIXME: Currently, the Vulkan offline cache doesn't support cache cleaning -@pytest.mark.parametrize( - 'curr_arch', list(set(supported_archs_offline_cache) - {ti.vulkan})) +@pytest.mark.parametrize('curr_arch', supported_archs_offline_cache) @pytest.mark.parametrize('factor', [0.0, 0.25, 0.85, 1.0]) @pytest.mark.parametrize('policy', ['never', 'version', 'lru', 'fifo']) @_test_offline_cache_dec @@ -495,10 +494,6 @@ def run_simple_kernels(max_size): sleep(1) # make sure the kernels are not used in the same second kernel_count = len(simple_kernels_to_test) - rem_factor = 1 if policy in [ - 'never', 'version' - ] else (kernel_count - int(factor * kernel_count)) / kernel_count - count_of_cache_file = cache_files_cnt(curr_arch) def added_files(arch): @@ -522,13 +517,13 @@ def added_files(arch): ti.reset() rem = 0 if policy in ['never', 'version']: - rem = sum([kern[3] for kern in simple_kernels_to_test]) + rem = expected_num_cache_files( + curr_arch, [kern[3] for kern in simple_kernels_to_test]) else: - for i in range( - min(kernel_count - int(factor * kernel_count), kernel_count)): - rem += simple_kernels_to_test[kernel_count - i - 1][3] - if rem > 0: - rem += 2 + lo = -min(kernel_count - int(factor * kernel_count), kernel_count) + lo = kernel_count if lo == 0 else lo + rem = expected_num_cache_files( + curr_arch, [kern[3] for kern in simple_kernels_to_test[lo:]]) assert added_files(curr_arch) == rem