Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[llvm] [aot] Added CGraph tests for LLVM backend #5305

Merged
merged 17 commits into from
Jul 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmake/TaichiTests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ endif()
file(GLOB_RECURSE TAICHI_TESTS_SOURCE
"tests/cpp/analysis/*.cpp"
"tests/cpp/aot/*.cpp"
"tests/cpp/aot/llvm/*.cpp"
"tests/cpp/aot/vulkan/*.cpp"
"tests/cpp/backends/*.cpp"
"tests/cpp/backends/llvm/*.cpp"
"tests/cpp/codegen/*.cpp"
"tests/cpp/common/*.cpp"
"tests/cpp/ir/*.cpp"
Expand Down
9 changes: 4 additions & 5 deletions taichi/aot/graph_data.cpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
#include "taichi/aot/graph_data.h"
#include "taichi/program/ndarray.h"
#define TI_RUNTIME_HOST
#include "taichi/program/context.h"
#undef TI_RUNTIME_HOST

namespace taichi {
namespace lang {
namespace aot {

void CompiledGraph::run(
const std::unordered_map<std::string, IValue> &args) const {
RuntimeContext ctx;
for (const auto &dispatch : dispatches) {
memset(&ctx, 0, sizeof(RuntimeContext));
RuntimeContext ctx = ctx_;

TI_ASSERT(dispatch.compiled_kernel);

// Populate args metadata into RuntimeContext
const auto &symbolic_args_ = dispatch.symbolic_args;
for (int i = 0; i < symbolic_args_.size(); ++i) {
Expand All @@ -27,6 +25,7 @@ void CompiledGraph::run(
TI_ERROR_IF(arr->element_shape != symbolic_arg.element_shape,
"Mismatched shape information for argument {}",
symbolic_arg.name);

set_runtime_ctx_ndarray(&ctx, i, arr);
} else if (ival.tag == aot::ArgKind::kScalar) {
ctx.set_arg(i, ival.val);
Expand Down
6 changes: 5 additions & 1 deletion taichi/aot/graph_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
#include <unordered_map>
#include "taichi/ir/type.h"
#include "taichi/aot/module_data.h"
#define TI_RUNTIME_HOST
#include "taichi/program/context.h"
#undef TI_RUNTIME_HOST

template <typename T, typename G>
T taichi_union_cast_with_different_sizes(G g);
Expand All @@ -12,7 +15,7 @@ namespace taichi {
namespace lang {
class AotModuleBuilder;
class Ndarray;
struct RuntimeContext;

namespace aot {
// Currently only scalar, matrix and ndarray are supported.
enum class ArgKind { kScalar, kMatrix, kNdarray, kUnknown };
Expand Down Expand Up @@ -112,6 +115,7 @@ struct CompiledDispatch {

struct TI_DLL_EXPORT CompiledGraph {
std::vector<CompiledDispatch> dispatches;
RuntimeContext ctx_;

void run(const std::unordered_map<std::string, IValue> &args) const;

Expand Down
8 changes: 8 additions & 0 deletions taichi/program/ndarray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,14 @@ void set_runtime_ctx_ndarray(RuntimeContext *ctx,
int arg_id,
Ndarray *ndarray) {
ctx->set_arg_devalloc(arg_id, ndarray->ndarray_alloc_, ndarray->shape);

uint64_t total_array_size = 1;
for (const auto &dim : ndarray->total_shape()) {
total_array_size *= dim;
}
total_array_size *= data_type_size(ndarray->dtype);

ctx->set_array_runtime_size(arg_id, total_array_size);
}

} // namespace lang
Expand Down
9 changes: 6 additions & 3 deletions taichi/runtime/llvm/llvm_aot_module_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ LlvmOfflineCache::KernelCacheData LlvmAotModule::load_kernel_from_cache(
std::unique_ptr<aot::Kernel> LlvmAotModule::make_new_kernel(
const std::string &name) {
auto fn = convert_module_to_function(name, load_kernel_from_cache(name));
return std::make_unique<llvm_aot::KernelImpl>(fn, name,
load_kernel_from_cache(name));
return std::make_unique<llvm_aot::KernelImpl>(
fn, name, LlvmOfflineCache::KernelCacheData());
}

std::unique_ptr<aot::Field> LlvmAotModule::make_new_field(
Expand Down Expand Up @@ -47,7 +47,10 @@ std::unique_ptr<aot::CompiledGraph> LlvmAotModule::get_graph(std::string name) {
dispatches.push_back({dispatch.kernel_name, dispatch.symbolic_args,
get_kernel(dispatch.kernel_name)});
}
aot::CompiledGraph graph{dispatches};

aot::CompiledGraph graph = aot::CompiledGraph({dispatches});
executor_->prepare_runtime_context(&graph.ctx_);

return std::make_unique<aot::CompiledGraph>(std::move(graph));
}

Expand Down
4 changes: 2 additions & 2 deletions taichi/runtime/llvm/llvm_runtime_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class LlvmRuntimeExecutor {

LLVMRuntime *get_llvm_runtime();

void prepare_runtime_context(RuntimeContext *ctx);

private:
/* ----------------------- */
/* ------ Allocation ----- */
Expand All @@ -85,8 +87,6 @@ class LlvmRuntimeExecutor {
std::vector<std::unique_ptr<SNodeTree>> &snode_trees_,
uint64 *result_buffer);

void prepare_runtime_context(RuntimeContext *ctx);

template <typename T, typename... Args>
T runtime_query(const std::string &key,
uint64 *result_buffer,
Expand Down
4 changes: 2 additions & 2 deletions taichi/runtime/program_impls/llvm/llvm_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#include "llvm/IR/Module.h"

#include "taichi/program/program.h"
#include "taichi/runtime/llvm/aot_graph_data.h"
#include "taichi/codegen/codegen.h"
#include "taichi/codegen/llvm/struct_llvm.h"
#include "taichi/runtime/llvm/aot_graph_data.h"
#include "taichi/runtime/llvm/llvm_offline_cache.h"
#include "taichi/codegen/codegen.h"
#include "taichi/runtime/cpu/aot_module_builder_impl.h"

#if defined(TI_WITH_CUDA)
Expand Down
17 changes: 17 additions & 0 deletions tests/cpp/aot/llvm/field_aot_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import argparse

from utils import compile_field_aot

import taichi as ti

parser = argparse.ArgumentParser()
parser.add_argument("--arch", type=str)
args = parser.parse_args()

if __name__ == "__main__":
if args.arch == "cpu":
compile_field_aot(arch=ti.cpu)
elif args.arch == "cuda":
compile_field_aot(arch=ti.cuda)
else:
assert False
17 changes: 17 additions & 0 deletions tests/cpp/aot/llvm/kernel_aot_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import argparse

from utils import compile_kernel_aot

import taichi as ti

parser = argparse.ArgumentParser()
parser.add_argument("--arch", type=str)
args = parser.parse_args()

if __name__ == "__main__":
if args.arch == "cpu":
compile_kernel_aot(arch=ti.cpu)
elif args.arch == "cuda":
compile_kernel_aot(arch=ti.cuda)
else:
assert False
199 changes: 199 additions & 0 deletions tests/cpp/aot/llvm/mpm88_graph_aot_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#include "gtest/gtest.h"
#include "taichi/ir/ir_builder.h"
#include "taichi/ir/statements.h"
#include "taichi/inc/constants.h"
#include "taichi/program/program.h"
#include "tests/cpp/ir/ndarray_kernel.h"
#include "tests/cpp/program/test_program.h"
#include "taichi/aot/graph_data.h"
#include "taichi/program/graph_builder.h"
#include "taichi/runtime/gfx/aot_module_loader_impl.h"
#include "taichi/rhi/device.h"

#include "taichi/program/kernel_profiler.h"
#include "taichi/runtime/program_impls/llvm/llvm_program.h"
#include "taichi/system/memory_pool.h"
#include "taichi/runtime/cpu/aot_module_loader_impl.h"
#include "taichi/runtime/cuda/aot_module_loader_impl.h"
#include "taichi/rhi/cuda/cuda_driver.h"
#include "taichi/platform/cuda/detect_cuda.h"

#define TI_RUNTIME_HOST
#include "taichi/program/context.h"
#undef TI_RUNTIME_HOST

using namespace taichi;
using namespace lang;

constexpr int NR_PARTICLES = 8192;
constexpr int N_GRID = 128;

TEST(LlvmCGraph, Mpm88Cpu) {
CompileConfig cfg;
cfg.arch = Arch::x64;
cfg.kernel_profiler = false;
constexpr KernelProfilerBase *kNoProfiler = nullptr;
LlvmProgramImpl prog{cfg, kNoProfiler};
auto *compute_device = prog.get_compute_device();
// Must have handled all the arch fallback logic by this point.
auto memory_pool = std::make_unique<MemoryPool>(cfg.arch, compute_device);
prog.initialize_host();
uint64 *result_buffer{nullptr};
prog.materialize_runtime(memory_pool.get(), kNoProfiler, &result_buffer);

/* AOTLoader */
cpu::AotModuleParams aot_params;
const auto folder_dir = getenv("TAICHI_AOT_FOLDER_PATH");

std::stringstream aot_mod_ss;
aot_mod_ss << folder_dir;
aot_params.module_path = aot_mod_ss.str();
aot_params.executor_ = prog.get_runtime_executor();
auto mod = cpu::make_aot_module(aot_params);

// Prepare & Run "init" Graph
auto g_init = mod->get_graph("init");

/* Prepare arguments */
constexpr int kArrBytes_x = NR_PARTICLES * 2 * sizeof(float);
auto devalloc_x = prog.allocate_memory_ndarray(kArrBytes_x, result_buffer);
auto x = taichi::lang::Ndarray(devalloc_x, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2});

constexpr int kArrBytes_v = NR_PARTICLES * 2 * sizeof(float);
auto devalloc_v = prog.allocate_memory_ndarray(kArrBytes_v, result_buffer);
auto v = taichi::lang::Ndarray(devalloc_v, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2});

constexpr int kArrBytes_J = NR_PARTICLES * sizeof(float);
auto devalloc_J = prog.allocate_memory_ndarray(kArrBytes_J, result_buffer);
auto J = taichi::lang::Ndarray(devalloc_J, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES});

std::unordered_map<std::string, taichi::lang::aot::IValue> args;
args.insert({"x", taichi::lang::aot::IValue::create(x)});
args.insert({"v", taichi::lang::aot::IValue::create(v)});
args.insert({"J", taichi::lang::aot::IValue::create(J)});

g_init->run(args);
prog.synchronize();

// Prepare & Run "update" Graph
auto g_update = mod->get_graph("update");

constexpr int kArrBytes_grid_v = N_GRID * N_GRID * 2 * sizeof(float);
auto devalloc_grid_v =
prog.allocate_memory_ndarray(kArrBytes_grid_v, result_buffer);
auto grid_v = taichi::lang::Ndarray(
devalloc_grid_v, taichi::lang::PrimitiveType::f32, {N_GRID, N_GRID}, {2});

constexpr int kArrBytes_grid_m = N_GRID * N_GRID * sizeof(float);
auto devalloc_grid_m =
prog.allocate_memory_ndarray(kArrBytes_grid_m, result_buffer);
auto grid_m = taichi::lang::Ndarray(
devalloc_grid_m, taichi::lang::PrimitiveType::f32, {N_GRID, N_GRID});

constexpr int kArrBytes_pos = NR_PARTICLES * 3 * sizeof(float);
auto devalloc_pos =
prog.allocate_memory_ndarray(kArrBytes_pos, result_buffer);
auto pos = taichi::lang::Ndarray(
devalloc_pos, taichi::lang::PrimitiveType::f32, {NR_PARTICLES}, {3});

constexpr int kArrBytes_C = NR_PARTICLES * sizeof(float) * 2 * 2;
auto devalloc_C = prog.allocate_memory_ndarray(kArrBytes_C, result_buffer);
auto C = taichi::lang::Ndarray(devalloc_C, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2, 2});

args.insert({"C", taichi::lang::aot::IValue::create(C)});
args.insert({"grid_v", taichi::lang::aot::IValue::create(grid_v)});
args.insert({"grid_m", taichi::lang::aot::IValue::create(grid_m)});
args.insert({"pos", taichi::lang::aot::IValue::create(pos)});

g_update->run(args);
prog.synchronize();
}

TEST(LlvmCGraph, Mpm88Cuda) {
if (is_cuda_api_available()) {
CompileConfig cfg;
cfg.arch = Arch::cuda;
cfg.kernel_profiler = false;
constexpr KernelProfilerBase *kNoProfiler = nullptr;
LlvmProgramImpl prog{cfg, kNoProfiler};
prog.initialize_host();
uint64 *result_buffer{nullptr};
prog.materialize_runtime(nullptr, kNoProfiler, &result_buffer);

/* AOTLoader */
cuda::AotModuleParams aot_params;
const auto folder_dir = getenv("TAICHI_AOT_FOLDER_PATH");

std::stringstream aot_mod_ss;
aot_mod_ss << folder_dir;
aot_params.module_path = aot_mod_ss.str();
aot_params.executor_ = prog.get_runtime_executor();
auto mod = cuda::make_aot_module(aot_params);

// Prepare & Run "init" Graph
auto g_init = mod->get_graph("init");

/* Prepare arguments */
constexpr int kArrBytes_x = NR_PARTICLES * 2 * sizeof(float);
auto devalloc_x = prog.allocate_memory_ndarray(kArrBytes_x, result_buffer);
auto x = taichi::lang::Ndarray(devalloc_x, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2});

constexpr int kArrBytes_v = NR_PARTICLES * 2 * sizeof(float);
auto devalloc_v = prog.allocate_memory_ndarray(kArrBytes_v, result_buffer);
auto v = taichi::lang::Ndarray(devalloc_v, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2});

constexpr int kArrBytes_J = NR_PARTICLES * sizeof(float);
auto devalloc_J = prog.allocate_memory_ndarray(kArrBytes_J, result_buffer);
auto J = taichi::lang::Ndarray(devalloc_J, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES});

std::unordered_map<std::string, taichi::lang::aot::IValue> args;
args.insert({"x", taichi::lang::aot::IValue::create(x)});
args.insert({"v", taichi::lang::aot::IValue::create(v)});
args.insert({"J", taichi::lang::aot::IValue::create(J)});

g_init->run(args);
prog.synchronize();

// Prepare & Run "update" Graph
auto g_update = mod->get_graph("update");

constexpr int kArrBytes_grid_v = N_GRID * N_GRID * 2 * sizeof(float);
auto devalloc_grid_v =
prog.allocate_memory_ndarray(kArrBytes_grid_v, result_buffer);
auto grid_v =
taichi::lang::Ndarray(devalloc_grid_v, taichi::lang::PrimitiveType::f32,
{N_GRID, N_GRID}, {2});

constexpr int kArrBytes_grid_m = N_GRID * N_GRID * sizeof(float);
auto devalloc_grid_m =
prog.allocate_memory_ndarray(kArrBytes_grid_m, result_buffer);
auto grid_m = taichi::lang::Ndarray(
devalloc_grid_m, taichi::lang::PrimitiveType::f32, {N_GRID, N_GRID});

constexpr int kArrBytes_pos = NR_PARTICLES * 3 * sizeof(float);
auto devalloc_pos =
prog.allocate_memory_ndarray(kArrBytes_pos, result_buffer);
auto pos = taichi::lang::Ndarray(
devalloc_pos, taichi::lang::PrimitiveType::f32, {NR_PARTICLES}, {3});

constexpr int kArrBytes_C = NR_PARTICLES * sizeof(float) * 2 * 2;
auto devalloc_C = prog.allocate_memory_ndarray(kArrBytes_C, result_buffer);
auto C = taichi::lang::Ndarray(devalloc_C, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2, 2});

args.insert({"C", taichi::lang::aot::IValue::create(C)});
args.insert({"grid_v", taichi::lang::aot::IValue::create(grid_v)});
args.insert({"grid_m", taichi::lang::aot::IValue::create(grid_m)});
args.insert({"pos", taichi::lang::aot::IValue::create(pos)});

g_update->run(args);
prog.synchronize();
}
}
File renamed without changes.
Loading