Skip to content

Commit d123e98

Browse files
committedJun 5, 2021
[Polly] Move MatMul optimization into its own file. NFC.
Functions shared between generalized matrix-multiplication optimization and other post-reschedule optimizations (tiling, prevect) are moved into the schedule tree transformation utility ScheduleTreeTransform.
1 parent d8a4a2c commit d123e98

9 files changed

+1244
-1192
lines changed
 

‎polly/include/polly/MatmulOptimizer.h

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
//===- MatmulOptimizer.h -------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef POLLY_MATMULOPTIMIZER_H
10+
#define POLLY_MATMULOPTIMIZER_H
11+
12+
#include "isl/isl-noexceptions.h"
13+
14+
namespace llvm {
15+
class TargetTransformInfo;
16+
}
17+
18+
namespace polly {
19+
struct Dependences;
20+
21+
/// Apply the BLIS matmul optimization pattern if possible.
22+
///
23+
/// Make the loops containing the matrix multiplication be the innermost
24+
/// loops and apply the BLIS matmul optimization pattern. BLIS implements
25+
/// gemm as three nested loops around a macro-kernel, plus two packing
26+
/// routines. The macro-kernel is implemented in terms of two additional
27+
/// loops around a micro-kernel. The micro-kernel is a loop around a rank-1
28+
/// (i.e., outer product) update.
29+
///
30+
/// For a detailed description please see [1].
31+
///
32+
/// The order of the loops defines the data reused in the BLIS implementation
33+
/// of gemm ([1]). In particular, elements of the matrix B, the second
34+
/// operand of matrix multiplication, are reused between iterations of the
35+
/// innermost loop. To keep the reused data in cache, only elements of matrix
36+
/// A, the first operand of matrix multiplication, should be evicted during
37+
/// an iteration of the innermost loop. To provide such a cache replacement
38+
/// policy, elements of the matrix A can, in particular, be loaded first and,
39+
/// consequently, be least-recently-used.
40+
///
41+
/// In our case matrices are stored in row-major order instead of
42+
/// column-major order used in the BLIS implementation ([1]). It affects only
43+
/// on the form of the BLIS micro kernel and the computation of its
44+
/// parameters. In particular, reused elements of the matrix B are
45+
/// successively multiplied by specific elements of the matrix A.
46+
///
47+
/// Refs.:
48+
/// [1] - Analytical Modeling is Enough for High Performance BLIS
49+
/// Tze Meng Low, Francisco D Igual, Tyler M Smith, Enrique S Quintana-Orti
50+
/// Technical Report, 2014
51+
/// http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf
52+
///
53+
/// @see ScheduleTreeOptimizer::createMicroKernel
54+
/// @see ScheduleTreeOptimizer::createMacroKernel
55+
/// @see getMicroKernelParams
56+
/// @see getMacroKernelParams
57+
///
58+
/// TODO: Implement the packing transformation.
59+
///
60+
/// @param Node The node that contains a band to be optimized. The node
61+
/// is required to successfully pass
62+
/// ScheduleTreeOptimizer::isMatrMultPattern.
63+
/// @param TTI Target Transform Info.
64+
/// @param D The dependencies.
65+
///
66+
/// @returns The transformed schedule or nullptr if the optimization
67+
/// cannot be applied.
68+
isl::schedule_node
69+
tryOptimizeMatMulPattern(isl::schedule_node Node,
70+
const llvm::TargetTransformInfo *TTI,
71+
const Dependences *D);
72+
73+
} // namespace polly
74+
#endif // POLLY_MATMULOPTIMIZER_H

‎polly/include/polly/ScheduleOptimizer.h

-20
Original file line numberDiff line numberDiff line change
@@ -37,26 +37,6 @@ struct IslScheduleOptimizerPrinterPass
3737
private:
3838
llvm::raw_ostream &OS;
3939
};
40-
41-
/// Build the desired set of partial tile prefixes.
42-
///
43-
/// We build a set of partial tile prefixes, which are prefixes of the vector
44-
/// loop that have exactly VectorWidth iterations.
45-
///
46-
/// 1. Drop all constraints involving the dimension that represents the
47-
/// vector loop.
48-
/// 2. Constrain the last dimension to get a set, which has exactly VectorWidth
49-
/// iterations.
50-
/// 3. Subtract loop domain from it, project out the vector loop dimension and
51-
/// get a set that contains prefixes, which do not have exactly VectorWidth
52-
/// iterations.
53-
/// 4. Project out the vector loop dimension of the set that was build on the
54-
/// first step and subtract the set built on the previous step to get the
55-
/// desired set of prefixes.
56-
///
57-
/// @param ScheduleRange A range of a map, which describes a prefix schedule
58-
/// relation.
59-
isl::set getPartialTilePrefixes(isl::set ScheduleRange, int VectorWidth);
6040
} // namespace polly
6141

6242
namespace llvm {

‎polly/include/polly/ScheduleTreeTransform.h

+60
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef POLLY_SCHEDULETREETRANSFORM_H
1414
#define POLLY_SCHEDULETREETRANSFORM_H
1515

16+
#include "llvm/ADT/ArrayRef.h"
1617
#include "llvm/Support/ErrorHandling.h"
1718
#include "isl/isl-noexceptions.h"
1819
#include <cassert>
@@ -164,6 +165,65 @@ isl::schedule applyFullUnroll(isl::schedule_node BandToUnroll);
164165
/// Replace the AST band @p BandToUnroll by a partially unrolled equivalent.
165166
isl::schedule applyPartialUnroll(isl::schedule_node BandToUnroll, int Factor);
166167

168+
/// Build the desired set of partial tile prefixes.
169+
///
170+
/// We build a set of partial tile prefixes, which are prefixes of the vector
171+
/// loop that have exactly VectorWidth iterations.
172+
///
173+
/// 1. Drop all constraints involving the dimension that represents the
174+
/// vector loop.
175+
/// 2. Constrain the last dimension to get a set, which has exactly VectorWidth
176+
/// iterations.
177+
/// 3. Subtract loop domain from it, project out the vector loop dimension and
178+
/// get a set that contains prefixes, which do not have exactly VectorWidth
179+
/// iterations.
180+
/// 4. Project out the vector loop dimension of the set that was build on the
181+
/// first step and subtract the set built on the previous step to get the
182+
/// desired set of prefixes.
183+
///
184+
/// @param ScheduleRange A range of a map, which describes a prefix schedule
185+
/// relation.
186+
isl::set getPartialTilePrefixes(isl::set ScheduleRange, int VectorWidth);
187+
188+
/// Create an isl::union_set, which describes the isolate option based on
189+
/// IsolateDomain.
190+
///
191+
/// @param IsolateDomain An isl::set whose @p OutDimsNum last dimensions should
192+
/// belong to the current band node.
193+
/// @param OutDimsNum A number of dimensions that should belong to
194+
/// the current band node.
195+
isl::union_set getIsolateOptions(isl::set IsolateDomain, isl_size OutDimsNum);
196+
197+
/// Create an isl::union_set, which describes the specified option for the
198+
/// dimension of the current node.
199+
///
200+
/// @param Ctx An isl::ctx, which is used to create the isl::union_set.
201+
/// @param Option The name of the option.
202+
isl::union_set getDimOptions(isl::ctx Ctx, const char *Option);
203+
204+
/// Tile a schedule node.
205+
///
206+
/// @param Node The node to tile.
207+
/// @param Identifier An name that identifies this kind of tiling and
208+
/// that is used to mark the tiled loops in the
209+
/// generated AST.
210+
/// @param TileSizes A vector of tile sizes that should be used for
211+
/// tiling.
212+
/// @param DefaultTileSize A default tile size that is used for dimensions
213+
/// that are not covered by the TileSizes vector.
214+
isl::schedule_node tileNode(isl::schedule_node Node, const char *Identifier,
215+
llvm::ArrayRef<int> TileSizes, int DefaultTileSize);
216+
217+
/// Tile a schedule node and unroll point loops.
218+
///
219+
/// @param Node The node to register tile.
220+
/// @param TileSizes A vector of tile sizes that should be used for
221+
/// tiling.
222+
/// @param DefaultTileSize A default tile size that is used for dimensions
223+
isl::schedule_node applyRegisterTiling(isl::schedule_node Node,
224+
llvm::ArrayRef<int> TileSizes,
225+
int DefaultTileSize);
226+
167227
} // namespace polly
168228

169229
#endif // POLLY_SCHEDULETREETRANSFORM_H

‎polly/lib/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ add_llvm_pass_plugin(Polly
9999
Transform/RewriteByReferenceParameters.cpp
100100
Transform/ScopInliner.cpp
101101
Transform/ManualOptimizer.cpp
102+
Transform/MatmulOptimizer.cpp
102103
${POLLY_HEADER_FILES}
103104

104105
LINK_COMPONENTS

0 commit comments

Comments
 (0)