Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【CINN】Add pass to fold consecutive IfThenElse. #70142

Merged
merged 25 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion paddle/cinn/optim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ gather_srcs(
rearrange_load_instruction.cc
check_tensor_buffer_map.cc
longlong2int_pass.cc
vectorize_for_trans.cc)
vectorize_for_trans.cc
if_fold_pass.cc)

if(WITH_CUDA OR WITH_ROCM)
gather_srcs(cinnapi_src SRCS transform_gpu_forloop.cc)
Expand Down
147 changes: 147 additions & 0 deletions paddle/cinn/optim/if_fold_pass.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include "paddle/cinn/optim/if_fold_pass.h"
#include <vector>
#include "paddle/cinn/common/ir_util.h"
#include "paddle/cinn/common/simplify_special_pattern.h"
#include "paddle/cinn/ir/ir_printer.h"
#include "paddle/cinn/ir/op/ir_operators.h"
#include "paddle/cinn/ir/utils/ir_compare.h"
#include "paddle/cinn/ir/utils/ir_copy.h"

namespace cinn {
namespace optim {
using ir::stmt::IfThenElse;
using ir::stmt::StmtRef;

// Determine whether `IfThenElse` satisfies the following conditions:
// 1. The condition is an equality comparison
// 2. The right side of the equality comparison is `0`
// 3. There are no statements in the false branch
// 4. Only one statement in the true branch
bool JudgeIfStmt(const StmtRef& stmt) {
if (!stmt.isa<IfThenElse>()) return false;
auto if_stmt = stmt.as<IfThenElse>();
auto cond = if_stmt->condition().As<ir::EQ>();
if (if_stmt->false_case()->stmts().size() != 0) return false;
if (if_stmt->true_case()->stmts().size() != 1) return false;
if (!cond) return false;
if (!cond->b().is_constant()) return false;
if (cond->b().get_constant() != 0) return false;
return true;
}

// Only judge condition of `IfThenElse` like `xxx == 0`
bool IsIfWithEqCond(const StmtRef& stmt) {
if (stmt.isa<IfThenElse>()) {
auto if_stmt = stmt.as<IfThenElse>();
if (auto eq = if_stmt->condition().As<ir::EQ>()) {
if (eq->b().is_constant() && eq->b().get_constant() == 0) {
return true;
}
}
}
return false;
}

void AppendContinuousIfCond(const StmtRef& stmt,
std::vector<ir::IndexExpr>* cond_vec,
StmtRef* inner_op) {
if (!JudgeIfStmt(stmt)) {
// inner op is a `IfThenElse`, so we need to check its condition.
if (IsIfWithEqCond(stmt)) {
auto eq_lhs = stmt.as<IfThenElse>()->condition().As<ir::EQ>()->a();
if (eq_lhs.is_index()) {
cond_vec->push_back(common::ChangeSeqOfDivMod(
ir::ir_utils::IRCopy(eq_lhs).as_index().Normalize()));
}
}
// inner op is other op.
*inner_op = stmt;
return;
}

// Continuous `IfThenElse`, so we push its condition and recursively.
auto if_stmt = stmt.as<IfThenElse>();
auto eq_lhs = if_stmt->condition().As<ir::EQ>()->a();
if (eq_lhs.is_index())
cond_vec->push_back(common::ChangeSeqOfDivMod(
ir::ir_utils::IRCopy(eq_lhs).as_index().Normalize()));
Comment on lines +80 to +82
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if加{}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

AppendContinuousIfCond(
if_stmt->true_case()->stmts().at(0), cond_vec, inner_op);
}

LogicalResult IfFoldPass::Run(StmtRef stmt) {
if (!JudgeIfStmt(stmt)) return LogicalResult::success();

std::vector<ir::IndexExpr> cond_vec;
StmtRef inner_op;

AppendContinuousIfCond(stmt, &cond_vec, &inner_op);

ir::IndexExpr expr(0);
int32_t min_len = INT32_MAX;

VLOG(6) << "-------------cond_vec start--------------";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这是调试log吗?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

是预留的调试信息,之后方便调试

for (auto v : cond_vec) {
VLOG(6) << "v: " << v;
// record min length of all conditions, because we want the simplified
// result to be shorter
min_len = std::min(v.length(), min_len);
// For all normalized conditions, they have the form `a % b /c`,modulo
// first and then divided. We transform it as follows:
// origin:
// ((256*j)+((1024*i)+k))/3136==0
// ((256*j)+((1024*i)+k))%3136/56==0
// ((256*j)+((1024*i)+k))%56==0
// Mul and Sum:
// ((256*j)+((1024*i)+k))/3136*3136
// +((256*j)+((1024*i)+k))%3136/56*56
// +((256*j)+((1024*i)+k))%56==0
if (v.node_type() == ir::IrNodeTy::Div) {
expr = expr + v * v.operand(1);
} else {
expr = expr + v;
}
Comment on lines +114 to +118
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

加一些注释说明这里处理的原理

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}
VLOG(6) << "-------------cond_vec end----------------";

// Normalize expr to simplify the expr after Mul and Sum.
expr = expr.Normalize(ir::IndexExpr::OptLevel::Level2);

if (expr != ir::IndexExpr(0) && expr.length() < min_len &&
inner_op.defined()) {
VLOG(6) << "old stmt: " << stmt;
auto stmt_if = stmt.as<IfThenElse>();
stmt_if->set_condition(ir::EQ::Make(expr, ir::IndexExpr(0)));
if (IsIfWithEqCond(inner_op)) {
stmt_if->set_true_case(inner_op.as<IfThenElse>()->true_case());
stmt_if->set_false_case(inner_op.as<IfThenElse>()->false_case());
} else {
stmt_if->set_true_case(
ir::stmt::BlockRef(std::vector<StmtRef>{inner_op}));
}
VLOG(6) << "new stmt: " << stmt;
}

return LogicalResult::success();
}

std::unique_ptr<StmtPass> CreateIfFoldPass() {
return std::make_unique<IfFoldPass>();
}
} // namespace optim
} // namespace cinn
94 changes: 94 additions & 0 deletions paddle/cinn/optim/if_fold_pass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include "paddle/cinn/pass/pass.h"

namespace cinn {
namespace optim {
class IfFoldPass : public StmtPass {
public:
IfFoldPass() : StmtPass("if_fold") {}
LogicalResult Run(ir::stmt::StmtRef stmt) override;
};

/**
* Simplify several consecutively nested `IfThenElse` with equal to 0 conditions
* into one with simplified conditions.
*
* This pass is used when there are nested `IfThenElse` in a block, and their
* conditions are all equal to zero, and these conditions can be mathematically
* proven to be simplifiable.
*
* When applied, the continuously nested `IfThenElse` will be converted into an
* equivalent `IfThenElse` in IR.
*
* Performance impact: This pass primarily addresses code size and readability.
* By reducing the number of redundant condition checks, it may also slightly
* improve branch prediction and reduce instruction cache pressure.
*
* Examples:
* case1: All if can be simplified.
* if ((((((256 * j) + ((1024 * i) + k)) / 56) / 56) == 0)) {
* if ((((((256 * j) + ((1024 * i) + k)) / 56) % 56) == 0)) {
* if (((((256 * j) + ((1024 * i) + k)) % 56) == 0)) {
* int32 a = 1
* }
* }
* }
* can be simplified to:
* if (((((i * 1024ll) + k) + (j * 256ll)) == 0)) {
* int32 a = 1
* }
*
* case2: All if can be simplified and the inner one has false branch.
* if ((((((256 * j) + ((1024 * i) + k)) / 56) / 56) == 0)) {
* if ((((((256 * j) + ((1024 * i) + k)) / 56) % 56) == 0)) {
* if (((((256 * j) + ((1024 * i) + k)) % 56) == 0)) {
* int32 a = 1
* int32 b = 1
* } else {
* int32 c = 1
* }
* }
* }
* can be simplified to:
* if (((((i * 1024ll) + k) + (j * 256ll)) == 0)) {
* int32 a = 1
* int32 b = 1
* } else {
* int32 c = 1
* }
*
* case3: The inner one can not be simplified.
* if ((((((256 * j) + ((1024 * i) + k)) / 56) / 56) == 0)) {
* if ((((((256 * j) + ((1024 * i) + k)) / 56) % 56) == 0)) {
* if (((((256 * j) + ((1024 * i) + k)) % 56) == 0)) {
* if (l <= 0)) {
* int32 a = 1
* }
* }
* }
* }
* can be simplified to:
* if (((((i * 1024ll) + k) + (j * 256ll)) == 0)) {
* if (l <= 0)) {
* int32 a = 1
* }
* }
*/
std::unique_ptr<StmtPass> CreateIfFoldPass();

} // namespace optim
} // namespace cinn
6 changes: 6 additions & 0 deletions paddle/cinn/optim/optimize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "paddle/cinn/optim/eliminate_invariant_loop.h"
#include "paddle/cinn/optim/extern_call_process_pass.h"
#include "paddle/cinn/optim/fold_cinn_call_arguments.h"
#include "paddle/cinn/optim/if_fold_pass.h"
#include "paddle/cinn/optim/if_fusion_pass.h"
#include "paddle/cinn/optim/insert_debug_log_callee.h"
#include "paddle/cinn/optim/ir_simplify.h"
Expand Down Expand Up @@ -132,6 +133,11 @@ ir::LoweredFunc Optimize(ir::LoweredFunc fn,
pass_manager.Run(copied);
VLOG(10) << "After RemoveScheduleBlock:" << copied;

StmtPassManager stmt_pass_manager;
stmt_pass_manager.AddPass(CreateIfFoldPass());
stmt_pass_manager.Run(copied);
VLOG(10) << "After IfFoldPass:" << copied;

LowerIntrin(&copied->body, target);
VLOG(10) << "After LowerIntrin:" << copied;

Expand Down
2 changes: 2 additions & 0 deletions test/cpp/pir/cinn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ if(WITH_TESTING AND WITH_CINN)
paddle_test(replace_cross_block_reduction_test SRCS
replace_cross_block_reduction_test.cc)

paddle_test(ir_simplify_test SRCS ir_simplify_test.cc)

# DO NOT forget add test name here, otherwise it will not be executed in
# CINN CI.
set(cinn_unit_tests
Expand Down
Loading
Loading