Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 0 additions & 46 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,58 +118,12 @@ function(set_halide_compiler_warnings NAME)
# GCC warns when these warnings are given to plain-C sources
$<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang,AppleClang>:-Woverloaded-virtual>
$<$<COMPILE_LANG_AND_ID:CXX,GNU>:-Wsuggest-override>
$<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang,AppleClang>:-Wno-old-style-cast>

$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Winconsistent-missing-destructor-override>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Winconsistent-missing-override>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wdeprecated-declarations>

$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-double-promotion>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-float-conversion>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-float-equal>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-missing-field-initializers>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-shadow>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-sign-conversion>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-switch-enum>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-undef>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-unused-function>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-unused-macros>
$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-unused-parameter>

$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-c++98-compat-pedantic>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-c++98-compat>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-cast-align>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-comma>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-covered-switch-default>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-documentation-unknown-command>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-documentation>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-exit-time-destructors>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-global-constructors>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-implicit-float-conversion>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-implicit-int-conversion>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-implicit-int-float-conversion>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-missing-prototypes>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-nonportable-system-include-path>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-reserved-id-macro>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-shadow-field-in-constructor>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-shadow-field>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-shorten-64-to-32>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-undefined-func-template>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-unused-member-function>
$<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-unused-template>

$<$<CXX_COMPILER_ID:MSVC>:/W3>
$<$<CXX_COMPILER_ID:MSVC>:/wd4018> # 4018: disable "signed/unsigned mismatch"
$<$<CXX_COMPILER_ID:MSVC>:/wd4141> # 4141: 'inline' used more than once
$<$<CXX_COMPILER_ID:MSVC>:/wd4146> # 4146: unary minus applied to unsigned type
$<$<CXX_COMPILER_ID:MSVC>:/wd4244> # 4244: conversion, possible loss of data
$<$<CXX_COMPILER_ID:MSVC>:/wd4267> # 4267: conversion from 'size_t' to 'int', possible loss of data
$<$<CXX_COMPILER_ID:MSVC>:/wd4291> # 4291: No matching operator delete found
$<$<CXX_COMPILER_ID:MSVC>:/wd4503> # 4503: disable "decorated name length exceeded, name was truncated"
$<$<CXX_COMPILER_ID:MSVC>:/wd4800> # 4800: forcing value to bool 'true' or 'false' (performance warning)

# No: enable deprecation warnings
# $<$<CXX_COMPILER_ID:MSVC>:/wd4996> # 4996: compiler encountered deprecated declaration
)
endfunction()

Expand Down
24 changes: 10 additions & 14 deletions src/AlignLoads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,10 @@ namespace {
class AlignLoads : public IRMutator {
public:
AlignLoads(int alignment, int min_bytes)
: alignment_analyzer(alignment), required_alignment(alignment), min_bytes_to_align(min_bytes) {
: required_alignment(alignment), min_bytes_to_align(min_bytes) {
}

private:
HexagonAlignmentAnalyzer alignment_analyzer;

// Loads and stores should ideally be aligned to the vector width in bytes.
int required_alignment;

Expand Down Expand Up @@ -82,12 +80,10 @@ class AlignLoads : public IRMutator {
return IRMutator::visit(op);
}

int64_t aligned_offset = 0;
bool is_aligned =
alignment_analyzer.is_aligned(op, &aligned_offset);
int aligned_offset = 0;
bool is_aligned = is_hexagon_aligned(op, required_alignment, &aligned_offset);
// We know the alignment_analyzer has been able to reason about alignment
// if the following is true.
bool known_alignment = is_aligned || (!is_aligned && aligned_offset != 0);
int lanes = ramp->lanes;
int native_lanes = required_alignment / op->type.bytes();
int stride = static_cast<int>(*const_stride);
Expand All @@ -99,7 +95,7 @@ class AlignLoads : public IRMutator {
// without requiring more vectors from the dense
// load. This makes loads like f(2*x + 1) into an aligned
// load of double length, with a single shuffle.
int shift = known_alignment && aligned_offset < stride ? aligned_offset : 0;
int shift = aligned_offset < stride ? aligned_offset : 0;

// Load a dense vector covering all of the addresses in the load.
Expr dense_base = simplify(ramp->base - shift);
Expand All @@ -122,8 +118,8 @@ class AlignLoads : public IRMutator {

// If load is smaller than a native vector and can fully fit inside of it and offset is known,
// we can simply offset the native load and slice.
if (!is_aligned && aligned_offset != 0 && Int(32).can_represent(aligned_offset) && (aligned_offset + lanes <= native_lanes)) {
ramp_base = simplify(ramp_base - (int)aligned_offset);
if (!is_aligned && aligned_offset != 0 && (aligned_offset + lanes <= native_lanes)) {
ramp_base = simplify(ramp_base - aligned_offset);
alignment = alignment - aligned_offset;
slice_offset = aligned_offset;
}
Expand All @@ -147,15 +143,15 @@ class AlignLoads : public IRMutator {
return Shuffle::make_concat(slices);
}

if (!is_aligned && aligned_offset != 0 && Int(32).can_represent(aligned_offset)) {
if (!is_aligned && aligned_offset != 0) {
// We know the offset of this load from an aligned
// address. Rewrite this is an aligned load of two
// native vectors, followed by a shuffle.
Expr aligned_base = simplify(ramp->base - (int)aligned_offset);
ModulusRemainder alignment = op->alignment - (int)aligned_offset;
Expr aligned_base = simplify(ramp->base - aligned_offset);
ModulusRemainder alignment = op->alignment - aligned_offset;
Expr aligned_load = make_load(op, Ramp::make(aligned_base, 1, lanes * 2), alignment);

return Shuffle::make_slice(aligned_load, (int)aligned_offset, 1, lanes);
return Shuffle::make_slice(aligned_load, aligned_offset, 1, lanes);
}

return IRMutator::visit(op);
Expand Down
2 changes: 1 addition & 1 deletion src/CodeGen_PTX_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ void CodeGen_PTX_Dev::codegen_vector_reduce(const VectorReduce *op, const Expr &
CodeGen_LLVM::codegen_vector_reduce(op, init);
}

string CodeGen_PTX_Dev::march() const {
[[maybe_unused]] string CodeGen_PTX_Dev::march() const {
return "nvptx64";
}

Expand Down
2 changes: 1 addition & 1 deletion src/CodeGen_Vulkan_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::scalarize(const Expr &e) {
builder.update_id(result_id);
}

SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::map_type_to_pair(const Type &t) {
[[maybe_unused]] SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::map_type_to_pair(const Type &t) {
debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::map_type_to_pair(): " << t << "\n";
SpvId base_type_id = builder.declare_type(t);
SpvBuilder::StructMemberTypes member_type_ids = {base_type_id, base_type_id};
Expand Down
71 changes: 26 additions & 45 deletions src/HexagonAlignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,59 +10,40 @@
namespace Halide {
namespace Internal {

// TODO: This class is barely stateful, and could probably be replaced with free functions.
class HexagonAlignmentAnalyzer {
const int required_alignment;

public:
HexagonAlignmentAnalyzer(int required_alignment)
: required_alignment(required_alignment) {
internal_assert(required_alignment != 0);
template<typename T>
bool is_hexagon_aligned(const T *op, int required_alignment, int *aligned_offset) {
int native_lanes;
if constexpr (std::is_same_v<T, Load>) {
native_lanes = required_alignment / op->type.bytes();
} else {
native_lanes = required_alignment / op->value.type().bytes();
}

/** Analyze the index of a load/store instruction for alignment
* Returns true if it can determining that the address of the store or load is aligned, false otherwise.
*/
template<typename T>
bool is_aligned_impl(const T *op, int native_lanes, int64_t *aligned_offset) {
debug(3) << "HexagonAlignmentAnalyzer: Check if " << op->index << " is aligned to a "
<< required_alignment << " byte boundary\n"
<< "native_lanes: " << native_lanes << "\n";
Expr index = op->index;
const Ramp *ramp = index.as<Ramp>();
if (ramp) {
index = ramp->base;
} else if (index.type().is_vector()) {
debug(3) << "Is Unaligned\n";
return false;
}

internal_assert(native_lanes != 0) << "Type is larger than required alignment of " << required_alignment << " bytes\n";
debug(3) << "HexagonAlignmentAnalyzer: Check if " << op->index << " is aligned to a "
<< required_alignment << " byte boundary\n"
<< "native_lanes: " << native_lanes << "\n";

// If this is a parameter, the base_alignment should be
// host_alignment. Otherwise, this is an internal buffer,
// which we assume has been aligned to the required alignment.
if (op->param.defined() && ((op->param.host_alignment() % required_alignment) != 0)) {
return false;
}

bool known_alignment = (op->alignment.modulus % native_lanes) == 0;
if (known_alignment) {
*aligned_offset = op->alignment.remainder % native_lanes;
}
return known_alignment && (*aligned_offset == 0);
if (Expr index = op->index; !index.as<Ramp>() && index.type().is_vector()) {
debug(3) << "Is Unaligned\n";
return false;
}

bool is_aligned(const Load *op, int64_t *aligned_offset) {
int native_lanes = required_alignment / op->type.bytes();
return is_aligned_impl<Load>(op, native_lanes, aligned_offset);
internal_assert(native_lanes != 0) << "Type is larger than required alignment of " << required_alignment << " bytes\n";

// If this is a parameter, the base_alignment should be
// host_alignment. Otherwise, this is an internal buffer,
// which we assume has been aligned to the required alignment.
if (op->param.defined() && ((op->param.host_alignment() % required_alignment) != 0)) {
return false;
}

bool is_aligned(const Store *op, int64_t *aligned_offset) {
int native_lanes = required_alignment / op->value.type().bytes();
return is_aligned_impl<Store>(op, native_lanes, aligned_offset);
bool known_alignment = (op->alignment.modulus % native_lanes) == 0;
int64_t remainder = op->alignment.remainder % native_lanes;
if (known_alignment && aligned_offset != nullptr) {
*aligned_offset = static_cast<int>(remainder);
}
};
return known_alignment && remainder == 0;
}

} // namespace Internal
} // namespace Halide
Expand Down
12 changes: 3 additions & 9 deletions src/HexagonOptimize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1404,9 +1404,6 @@ class EliminateInterleaves : public IRMutator {
// We need to know when loads are a multiple of 2 native vectors.
int native_vector_bits;

// Alignment analyzer for loads and stores
HexagonAlignmentAnalyzer alignment_analyzer;

// Check if x is an expression that is either an interleave, or
// transitively is an interleave.
bool yields_removable_interleave(const Expr &x) {
Expand Down Expand Up @@ -1920,9 +1917,8 @@ class EliminateInterleaves : public IRMutator {
}
bool *aligned_accesses = aligned_buffer_access.shallow_find(op->name);
internal_assert(aligned_accesses) << "Buffer not found in scope";
int64_t aligned_offset = 0;

if (!alignment_analyzer.is_aligned(op, &aligned_offset)) {
if (!is_hexagon_aligned(op, native_vector_bits / 8, nullptr)) {
*aligned_accesses = false;
}
}
Expand Down Expand Up @@ -1955,9 +1951,7 @@ class EliminateInterleaves : public IRMutator {
bool *aligned_accesses = aligned_buffer_access.shallow_find(op->name);
internal_assert(aligned_accesses) << "Buffer not found in scope";

int64_t aligned_offset = 0;

if (!alignment_analyzer.is_aligned(op, &aligned_offset)) {
if (!is_hexagon_aligned(op, native_vector_bits / 8, nullptr)) {
*aligned_accesses = false;
}
} else {
Expand All @@ -1977,7 +1971,7 @@ class EliminateInterleaves : public IRMutator {

public:
EliminateInterleaves(const Target &t, int native_vector_bytes)
: native_vector_bits(native_vector_bytes * 8), alignment_analyzer(native_vector_bytes) {
: native_vector_bits(native_vector_bytes * 8) {
if (t.features_any_of({Target::HVX_v65})) {
hvx_target = HvxTarget::v65orLater;
} else if (t.features_any_of({Target::HVX_v66})) {
Expand Down
4 changes: 2 additions & 2 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,11 +223,11 @@ std::optional<T> getsysctl(const char *name) {
return std::make_optional(value);
}

bool sysctl_is_set(const char *name) {
[[maybe_unused]] bool sysctl_is_set(const char *name) {
return getsysctl<int>(name).value_or(0);
}

bool is_armv7s() {
[[maybe_unused]] bool is_armv7s() {
return getsysctl<cpu_type_t>("hw.cputype") == CPU_TYPE_ARM &&
getsysctl<cpu_subtype_t>("hw.cpusubtype") == CPU_SUBTYPE_ARM_V7S;
}
Expand Down
4 changes: 2 additions & 2 deletions test/correctness/compute_with.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1016,7 +1016,7 @@ int multi_tile_mixed_tile_factor_test() {
return 0;
}

int only_some_are_tiled_test() {
[[maybe_unused]] int only_some_are_tiled_test() {
const int size = 256;
Buffer<int> f_im(size, size), g_im(size / 2, size / 2), h_im(size / 2, size / 2);
Buffer<int> f_im_ref(size, size), g_im_ref(size / 2, size / 2), h_im_ref(size / 2, size / 2);
Expand Down Expand Up @@ -1527,7 +1527,7 @@ int update_stage_pairwise_test() {
return 0;
}

int update_stage_pairwise_zigzag_test() {
[[maybe_unused]] int update_stage_pairwise_zigzag_test() {
const int f_size = 128;
const int g_size = 128;
const int base = 31;
Expand Down
8 changes: 4 additions & 4 deletions test/correctness/simd_op_check.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ using namespace Halide;
Expr input(const Type &t, const Expr &arg) {
return Internal::Call::make(t, "input", {arg}, Internal::Call::Extern);
}
Expr in_f16(const Expr &arg) {
[[maybe_unused]] Expr in_f16(const Expr &arg) {
return input(Float(16), arg);
}
Expr in_bf16(const Expr &arg) {
[[maybe_unused]] Expr in_bf16(const Expr &arg) {
return input(BFloat(16), arg);
}
Expr in_f32(const Expr &arg) {
[[maybe_unused]] Expr in_f32(const Expr &arg) {
return input(Float(32), arg);
}
Expr in_f64(const Expr &arg) {
[[maybe_unused]] Expr in_f64(const Expr &arg) {
return input(Float(64), arg);
}
Expr in_i8(const Expr &arg) {
Expand Down
1 change: 1 addition & 0 deletions test/generator/metadata_tester_aottest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ const halide_scalar_value_t *make_scalar(double v) {
}

template<>
[[maybe_unused]]
const halide_scalar_value_t *make_scalar(void *v) {
halide_scalar_value_t *s = new halide_scalar_value_t();
s->u.handle = v;
Expand Down
2 changes: 1 addition & 1 deletion test/runtime/block_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ int deallocate_block(void *user_context, MemoryBlock *block) {
return halide_error_code_success;
}

int conform_block(void *user_context, MemoryRequest *request) {
[[maybe_unused]] int conform_block(void *user_context, MemoryRequest *request) {

debug(user_context) << "Test : conform_block ("
<< "request_size=" << int32_t(request->size) << " "
Expand Down
Loading