-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Canonicalize away bit width and embed small integers into
IntId
s
The first change here is to canonicalize away bit width when tracking integers in our shared value store. This lets us have a more definitive model of "what is the mathematical value". It also frees us to use more efficient bit widths when available, such as bits inside the ID itself. For canonicalizing, we try to minimize the width adjustments and maximize the use of the SSO in APInt, and so we never shrink belowe 64-bits and grow in multiples of the word bit width in the implementation. We also canonicalize to the signed 2s compliment representation so we can represent negative numbers in an intuitive way. The canonicalizing requires getting the bit width out of the type and adjusting to it within the toolchain when doing any kind of math, and this PR updates various places to do that, as well as adding some convenience APIs to assist. Then we take advantage of the canonical form and embed small integers into the ID itself rather than allocating storage for them and referencing them with an index. This is especially helpful for the pervasive small integers such as the sizes of types, arrays, etc. Those no longer require indirection at all. Various short-cut APIs to take advantage of this have also been added. This PR improves lexing by about 5% when there are lots of `i32` types.
- Loading branch information
Showing
20 changed files
with
713 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See /LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#include "toolchain/base/int_store.h" | ||
|
||
namespace Carbon { | ||
|
||
auto IntStore::CanonicalBitWidth(int significant_bits) -> int { | ||
// For larger integers, we store them in as a signed APInt with a canonical | ||
// width that is the smallest multiple of the word type's bits, but no | ||
// smaller than a minimum of 64 bits to avoid spurious resizing of the most | ||
// common cases (<= 64 bits). | ||
static constexpr int WordWidth = llvm::APInt::APINT_BITS_PER_WORD; | ||
|
||
return std::max<int>( | ||
MinAPWidth, ((significant_bits + WordWidth - 1) / WordWidth) * WordWidth); | ||
} | ||
|
||
auto IntStore::CanonicalizeSigned(llvm::APInt value) -> llvm::APInt { | ||
return value.sextOrTrunc(CanonicalBitWidth(value.getSignificantBits())); | ||
} | ||
|
||
auto IntStore::CanonicalizeUnsigned(llvm::APInt value) -> llvm::APInt { | ||
// We need the width to include a zero sign bit as we canonicalize to a | ||
// signed representation. | ||
return value.zextOrTrunc(CanonicalBitWidth(value.getActiveBits() + 1)); | ||
} | ||
|
||
auto IntStore::AddLarge(int64_t value) -> IntId { | ||
auto ap_id = | ||
values_.Add(llvm::APInt(CanonicalBitWidth(64), value, /*isSigned=*/true)); | ||
return IntId::MakeIndexOrInvalid(ap_id.index); | ||
} | ||
|
||
auto IntStore::AddSignedLarge(llvm::APInt value) -> IntId { | ||
auto ap_id = values_.Add(CanonicalizeSigned(value)); | ||
return IntId::MakeIndexOrInvalid(ap_id.index); | ||
} | ||
|
||
auto IntStore::AddUnsignedLarge(llvm::APInt value) -> IntId { | ||
auto ap_id = values_.Add(CanonicalizeUnsigned(value)); | ||
return IntId::MakeIndexOrInvalid(ap_id.index); | ||
} | ||
|
||
auto IntStore::LookupSignedLarge(llvm::APInt value) const -> IntId { | ||
auto ap_id = values_.Lookup(CanonicalizeSigned(value)); | ||
return IntId::MakeIndexOrInvalid(ap_id.index); | ||
} | ||
|
||
auto IntStore::OutputYaml() const -> Yaml::OutputMapping { | ||
return values_.OutputYaml(); | ||
} | ||
|
||
auto IntStore::CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const | ||
-> void { | ||
values_.CollectMemUsage(mem_usage, label); | ||
} | ||
|
||
} // namespace Carbon |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See /LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#ifndef CARBON_TOOLCHAIN_BASE_INT_STORE_H_ | ||
#define CARBON_TOOLCHAIN_BASE_INT_STORE_H_ | ||
|
||
#include "common/check.h" | ||
#include "llvm/ADT/APFloat.h" | ||
#include "llvm/ADT/APInt.h" | ||
#include "llvm/ADT/SmallVector.h" | ||
#include "toolchain/base/mem_usage.h" | ||
#include "toolchain/base/value_ids.h" | ||
#include "toolchain/base/value_store.h" | ||
#include "toolchain/base/yaml.h" | ||
|
||
namespace Carbon { | ||
|
||
// Forward declare a testing peer so we can friend it. | ||
namespace Testing { | ||
struct IntStoreTestPeer; | ||
} // namespace Testing | ||
|
||
// A canonicalizing value store with deep optimizations for integers. | ||
// | ||
// This stores integers as abstract, signed mathematical integers. The bit width | ||
// of specific `APInt` values, either as inputs or outputs, is disregarded for | ||
// the purpose of canonicalization and the returned integer may use a very | ||
// different bit width `APInt` than was used when adding. There are also | ||
// optimized paths for adding integer values representable using native integer | ||
// types. | ||
// | ||
// Because the integers in the store are canonicalized without a specific bit | ||
// width there are helper functions to coerce them to a specific desired bit | ||
// width for use. | ||
// | ||
// This leverages a significant optimization for small integer values -- rather | ||
// than canonicalizing and making unique them in a `ValueStore`, they are | ||
// directly embedded in the `IntId` itself. Only larger integers are store in an | ||
// array of `APInt` values and represented as an index in the ID. | ||
class IntStore { | ||
public: | ||
// Adds an integer value representable in a host `int64_t` to the store. | ||
// Especially useful when the integer is computed without an `APInt` in the | ||
// first place. | ||
// | ||
// This only accepts a signed `int64_t` and uses the mathematical signed | ||
// integer value of it as the added integer value. | ||
// | ||
// Returns the ID corresponding to this integer value, storing an `APInt` if | ||
// necessary to represent it. | ||
auto Add(int64_t value) -> IntId { | ||
// First try directly making this into an ID. | ||
if (IntId id = IntId::TryMakeValue(value); id.is_valid()) [[likely]] { | ||
return id; | ||
} | ||
|
||
// Fallback for larger values. | ||
return AddLarge(value); | ||
} | ||
|
||
// Stores a canonical copy of a signed value and returns its ID. | ||
auto AddSigned(llvm::APInt value) -> IntId { | ||
// First try directly making this into an ID. | ||
if (IntId id = IntId::TryMakeSignedValue(value); id.is_valid()) [[likely]] { | ||
return id; | ||
} | ||
|
||
// Fallback for larger values. | ||
return AddSignedLarge(std::move(value)); | ||
} | ||
|
||
// Stores a canonical copy of an unsigned value and returns its ID. | ||
auto AddUnsigned(llvm::APInt value) -> IntId { | ||
// First try directly making this into an ID. | ||
if (IntId id = IntId::TryMakeUnsignedValue(value); id.is_valid()) | ||
[[likely]] { | ||
return id; | ||
} | ||
|
||
// Fallback for larger values. | ||
return AddUnsignedLarge(std::move(value)); | ||
} | ||
|
||
// Returns the value for an ID. | ||
// | ||
// This will always be a signed `APInt` with a canonical bit width for the | ||
// specific integer value in question. | ||
auto Get(IntId id) const -> llvm::APInt { | ||
if (id.is_value()) [[likely]] { | ||
return llvm::APInt(MinAPWidth, id.AsValue(), /*isSigned=*/true); | ||
} | ||
return values_.Get(APIntId(id.AsIndex())); | ||
} | ||
|
||
// Returns the value for an ID adjusted to a specific bit width. | ||
// | ||
// Note that because we store canonical mathematical integers as signed | ||
// integers, this always sign extends or truncates to the target width. The | ||
// caller can then use that as a signed or unsigned integer as needed. | ||
auto GetAtWidth(IntId id, int bit_width) const -> llvm::APInt { | ||
llvm::APInt value = Get(id); | ||
if (static_cast<int>(value.getBitWidth()) != bit_width) { | ||
value = value.sextOrTrunc(bit_width); | ||
} | ||
return value; | ||
} | ||
|
||
// Returns the value for an ID adjusted to the bit width specified with | ||
// another integer ID. | ||
// | ||
// This simply looks up the width integer ID, and then calls the above | ||
// `GetAtWidth` overload using the value found for it. See that overload for | ||
// more details. | ||
auto GetAtWidth(IntId id, IntId bit_width_id) const -> llvm::APInt { | ||
const llvm::APInt& bit_width = Get(bit_width_id); | ||
CARBON_CHECK(bit_width.isStrictlyPositive() && | ||
bit_width.isSignedIntN(sizeof(int) * 8), | ||
"Invalid bit width value: {0}", bit_width); | ||
return GetAtWidth(id, bit_width.getSExtValue()); | ||
} | ||
|
||
// Looks up the canonical ID for a value, or returns invalid if not in the | ||
// store. | ||
auto LookupSigned(llvm::APInt value) const -> IntId { | ||
if (IntId id = IntId::TryMakeSignedValue(value); id.is_valid()) [[likely]] { | ||
return id; | ||
} | ||
|
||
// Fallback for larger values. | ||
return LookupSignedLarge(std::move(value)); | ||
} | ||
|
||
// Output a YAML description of this data structure. Note that this will only | ||
// include the integers that required storing, not those successfully embedded | ||
// into the ID space. | ||
auto OutputYaml() const -> Yaml::OutputMapping; | ||
|
||
auto array_ref() const -> llvm::ArrayRef<llvm::APInt> { | ||
return values_.array_ref(); | ||
} | ||
auto size() const -> size_t { return values_.size(); } | ||
|
||
// Collects the memory usage of the separately stored integers. | ||
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const | ||
-> void; | ||
|
||
private: | ||
friend struct Testing::IntStoreTestPeer; | ||
|
||
struct APIntId : IdBase, Printable<APIntId> { | ||
using ValueType = llvm::APInt; | ||
static const APIntId Invalid; | ||
using IdBase::IdBase; | ||
auto Print(llvm::raw_ostream& out) const -> void { | ||
out << "ap-int"; | ||
IdBase::Print(out); | ||
} | ||
}; | ||
|
||
static constexpr int MinAPWidth = 64; | ||
|
||
// Pick a canonical bit width for the provided number of significant bits. | ||
static auto CanonicalBitWidth(int significant_bits) -> int; | ||
|
||
// Canonicalize an incoming signed APInt to the correct bit width. | ||
static auto CanonicalizeSigned(llvm::APInt value) -> llvm::APInt; | ||
|
||
// Canonicalize an incoming unsigned APInt to the correct bit width. | ||
static auto CanonicalizeUnsigned(llvm::APInt value) -> llvm::APInt; | ||
|
||
auto AddLarge(int64_t value) -> IntId; | ||
auto AddSignedLarge(llvm::APInt value) -> IntId; | ||
auto AddUnsignedLarge(llvm::APInt value) -> IntId; | ||
|
||
auto LookupSignedLarge(llvm::APInt value) const -> IntId; | ||
|
||
CanonicalValueStore<APIntId> values_; | ||
}; | ||
|
||
constexpr IntStore::APIntId IntStore::APIntId::Invalid( | ||
IntId::Invalid.AsIndex()); | ||
|
||
} // namespace Carbon | ||
|
||
#endif // CARBON_TOOLCHAIN_BASE_INT_STORE_H_ |
Oops, something went wrong.