Skip to content

Commit

Permalink
Engine: speed up String.Chars[] iteration in unicode mode
Browse files Browse the repository at this point in the history
This speeds up utf-8 string iteration by remembering last requested char index and buffer offset. This seems to be the only solution in the absence of proper ScriptString iterators in script API.
Costs 4 extra bytes per managed String object.
I intentionally limit these 2 indexes to uint16_t (64k char/byte) to save bit of mem. On average Strings are not that long, in the worst case things may be sped up by splitting into substrings in the script.

This change does not impose any dependencies, does not change save format, and may be safely reverted anytime.
  • Loading branch information
ivan-mogilko committed Oct 26, 2023
1 parent e2beb30 commit 2b42ffd
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
2 changes: 2 additions & 0 deletions Engine/ac/dynobj/scriptstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ ScriptString::Buffer ScriptString::CreateBuffer(size_t len, size_t ulen)
auto *header = reinterpret_cast<Header*>(buf.get());
header->Length = len;
header->ULength = ulen;
header->LastCharIdx = 0;
header->LastCharOff = 0;
return Buffer(std::move(buf), len + 1 + MemHeaderSz);
}

Expand Down
13 changes: 12 additions & 1 deletion Engine/ac/dynobj/scriptstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@ struct ScriptString final : AGSCCDynamicObject
struct Header
{
uint32_t Length = 0u; // string length in bytes (not counting 0)
uint32_t ULength = 0u; // UTF-8 compatible length in characters
uint32_t ULength = 0u; // Unicode compatible length in characters
// Saved last requested character index and buffer offset;
// significantly speeds up Unicode string iteration, but adds 4 bytes
// per ScriptString object. Replace with a proper str iterator later!
// NOTE: intentionally limited to 64k chars/bytes to save bit of mem.
uint16_t LastCharIdx = 0u;
uint16_t LastCharOff = 0u;
};

struct Buffer
Expand Down Expand Up @@ -55,6 +61,11 @@ struct ScriptString final : AGSCCDynamicObject
return reinterpret_cast<const Header&>(*(static_cast<const uint8_t*>(address) - MemHeaderSz));
}

inline static Header &GetHeader(void *address)
{
return reinterpret_cast<Header&>(*(static_cast<uint8_t*>(address) - MemHeaderSz));
}

// Allocates a ScriptString-compatible buffer large enough to accomodate
// given length in bytes (len). This buffer may be filled by the caller
// and then passed into Create(). If ulen is left eq 0, then it will be
Expand Down
10 changes: 8 additions & 2 deletions Engine/ac/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,16 @@ const char* String_UpperCase(const char *thisString) {
}

int String_GetChars(const char *thisString, int index) {
const auto &header = ScriptString::GetHeader(thisString);
auto &header = ScriptString::GetHeader((void*)thisString);
if ((index < 0) || (static_cast<uint32_t>(index) >= header.ULength))
return 0;
return ugetat(thisString, index);
int off = (header.LastCharIdx <= index) ?
(uoffset(thisString + header.LastCharOff, index - header.LastCharIdx) + header.LastCharOff) :
uoffset(thisString, index);
// NOTE: works up to 64k chars/bytes, then overflows; this is intentional to save a bit of mem
header.LastCharIdx = static_cast<uint16_t>(index);
header.LastCharOff = static_cast<uint16_t>(off);
return ugetc(thisString + off);
}

int StringToInt(const char*stino) {
Expand Down

0 comments on commit 2b42ffd

Please sign in to comment.