Skip to content

Commit

Permalink
liblzma: Optimize the loop conditions in BCJ filters
Browse files Browse the repository at this point in the history
Compilers cannot optimize the addition "i + 4" away since theoretically
it could overflow.
  • Loading branch information
Larhzu committed Nov 26, 2024
1 parent 9f69e71 commit c15115f
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 7 deletions.
4 changes: 3 additions & 1 deletion src/liblzma/simple/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ arm_code(void *simple lzma_attribute((__unused__)),
uint32_t now_pos, bool is_encoder,
uint8_t *buffer, size_t size)
{
size &= ~(size_t)3;

size_t i;
for (i = 0; i + 4 <= size; i += 4) {
for (i = 0; i < size; i += 4) {
if (buffer[i + 3] == 0xEB) {
uint32_t src = ((uint32_t)(buffer[i + 2]) << 16)
| ((uint32_t)(buffer[i + 1]) << 8)
Expand Down
4 changes: 3 additions & 1 deletion src/liblzma/simple/arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ arm64_code(void *simple lzma_attribute((__unused__)),
uint32_t now_pos, bool is_encoder,
uint8_t *buffer, size_t size)
{
size &= ~(size_t)3;

size_t i;

// Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower
Expand All @@ -37,7 +39,7 @@ arm64_code(void *simple lzma_attribute((__unused__)),
#ifdef __clang__
# pragma clang loop vectorize(disable)
#endif
for (i = 0; i + 4 <= size; i += 4) {
for (i = 0; i < size; i += 4) {
uint32_t pc = (uint32_t)(now_pos + i);
uint32_t instr = read32le(buffer + i);

Expand Down
7 changes: 6 additions & 1 deletion src/liblzma/simple/armthumb.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@ armthumb_code(void *simple lzma_attribute((__unused__)),
uint32_t now_pos, bool is_encoder,
uint8_t *buffer, size_t size)
{
if (size < 4)
return 0;

size -= 4;

size_t i;
for (i = 0; i + 4 <= size; i += 2) {
for (i = 0; i <= size; i += 2) {
if ((buffer[i + 1] & 0xF8) == 0xF0
&& (buffer[i + 3] & 0xF8) == 0xF8) {
uint32_t src = (((uint32_t)(buffer[i + 1]) & 7) << 19)
Expand Down
4 changes: 3 additions & 1 deletion src/liblzma/simple/ia64.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ ia64_code(void *simple lzma_attribute((__unused__)),
4, 4, 0, 0, 4, 4, 0, 0
};

size &= ~(size_t)15;

size_t i;
for (i = 0; i + 16 <= size; i += 16) {
for (i = 0; i < size; i += 16) {
const uint32_t instr_template = buffer[i] & 0x1F;
const uint32_t mask = BRANCH_TABLE[instr_template];
uint32_t bit_pos = 5;
Expand Down
4 changes: 3 additions & 1 deletion src/liblzma/simple/powerpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ powerpc_code(void *simple lzma_attribute((__unused__)),
uint32_t now_pos, bool is_encoder,
uint8_t *buffer, size_t size)
{
size &= ~(size_t)3;

size_t i;
for (i = 0; i + 4 <= size; i += 4) {
for (i = 0; i < size; i += 4) {
// PowerPC branch 6(48) 24(Offset) 1(Abs) 1(Link)
if ((buffer[i] >> 2) == 0x12
&& ((buffer[i + 3] & 3) == 1)) {
Expand Down
5 changes: 3 additions & 2 deletions src/liblzma/simple/sparc.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ sparc_code(void *simple lzma_attribute((__unused__)),
uint32_t now_pos, bool is_encoder,
uint8_t *buffer, size_t size)
{
size_t i;
for (i = 0; i + 4 <= size; i += 4) {
size &= ~(size_t)3;

size_t i;
for (i = 0; i < size; i += 4) {
if ((buffer[i] == 0x40 && (buffer[i + 1] & 0xC0) == 0x00)
|| (buffer[i] == 0x7F
&& (buffer[i + 1] & 0xC0) == 0xC0)) {
Expand Down

0 comments on commit c15115f

Please sign in to comment.