fix shortening of file names (fixes #3943)

sumatrapdfreader · Dec 28, 2023 · 9edf2f9 · 9edf2f9
1 parent 4d2972e
commit 9edf2f9
Show file tree

Hide file tree

Showing 5 changed files with 147 additions and 56 deletions.
diff --git a/src/Menu.cpp b/src/Menu.cpp
@@ -1003,25 +1003,73 @@ static bool __cmdIdInList(UINT_PTR cmdId, UINT_PTR* idsList, int n) {
 
 #define cmdIdInList(name) __cmdIdInList(md.idOrSubmenu, name, dimof(name))
 
-// TODO: write it in a way that handles unicode
-static TempStr ShortenString(char* menuString, size_t maxLen) {
-    size_t menuStrLen = str::Len(menuString);
-    if (menuStrLen <= maxLen) {
-        return menuString;
-    }
-    char* newStr = AllocArrayTemp<char>(maxLen);
+// shorten a string to maxLen characters, adding ellipsis in the middle
+// ascii version that doesn't handle UTF-8
+static TempStr ShortenStringTemp(char* s, int maxLen) {
+    size_t sLen = str::Len(s);
+    if (sLen <= maxLen) {
+        return s;
+    }
+    char* ret = AllocArrayTemp<char>(maxLen + 2);
     const size_t half = maxLen / 2;
-    const size_t strSize = menuStrLen + 1; // size()+1 because wcslen() doesn't include \0
-    // Copy first N/2 characters, move last N/2 characters to the halfway point
+    const size_t strSize = sLen + 1; // +1 for terminating \0
+    // copy first N/2 characters, move last N/2 characters to the halfway point
     for (size_t i = 0; i < half; i++) {
-        newStr[i] = menuString[i];
-        newStr[i + half] = menuString[strSize - half + i];
-    }
-    // Add ellipsis
-    newStr[half - 2] = newStr[half - 1] = newStr[half] = '.';
-    // Ensure null-terminated string
-    newStr[maxLen - 1] = '\0';
-    return newStr;
+        ret[i] = s[i];
+        ret[i + half] = s[strSize - half + i];
+    }
+    // add ellipsis in the middle
+    ret[half - 2] = ret[half - 1] = ret[half] = '.';
+    return ret;
+}
+
+// shorten a string to maxLen characters, adding ellipsis in the middle
+// works correctly with utf8 strings
+static TempStr ShortenStringUtf8Temp(char* s, int maxRunes) {
+    int nRunes = utf8StrLen((u8*)s);
+    if (nRunes < 0) {
+        // not a valid utf8
+        return ShortenStringTemp(s, maxRunes);
+    }
+    if (nRunes <= maxRunes) {
+        return s;
+    }
+    int toRemove = (nRunes - maxRunes) + 3; // 3 for "..."
+    int removeStartingAt = (nRunes / 2) - (toRemove / 2);
+    // over-allocate the result by 4x to be always safe
+    char* ret = AllocArrayTemp<char>(maxRunes * 4 + 1);
+    char* tmp = ret;
+    int n;
+    for (int i = 0; i < nRunes; i++) {
+        n = utf8CharLen((u8*)s);
+        CrashIf(n <= 0);
+        if (i < removeStartingAt || i >= removeStartingAt + toRemove) {
+            switch (n) {
+                default:
+                    CrashIf(true);
+                    break;
+                case 4:
+                    *tmp++ = *s++;
+                    __fallthrough;
+                case 3:
+                    *tmp++ = *s++;
+                    __fallthrough;
+                case 2:
+                    *tmp++ = *s++;
+                    __fallthrough;
+                case 1:
+                    *tmp++ = *s++;
+            }
+        } else if (i == removeStartingAt) {
+            *tmp++ = '.';
+            *tmp++ = '.';
+            *tmp++ = '.';
+            s += n;
+        } else {
+            s += n;
+        }
+    }
+    return ret;
 }
 
 static void AddFileMenuItem(HMENU menuFile, const char* filePath, int index) {
@@ -1031,10 +1079,9 @@ static void AddFileMenuItem(HMENU menuFile, const char* filePath, int index) {
     }
 
     TempStr menuString = path::GetBaseNameTemp(filePath);
-    // If the name is too long, save only the ends glued together
-    // E.g. 'Very Long PDF Name (3).pdf' -> 'Very Long...e (3).pdf'
-    const size_t MAX_LEN = 70;
-    menuString = ShortenString(menuString, MAX_LEN);
+    // shorten very long file names so that menu isn't too wide
+    const size_t kMaxRunes = 70;
+    menuString = ShortenStringUtf8Temp(menuString, kMaxRunes);
 
     TempStr fileName = MenuToSafeStringTemp(menuString);
     int menuIdx = (int)((index + 1) % 10);

diff --git a/src/utils/StrUtil.cpp b/src/utils/StrUtil.cpp
@@ -63,28 +63,39 @@ static const u8 trailingBytesForUTF8[256] = {
  * definition of UTF-8 goes up to 4-byte sequences.
  */
 
-static bool isLegalUTF8(const u8* source, int length) {
+static bool isLegalUTF8(const u8* src, int length) {
     u8 a;
-    const u8* srcptr = source + length;
+    for (int i = 0; i < length; i++) {
+        a = src[i];
+        if (a == 0) {
+            return false;
+        }
+    }
+    const u8* end = src + length;
 
     switch (length) {
         default:
             return false;
         /* Everything else falls through when "true"... */
         case 4:
-            if ((a = (*--srcptr)) < 0x80 || a > 0xBF) {
+            a = (*--end);
+            if (a < 0x80 || a > 0xBF) {
                 return false;
             }
+            __fallthrough;
         case 3:
-            if ((a = (*--srcptr)) < 0x80 || a > 0xBF) {
+            a = (*--end);
+            if (a < 0x80 || a > 0xBF) {
                 return false;
             }
+            __fallthrough;
         case 2:
-            if ((a = (*--srcptr)) > 0xBF) {
+            a = (*--end);
+            if (a > 0xBF) {
                 return false;
             }
 
-            switch (*source) {
+            switch (*src) {
                 /* no fall-through in this inner switch */
                 case 0xE0:
                     if (a < 0xA0) {
@@ -111,45 +122,64 @@ static bool isLegalUTF8(const u8* source, int length) {
                         return false;
                     }
             }
-
+            __fallthrough;
         case 1:
-            if (*source >= 0x80 && *source < 0xC2) {
+            if (*src >= 0x80 && *src < 0xC2) {
                 return false;
             }
     }
 
-    return *source <= 0xF4;
+    return *src <= 0xF4;
 }
 
 /* --------------------------------------------------------------------- */
 
+inline int utf8CharLen(const u8* s) {
+    int n = trailingBytesForUTF8[*s] + 1;
+    return n;
+}
+
 /*
- * Exported function to return whether a UTF-8 sequence is legal or not.
- * This is not used here; it's just exported.
+ * return true if a UTF-8 sequence is legal
  */
 bool isLegalUTF8Sequence(const u8* source, const u8* sourceEnd) {
-    int n = trailingBytesForUTF8[*source] + 1;
+    int n = utf8CharLen(source);
     if (source + n > sourceEnd) {
         return false;
     }
     return isLegalUTF8(source, n);
 }
 
 /*
- * Exported function to return whether a UTF-8 string is legal or not.
- * This is not used here; it's just exported.
+ * return true if UTF-8 string is legal.
  */
 bool isLegalUTF8String(const u8** source, const u8* sourceEnd) {
-    while (*source != sourceEnd) {
-        int n = trailingBytesForUTF8[**source] + 1;
-        if (n > sourceEnd - *source || !isLegalUTF8(*source, n)) {
+    const u8* s = *source;
+    while (s != sourceEnd) {
+        int n = utf8CharLen(s);
+        if (n > sourceEnd - s || !isLegalUTF8(s, n)) {
             return false;
         }
-        *source += n;
+        s += n;
     }
+    *source = s;
     return true;
 }
 
+// return -1 if not a valid utf8 string
+int utf8StrLen(const u8* s) {
+    int len = 0;
+    while (*s) {
+        int n = utf8CharLen(s);
+        if (!isLegalUTF8(s, n)) {
+            return -1;
+        }
+        s += n;
+        len++;
+    }
+    return len;
+}
+
 // --- end of Unicode, Inc. utf8 code
 
 bool IsEqual(const ByteSlice& d1, const ByteSlice& d2) {
@@ -1440,10 +1470,13 @@ char& Str::Last() const {
 // without duplicate allocation. Note: since Vec over-allocates, this
 // is likely to use more memory than strictly necessary, but in most cases
 // it doesn't matter
-char* Str::StealData() {
+char* Str::StealData(Allocator* a) {
+    if (a == nullptr) {
+        a = this->allocator;
+    }
     char* res = els;
     if (els == buf) {
-        res = (char*)Allocator::MemDup(allocator, buf, len + kPadding);
+        res = (char*)Allocator::MemDup(a, buf, len + kPadding);
     }
     els = buf;
     Reset();

diff --git a/src/utils/StrUtil.h b/src/utils/StrUtil.h
@@ -9,6 +9,8 @@
 
 bool isLegalUTF8Sequence(const u8* source, const u8* sourceEnd);
 bool isLegalUTF8String(const u8** source, const u8* sourceEnd);
+int utf8StrLen(const u8* s);
+int utf8CharLen(const u8* s);
 
 struct ByteSlice {
     u8* d = nullptr;
@@ -299,7 +301,7 @@ struct Str {
     char RemoveAt(size_t idx, size_t count = 1);
     char RemoveLast();
     char& Last() const;
-    char* StealData();
+    char* StealData(Allocator* a = nullptr);
     char* LendData() const;
     bool Contains(const char* s, size_t sLen = 0);
     bool IsEmpty() const;

diff --git a/src/utils/TempAllocator.cpp b/src/utils/TempAllocator.cpp
@@ -70,27 +70,40 @@ TempStr ReplaceTemp(const char* s, const char* toReplace, const char* replaceWit
         return nullptr;
     }
 
+    const char* curr = s;
+    const char* end = str::Find(curr, toReplace);
+    if (!end) {
+        // optimization: nothing to replace so do nothing
+        return (TempStr)s;
+    }
+
+    size_t findLen = str::Len(toReplace);
+    size_t replLen = str::Len(replaceWith);
+    size_t lenDiff = 0;
+    if (replLen > findLen) {
+        lenDiff = replLen - findLen;
+    }
+    // heuristic: allow 6 replacements without reallocating
+    size_t capHint = str::Len(s) + 1 + (lenDiff * 6);
+    str::Str result(capHint);
     bool ok;
-    str::Str result(str::Len(s));
-    size_t findLen = str::Len(toReplace), replLen = str::Len(replaceWith);
-    const char *start = s, *end;
-    while ((end = str::Find(start, toReplace)) != nullptr) {
-        ok = result.Append(start, end - start);
+    while (end != nullptr) {
+        ok = result.Append(curr, end - curr);
         if (!ok) {
             return nullptr;
         }
         ok = result.Append(replaceWith, replLen);
         if (!ok) {
             return nullptr;
         }
-        start = end + findLen;
+        curr = end + findLen;
+        end = str::Find(curr, toReplace);
     }
-    ok = result.Append(start);
+    ok = result.Append(curr);
     if (!ok) {
         return nullptr;
     }
-    char* res = DupTemp(result.Get());
-    return res;
+    return result.StealData(GetTempAllocator());
 }
 
 } // namespace str

diff --git a/src/utils/WinUtil.cpp b/src/utils/WinUtil.cpp
@@ -1690,11 +1690,7 @@ void MenuSetText(HMENU m, int id, const char* s) {
    if no change is needed, the string is returned as is,
    else it's also saved in newResult for automatic freeing */
 TempStr MenuToSafeStringTemp(const char* s) {
-    auto str = str::DupTemp(s);
-    if (!str::FindChar(str, '&')) {
-        return str;
-    }
-    TempStr safe = str::ReplaceTemp(str, "&", "&&");
+    TempStr safe = str::ReplaceTemp(s, "&", "&&");
     return safe;
 }