From d8ca6ff0be902b09009f703e3cad671e38146761 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 14 Jan 2025 18:03:19 -0500 Subject: [PATCH 01/14] give Windows ARM build the -win64-arm suffix Signed-off-by: Jared Van Bortel --- gpt4all-chat/CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index b69a9610898b..3084bd2942ba 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -513,7 +513,13 @@ elseif(${CMAKE_SYSTEM_NAME} MATCHES Windows) set(CPACK_PRE_BUILD_SCRIPTS ${CMAKE_BINARY_DIR}/cmake/deploy-qt-windows.cmake) set(CPACK_IFW_ROOT "C:/Qt/Tools/QtInstallerFramework/4.6") set(CPACK_IFW_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.ico") - set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-win64") + if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|amd64)$") + set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-win64") + elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") + set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-win64-arm") + else() + message(FATAL_ERROR "Unrecognized processor: ${CMAKE_SYSTEM_PROCESSOR}") + endif() set(CPACK_IFW_TARGET_DIRECTORY "@HomeDir@\\${COMPONENT_NAME_MAIN}") elseif(${CMAKE_SYSTEM_NAME} MATCHES Darwin) find_program(MACDEPLOYQT macdeployqt HINTS ${_qt_bin_dir}) From ba896d08c50c7fc428c59753d960f7e8a84e0fad Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 14 Jan 2025 12:41:35 -0500 Subject: [PATCH 02/14] disable QtPdf for Windows ARM build Signed-off-by: Jared Van Bortel --- gpt4all-chat/CMakeLists.txt | 14 ++++++++++++-- gpt4all-chat/qml/LocalDocsSettings.qml | 7 +++++-- gpt4all-chat/src/database.cpp | 15 ++++++++++++--- gpt4all-chat/src/mysettings.cpp | 19 ++++++++++++++++++- gpt4all-chat/src/mysettings.h | 3 +++ 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 3084bd2942ba..59597cee35ca 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -80,7 +80,13 @@ configure_file( ) set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) -find_package(Qt6 6.5 COMPONENTS Core HttpServer LinguistTools Pdf Quick QuickDialogs2 Sql Svg REQUIRED) +set(GPT4ALL_QT_COMPONENTS Core HttpServer LinguistTools Quick QuickDialogs2 Sql Svg) +set(GPT4ALL_HAVE_QTPDF OFF) +if (NOT (CMAKE_SYSTEM_NAME MATCHES Windows AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$")) + set(GPT4ALL_HAVE_QTPDF ON) + list(APPEND GPT4ALL_QT_COMPONENTS Pdf) +endif() +find_package(Qt6 6.5 COMPONENTS ${GPT4ALL_QT_COMPONENTS} REQUIRED) if (QT_KNOWN_POLICY_QTP0004) qt_policy(SET QTP0004 NEW) # generate extra qmldir files on Qt 6.8+ @@ -404,7 +410,11 @@ target_include_directories(chat PRIVATE deps/usearch/include deps/usearch/fp16/include) target_link_libraries(chat - PRIVATE Qt6::Core Qt6::HttpServer Qt6::Pdf Qt6::Quick Qt6::Sql Qt6::Svg) + PRIVATE Qt6::Core Qt6::HttpServer Qt6::Quick Qt6::Sql Qt6::Svg) +if (GPT4ALL_HAVE_QTPDF) + target_compile_definitions(chat PRIVATE GPT4ALL_HAVE_QTPDF) + target_link_libraries(chat PRIVATE Qt6::Pdf) +endif() target_link_libraries(chat PRIVATE llmodel SingleApplication fmt::fmt duckx::duckx QXlsx jinja2cpp) diff --git a/gpt4all-chat/qml/LocalDocsSettings.qml b/gpt4all-chat/qml/LocalDocsSettings.qml index 95124c9c822d..e1628c2a5348 100644 --- a/gpt4all-chat/qml/LocalDocsSettings.qml +++ b/gpt4all-chat/qml/LocalDocsSettings.qml @@ -69,7 +69,7 @@ MySettingsTab { exts = Array.from(new Set(exts)); /* Blacklist common unsupported file extensions. We only support plain text and PDFs, and although we * reject binary data, we don't want to waste time trying to index files that we don't support. */ - exts = exts.filter(e => ![ + let extBlacklist = [ /* Microsoft documents */ "rtf", "ppt", "pptx", "xls", "xlsx", /* OpenOffice */ "odt", "ods", "odp", "odg", /* photos */ "jpg", "jpeg", "png", "gif", "bmp", "tif", "tiff", "webp", @@ -80,7 +80,10 @@ MySettingsTab { /* archives */ "zip", "jar", "apk", "rar", "7z", "tar", "gz", "xz", "bz2", "tar.gz", "tgz", "tar.xz", "tar.bz2", /* misc */ "bin", - ].includes(e)); + ]; + if (!MySettings.haveQtPdf) + extBlacklist.push("pdf"); // PDF not supported in this build + exts = exts.filter(e => !extBlacklist.includes(e)); MySettings.localDocsFileExtensions = exts; extsField.text = exts.join(','); focus = false; diff --git a/gpt4all-chat/src/database.cpp b/gpt4all-chat/src/database.cpp index 5ea2fd173639..a2bb1f213a46 100644 --- a/gpt4all-chat/src/database.cpp +++ b/gpt4all-chat/src/database.cpp @@ -13,8 +13,10 @@ #include #include #include -#include -#include +#ifdef GPT4ALL_HAVE_QTPDF +# include +# include +#endif #include #include #include @@ -1133,6 +1135,7 @@ class DocumentReader { namespace { +#ifdef GPT4ALL_HAVE_QTPDF class PdfDocumentReader final : public DocumentReader { public: explicit PdfDocumentReader(const DocumentInfo &info) @@ -1173,6 +1176,7 @@ class PdfDocumentReader final : public DocumentReader { QString m_pageText; std::optional m_stream; }; +#endif // GPT4ALL_HAVE_QTPDF class WordDocumentReader final : public DocumentReader { public: @@ -1313,8 +1317,13 @@ class TxtDocumentReader final : public DocumentReader { std::unique_ptr DocumentReader::fromDocument(const DocumentInfo &doc) { - if (doc.isPdf()) + if (doc.isPdf()) { +#ifdef GPT4ALL_HAVE_QTPDF return std::make_unique(doc); +#else + throw std::invalid_argument("fromDocument() passed a PDF file but GPT4All was built without PDF support"); +#endif + } if (doc.isDocx()) return std::make_unique(doc); return std::make_unique(doc); diff --git a/gpt4all-chat/src/mysettings.cpp b/gpt4all-chat/src/mysettings.cpp index ffccc912dade..faff959f5ab9 100644 --- a/gpt4all-chat/src/mysettings.cpp +++ b/gpt4all-chat/src/mysettings.cpp @@ -63,7 +63,15 @@ static const QVariantMap basicDefaults { { "localdocs/chunkSize", 512 }, { "localdocs/retrievalSize", 3 }, { "localdocs/showReferences", true }, - { "localdocs/fileExtensions", QStringList { "docx", "pdf", "txt", "md", "rst" } }, + { "localdocs/fileExtensions", + QStringList { + "docx", +#ifdef GPT4ALL_HAVE_QTPDF + "pdf", +#endif + "txt", "md", "rst", + }, + }, { "localdocs/useRemoteEmbed", false }, { "localdocs/nomicAPIKey", "" }, { "localdocs/embedDevice", "Auto" }, @@ -836,3 +844,12 @@ void MySettings::setLanguageAndLocale(const QString &bcp47Name) QLocale::setDefault(locale); emit languageAndLocaleChanged(); } + +bool MySettings::haveQtPdf() noexcept +{ +#ifdef GPT4ALL_HAVE_QTPDF + return true; +#else + return false; +#endif +} diff --git a/gpt4all-chat/src/mysettings.h b/gpt4all-chat/src/mysettings.h index a1a61e0618e0..a2a6b1f77ddd 100644 --- a/gpt4all-chat/src/mysettings.h +++ b/gpt4all-chat/src/mysettings.h @@ -214,6 +214,9 @@ public Q_SLOTS: int networkPort() const; void setNetworkPort(int value); + // Compile-time configuration constants + static bool haveQtPdf() noexcept; + Q_SIGNALS: void nameChanged(const ModelInfo &info); void filenameChanged(const ModelInfo &info); From 4e656976e6ac3033aec1b5313f61f83898ead3d3 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 14 Jan 2025 17:43:41 -0500 Subject: [PATCH 03/14] ci: add Windows ARM build Signed-off-by: Jared Van Bortel --- .circleci/continue_config.yml | 267 ++++++++++++++++++++++++++++++++++ 1 file changed, 267 insertions(+) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index 6f2ff4314ae9..7ff859f0133a 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -721,6 +721,259 @@ jobs: - store_artifacts: path: build/upload + build-offline-chat-installer-windows-arm: + machine: + # we use 2024.04.01 because nvcc complains about the MSVC ver if we use anything newer + image: windows-server-2022-gui:2024.04.1 + resource_class: windows.large + shell: powershell.exe -ExecutionPolicy Bypass + steps: + - checkout + - run: + name: Update Submodules + command: | + git submodule sync + git submodule update --init --recursive + - restore_cache: + keys: + - ccache-gpt4all-win-aarch64- + - run: + name: Install dependencies + command: choco install -y ccache wget + - run: + name: Installing Qt + command: | + wget.exe "https://gpt4all.io/ci/qt-unified-windows-x64-4.6.0-online.exe" + # note: need x86_64 toolset for windeployqt.exe, which isn't provided with the ARM64 cross toolset + & .\qt-unified-windows-x64-4.6.0-online.exe --no-force-installations --no-default-installations ` + --no-size-checking --default-answer --accept-licenses --confirm-command --accept-obligations ` + --email ${Env:QT_EMAIL} --password ${Env:QT_PASSWORD} install ` + qt.tools.cmake qt.tools.ifw.48 qt.tools.ninja qt.qt6.651.win64_msvc2019_64 ` + qt.qt6.651.win64_msvc2019_arm64 qt.qt6.651.qt5compat.win64_msvc2019_arm64 ` + qt.qt6.651.debug_info.win64_msvc2019_arm64 qt.qt6.651.addons.qthttpserver.win64_msvc2019_arm64 + - run: + name: "Install Dotnet 8" + command: | + mkdir dotnet + cd dotnet + $dotnet_url="https://download.visualstudio.microsoft.com/download/pr/5af098e1-e433-4fda-84af-3f54fd27c108/6bd1c6e48e64e64871957289023ca590/dotnet-sdk-8.0.302-win-x64.zip" + wget.exe "$dotnet_url" + Expand-Archive -LiteralPath .\dotnet-sdk-8.0.302-win-x64.zip + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + $Env:DOTNET_SKIP_FIRST_TIME_EXPERIENCE=$true + dotnet tool install --global AzureSignTool + - run: + name: Build + no_output_timeout: 30m + command: | + $vsInstallPath = & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -property installationpath + Import-Module "${vsInstallPath}\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" + Enter-VsDevShell -VsInstallPath "$vsInstallPath" -SkipAutomaticLocation -DevCmdArguments '-arch=arm64 -no_logo' + + $Env:PATH = "${Env:PATH};C:\Qt\Tools\QtInstallerFramework\4.8\bin" + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + ccache -o "cache_dir=${pwd}\..\.ccache" -o max_size=500M -p -z + mkdir build + cd build + & "C:\Qt\Tools\CMake_64\bin\cmake.exe" ` + -S ..\gpt4all-chat -B . -G Ninja ` + -DCMAKE_BUILD_TYPE=Release ` + "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_arm64" ` + "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" ` + -DCMAKE_C_COMPILER_LAUNCHER=ccache ` + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ` + -DLLMODEL_CUDA=OFF ` + -DLLMODEL_KOMPUTE=OFF ` + -DWINDEPLOYQT='C:\Qt\6.5.1\msvc2019_64\bin\windeployqt.exe;--qtpaths;C:\Qt\6.5.1\msvc2019_arm64\bin\qtpaths.bat' ` + -DGPT4ALL_OFFLINE_INSTALLER=ON + & "C:\Qt\Tools\Ninja\ninja.exe" + & "C:\Qt\Tools\Ninja\ninja.exe" install + & "C:\Qt\Tools\Ninja\ninja.exe" package + ccache -s + mkdir upload + copy gpt4all-installer-win64-arm.exe upload + - store_artifacts: + path: build/upload + # add workspace so signing jobs can connect & obtain dmg + - save_cache: + key: ccache-gpt4all-win-aarch64-{{ epoch }} + when: always + paths: + - ..\.ccache + - persist_to_workspace: + root: build + # specify path to only include components we want to persist + # accross builds + paths: + - upload + + sign-offline-chat-installer-windows-arm: + machine: + image: windows-server-2022-gui:2024.04.1 + resource_class: windows.large + shell: powershell.exe -ExecutionPolicy Bypass + steps: + - checkout + - attach_workspace: + at: build + - run: + name: Install dependencies + command: choco install -y wget + - run: + name: "Install Dotnet 8 && Azure Sign Tool" + command: | + mkdir dotnet + cd dotnet + $dotnet_url="https://download.visualstudio.microsoft.com/download/pr/5af098e1-e433-4fda-84af-3f54fd27c108/6bd1c6e48e64e64871957289023ca590/dotnet-sdk-8.0.302-win-x64.zip" + wget.exe "$dotnet_url" + Expand-Archive -LiteralPath .\dotnet-sdk-8.0.302-win-x64.zip + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + $Env:DOTNET_SKIP_FIRST_TIME_EXPERIENCE=$true + dotnet tool install --global AzureSignTool + - run: + name: "Sign Windows Installer With AST" + command: | + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + AzureSignTool.exe sign -du "https://gpt4all.io/index.html" -kvu https://gpt4all.vault.azure.net -kvi "$Env:AZSignGUID" -kvs "$Env:AZSignPWD" -kvc "$Env:AZSignCertName" -kvt "$Env:AZSignTID" -tr http://timestamp.digicert.com -v "$($(Get-Location).Path)\build\upload\gpt4all-installer-win64-arm.exe" + - store_artifacts: + path: build/upload + + build-online-chat-installer-windows-arm: + machine: + image: windows-server-2022-gui:2024.04.1 + resource_class: windows.large + shell: powershell.exe -ExecutionPolicy Bypass + steps: + - checkout + - run: + name: Update Submodules + command: | + git submodule sync + git submodule update --init --recursive + - restore_cache: + keys: + - ccache-gpt4all-win-aarch64- + - run: + name: Install dependencies + command: choco install -y ccache wget + - run: + name: Installing Qt + command: | + wget.exe "https://gpt4all.io/ci/qt-unified-windows-x64-4.6.0-online.exe" + # note: need x86_64 toolset for windeployqt.exe, which isn't provided with the ARM64 cross toolset + & .\qt-unified-windows-x64-4.6.0-online.exe --no-force-installations --no-default-installations ` + --no-size-checking --default-answer --accept-licenses --confirm-command --accept-obligations ` + --email ${Env:QT_EMAIL} --password ${Env:QT_PASSWORD} install ` + qt.tools.cmake qt.tools.ifw.48 qt.tools.ninja qt.qt6.651.win64_msvc2019_64 ` + qt.qt6.651.win64_msvc2019_arm64 qt.qt6.651.qt5compat.win64_msvc2019_arm64 ` + qt.qt6.651.debug_info.win64_msvc2019_arm64 qt.qt6.651.addons.qthttpserver.win64_msvc2019_arm64 + - run: + name: "Install Dotnet 8" + command: | + mkdir dotnet + cd dotnet + $dotnet_url="https://download.visualstudio.microsoft.com/download/pr/5af098e1-e433-4fda-84af-3f54fd27c108/6bd1c6e48e64e64871957289023ca590/dotnet-sdk-8.0.302-win-x64.zip" + wget.exe "$dotnet_url" + Expand-Archive -LiteralPath .\dotnet-sdk-8.0.302-win-x64.zip + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + - run: + name: "Setup Azure SignTool" + command: | + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + $Env:DOTNET_SKIP_FIRST_TIME_EXPERIENCE=$true + dotnet tool install --global AzureSignTool + - run: + name: Build + no_output_timeout: 30m + command: | + $vsInstallPath = & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -property installationpath + Import-Module "${vsInstallPath}\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" + Enter-VsDevShell -VsInstallPath "$vsInstallPath" -SkipAutomaticLocation -DevCmdArguments '-arch=arm64 -no_logo' + + $Env:PATH = "${Env:PATH};C:\Qt\Tools\QtInstallerFramework\4.8\bin" + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + ccache -o "cache_dir=${pwd}\..\.ccache" -o max_size=500M -p -z + mkdir build + cd build + & "C:\Qt\Tools\CMake_64\bin\cmake.exe" ` + -S ..\gpt4all-chat -B . -G Ninja ` + -DCMAKE_BUILD_TYPE=Release ` + "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_64" ` + "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" ` + -DCMAKE_C_COMPILER_LAUNCHER=ccache ` + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ` + -DLLMODEL_CUDA=OFF ` + -DLLMODEL_KOMPUTE=OFF ` + -DWINDEPLOYQT='C:\Qt\6.5.1\msvc2019_64\bin\windeployqt.exe;--qtpaths;C:\Qt\6.5.1\msvc2019_arm64\bin\qtpaths.bat' ` + -DGPT4ALL_OFFLINE_INSTALLER=OFF + & "C:\Qt\Tools\Ninja\ninja.exe" + & "C:\Qt\Tools\Ninja\ninja.exe" install + & "C:\Qt\Tools\Ninja\ninja.exe" package + ccache -s + mkdir upload + copy gpt4all-installer-win64-arm.exe upload + Set-Location -Path "_CPack_Packages/win64/IFW/gpt4all-installer-win64-arm" + Compress-Archive -Path 'repository' -DestinationPath '..\..\..\..\upload\repository.zip' + - store_artifacts: + path: build/upload + - save_cache: + key: ccache-gpt4all-win-aarch64-{{ epoch }} + when: always + paths: + - ..\.ccache + # add workspace so signing jobs can connect & obtain dmg + - persist_to_workspace: + root: build + # specify path to only include components we want to persist + # accross builds + paths: + - upload + + sign-online-chat-installer-windows-arm: + machine: + image: windows-server-2022-gui:2024.04.1 + resource_class: windows.large + shell: powershell.exe -ExecutionPolicy Bypass + steps: + - checkout + - attach_workspace: + at: build + - run: + name: Install dependencies + command: choco install -y wget + - run: + name: "Install Dotnet 8" + command: | + mkdir dotnet + cd dotnet + $dotnet_url="https://download.visualstudio.microsoft.com/download/pr/5af098e1-e433-4fda-84af-3f54fd27c108/6bd1c6e48e64e64871957289023ca590/dotnet-sdk-8.0.302-win-x64.zip" + wget.exe "$dotnet_url" + Expand-Archive -LiteralPath .\dotnet-sdk-8.0.302-win-x64.zip + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + - run: + name: "Setup Azure SignTool" + command: | + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + $Env:DOTNET_SKIP_FIRST_TIME_EXPERIENCE=$true + dotnet tool install --global AzureSignTool + - run: + name: "Sign Windows Installer With AST" + command: | + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + AzureSignTool.exe sign -du "https://gpt4all.io/index.html" -kvu https://gpt4all.vault.azure.net -kvi "$Env:AZSignGUID" -kvs "$Env:AZSignPWD" -kvc "$Env:AZSignCertName" -kvt "$Env:AZSignTID" -tr http://timestamp.digicert.com -v "$($(Get-Location).Path)/build/upload/gpt4all-installer-win64-arm.exe" + - store_artifacts: + path: build/upload + build-gpt4all-chat-linux: machine: image: ubuntu-2204:current @@ -1509,6 +1762,12 @@ workflows: - sign-offline-chat-installer-windows: requires: - build-offline-chat-installer-windows + - build-offline-chat-installer-windows-arm: + requires: + - hold + - sign-offline-chat-installer-windows-arm: + requires: + - build-offline-chat-installer-windows-arm - build-offline-chat-installer-linux: requires: - hold @@ -1541,6 +1800,14 @@ workflows: <<: *main_only requires: - build-online-chat-installer-windows + - build-online-chat-installer-windows-arm: + <<: *main_only + requires: + - hold + - sign-online-chat-installer-windows-arm: + <<: *main_only + requires: + - build-online-chat-installer-windows-arm - build-online-chat-installer-linux: <<: *main_only requires: From 33b44b1f5232d0bc2d8376c06096f45b2e6fd132 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 14 Jan 2025 18:27:51 -0500 Subject: [PATCH 04/14] fix Enter-VsDevShell arguments Signed-off-by: Jared Van Bortel --- .circleci/continue_config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index 7ff859f0133a..e00166c0295b 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -769,7 +769,7 @@ jobs: command: | $vsInstallPath = & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -property installationpath Import-Module "${vsInstallPath}\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" - Enter-VsDevShell -VsInstallPath "$vsInstallPath" -SkipAutomaticLocation -DevCmdArguments '-arch=arm64 -no_logo' + Enter-VsDevShell -VsInstallPath "$vsInstallPath" -SkipAutomaticLocation -Arch arm64 -HostArch amd64 -DevCmdArguments '-no_logo' $Env:PATH = "${Env:PATH};C:\Qt\Tools\QtInstallerFramework\4.8\bin" $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" @@ -894,7 +894,7 @@ jobs: command: | $vsInstallPath = & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -property installationpath Import-Module "${vsInstallPath}\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" - Enter-VsDevShell -VsInstallPath "$vsInstallPath" -SkipAutomaticLocation -DevCmdArguments '-arch=arm64 -no_logo' + Enter-VsDevShell -VsInstallPath "$vsInstallPath" -SkipAutomaticLocation -Arch arm64 -HostArch amd64 -DevCmdArguments '-no_logo' $Env:PATH = "${Env:PATH};C:\Qt\Tools\QtInstallerFramework\4.8\bin" $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" From 7f2d093e727ebe0f05ecd978c41021bbcdb51f56 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 14 Jan 2025 18:41:41 -0500 Subject: [PATCH 05/14] do not attempt to install virtual components directly Signed-off-by: Jared Van Bortel --- .circleci/continue_config.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index e00166c0295b..03a71817702b 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -749,8 +749,7 @@ jobs: --no-size-checking --default-answer --accept-licenses --confirm-command --accept-obligations ` --email ${Env:QT_EMAIL} --password ${Env:QT_PASSWORD} install ` qt.tools.cmake qt.tools.ifw.48 qt.tools.ninja qt.qt6.651.win64_msvc2019_64 ` - qt.qt6.651.win64_msvc2019_arm64 qt.qt6.651.qt5compat.win64_msvc2019_arm64 ` - qt.qt6.651.debug_info.win64_msvc2019_arm64 qt.qt6.651.addons.qthttpserver.win64_msvc2019_arm64 + qt.qt6.651.win64_msvc2019_arm64 qt.qt6.651.qt5compat qt.qt6.651.debug_info qt.qt6.651.addons.qthttpserver - run: name: "Install Dotnet 8" command: | @@ -869,8 +868,7 @@ jobs: --no-size-checking --default-answer --accept-licenses --confirm-command --accept-obligations ` --email ${Env:QT_EMAIL} --password ${Env:QT_PASSWORD} install ` qt.tools.cmake qt.tools.ifw.48 qt.tools.ninja qt.qt6.651.win64_msvc2019_64 ` - qt.qt6.651.win64_msvc2019_arm64 qt.qt6.651.qt5compat.win64_msvc2019_arm64 ` - qt.qt6.651.debug_info.win64_msvc2019_arm64 qt.qt6.651.addons.qthttpserver.win64_msvc2019_arm64 + qt.qt6.651.win64_msvc2019_arm64 qt.qt6.651.qt5compat qt.qt6.651.debug_info qt.qt6.651.addons.qthttpserver - run: name: "Install Dotnet 8" command: | From 3efc160cc8502338310506e6bc6bf3ecaa67371e Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 15 Jan 2025 11:24:38 -0500 Subject: [PATCH 06/14] cmake: set toolchain file to fix CMAKE_SYSTEM_PROCESSOR Signed-off-by: Jared Van Bortel --- .circleci/continue_config.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index 03a71817702b..667745a8a510 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -781,11 +781,12 @@ jobs: -DCMAKE_BUILD_TYPE=Release ` "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_arm64" ` "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" ` + "-DCMAKE_TOOLCHAIN_FILE=C:\Qt\6.5.1\msvc2019_arm64\lib\cmake\Qt6\qt.toolchain.cmake" ` -DCMAKE_C_COMPILER_LAUNCHER=ccache ` -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ` -DLLMODEL_CUDA=OFF ` -DLLMODEL_KOMPUTE=OFF ` - -DWINDEPLOYQT='C:\Qt\6.5.1\msvc2019_64\bin\windeployqt.exe;--qtpaths;C:\Qt\6.5.1\msvc2019_arm64\bin\qtpaths.bat' ` + "-DWINDEPLOYQT=C:\Qt\6.5.1\msvc2019_64\bin\windeployqt.exe;--qtpaths;C:\Qt\6.5.1\msvc2019_arm64\bin\qtpaths.bat" ` -DGPT4ALL_OFFLINE_INSTALLER=ON & "C:\Qt\Tools\Ninja\ninja.exe" & "C:\Qt\Tools\Ninja\ninja.exe" install @@ -905,11 +906,12 @@ jobs: -DCMAKE_BUILD_TYPE=Release ` "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_64" ` "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" ` + "-DCMAKE_TOOLCHAIN_FILE=C:\Qt\6.5.1\msvc2019_arm64\lib\cmake\Qt6\qt.toolchain.cmake" ` -DCMAKE_C_COMPILER_LAUNCHER=ccache ` -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ` -DLLMODEL_CUDA=OFF ` -DLLMODEL_KOMPUTE=OFF ` - -DWINDEPLOYQT='C:\Qt\6.5.1\msvc2019_64\bin\windeployqt.exe;--qtpaths;C:\Qt\6.5.1\msvc2019_arm64\bin\qtpaths.bat' ` + "-DWINDEPLOYQT=C:\Qt\6.5.1\msvc2019_64\bin\windeployqt.exe;--qtpaths;C:\Qt\6.5.1\msvc2019_arm64\bin\qtpaths.bat" ` -DGPT4ALL_OFFLINE_INSTALLER=OFF & "C:\Qt\Tools\Ninja\ninja.exe" & "C:\Qt\Tools\Ninja\ninja.exe" install From fd9d46c3d17f7b51f6c87a6be803d22dc26ac858 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 15 Jan 2025 11:31:46 -0500 Subject: [PATCH 07/14] cmake: use a different installer repo for ARM build Signed-off-by: Jared Van Bortel --- gpt4all-chat/CMakeLists.txt | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 59597cee35ca..beafa0bad4b1 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -581,15 +581,19 @@ cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} REPLACES "gpt4all-chat") #W if (GPT4ALL_LOCALHOST) cpack_ifw_add_repository("GPT4AllRepository" URL "http://localhost/repository") -elseif(GPT4ALL_OFFLINE_INSTALLER) - add_compile_definitions(GPT4ALL_OFFLINE_INSTALLER) +elseif (GPT4ALL_OFFLINE_INSTALLER) + add_compile_definitions(GPT4ALL_OFFLINE_INSTALLER) else() - if(${CMAKE_SYSTEM_NAME} MATCHES Linux) - cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/linux/repository") - elseif(${CMAKE_SYSTEM_NAME} MATCHES Windows) - #To sign the target on windows have to create a batch script add use it as a custom target and then use CPACK_IFW_EXTRA_TARGETS to set this extra target - cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows/repository") - elseif(${CMAKE_SYSTEM_NAME} MATCHES Darwin) - cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/mac/repository") - endif() + if (CMAKE_SYSTEM_NAME MATCHES Linux) + cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/linux/repository") + elseif (CMAKE_SYSTEM_NAME MATCHES Windows) + # To sign the target on windows have to create a batch script add use it as a custom target and then use CPACK_IFW_EXTRA_TARGETS to set this extra target + if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|amd64)$") + cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows/repository") + elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") + cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows_arm/repository") + else() + elseif (CMAKE_SYSTEM_NAME MATCHES Darwin) + cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/mac/repository") + endif() endif() From 67152add68b3cf8dd8da82686487e15a2360db4d Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 15 Jan 2025 12:59:27 -0500 Subject: [PATCH 08/14] cmake: fix typo Signed-off-by: Jared Van Bortel --- gpt4all-chat/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index beafa0bad4b1..1cb4e5c03bd8 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -592,7 +592,7 @@ else() cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows/repository") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows_arm/repository") - else() + endif() elseif (CMAKE_SYSTEM_NAME MATCHES Darwin) cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/mac/repository") endif() From c8ec46bac16acccc18e6b42c39f42505c2dc13bd Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 15 Jan 2025 14:29:44 -0500 Subject: [PATCH 09/14] database: use PDFium when QtPDF is not available Signed-off-by: Jared Van Bortel --- gpt4all-chat/CMakeLists.txt | 29 ++++-- gpt4all-chat/deps/CMakeLists.txt | 37 ++++++++ gpt4all-chat/qml/LocalDocsSettings.qml | 7 +- gpt4all-chat/src/database.cpp | 117 ++++++++++++++++++++++--- gpt4all-chat/src/main.cpp | 8 ++ gpt4all-chat/src/mysettings.cpp | 19 +--- gpt4all-chat/src/mysettings.h | 3 - 7 files changed, 177 insertions(+), 43 deletions(-) diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 1cb4e5c03bd8..7d23102cb686 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -28,6 +28,8 @@ option(GPT4ALL_TEST "Build the tests" ${Python3_FOUND}) option(GPT4ALL_LOCALHOST "Build installer for localhost repo" OFF) option(GPT4ALL_OFFLINE_INSTALLER "Build an offline installer" OFF) option(GPT4ALL_SIGN_INSTALL "Sign installed binaries and installers (requires signing identities)" OFF) +set(GPT4ALL_USE_QTPDF "AUTO" CACHE STRING "Whether to Use QtPDF for LocalDocs. If OFF or not available on this platform, PDFium is used.") +set_property(CACHE GPT4ALL_USE_QTPDF PROPERTY STRINGS AUTO ON OFF) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) @@ -81,9 +83,14 @@ configure_file( set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) set(GPT4ALL_QT_COMPONENTS Core HttpServer LinguistTools Quick QuickDialogs2 Sql Svg) -set(GPT4ALL_HAVE_QTPDF OFF) -if (NOT (CMAKE_SYSTEM_NAME MATCHES Windows AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$")) - set(GPT4ALL_HAVE_QTPDF ON) +set(GPT4ALL_USING_QTPDF OFF) +if (CMAKE_SYSTEM_NAME MATCHES Windows AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") + # QtPDF is not available. + if (GPT4ALL_USE_QTPDF STREQUAL "ON") + message(FATAL_ERROR "QtPDF is not available on Windows ARM64.") + endif() +elseif (GPT4ALL_USE_QTPDF MATCHES "^(ON|AUTO)$") + set(GPT4ALL_USING_QTPDF ON) list(APPEND GPT4ALL_QT_COMPONENTS Pdf) endif() find_package(Qt6 6.5 COMPONENTS ${GPT4ALL_QT_COMPONENTS} REQUIRED) @@ -411,9 +418,12 @@ target_include_directories(chat PRIVATE deps/usearch/include target_link_libraries(chat PRIVATE Qt6::Core Qt6::HttpServer Qt6::Quick Qt6::Sql Qt6::Svg) -if (GPT4ALL_HAVE_QTPDF) - target_compile_definitions(chat PRIVATE GPT4ALL_HAVE_QTPDF) +if (GPT4ALL_USING_QTPDF) + target_compile_definitions(chat PRIVATE GPT4ALL_USE_QTPDF) target_link_libraries(chat PRIVATE Qt6::Pdf) +else() + # Link PDFium + target_link_libraries(chat PRIVATE pdfium) endif() target_link_libraries(chat PRIVATE llmodel SingleApplication fmt::fmt duckx::duckx QXlsx jinja2cpp) @@ -498,6 +508,15 @@ if (LLMODEL_CUDA) endif() endif() +if (NOT GPT4ALL_USING_QTPDF) + # Install PDFium + if (WIN32) + install(FILES "${PDFium_LIBRARY}" DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN}) + else() + install(FILES "${PDFium_LIBRARY}" DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN}) + endif() +endif() + if (NOT APPLE) install(FILES "${LOCAL_EMBEDDING_MODEL_PATH}" DESTINATION resources diff --git a/gpt4all-chat/deps/CMakeLists.txt b/gpt4all-chat/deps/CMakeLists.txt index 04b0e7616bbb..6c946eed1db0 100644 --- a/gpt4all-chat/deps/CMakeLists.txt +++ b/gpt4all-chat/deps/CMakeLists.txt @@ -1,3 +1,6 @@ +include(FetchContent) + + set(BUILD_SHARED_LIBS OFF) set(FMT_INSTALL OFF) @@ -20,3 +23,37 @@ set(RAPIDJSON_ENABLE_INSTRUMENTATION_OPT OFF) add_subdirectory(rapidjson) add_subdirectory(Jinja2Cpp) + +if (NOT GPT4ALL_USING_QTPDF) + # If we do not use QtPDF, we need to get PDFium. + if (CMAKE_SYSTEM_NAME MATCHES Linux) + FetchContent_Declare( + pdfium + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/chromium/6954/pdfium-linux-x64.tgz" + URL_HASH "SHA256=69917fd9543befc6c806254aff6c8a604d9e7cd3999a3e70fc32b8690d372da2" + ) + elseif (CMAKE_SYSTEM_NAME MATCHES Windows) + if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|amd64)$") + FetchContent_Declare( + pdfium + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/chromium/6954/pdfium-win-x64.tgz" + URL_HASH "SHA256=62ecac78fbaf658457beaffcc05eb147f493d435a2e1309e6a731808b4e80d38" + ) + elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") + FetchContent_Declare( + pdfium + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/chromium/6954/pdfium-win-arm64.tgz" + URL_HASH "SHA256=a0b69014467f2b9824776c064920bc95359c9ba0d88793bdda1894a0f22206f8" + ) + endif() + elseif (CMAKE_SYSTEM_NAME MATCHES Darwin) + FetchContent_Declare( + pdfium + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/chromium/6954/pdfium-mac-univ.tgz" + URL_HASH "SHA256=7442f1dc6bef90898b2b7bd38dbec369ddd81bbf66c1c5aac3a1b60e107098f9" + ) + endif() + + FetchContent_MakeAvailable(pdfium) + find_package(PDFium REQUIRED PATHS "${pdfium_SOURCE_DIR}" NO_DEFAULT_PATH) +endif() diff --git a/gpt4all-chat/qml/LocalDocsSettings.qml b/gpt4all-chat/qml/LocalDocsSettings.qml index e1628c2a5348..95124c9c822d 100644 --- a/gpt4all-chat/qml/LocalDocsSettings.qml +++ b/gpt4all-chat/qml/LocalDocsSettings.qml @@ -69,7 +69,7 @@ MySettingsTab { exts = Array.from(new Set(exts)); /* Blacklist common unsupported file extensions. We only support plain text and PDFs, and although we * reject binary data, we don't want to waste time trying to index files that we don't support. */ - let extBlacklist = [ + exts = exts.filter(e => ![ /* Microsoft documents */ "rtf", "ppt", "pptx", "xls", "xlsx", /* OpenOffice */ "odt", "ods", "odp", "odg", /* photos */ "jpg", "jpeg", "png", "gif", "bmp", "tif", "tiff", "webp", @@ -80,10 +80,7 @@ MySettingsTab { /* archives */ "zip", "jar", "apk", "rar", "7z", "tar", "gz", "xz", "bz2", "tar.gz", "tgz", "tar.xz", "tar.bz2", /* misc */ "bin", - ]; - if (!MySettings.haveQtPdf) - extBlacklist.push("pdf"); // PDF not supported in this build - exts = exts.filter(e => !extBlacklist.includes(e)); + ].includes(e)); MySettings.localDocsFileExtensions = exts; extsField.text = exts.join(','); focus = false; diff --git a/gpt4all-chat/src/database.cpp b/gpt4all-chat/src/database.cpp index a2bb1f213a46..4b1184f71949 100644 --- a/gpt4all-chat/src/database.cpp +++ b/gpt4all-chat/src/database.cpp @@ -7,16 +7,13 @@ #include #include +#include #include #include #include #include #include #include -#ifdef GPT4ALL_HAVE_QTPDF -# include -# include -#endif #include #include #include @@ -33,6 +30,15 @@ #include #include +#ifdef GPT4ALL_USE_QTPDF +# include +# include +#else +# include +# include +# include +#endif + using namespace Qt::Literals::StringLiterals; namespace ranges = std::ranges; namespace us = unum::usearch; @@ -1135,7 +1141,7 @@ class DocumentReader { namespace { -#ifdef GPT4ALL_HAVE_QTPDF +#ifdef GPT4ALL_USE_QTPDF class PdfDocumentReader final : public DocumentReader { public: explicit PdfDocumentReader(const DocumentInfo &info) @@ -1176,7 +1182,99 @@ class PdfDocumentReader final : public DocumentReader { QString m_pageText; std::optional m_stream; }; -#endif // GPT4ALL_HAVE_QTPDF +#else +class PdfDocumentReader final : public DocumentReader { +public: + explicit PdfDocumentReader(const DocumentInfo &info) + : DocumentReader(info) + { + QString path = info.file.canonicalFilePath(); + m_doc = FPDF_LoadDocument(path.toUtf8().constData(), nullptr); + if (!m_doc) + throw std::runtime_error(fmt::format("Failed to load PDF: {}", path)); + + // Extract metadata + Metadata metadata { + .title = getMetadata("Title" ), + .author = getMetadata("Author" ), + .subject = getMetadata("Subject" ), + .keywords = getMetadata("Keywords"), + }; + postInit(std::move(metadata)); + } + + ~PdfDocumentReader() override + { + if (m_page) + FPDF_ClosePage(m_page); + if (m_doc) + FPDF_CloseDocument(m_doc); + FPDF_DestroyLibrary(); + } + + int page() const override { return m_currentPage; } + +private: + std::optional advance() override + { + QString word; + do { + while (!m_stream || m_stream->atEnd()) { + if (m_currentPage >= FPDF_GetPageCount(m_doc)) + return std::nullopt; + + if (m_page) + FPDF_ClosePage(m_page); + m_page = FPDF_LoadPage(m_doc, m_currentPage++); + if (!m_page) + throw std::runtime_error("Failed to load page."); + + m_pageText = extractTextFromPage(m_page); + m_stream.emplace(&m_pageText); + } + *m_stream >> word; + } while (word.isEmpty()); + return word; + } + + QString getMetadata(FPDF_BYTESTRING key) + { + // FPDF_GetMetaText includes a 2-byte null teminator + ulong nBytes = FPDF_GetMetaText(m_doc, key, nullptr, 0); + if (nBytes <= sizeof (FPDF_WCHAR)) + return { "" }; + QByteArray buffer(nBytes, Qt::Uninitialized); + ulong nResultBytes = FPDF_GetMetaText(m_doc, key, buffer.data(), buffer.size()); + Q_ASSERT(nResultBytes % 2 == 0); + Q_ASSERT(nResultBytes <= nBytes); + return QString::fromUtf16(reinterpret_cast(buffer.data()), nResultBytes / 2 - 1); + } + + QString extractTextFromPage(FPDF_PAGE page) + { + FPDF_TEXTPAGE textPage = FPDFText_LoadPage(page); + if (!textPage) + throw std::runtime_error("Failed to load text page."); + + int nChars = FPDFText_CountChars(textPage); + if (!nChars) + return {}; + // FPDFText_GetText includes a 2-byte null teminator + QByteArray buffer((nChars + 1) * sizeof (FPDF_WCHAR), Qt::Uninitialized); + int nResultChars = FPDFText_GetText(textPage, 0, nChars, reinterpret_cast(buffer.data())); + Q_ASSERT(nResultChars <= nChars + 1); + + FPDFText_ClosePage(textPage); + return QString::fromUtf16(reinterpret_cast(buffer.data()), nResultChars - 1); + } + + FPDF_DOCUMENT m_doc = nullptr; + FPDF_PAGE m_page = nullptr; + int m_currentPage = 0; + QString m_pageText; + std::optional m_stream; +}; +#endif // !defined(GPT4ALL_USE_QTPDF) class WordDocumentReader final : public DocumentReader { public: @@ -1317,13 +1415,8 @@ class TxtDocumentReader final : public DocumentReader { std::unique_ptr DocumentReader::fromDocument(const DocumentInfo &doc) { - if (doc.isPdf()) { -#ifdef GPT4ALL_HAVE_QTPDF + if (doc.isPdf()) return std::make_unique(doc); -#else - throw std::invalid_argument("fromDocument() passed a PDF file but GPT4All was built without PDF support"); -#endif - } if (doc.isDocx()) return std::make_unique(doc); return std::make_unique(doc); diff --git a/gpt4all-chat/src/main.cpp b/gpt4all-chat/src/main.cpp index 1050e590879d..22b441693e62 100644 --- a/gpt4all-chat/src/main.cpp +++ b/gpt4all-chat/src/main.cpp @@ -25,6 +25,10 @@ #include #include +#ifndef GPT4ALL_USE_QTPDF +# include +#endif + #ifdef Q_OS_LINUX # include #endif @@ -58,6 +62,10 @@ static void raiseWindow(QWindow *window) int main(int argc, char *argv[]) { +#ifndef GPT4ALL_USE_QTPDF + FPDF_InitLibrary(); +#endif + QCoreApplication::setOrganizationName("nomic.ai"); QCoreApplication::setOrganizationDomain("gpt4all.io"); QCoreApplication::setApplicationName("GPT4All"); diff --git a/gpt4all-chat/src/mysettings.cpp b/gpt4all-chat/src/mysettings.cpp index faff959f5ab9..ffccc912dade 100644 --- a/gpt4all-chat/src/mysettings.cpp +++ b/gpt4all-chat/src/mysettings.cpp @@ -63,15 +63,7 @@ static const QVariantMap basicDefaults { { "localdocs/chunkSize", 512 }, { "localdocs/retrievalSize", 3 }, { "localdocs/showReferences", true }, - { "localdocs/fileExtensions", - QStringList { - "docx", -#ifdef GPT4ALL_HAVE_QTPDF - "pdf", -#endif - "txt", "md", "rst", - }, - }, + { "localdocs/fileExtensions", QStringList { "docx", "pdf", "txt", "md", "rst" } }, { "localdocs/useRemoteEmbed", false }, { "localdocs/nomicAPIKey", "" }, { "localdocs/embedDevice", "Auto" }, @@ -844,12 +836,3 @@ void MySettings::setLanguageAndLocale(const QString &bcp47Name) QLocale::setDefault(locale); emit languageAndLocaleChanged(); } - -bool MySettings::haveQtPdf() noexcept -{ -#ifdef GPT4ALL_HAVE_QTPDF - return true; -#else - return false; -#endif -} diff --git a/gpt4all-chat/src/mysettings.h b/gpt4all-chat/src/mysettings.h index a2a6b1f77ddd..a1a61e0618e0 100644 --- a/gpt4all-chat/src/mysettings.h +++ b/gpt4all-chat/src/mysettings.h @@ -214,9 +214,6 @@ public Q_SLOTS: int networkPort() const; void setNetworkPort(int value); - // Compile-time configuration constants - static bool haveQtPdf() noexcept; - Q_SIGNALS: void nameChanged(const ModelInfo &info); void filenameChanged(const ModelInfo &info); From ebc7bd3da5e1b9c2e6ec2fc3801a38e98f3812ff Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 15 Jan 2025 14:32:51 -0500 Subject: [PATCH 10/14] cmake: do not build tests by default when cross compiling Since we don't install these, it's not useful to attempt to build them when we are cross-compiling. Signed-off-by: Jared Van Bortel --- gpt4all-chat/CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 7d23102cb686..03e771e2dba5 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -24,7 +24,13 @@ endif() find_package(Python3 3.12 QUIET COMPONENTS Interpreter) -option(GPT4ALL_TEST "Build the tests" ${Python3_FOUND}) +if (NOT CMAKE_CROSS_COMPILING AND Python3_FOUND) + set(GPT4ALL_TEST_DEFAULT ON) +else() + set(GPT4ALL_TEST_DEFAULT OFF) +endif() + +option(GPT4ALL_TEST "Build the tests" ${GPT4ALL_TEST_DEFAULT}) option(GPT4ALL_LOCALHOST "Build installer for localhost repo" OFF) option(GPT4ALL_OFFLINE_INSTALLER "Build an offline installer" OFF) option(GPT4ALL_SIGN_INSTALL "Sign installed binaries and installers (requires signing identities)" OFF) From 68693ea5a84a223fca848e6ef8074727352b85df Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 15 Jan 2025 14:46:43 -0500 Subject: [PATCH 11/14] cmake: actually disable tests for ARM64 build, fix cmake prefix Signed-off-by: Jared Van Bortel --- .circleci/continue_config.yml | 4 +++- gpt4all-chat/CMakeLists.txt | 8 +------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index 667745a8a510..a1901fcdf3cd 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -787,6 +787,7 @@ jobs: -DLLMODEL_CUDA=OFF ` -DLLMODEL_KOMPUTE=OFF ` "-DWINDEPLOYQT=C:\Qt\6.5.1\msvc2019_64\bin\windeployqt.exe;--qtpaths;C:\Qt\6.5.1\msvc2019_arm64\bin\qtpaths.bat" ` + -DGPT4ALL_TEST=OFF ` -DGPT4ALL_OFFLINE_INSTALLER=ON & "C:\Qt\Tools\Ninja\ninja.exe" & "C:\Qt\Tools\Ninja\ninja.exe" install @@ -904,7 +905,7 @@ jobs: & "C:\Qt\Tools\CMake_64\bin\cmake.exe" ` -S ..\gpt4all-chat -B . -G Ninja ` -DCMAKE_BUILD_TYPE=Release ` - "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_64" ` + "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_arm64" ` "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" ` "-DCMAKE_TOOLCHAIN_FILE=C:\Qt\6.5.1\msvc2019_arm64\lib\cmake\Qt6\qt.toolchain.cmake" ` -DCMAKE_C_COMPILER_LAUNCHER=ccache ` @@ -912,6 +913,7 @@ jobs: -DLLMODEL_CUDA=OFF ` -DLLMODEL_KOMPUTE=OFF ` "-DWINDEPLOYQT=C:\Qt\6.5.1\msvc2019_64\bin\windeployqt.exe;--qtpaths;C:\Qt\6.5.1\msvc2019_arm64\bin\qtpaths.bat" ` + -DGPT4ALL_TEST=OFF ` -DGPT4ALL_OFFLINE_INSTALLER=OFF & "C:\Qt\Tools\Ninja\ninja.exe" & "C:\Qt\Tools\Ninja\ninja.exe" install diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 03e771e2dba5..7d23102cb686 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -24,13 +24,7 @@ endif() find_package(Python3 3.12 QUIET COMPONENTS Interpreter) -if (NOT CMAKE_CROSS_COMPILING AND Python3_FOUND) - set(GPT4ALL_TEST_DEFAULT ON) -else() - set(GPT4ALL_TEST_DEFAULT OFF) -endif() - -option(GPT4ALL_TEST "Build the tests" ${GPT4ALL_TEST_DEFAULT}) +option(GPT4ALL_TEST "Build the tests" ${Python3_FOUND}) option(GPT4ALL_LOCALHOST "Build installer for localhost repo" OFF) option(GPT4ALL_OFFLINE_INSTALLER "Build an offline installer" OFF) option(GPT4ALL_SIGN_INSTALL "Sign installed binaries and installers (requires signing identities)" OFF) From 686c6983eda7882bc56c0a69a06f32fb82b8d5be Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 15 Jan 2025 15:10:39 -0500 Subject: [PATCH 12/14] cmake: don't copy-paste the tag name Signed-off-by: Jared Van Bortel --- gpt4all-chat/deps/CMakeLists.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gpt4all-chat/deps/CMakeLists.txt b/gpt4all-chat/deps/CMakeLists.txt index 6c946eed1db0..e41dc1c2aaa1 100644 --- a/gpt4all-chat/deps/CMakeLists.txt +++ b/gpt4all-chat/deps/CMakeLists.txt @@ -26,30 +26,31 @@ add_subdirectory(Jinja2Cpp) if (NOT GPT4ALL_USING_QTPDF) # If we do not use QtPDF, we need to get PDFium. + set(GPT4ALL_PDFIUM_TAG "chromium/6954") if (CMAKE_SYSTEM_NAME MATCHES Linux) FetchContent_Declare( pdfium - URL "https://github.com/bblanchon/pdfium-binaries/releases/download/chromium/6954/pdfium-linux-x64.tgz" + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-linux-x64.tgz" URL_HASH "SHA256=69917fd9543befc6c806254aff6c8a604d9e7cd3999a3e70fc32b8690d372da2" ) elseif (CMAKE_SYSTEM_NAME MATCHES Windows) if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|amd64)$") FetchContent_Declare( pdfium - URL "https://github.com/bblanchon/pdfium-binaries/releases/download/chromium/6954/pdfium-win-x64.tgz" + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-win-x64.tgz" URL_HASH "SHA256=62ecac78fbaf658457beaffcc05eb147f493d435a2e1309e6a731808b4e80d38" ) elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") FetchContent_Declare( pdfium - URL "https://github.com/bblanchon/pdfium-binaries/releases/download/chromium/6954/pdfium-win-arm64.tgz" + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-win-arm64.tgz" URL_HASH "SHA256=a0b69014467f2b9824776c064920bc95359c9ba0d88793bdda1894a0f22206f8" ) endif() elseif (CMAKE_SYSTEM_NAME MATCHES Darwin) FetchContent_Declare( pdfium - URL "https://github.com/bblanchon/pdfium-binaries/releases/download/chromium/6954/pdfium-mac-univ.tgz" + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-mac-univ.tgz" URL_HASH "SHA256=7442f1dc6bef90898b2b7bd38dbec369ddd81bbf66c1c5aac3a1b60e107098f9" ) endif() From fa01af44d7f26fa875279977ec98f36b46d78b2e Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 15 Jan 2025 15:33:25 -0500 Subject: [PATCH 13/14] appease codespell Signed-off-by: Jared Van Bortel --- gpt4all-chat/src/database.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gpt4all-chat/src/database.cpp b/gpt4all-chat/src/database.cpp index 4b1184f71949..b7b509c676e2 100644 --- a/gpt4all-chat/src/database.cpp +++ b/gpt4all-chat/src/database.cpp @@ -1239,7 +1239,7 @@ class PdfDocumentReader final : public DocumentReader { QString getMetadata(FPDF_BYTESTRING key) { - // FPDF_GetMetaText includes a 2-byte null teminator + // FPDF_GetMetaText includes a 2-byte null terminator ulong nBytes = FPDF_GetMetaText(m_doc, key, nullptr, 0); if (nBytes <= sizeof (FPDF_WCHAR)) return { "" }; @@ -1259,7 +1259,7 @@ class PdfDocumentReader final : public DocumentReader { int nChars = FPDFText_CountChars(textPage); if (!nChars) return {}; - // FPDFText_GetText includes a 2-byte null teminator + // FPDFText_GetText includes a 2-byte null terminator QByteArray buffer((nChars + 1) * sizeof (FPDF_WCHAR), Qt::Uninitialized); int nResultChars = FPDFText_GetText(textPage, 0, nChars, reinterpret_cast(buffer.data())); Q_ASSERT(nResultChars <= nChars + 1); From 88ae57ba7c4eacfe75791ce6c7dedd5d1755de47 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 15 Jan 2025 15:36:30 -0500 Subject: [PATCH 14/14] add changelog entry Signed-off-by: Jared Van Bortel --- gpt4all-chat/CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md index 0703907655e5..f2e5c69c7a86 100644 --- a/gpt4all-chat/CHANGELOG.md +++ b/gpt4all-chat/CHANGELOG.md @@ -6,7 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] -## Fixed +### Added +- Add support for the Windows ARM64 target platform (CPU-only) ([#3385](https://github.com/nomic-ai/gpt4all/pull/3385)) + +### Fixed - Fix the timeout error in code interpreter ([#3369](https://github.com/nomic-ai/gpt4all/pull/3369)) - Fix code interpreter console.log not accepting multiple arguments ([#3371](https://github.com/nomic-ai/gpt4all/pull/3371)) - Remove 'X is defined' checks from templates as they work incorrectly with Jinja2Cpp ([#3372](https://github.com/nomic-ai/gpt4all/pull/3372))