diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index c24e7b4df760..e1527ea33ba1 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -723,6 +723,261 @@ jobs: - store_artifacts: path: build/upload + build-offline-chat-installer-windows-arm: + machine: + # we use 2024.04.01 because nvcc complains about the MSVC ver if we use anything newer + image: windows-server-2022-gui:2024.04.1 + resource_class: windows.large + shell: powershell.exe -ExecutionPolicy Bypass + steps: + - checkout + - run: + name: Update Submodules + command: | + git submodule sync + git submodule update --init --recursive + - restore_cache: + keys: + - ccache-gpt4all-win-aarch64- + - run: + name: Install dependencies + command: choco install -y ccache wget + - run: + name: Installing Qt + command: | + wget.exe "https://gpt4all.io/ci/qt-unified-windows-x64-4.6.0-online.exe" + # note: need x86_64 toolset for windeployqt.exe, which isn't provided with the ARM64 cross toolset + & .\qt-unified-windows-x64-4.6.0-online.exe --no-force-installations --no-default-installations ` + --no-size-checking --default-answer --accept-licenses --confirm-command --accept-obligations ` + --email ${Env:QT_EMAIL} --password ${Env:QT_PASSWORD} install ` + qt.tools.cmake qt.tools.ifw.48 qt.tools.ninja qt.qt6.651.win64_msvc2019_64 ` + qt.qt6.651.win64_msvc2019_arm64 qt.qt6.651.qt5compat qt.qt6.651.debug_info qt.qt6.651.addons.qthttpserver + - run: + name: "Install Dotnet 8" + command: | + mkdir dotnet + cd dotnet + $dotnet_url="https://download.visualstudio.microsoft.com/download/pr/5af098e1-e433-4fda-84af-3f54fd27c108/6bd1c6e48e64e64871957289023ca590/dotnet-sdk-8.0.302-win-x64.zip" + wget.exe "$dotnet_url" + Expand-Archive -LiteralPath .\dotnet-sdk-8.0.302-win-x64.zip + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + $Env:DOTNET_SKIP_FIRST_TIME_EXPERIENCE=$true + dotnet tool install --global AzureSignTool + - run: + name: Build + no_output_timeout: 30m + command: | + $vsInstallPath = & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -property installationpath + Import-Module "${vsInstallPath}\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" + Enter-VsDevShell -VsInstallPath "$vsInstallPath" -SkipAutomaticLocation -Arch arm64 -HostArch amd64 -DevCmdArguments '-no_logo' + + $Env:PATH = "${Env:PATH};C:\Qt\Tools\QtInstallerFramework\4.8\bin" + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + ccache -o "cache_dir=${pwd}\..\.ccache" -o max_size=500M -p -z + mkdir build + cd build + & "C:\Qt\Tools\CMake_64\bin\cmake.exe" ` + -S ..\gpt4all-chat -B . -G Ninja ` + -DCMAKE_BUILD_TYPE=Release ` + "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_arm64" ` + "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" ` + "-DCMAKE_TOOLCHAIN_FILE=C:\Qt\6.5.1\msvc2019_arm64\lib\cmake\Qt6\qt.toolchain.cmake" ` + -DCMAKE_C_COMPILER_LAUNCHER=ccache ` + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ` + -DLLMODEL_CUDA=OFF ` + -DLLMODEL_KOMPUTE=OFF ` + "-DWINDEPLOYQT=C:\Qt\6.5.1\msvc2019_64\bin\windeployqt.exe;--qtpaths;C:\Qt\6.5.1\msvc2019_arm64\bin\qtpaths.bat" ` + -DGPT4ALL_TEST=OFF ` + -DGPT4ALL_OFFLINE_INSTALLER=ON + & "C:\Qt\Tools\Ninja\ninja.exe" + & "C:\Qt\Tools\Ninja\ninja.exe" install + & "C:\Qt\Tools\Ninja\ninja.exe" package + ccache -s + mkdir upload + copy gpt4all-installer-win64-arm.exe upload + - store_artifacts: + path: build/upload + # add workspace so signing jobs can connect & obtain dmg + - save_cache: + key: ccache-gpt4all-win-aarch64-{{ epoch }} + when: always + paths: + - ..\.ccache + - persist_to_workspace: + root: build + # specify path to only include components we want to persist + # accross builds + paths: + - upload + + sign-offline-chat-installer-windows-arm: + machine: + image: windows-server-2022-gui:2024.04.1 + resource_class: windows.large + shell: powershell.exe -ExecutionPolicy Bypass + steps: + - checkout + - attach_workspace: + at: build + - run: + name: Install dependencies + command: choco install -y wget + - run: + name: "Install Dotnet 8 && Azure Sign Tool" + command: | + mkdir dotnet + cd dotnet + $dotnet_url="https://download.visualstudio.microsoft.com/download/pr/5af098e1-e433-4fda-84af-3f54fd27c108/6bd1c6e48e64e64871957289023ca590/dotnet-sdk-8.0.302-win-x64.zip" + wget.exe "$dotnet_url" + Expand-Archive -LiteralPath .\dotnet-sdk-8.0.302-win-x64.zip + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + $Env:DOTNET_SKIP_FIRST_TIME_EXPERIENCE=$true + dotnet tool install --global AzureSignTool + - run: + name: "Sign Windows Installer With AST" + command: | + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + AzureSignTool.exe sign -du "https://gpt4all.io/index.html" -kvu https://gpt4all.vault.azure.net -kvi "$Env:AZSignGUID" -kvs "$Env:AZSignPWD" -kvc "$Env:AZSignCertName" -kvt "$Env:AZSignTID" -tr http://timestamp.digicert.com -v "$($(Get-Location).Path)\build\upload\gpt4all-installer-win64-arm.exe" + - store_artifacts: + path: build/upload + + build-online-chat-installer-windows-arm: + machine: + image: windows-server-2022-gui:2024.04.1 + resource_class: windows.large + shell: powershell.exe -ExecutionPolicy Bypass + steps: + - checkout + - run: + name: Update Submodules + command: | + git submodule sync + git submodule update --init --recursive + - restore_cache: + keys: + - ccache-gpt4all-win-aarch64- + - run: + name: Install dependencies + command: choco install -y ccache wget + - run: + name: Installing Qt + command: | + wget.exe "https://gpt4all.io/ci/qt-unified-windows-x64-4.6.0-online.exe" + # note: need x86_64 toolset for windeployqt.exe, which isn't provided with the ARM64 cross toolset + & .\qt-unified-windows-x64-4.6.0-online.exe --no-force-installations --no-default-installations ` + --no-size-checking --default-answer --accept-licenses --confirm-command --accept-obligations ` + --email ${Env:QT_EMAIL} --password ${Env:QT_PASSWORD} install ` + qt.tools.cmake qt.tools.ifw.48 qt.tools.ninja qt.qt6.651.win64_msvc2019_64 ` + qt.qt6.651.win64_msvc2019_arm64 qt.qt6.651.qt5compat qt.qt6.651.debug_info qt.qt6.651.addons.qthttpserver + - run: + name: "Install Dotnet 8" + command: | + mkdir dotnet + cd dotnet + $dotnet_url="https://download.visualstudio.microsoft.com/download/pr/5af098e1-e433-4fda-84af-3f54fd27c108/6bd1c6e48e64e64871957289023ca590/dotnet-sdk-8.0.302-win-x64.zip" + wget.exe "$dotnet_url" + Expand-Archive -LiteralPath .\dotnet-sdk-8.0.302-win-x64.zip + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + - run: + name: "Setup Azure SignTool" + command: | + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + $Env:DOTNET_SKIP_FIRST_TIME_EXPERIENCE=$true + dotnet tool install --global AzureSignTool + - run: + name: Build + no_output_timeout: 30m + command: | + $vsInstallPath = & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -property installationpath + Import-Module "${vsInstallPath}\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" + Enter-VsDevShell -VsInstallPath "$vsInstallPath" -SkipAutomaticLocation -Arch arm64 -HostArch amd64 -DevCmdArguments '-no_logo' + + $Env:PATH = "${Env:PATH};C:\Qt\Tools\QtInstallerFramework\4.8\bin" + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + ccache -o "cache_dir=${pwd}\..\.ccache" -o max_size=500M -p -z + mkdir build + cd build + & "C:\Qt\Tools\CMake_64\bin\cmake.exe" ` + -S ..\gpt4all-chat -B . -G Ninja ` + -DCMAKE_BUILD_TYPE=Release ` + "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_arm64" ` + "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" ` + "-DCMAKE_TOOLCHAIN_FILE=C:\Qt\6.5.1\msvc2019_arm64\lib\cmake\Qt6\qt.toolchain.cmake" ` + -DCMAKE_C_COMPILER_LAUNCHER=ccache ` + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ` + -DLLMODEL_CUDA=OFF ` + -DLLMODEL_KOMPUTE=OFF ` + "-DWINDEPLOYQT=C:\Qt\6.5.1\msvc2019_64\bin\windeployqt.exe;--qtpaths;C:\Qt\6.5.1\msvc2019_arm64\bin\qtpaths.bat" ` + -DGPT4ALL_TEST=OFF ` + -DGPT4ALL_OFFLINE_INSTALLER=OFF + & "C:\Qt\Tools\Ninja\ninja.exe" + & "C:\Qt\Tools\Ninja\ninja.exe" install + & "C:\Qt\Tools\Ninja\ninja.exe" package + ccache -s + mkdir upload + copy gpt4all-installer-win64-arm.exe upload + Set-Location -Path "_CPack_Packages/win64/IFW/gpt4all-installer-win64-arm" + Compress-Archive -Path 'repository' -DestinationPath '..\..\..\..\upload\repository.zip' + - store_artifacts: + path: build/upload + - save_cache: + key: ccache-gpt4all-win-aarch64-{{ epoch }} + when: always + paths: + - ..\.ccache + # add workspace so signing jobs can connect & obtain dmg + - persist_to_workspace: + root: build + # specify path to only include components we want to persist + # accross builds + paths: + - upload + + sign-online-chat-installer-windows-arm: + machine: + image: windows-server-2022-gui:2024.04.1 + resource_class: windows.large + shell: powershell.exe -ExecutionPolicy Bypass + steps: + - checkout + - attach_workspace: + at: build + - run: + name: Install dependencies + command: choco install -y wget + - run: + name: "Install Dotnet 8" + command: | + mkdir dotnet + cd dotnet + $dotnet_url="https://download.visualstudio.microsoft.com/download/pr/5af098e1-e433-4fda-84af-3f54fd27c108/6bd1c6e48e64e64871957289023ca590/dotnet-sdk-8.0.302-win-x64.zip" + wget.exe "$dotnet_url" + Expand-Archive -LiteralPath .\dotnet-sdk-8.0.302-win-x64.zip + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + - run: + name: "Setup Azure SignTool" + command: | + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + $Env:DOTNET_SKIP_FIRST_TIME_EXPERIENCE=$true + dotnet tool install --global AzureSignTool + - run: + name: "Sign Windows Installer With AST" + command: | + $Env:DOTNET_ROOT="$($(Get-Location).Path)\dotnet\dotnet-sdk-8.0.302-win-x64" + $Env:PATH="$Env:DOTNET_ROOT;$Env:PATH" + AzureSignTool.exe sign -du "https://gpt4all.io/index.html" -kvu https://gpt4all.vault.azure.net -kvi "$Env:AZSignGUID" -kvs "$Env:AZSignPWD" -kvc "$Env:AZSignCertName" -kvt "$Env:AZSignTID" -tr http://timestamp.digicert.com -v "$($(Get-Location).Path)/build/upload/gpt4all-installer-win64-arm.exe" + - store_artifacts: + path: build/upload + build-gpt4all-chat-linux: machine: image: ubuntu-2204:current @@ -1516,6 +1771,12 @@ workflows: context: gpt4all requires: - build-offline-chat-installer-windows + - build-offline-chat-installer-windows-arm: + requires: + - hold + - sign-offline-chat-installer-windows-arm: + requires: + - build-offline-chat-installer-windows-arm - build-offline-chat-installer-linux: context: gpt4all requires: @@ -1554,6 +1815,14 @@ workflows: context: gpt4all requires: - build-online-chat-installer-windows + - build-online-chat-installer-windows-arm: + <<: *main_only + requires: + - hold + - sign-online-chat-installer-windows-arm: + <<: *main_only + requires: + - build-online-chat-installer-windows-arm - build-online-chat-installer-linux: <<: *main_only context: gpt4all diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md index 93a52ff57e8d..46936f21b448 100644 --- a/gpt4all-chat/CHANGELOG.md +++ b/gpt4all-chat/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Added +- Add support for the Windows ARM64 target platform (CPU-only) ([#3385](https://github.com/nomic-ai/gpt4all/pull/3385)) + ### Fixed - Fix the timeout error in code interpreter ([#3369](https://github.com/nomic-ai/gpt4all/pull/3369)) - Fix code interpreter console.log not accepting multiple arguments ([#3371](https://github.com/nomic-ai/gpt4all/pull/3371)) diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 279ba57151a9..02ec2401c9ea 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -29,6 +29,8 @@ option(GPT4ALL_LOCALHOST "Build installer for localhost repo" OFF) option(GPT4ALL_OFFLINE_INSTALLER "Build an offline installer" OFF) option(GPT4ALL_SIGN_INSTALL "Sign installed binaries and installers (requires signing identities)" OFF) option(GPT4ALL_GEN_CPACK_CONFIG "Generate the CPack config.xml in the package step and nothing else." OFF) +set(GPT4ALL_USE_QTPDF "AUTO" CACHE STRING "Whether to Use QtPDF for LocalDocs. If OFF or not available on this platform, PDFium is used.") +set_property(CACHE GPT4ALL_USE_QTPDF PROPERTY STRINGS AUTO ON OFF) include(cmake/cpack_config.cmake) @@ -91,7 +93,18 @@ configure_file( ) set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) -find_package(Qt6 6.5 COMPONENTS Core HttpServer LinguistTools Pdf Quick QuickDialogs2 Sql Svg REQUIRED) +set(GPT4ALL_QT_COMPONENTS Core HttpServer LinguistTools Quick QuickDialogs2 Sql Svg) +set(GPT4ALL_USING_QTPDF OFF) +if (CMAKE_SYSTEM_NAME MATCHES Windows AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") + # QtPDF is not available. + if (GPT4ALL_USE_QTPDF STREQUAL "ON") + message(FATAL_ERROR "QtPDF is not available on Windows ARM64.") + endif() +elseif (GPT4ALL_USE_QTPDF MATCHES "^(ON|AUTO)$") + set(GPT4ALL_USING_QTPDF ON) + list(APPEND GPT4ALL_QT_COMPONENTS Pdf) +endif() +find_package(Qt6 6.5 COMPONENTS ${GPT4ALL_QT_COMPONENTS} REQUIRED) if (QT_KNOWN_POLICY_QTP0004) qt_policy(SET QTP0004 NEW) # generate extra qmldir files on Qt 6.8+ @@ -415,7 +428,14 @@ target_include_directories(chat PRIVATE deps/usearch/include deps/usearch/fp16/include) target_link_libraries(chat - PRIVATE Qt6::Core Qt6::HttpServer Qt6::Pdf Qt6::Quick Qt6::Sql Qt6::Svg) + PRIVATE Qt6::Core Qt6::HttpServer Qt6::Quick Qt6::Sql Qt6::Svg) +if (GPT4ALL_USING_QTPDF) + target_compile_definitions(chat PRIVATE GPT4ALL_USE_QTPDF) + target_link_libraries(chat PRIVATE Qt6::Pdf) +else() + # Link PDFium + target_link_libraries(chat PRIVATE pdfium) +endif() target_link_libraries(chat PRIVATE llmodel SingleApplication fmt::fmt duckx::duckx QXlsx jinja2cpp) @@ -497,6 +517,15 @@ if (LLMODEL_CUDA) endif() endif() +if (NOT GPT4ALL_USING_QTPDF) + # Install PDFium + if (WIN32) + install(FILES "${PDFium_LIBRARY}" DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN}) + else() + install(FILES "${PDFium_LIBRARY}" DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN}) + endif() +endif() + if (NOT APPLE) install(FILES "${LOCAL_EMBEDDING_MODEL_PATH}" DESTINATION resources @@ -547,15 +576,19 @@ endif() if (GPT4ALL_LOCALHOST) cpack_ifw_add_repository("GPT4AllRepository" URL "http://localhost/repository") -elseif(GPT4ALL_OFFLINE_INSTALLER) - add_compile_definitions(GPT4ALL_OFFLINE_INSTALLER) +elseif (GPT4ALL_OFFLINE_INSTALLER) + add_compile_definitions(GPT4ALL_OFFLINE_INSTALLER) else() - if(${CMAKE_SYSTEM_NAME} MATCHES Linux) - cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/linux/repository") - elseif(${CMAKE_SYSTEM_NAME} MATCHES Windows) - #To sign the target on windows have to create a batch script add use it as a custom target and then use CPACK_IFW_EXTRA_TARGETS to set this extra target - cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows/repository") - elseif(${CMAKE_SYSTEM_NAME} MATCHES Darwin) - cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/mac/repository") - endif() + if (CMAKE_SYSTEM_NAME MATCHES Linux) + cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/linux/repository") + elseif (CMAKE_SYSTEM_NAME MATCHES Windows) + # To sign the target on windows have to create a batch script add use it as a custom target and then use CPACK_IFW_EXTRA_TARGETS to set this extra target + if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|amd64)$") + cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows/repository") + elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") + cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows_arm/repository") + endif() + elseif (CMAKE_SYSTEM_NAME MATCHES Darwin) + cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/mac/repository") + endif() endif() diff --git a/gpt4all-chat/cmake/cpack_config.cmake b/gpt4all-chat/cmake/cpack_config.cmake index 93ddc732f845..e069b90498f1 100644 --- a/gpt4all-chat/cmake/cpack_config.cmake +++ b/gpt4all-chat/cmake/cpack_config.cmake @@ -11,7 +11,13 @@ if (CMAKE_SYSTEM_NAME MATCHES Linux) elseif (CMAKE_SYSTEM_NAME MATCHES Windows) set(CPACK_IFW_ROOT "C:/Qt/Tools/QtInstallerFramework/4.6") set(CPACK_IFW_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.ico") - set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-win64") + if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|amd64)$") + set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-win64") + elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") + set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-win64-arm") + else() + message(FATAL_ERROR "Unrecognized processor: ${CMAKE_SYSTEM_PROCESSOR}") + endif() set(CPACK_IFW_TARGET_DIRECTORY "@HomeDir@\\${COMPONENT_NAME_MAIN}") elseif (CMAKE_SYSTEM_NAME MATCHES Darwin) set(CPACK_IFW_ROOT "~/Qt/Tools/QtInstallerFramework/4.6") diff --git a/gpt4all-chat/deps/CMakeLists.txt b/gpt4all-chat/deps/CMakeLists.txt index 04b0e7616bbb..e41dc1c2aaa1 100644 --- a/gpt4all-chat/deps/CMakeLists.txt +++ b/gpt4all-chat/deps/CMakeLists.txt @@ -1,3 +1,6 @@ +include(FetchContent) + + set(BUILD_SHARED_LIBS OFF) set(FMT_INSTALL OFF) @@ -20,3 +23,38 @@ set(RAPIDJSON_ENABLE_INSTRUMENTATION_OPT OFF) add_subdirectory(rapidjson) add_subdirectory(Jinja2Cpp) + +if (NOT GPT4ALL_USING_QTPDF) + # If we do not use QtPDF, we need to get PDFium. + set(GPT4ALL_PDFIUM_TAG "chromium/6954") + if (CMAKE_SYSTEM_NAME MATCHES Linux) + FetchContent_Declare( + pdfium + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-linux-x64.tgz" + URL_HASH "SHA256=69917fd9543befc6c806254aff6c8a604d9e7cd3999a3e70fc32b8690d372da2" + ) + elseif (CMAKE_SYSTEM_NAME MATCHES Windows) + if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|amd64)$") + FetchContent_Declare( + pdfium + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-win-x64.tgz" + URL_HASH "SHA256=62ecac78fbaf658457beaffcc05eb147f493d435a2e1309e6a731808b4e80d38" + ) + elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") + FetchContent_Declare( + pdfium + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-win-arm64.tgz" + URL_HASH "SHA256=a0b69014467f2b9824776c064920bc95359c9ba0d88793bdda1894a0f22206f8" + ) + endif() + elseif (CMAKE_SYSTEM_NAME MATCHES Darwin) + FetchContent_Declare( + pdfium + URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-mac-univ.tgz" + URL_HASH "SHA256=7442f1dc6bef90898b2b7bd38dbec369ddd81bbf66c1c5aac3a1b60e107098f9" + ) + endif() + + FetchContent_MakeAvailable(pdfium) + find_package(PDFium REQUIRED PATHS "${pdfium_SOURCE_DIR}" NO_DEFAULT_PATH) +endif() diff --git a/gpt4all-chat/src/database.cpp b/gpt4all-chat/src/database.cpp index 5ea2fd173639..b7b509c676e2 100644 --- a/gpt4all-chat/src/database.cpp +++ b/gpt4all-chat/src/database.cpp @@ -7,14 +7,13 @@ #include #include +#include #include #include #include #include #include #include -#include -#include #include #include #include @@ -31,6 +30,15 @@ #include #include +#ifdef GPT4ALL_USE_QTPDF +# include +# include +#else +# include +# include +# include +#endif + using namespace Qt::Literals::StringLiterals; namespace ranges = std::ranges; namespace us = unum::usearch; @@ -1133,6 +1141,7 @@ class DocumentReader { namespace { +#ifdef GPT4ALL_USE_QTPDF class PdfDocumentReader final : public DocumentReader { public: explicit PdfDocumentReader(const DocumentInfo &info) @@ -1173,6 +1182,99 @@ class PdfDocumentReader final : public DocumentReader { QString m_pageText; std::optional m_stream; }; +#else +class PdfDocumentReader final : public DocumentReader { +public: + explicit PdfDocumentReader(const DocumentInfo &info) + : DocumentReader(info) + { + QString path = info.file.canonicalFilePath(); + m_doc = FPDF_LoadDocument(path.toUtf8().constData(), nullptr); + if (!m_doc) + throw std::runtime_error(fmt::format("Failed to load PDF: {}", path)); + + // Extract metadata + Metadata metadata { + .title = getMetadata("Title" ), + .author = getMetadata("Author" ), + .subject = getMetadata("Subject" ), + .keywords = getMetadata("Keywords"), + }; + postInit(std::move(metadata)); + } + + ~PdfDocumentReader() override + { + if (m_page) + FPDF_ClosePage(m_page); + if (m_doc) + FPDF_CloseDocument(m_doc); + FPDF_DestroyLibrary(); + } + + int page() const override { return m_currentPage; } + +private: + std::optional advance() override + { + QString word; + do { + while (!m_stream || m_stream->atEnd()) { + if (m_currentPage >= FPDF_GetPageCount(m_doc)) + return std::nullopt; + + if (m_page) + FPDF_ClosePage(m_page); + m_page = FPDF_LoadPage(m_doc, m_currentPage++); + if (!m_page) + throw std::runtime_error("Failed to load page."); + + m_pageText = extractTextFromPage(m_page); + m_stream.emplace(&m_pageText); + } + *m_stream >> word; + } while (word.isEmpty()); + return word; + } + + QString getMetadata(FPDF_BYTESTRING key) + { + // FPDF_GetMetaText includes a 2-byte null terminator + ulong nBytes = FPDF_GetMetaText(m_doc, key, nullptr, 0); + if (nBytes <= sizeof (FPDF_WCHAR)) + return { "" }; + QByteArray buffer(nBytes, Qt::Uninitialized); + ulong nResultBytes = FPDF_GetMetaText(m_doc, key, buffer.data(), buffer.size()); + Q_ASSERT(nResultBytes % 2 == 0); + Q_ASSERT(nResultBytes <= nBytes); + return QString::fromUtf16(reinterpret_cast(buffer.data()), nResultBytes / 2 - 1); + } + + QString extractTextFromPage(FPDF_PAGE page) + { + FPDF_TEXTPAGE textPage = FPDFText_LoadPage(page); + if (!textPage) + throw std::runtime_error("Failed to load text page."); + + int nChars = FPDFText_CountChars(textPage); + if (!nChars) + return {}; + // FPDFText_GetText includes a 2-byte null terminator + QByteArray buffer((nChars + 1) * sizeof (FPDF_WCHAR), Qt::Uninitialized); + int nResultChars = FPDFText_GetText(textPage, 0, nChars, reinterpret_cast(buffer.data())); + Q_ASSERT(nResultChars <= nChars + 1); + + FPDFText_ClosePage(textPage); + return QString::fromUtf16(reinterpret_cast(buffer.data()), nResultChars - 1); + } + + FPDF_DOCUMENT m_doc = nullptr; + FPDF_PAGE m_page = nullptr; + int m_currentPage = 0; + QString m_pageText; + std::optional m_stream; +}; +#endif // !defined(GPT4ALL_USE_QTPDF) class WordDocumentReader final : public DocumentReader { public: diff --git a/gpt4all-chat/src/main.cpp b/gpt4all-chat/src/main.cpp index 1050e590879d..22b441693e62 100644 --- a/gpt4all-chat/src/main.cpp +++ b/gpt4all-chat/src/main.cpp @@ -25,6 +25,10 @@ #include #include +#ifndef GPT4ALL_USE_QTPDF +# include +#endif + #ifdef Q_OS_LINUX # include #endif @@ -58,6 +62,10 @@ static void raiseWindow(QWindow *window) int main(int argc, char *argv[]) { +#ifndef GPT4ALL_USE_QTPDF + FPDF_InitLibrary(); +#endif + QCoreApplication::setOrganizationName("nomic.ai"); QCoreApplication::setOrganizationDomain("gpt4all.io"); QCoreApplication::setApplicationName("GPT4All");