diff --git a/.asf.yaml b/.asf.yaml index 0b72df504c..a0a6be587b 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -6,24 +6,24 @@ notifications: pullrequests_comment: issues@sedona.apache.org jira_options: link label worklog github: - description: "A cluster computing framework for processing large-scale geospatial data" - homepage: https://sedona.apache.org/ - autolink_jira: - - SEDONA - labels: - - geospatial - - cluster-computing - - spatial-query - - spatial-analysis - - spatial-sql - - scala - - java - - python - features: - # Enable wiki for documentation - wiki: false - # Enable issue management - issues: true - # Enable projects for project management boards - projects: false - ghp_branch: + description: "A cluster computing framework for processing large-scale geospatial data" + homepage: https://sedona.apache.org/ + autolink_jira: + - SEDONA + labels: + - geospatial + - cluster-computing + - spatial-query + - spatial-analysis + - spatial-sql + - scala + - java + - python + features: + # Enable wiki for documentation + wiki: false + # Enable issue management + issues: true + # Enable projects for project management boards + projects: false + ghp_branch: diff --git a/.github/linters/.yaml-lint.yml b/.github/linters/.yaml-lint.yml new file mode 100644 index 0000000000..117e78e6b3 --- /dev/null +++ b/.github/linters/.yaml-lint.yml @@ -0,0 +1,11 @@ +--- +# https://yamllint.readthedocs.io/en/stable/ +extends: default + +rules: + colons: disable + comments: disable + comments-indentation: disable + document-start: disable + line-length: disable + truthy: false diff --git a/.github/linters/codespell.txt b/.github/linters/codespell.txt index b828d8b767..a20350263c 100644 --- a/.github/linters/codespell.txt +++ b/.github/linters/codespell.txt @@ -1,7 +1,9 @@ actualy afterall +atmost bu celle +checkin eiter errorprone fpt diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 4aa8108d41..16c3a32081 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -40,30 +40,29 @@ jobs: defaults: run: shell: bash - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-java@v4 - with: - distribution: 'zulu' - java-version: 11 - - name: Cache Maven packages - uses: actions/cache@v3 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - name: Setup docker (missing on macOS) - if: runner.os == 'macos' - run: | - brew install docker - colima start - DOCKER_CONFIG=${DOCKER_CONFIG:-$HOME/.docker} - mkdir -p $DOCKER_CONFIG/cli-plugins - curl -SL https://github.com/docker/buildx/releases/download/v0.14.1/buildx-v0.14.1.darwin-amd64 -o $DOCKER_CONFIG/cli-plugins/docker-buildx - chmod +x $DOCKER_CONFIG/cli-plugins/docker-buildx - - env: - SPARK_VERSION: ${{ matrix.spark }} - SEDONA_VERSION: ${{ matrix.sedona }} - GEOTOOLS_VERSION: ${{ matrix.geotools }} - run: ./docker/sedona-spark-jupyterlab/build.sh ${SPARK_VERSION} ${SEDONA_VERSION} local ${GEOTOOLS_VERSION} + - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 + with: + distribution: 'zulu' + java-version: 11 + - name: Cache Maven packages + uses: actions/cache@v3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + - name: Setup docker (missing on macOS) + if: runner.os == 'macos' + run: | + brew install docker + colima start + DOCKER_CONFIG=${DOCKER_CONFIG:-$HOME/.docker} + mkdir -p $DOCKER_CONFIG/cli-plugins + curl -SL https://github.com/docker/buildx/releases/download/v0.14.1/buildx-v0.14.1.darwin-amd64 -o $DOCKER_CONFIG/cli-plugins/docker-buildx + chmod +x $DOCKER_CONFIG/cli-plugins/docker-buildx + - env: + SPARK_VERSION: ${{ matrix.spark }} + SEDONA_VERSION: ${{ matrix.sedona }} + GEOTOOLS_VERSION: ${{ matrix.geotools }} + run: ./docker/sedona-spark-jupyterlab/build.sh ${SPARK_VERSION} ${SEDONA_VERSION} local ${GEOTOOLS_VERSION} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c5e6e78003..3f528c52d3 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -15,62 +15,62 @@ jobs: build: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Set up Java - uses: actions/setup-java@v4 - with: - distribution: 'zulu' - java-version: '8' - - name: Compile JavaDoc - run: mvn -q clean install -DskipTests && mkdir -p docs/api/javadoc/spark && cp -r spark/common/target/apidocs/* docs/api/javadoc/spark/ - - name: Compile ScalaDoc - run: mvn scala:doc && mkdir -p docs/api/scaladoc/spark && cp -r spark/common/target/site/scaladocs/* docs/api/scaladoc/spark - - uses: actions/setup-python@v5 - with: - python-version: 3.x - - run: pip install mkdocs-jupyter - - run: pip install mkdocs-material - - run: pip install mkdocs-macros-plugin - - run: pip install mkdocs-git-revision-date-localized-plugin - - run: pip install mike - - run: sudo apt update - - uses: r-lib/actions/setup-r@v2 - with: - r-version: release - use-public-rspm: true - - name: Query R dependencies - uses: r-lib/actions/setup-r-dependencies@v2 - with: - cache: true - extra-packages: | - any::pkgdown - working-directory : './R' - - run: Rscript -e 'pkgdown::build_site(pkg = "./R", preview = FALSE, override = list(destination = "../docs/api/rdocs"))' - - run: git config --global user.name = "GitHub Action" - - run: git config --global user.email = "test@abc.com" - - run: mkdocs build - - run: git fetch origin website --depth=1 - - name: Deploy the doc to the website branch - if: ${{ github.event_name != 'pull_request' && github.repository == 'apache/sedona' }} - run: mike deploy latest-snapshot -b website -p - - run: mkdir staging - - run: cp -r site/* staging/ - - uses: actions/upload-artifact@v3 - with: - name: generated-docs - path: staging - - name: Cache Python packages - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - - name: Cache Maven packages - uses: actions/cache@v3 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Java + uses: actions/setup-java@v4 + with: + distribution: 'zulu' + java-version: '8' + - name: Compile JavaDoc + run: mvn -q clean install -DskipTests && mkdir -p docs/api/javadoc/spark && cp -r spark/common/target/apidocs/* docs/api/javadoc/spark/ + - name: Compile ScalaDoc + run: mvn scala:doc && mkdir -p docs/api/scaladoc/spark && cp -r spark/common/target/site/scaladocs/* docs/api/scaladoc/spark + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - run: pip install mkdocs-jupyter + - run: pip install mkdocs-material + - run: pip install mkdocs-macros-plugin + - run: pip install mkdocs-git-revision-date-localized-plugin + - run: pip install mike + - run: sudo apt update + - uses: r-lib/actions/setup-r@v2 + with: + r-version: release + use-public-rspm: true + - name: Query R dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache: true + extra-packages: | + any::pkgdown + working-directory : './R' + - run: Rscript -e 'pkgdown::build_site(pkg = "./R", preview = FALSE, override = list(destination = "../docs/api/rdocs"))' + - run: git config --global user.name = "GitHub Action" + - run: git config --global user.email = "test@abc.com" + - run: mkdocs build + - run: git fetch origin website --depth=1 + - name: Deploy the doc to the website branch + if: ${{ github.event_name != 'pull_request' && github.repository == 'apache/sedona' }} + run: mike deploy latest-snapshot -b website -p + - run: mkdir staging + - run: cp -r site/* staging/ + - uses: actions/upload-artifact@v3 + with: + name: generated-docs + path: staging + - name: Cache Python packages + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Cache Maven packages + uses: actions/cache@v3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 diff --git a/.github/workflows/example.yml b/.github/workflows/example.yml index a0f11c119e..c8f9d66657 100644 --- a/.github/workflows/example.yml +++ b/.github/workflows/example.yml @@ -18,7 +18,6 @@ permissions: jobs: build: - runs-on: ubuntu-22.04 strategy: fail-fast: false @@ -33,37 +32,36 @@ jobs: - spark: 3.3.4 spark-compat: '3.0' sedona: 1.5.1 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-java@v4 - with: - distribution: 'zulu' - java-version: '8' - - run: sudo apt-get remove scala-library scala - - run: sudo wget www.scala-lang.org/files/archive/scala-2.12.11.deb - - run: sudo dpkg -i scala-2.12.11.deb - - run: scala -version - - run: echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee -a /etc/apt/sources.list.d/sbt.list - - run: sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823 - - run: sudo apt-get update - - run: sudo apt-get install sbt - - name: Cache SBT - uses: actions/cache@v3 - with: - path: | - ~/.ivy2/cache - ~/.sbt - key: ${{ runner.os }}-sbt-${{ hashFiles('**/build.sbt') }} - - name: Cache Maven packages - uses: actions/cache@v3 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - env: - SPARK_VERSION: ${{ matrix.spark }} - SPARK_COMPAT_VERSION: ${{ matrix.spark-compat }} - SEDONA_VERSION: ${{ matrix.sedona }} - run: (cd examples/spark-sql;mvn clean install -Dspark.version=${SPARK_VERSION} -Dspark.compat.version=${SPARK_COMPAT_VERSION} -Dsedona.version=${SEDONA_VERSION};java -jar target/sedona-spark-example-1.6.0.jar) - - run: (cd examples/flink-sql;mvn clean install;java -jar target/sedona-flink-example-1.6.0.jar) + - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 + with: + distribution: 'zulu' + java-version: '8' + - run: sudo apt-get remove scala-library scala + - run: sudo wget www.scala-lang.org/files/archive/scala-2.12.11.deb + - run: sudo dpkg -i scala-2.12.11.deb + - run: scala -version + - run: echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee -a /etc/apt/sources.list.d/sbt.list + - run: sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823 + - run: sudo apt-get update + - run: sudo apt-get install sbt + - name: Cache SBT + uses: actions/cache@v3 + with: + path: | + ~/.ivy2/cache + ~/.sbt + key: ${{ runner.os }}-sbt-${{ hashFiles('**/build.sbt') }} + - name: Cache Maven packages + uses: actions/cache@v3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + - env: + SPARK_VERSION: ${{ matrix.spark }} + SPARK_COMPAT_VERSION: ${{ matrix.spark-compat }} + SEDONA_VERSION: ${{ matrix.sedona }} + run: (cd examples/spark-sql;mvn clean install -Dspark.version=${SPARK_VERSION} -Dspark.compat.version=${SPARK_COMPAT_VERSION} -Dsedona.version=${SEDONA_VERSION};java -jar target/sedona-spark-example-1.6.0.jar) + - run: (cd examples/flink-sql;mvn clean install;java -jar target/sedona-flink-example-1.6.0.jar) diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 546d887cf0..36e91ed46a 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -35,7 +35,6 @@ permissions: jobs: build: - runs-on: ubuntu-22.04 strategy: fail-fast: true @@ -81,37 +80,36 @@ jobs: scala: 2.12.15 jdk: '8' skipTests: '' - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-java@v4 - with: - distribution: 'zulu' - java-version: ${{ matrix.jdk }} - - uses: actions/setup-python@v5 - with: - python-version: '3.7' - - name: Cache Maven packages - uses: actions/cache@v3 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - env: - SPARK_VERSION: ${{ matrix.spark }} - SCALA_VERSION: ${{ matrix.scala }} - SKIP_TESTS: ${{ matrix.skipTests }} - run: | - SPARK_COMPAT_VERSION="3.0" - if [ ${SPARK_VERSION:2:1} -gt "3" ]; then - SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3} - fi - mvn -q clean install -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dspark.version=${SPARK_VERSION} ${SKIP_TESTS} - - run: mkdir staging - - run: cp spark-shaded/target/sedona-*.jar staging - - run: | - [ -d "flink-shaded/target/" ] && cp flink-shaded/target/sedona-*.jar staging 2>/dev/null || true - - uses: actions/upload-artifact@v3 - with: - name: generated-jars ${{ matrix.spark }} ${{ matrix.scala }} ${{ matrix.jdk }} - path: staging + - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 + with: + distribution: 'zulu' + java-version: ${{ matrix.jdk }} + - uses: actions/setup-python@v5 + with: + python-version: '3.7' + - name: Cache Maven packages + uses: actions/cache@v3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + - env: + SPARK_VERSION: ${{ matrix.spark }} + SCALA_VERSION: ${{ matrix.scala }} + SKIP_TESTS: ${{ matrix.skipTests }} + run: | + SPARK_COMPAT_VERSION="3.0" + if [ ${SPARK_VERSION:2:1} -gt "3" ]; then + SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3} + fi + mvn -q clean install -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dspark.version=${SPARK_VERSION} ${SKIP_TESTS} + - run: mkdir staging + - run: cp spark-shaded/target/sedona-*.jar staging + - run: | + [ -d "flink-shaded/target/" ] && cp flink-shaded/target/sedona-*.jar staging 2>/dev/null || true + - uses: actions/upload-artifact@v3 + with: + name: generated-jars ${{ matrix.spark }} ${{ matrix.scala }} ${{ matrix.jdk }} + path: staging diff --git a/.github/workflows/python-extension.yml b/.github/workflows/python-extension.yml index 5536e8929e..698f41b8d9 100644 --- a/.github/workflows/python-extension.yml +++ b/.github/workflows/python-extension.yml @@ -31,47 +31,45 @@ jobs: matrix: os: ['ubuntu-latest', 'windows-latest', 'macos-13'] python: ['3.10', '3.9', '3.8'] - runs-on: ${{ matrix.os }} defaults: run: shell: bash - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - - name: Install pipenv - run: pip install -U pipenv - - name: Install dependencies - run: | - cd python - pipenv --python ${{ matrix.python }} - pipenv install --dev - - name: Build extension - run: | - cd python - pipenv run python setup.py build_ext --inplace - - name: Run tests - run: | - cd python - pipenv run pytest tests/utils/test_geomserde_speedup.py - - name: Run tests on Shapely 2.0 - run: | - cd python - pipenv install shapely~=2.0 - pipenv run pytest tests/utils/test_geomserde_speedup.py - - name: Run tests on Shapley 1.7 - # Shapely 1.7 only provides wheels for cp36 ~ cp39, so we'll skip running - # this test for recent python versions. - if: ${{ matrix.python == '3.9' || matrix.python == '3.8' }} - run: | - cd python - pipenv install shapely~=1.7 - pipenv run pytest tests/utils/test_geomserde_speedup.py - - name: Install from sdist - run: | - cd python - pipenv run python setup.py sdist - pipenv run python -m pip install dist/*sedona-*.tar.gz + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + - name: Install pipenv + run: pip install -U pipenv + - name: Install dependencies + run: | + cd python + pipenv --python ${{ matrix.python }} + pipenv install --dev + - name: Build extension + run: | + cd python + pipenv run python setup.py build_ext --inplace + - name: Run tests + run: | + cd python + pipenv run pytest tests/utils/test_geomserde_speedup.py + - name: Run tests on Shapely 2.0 + run: | + cd python + pipenv install shapely~=2.0 + pipenv run pytest tests/utils/test_geomserde_speedup.py + - name: Run tests on Shapley 1.7 + # Shapely 1.7 only provides wheels for cp36 ~ cp39, so we'll skip running + # this test for recent python versions. + if: ${{ matrix.python == '3.9' || matrix.python == '3.8' }} + run: | + cd python + pipenv install shapely~=1.7 + pipenv run pytest tests/utils/test_geomserde_speedup.py + - name: Install from sdist + run: | + cd python + pipenv run python setup.py sdist + pipenv run python -m pip install dist/*sedona-*.tar.gz diff --git a/.github/workflows/python-wheel.yml b/.github/workflows/python-wheel.yml index 5dddb92326..a89d3a2043 100644 --- a/.github/workflows/python-wheel.yml +++ b/.github/workflows/python-wheel.yml @@ -26,28 +26,26 @@ jobs: strategy: matrix: os: ['ubuntu-latest', 'windows-latest', 'macos-latest'] - runs-on: ${{ matrix.os }} defaults: run: shell: bash - steps: - - uses: actions/checkout@v4 - - name: Set up QEMU - if: runner.os == 'Linux' - uses: docker/setup-qemu-action@v3 - with: - platforms: all - - name: Build wheels - uses: pypa/cibuildwheel@v2.18.1 - env: - CIBW_SKIP: 'pp* *musl*' - CIBW_ARCHS_LINUX: 'x86_64 aarch64' - CIBW_ARCHS_WINDOWS: 'AMD64 ARM64' - CIBW_ARCHS_MACOS: 'x86_64 arm64' - with: - package-dir: python - - uses: actions/upload-artifact@v3 - with: - path: ./wheelhouse/*.whl + - uses: actions/checkout@v4 + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v3 + with: + platforms: all + - name: Build wheels + uses: pypa/cibuildwheel@v2.18.1 + env: + CIBW_SKIP: 'pp* *musl*' + CIBW_ARCHS_LINUX: 'x86_64 aarch64' + CIBW_ARCHS_WINDOWS: 'AMD64 ARM64' + CIBW_ARCHS_MACOS: 'x86_64 arm64' + with: + package-dir: python + - uses: actions/upload-artifact@v3 + with: + path: ./wheelhouse/*.whl diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index b37541d5a7..b712f0c357 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -34,7 +34,6 @@ permissions: jobs: build: - runs-on: ubuntu-22.04 strategy: matrix: @@ -93,68 +92,67 @@ jobs: scala: '2.12.8' python: '3.7' hadoop: '2.7' - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-java@v4 - with: - distribution: 'zulu' - java-version: '8' - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - - name: Cache Maven packages - uses: actions/cache@v3 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - env: - SPARK_VERSION: ${{ matrix.spark }} - SCALA_VERSION: ${{ matrix.scala }} - run: | - SPARK_COMPAT_VERSION="3.0" - if [ ${SPARK_VERSION:2:1} -gt "3" ]; then - SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3} + - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 + with: + distribution: 'zulu' + java-version: '8' + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + - name: Cache Maven packages + uses: actions/cache@v3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + - env: + SPARK_VERSION: ${{ matrix.spark }} + SCALA_VERSION: ${{ matrix.scala }} + run: | + SPARK_COMPAT_VERSION="3.0" + if [ ${SPARK_VERSION:2:1} -gt "3" ]; then + SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3} + fi + mvn -q clean install -DskipTests -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dgeotools + - env: + SPARK_VERSION: ${{ matrix.spark }} + HADOOP_VERSION: ${{ matrix.hadoop }} + run: | + wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz + wget https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar + wget https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar + wget https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar + tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz + mv -v jai_core-${JAI_CORE_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ + mv -v jai_codec-${JAI_CODEC_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ + mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ + - run: sudo apt-get -y install python3-pip python-dev-is-python3 + - run: sudo pip3 install -U setuptools + - run: sudo pip3 install -U wheel + - run: sudo pip3 install -U virtualenvwrapper + - run: python3 -m pip install pipenv + - run: cd python; python3 setup.py build_ext --inplace + - env: + SPARK_VERSION: ${{ matrix.spark }} + PYTHON_VERSION: ${{ matrix.python }} + SHAPELY_VERSION: ${{ matrix.shapely }} + run: | + cd python + if [ "${SHAPELY_VERSION}" == "1" ]; then + echo "Patching Pipfile to use Shapely 1.x" + sed -i 's/^shapely.*$/shapely="<2.0.0"/g' Pipfile fi - mvn -q clean install -DskipTests -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dgeotools - - env: - SPARK_VERSION: ${{ matrix.spark }} - HADOOP_VERSION: ${{ matrix.hadoop }} - run: | - wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz - wget https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar - wget https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar - wget https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar - tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz - mv -v jai_core-${JAI_CORE_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ - mv -v jai_codec-${JAI_CODEC_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ - mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ - - run: sudo apt-get -y install python3-pip python-dev-is-python3 - - run: sudo pip3 install -U setuptools - - run: sudo pip3 install -U wheel - - run: sudo pip3 install -U virtualenvwrapper - - run: python3 -m pip install pipenv - - run: cd python; python3 setup.py build_ext --inplace - - env: - SPARK_VERSION: ${{ matrix.spark }} - PYTHON_VERSION: ${{ matrix.python }} - SHAPELY_VERSION: ${{ matrix.shapely }} - run: | - cd python - if [ "${SHAPELY_VERSION}" == "1" ]; then - echo "Patching Pipfile to use Shapely 1.x" - sed -i 's/^shapely.*$/shapely="<2.0.0"/g' Pipfile - fi - pipenv --python ${PYTHON_VERSION} - pipenv install pyspark==${SPARK_VERSION} - pipenv install --dev - pipenv graph - - env: - SPARK_VERSION: ${{ matrix.spark }} - HADOOP_VERSION: ${{ matrix.hadoop }} - run: find spark-shaded/target -name sedona-*.jar -exec cp {} spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ \; - - env: - SPARK_VERSION: ${{ matrix.spark }} - HADOOP_VERSION: ${{ matrix.hadoop }} - run: (export SPARK_HOME=$PWD/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION};export PYTHONPATH=$SPARK_HOME/python;cd python;pipenv run pytest tests) + pipenv --python ${PYTHON_VERSION} + pipenv install pyspark==${SPARK_VERSION} + pipenv install --dev + pipenv graph + - env: + SPARK_VERSION: ${{ matrix.spark }} + HADOOP_VERSION: ${{ matrix.hadoop }} + run: find spark-shaded/target -name sedona-*.jar -exec cp {} spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ \; + - env: + SPARK_VERSION: ${{ matrix.spark }} + HADOOP_VERSION: ${{ matrix.hadoop }} + run: (export SPARK_HOME=$PWD/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION};export PYTHONPATH=$SPARK_HOME/python;cd python;pipenv run pytest tests) diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 6de07770ef..2ec23c7706 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -28,7 +28,6 @@ env: jobs: build: - runs-on: ubuntu-22.04 strategy: fail-fast: true @@ -37,7 +36,6 @@ jobs: hadoop: [3] scala: [2.12.15] r: [oldrel, release] - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d40341a5f8..a32de8be9f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ repos: - id: identity - id: check-hooks-apply - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 + rev: v2.3.0 hooks: - id: codespell name: Run codespell @@ -19,12 +19,12 @@ repos: args: [--ignore-words=.github/linters/codespell.txt] exclude: ^docs/image|^spark/common/src/test/resources|^docs/usecases|^tools/maven/scalafmt - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.2 + rev: v0.4.10 hooks: - id: ruff args: [--config=.github/linters/ruff.toml, --fix] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-ast - id: check-builtin-literals @@ -57,7 +57,7 @@ repos: args: [--markdown-linebreak-ext=md] exclude: ^docs-overrides/main\.html$|\.Rd$ - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.38.0 + rev: v0.41.0 hooks: - id: markdownlint name: Run markdownlint @@ -66,3 +66,13 @@ repos: exclude: ^\.github/.*$ types: [markdown] files: \.(md|mdown|markdown)$ + - repo: https://github.com/adrienverge/yamllint + rev: v1.35.1 + hooks: + - id: yamllint + name: Run yamllint + description: Check YAML files with yamllint + args: [--strict, -c=.github/linters/.yaml-lint.yml] + exclude: ^mkdocs\.yml$ + types: [yaml] + files: \.ya?ml$ diff --git a/R/_pkgdown.yml b/R/_pkgdown.yml index 74ce0d0e6a..4280bf60d2 100644 --- a/R/_pkgdown.yml +++ b/R/_pkgdown.yml @@ -25,43 +25,43 @@ home: [Homepage](https://sedona.apache.org/) reference: -- title: "Reading and Writing Spatial DataFrames" - desc: "Functions for reading and writing Spark DataFrames." - contents: - - starts_with("spark_read") - - starts_with("spark_write") - - sedona_save_spatial_rdd -- title: "Reading and Writing Spatial RDDs" - desc: "Functions for reading and writing Spatial RDDs." - contents: - - starts_with("sedona_read") - - starts_with("sedona_write") -- title: "Conversion" - desc: "Functions to convert between Spark DataFrames and Spatial RDDs." - contents: - - contains("sdf_register") - - to_spatial_rdd -- title: "RDD functions" -- subtitle: "Visualization" - contents: - - starts_with("sedona_visualization_routines") - - starts_with("sedona_render") -- subtitle: "Joins" - contents: - - sedona_spatial_join - - sedona_spatial_join_count_by_key -- subtitle: "Query" - contents: - - sedona_knn_query - - sedona_range_query -- subtitle: "Others" - contents: - - sedona_apply_spatial_partitioner - - sedona_build_index - - approx_count - - crs_transform - - minimum_bounding_box - - new_bounding_box + - title: "Reading and Writing Spatial DataFrames" + desc: "Functions for reading and writing Spark DataFrames." + contents: + - starts_with("spark_read") + - starts_with("spark_write") + - sedona_save_spatial_rdd + - title: "Reading and Writing Spatial RDDs" + desc: "Functions for reading and writing Spatial RDDs." + contents: + - starts_with("sedona_read") + - starts_with("sedona_write") + - title: "Conversion" + desc: "Functions to convert between Spark DataFrames and Spatial RDDs." + contents: + - contains("sdf_register") + - to_spatial_rdd + - title: "RDD functions" + - subtitle: "Visualization" + contents: + - starts_with("sedona_visualization_routines") + - starts_with("sedona_render") + - subtitle: "Joins" + contents: + - sedona_spatial_join + - sedona_spatial_join_count_by_key + - subtitle: "Query" + contents: + - sedona_knn_query + - sedona_range_query + - subtitle: "Others" + contents: + - sedona_apply_spatial_partitioner + - sedona_build_index + - approx_count + - crs_transform + - minimum_bounding_box + - new_bounding_box repo: url: home: https://github.com/apache/sedona/