diff --git a/.github/actions/build/action.yml b/.github/actions/build/action.yml index 8203e8a7..e74f78dc 100644 --- a/.github/actions/build/action.yml +++ b/.github/actions/build/action.yml @@ -43,9 +43,13 @@ runs: - name: Build run: | + mvn --batch-mode -Dspotless.check.skip --update-snapshots dependency:go-offline mvn --batch-mode -Dspotless.check.skip --update-snapshots clean compile test-compile mvn --batch-mode -Dspotless.check.skip -DskipTests -Dmaven.test.skip=true package mvn --batch-mode -Dspotless.check.skip -DskipTests -Dmaven.test.skip=true -Dgpg.skip install + cd examples/scala + mvn --batch-mode -Dspotless.check.skip clean compile test-compile + mvn --batch-mode -Dspotless.check.skip -DskipTests -Dmaven.test.skip=true package shell: bash - name: Upload Binaries @@ -58,6 +62,12 @@ runs: !target/*-javadoc.jar !target/site + - name: Upload Dependencies + uses: actions/upload-artifact@v4 + with: + name: Dependencies-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }} + path: ~/.m2/repository + branding: icon: 'check-circle' color: 'green' diff --git a/.github/actions/test-integrate/action.yml b/.github/actions/test-integrate/action.yml index 69abc4c0..f0f609ec 100644 --- a/.github/actions/test-integrate/action.yml +++ b/.github/actions/test-integrate/action.yml @@ -40,20 +40,21 @@ runs: name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }} path: . - - name: Cache Maven packages - uses: actions/cache@v4 + - name: Fetch Dependencies Artifact + uses: actions/download-artifact@v4 with: + name: Dependencies-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }} path: ~/.m2/repository - key: ${{ runner.os }}-mvn-integrate-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}-${{ hashFiles('pom.xml') }} - restore-keys: | - ${{ runner.os }}-mvn-integrate-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}- - ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}- - - name: Cache Spark Binaries - uses: actions/cache@v4 + - name: Fetch Spark Binaries Artifact + uses: actions/download-artifact@v4 with: + name: Spark-Binaries-${{ inputs.spark-version }}-${{ inputs.hadoop-version }} path: ~/spark - key: ${{ runner.os }}-spark-binaries-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }} + + - name: Change file permissions + run: chmod u+x ~/spark/bin/* ~/spark/sbin/* + shell: bash - name: Setup JDK uses: actions/setup-java@v4 @@ -61,17 +62,6 @@ runs: java-version: ${{ inputs.java-version }} distribution: 'zulu' - - name: Setup Spark Binaries - env: - SPARK_PACKAGE: spark-${{ inputs.spark-version }}/spark-${{ inputs.spark-version }}-bin-hadoop${{ inputs.hadoop-version }}.tgz - run: | - if [[ ! -e ~/spark ]] - then - wget --progress=dot:giga "https://www.apache.org/dyn/closer.lua/spark/${SPARK_PACKAGE}?action=download" -O - | tar -xzC "${{ runner.temp }}" - archive=$(basename "${SPARK_PACKAGE}") bash -c "mv -v "${{ runner.temp }}/\${archive/%.tgz/}" ~/spark" - fi - shell: bash - - name: Parametrize id: params run: | @@ -84,14 +74,27 @@ runs: - name: Prepare Integration Tests run: | mvn --batch-mode -Dspotless.check.skip -DskipTests install - cd examples/scala - mvn --batch-mode -Dspotless.check.skip package + (cd examples/scala && mvn --batch-mode -Dspotless.check.skip package) # spark-submit is not capable of downloading these dependencies, fetching them through mvn - mvn --batch-mode -Dspotless.check.skip dependency:get -DgroupId=com.google.errorprone -DartifactId=error_prone_annotations -Dversion=2.3.3 - mvn --batch-mode -Dspotless.check.skip dependency:get -DgroupId=com.google.code.findbugs -DartifactId=jsr305 -Dversion=3.0.2 - mvn --batch-mode -Dspotless.check.skip dependency:get -DgroupId=org.codehaus.mojo -DartifactId=animal-sniffer-annotations -Dversion=1.17 - mvn --batch-mode -Dspotless.check.skip dependency:get -DgroupId=com.google.code.gson -DartifactId=gson -Dversion=2.8.9 - mvn --batch-mode -Dspotless.check.skip dependency:get -DgroupId=org.slf4j -DartifactId=slf4j-api -Dversion=1.7.16 + for dep in "org.slf4j#slf4j-api;2.0.16" \ + "com.google.protobuf#protobuf-java;4.29.1" \ + "io.netty#netty-all;4.1.110.Final" \ + "com.google.guava#guava;33.3.1-jre" \ + "com.google.guava#failureaccess;1.0.2" \ + "com.google.guava#listenablefuture;9999.0-empty-to-avoid-conflict-with-guava" \ + "org.checkerframework#checker-qual;3.43.0" \ + "com.google.j2objc#j2objc-annotations;3.0.0"; do + IFS="#;" read group artifact version <<< "$dep" + mvn --batch-mode -Dspotless.check.skip dependency:get -DgroupId="$group" -DartifactId="$artifact" -Dversion="$version" + done + if [[ "${{ inputs.spark-compat-version }}" == "3.0" ]] + then + # spark-submit 3.0 cannot resolve the dgraph4j dependency that has classifier "shaded" + # copying it into .ivy2 cache without classifier + mkdir -p ~/.ivy2/jars/ + dgraph4j_version="$(grep -A3 -B2 "dgraph4j" pom.xml | grep "" | sed -e "s/[^>]*>//" -e "s/<.*//")" + cp -v ~/.m2/repository/io/dgraph/dgraph4j/${dgraph4j_version}/dgraph4j-${dgraph4j_version}-shaded.jar ~/.ivy2/jars/io.dgraph_dgraph4j-${dgraph4j_version}.jar + fi shell: bash - name: Start Dgraph cluster (Small) diff --git a/.github/actions/test-python/action.yml b/.github/actions/test-python/action.yml index 753e8e90..e6f8220c 100644 --- a/.github/actions/test-python/action.yml +++ b/.github/actions/test-python/action.yml @@ -37,15 +37,11 @@ runs: name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }} path: . - - name: Cache Maven packages - uses: actions/cache@v4 + - name: Fetch Dependencies Artifact + uses: actions/download-artifact@v4 with: + name: Dependencies-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }} path: ~/.m2/repository - key: ${{ runner.os }}-mvn-python-test-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}-${{ hashFiles('pom.xml') }} - restore-keys: | - ${{ runner.os }}-mvn-python-test-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}- - ${{ runner.os }}-mvn-test-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}- - ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}- - name: Setup JDK 11 uses: actions/setup-java@v4 diff --git a/.github/actions/test-scala/action.yml b/.github/actions/test-scala/action.yml index 7e61a216..1d6eea32 100644 --- a/.github/actions/test-scala/action.yml +++ b/.github/actions/test-scala/action.yml @@ -37,14 +37,11 @@ runs: name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }} path: . - - name: Cache Maven packages - uses: actions/cache@v4 + - name: Fetch Dependencies Artifact + uses: actions/download-artifact@v4 with: + name: Dependencies-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }} path: ~/.m2/repository - key: ${{ runner.os }}-mvn-test-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}-${{ hashFiles('pom.xml') }} - restore-keys: | - ${{ runner.os }}-mvn-test-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}- - ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}- - name: Setup JDK uses: actions/setup-java@v4 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 58638722..04f7f220 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,6 +54,10 @@ jobs: git diff shell: bash + download-spark: + name: "Spark" + uses: "./.github/workflows/download-spark.yml" + build: name: "Build" uses: "./.github/workflows/build.yml" @@ -129,15 +133,17 @@ jobs: test-integration: name: "Test Integration" - needs: [test-dgraph, test-spark] + needs: [download-spark, test-dgraph, test-spark] uses: "./.github/workflows/test-integration.yml" - delete_binaries: - name: "Delete Binaries" + delete_artifacts: + name: "Delete Artifacts" runs-on: ubuntu-latest needs: [test-dgraph, test-spark, test-scala, test-python, test-integration] steps: - - name: Delete Binaries Artifact + - name: Delete Artifacts uses: geekyeggo/delete-artifact@v5 with: - name: "Binaries-*" + name: | + Binaries-* + Dependencies-* diff --git a/.github/workflows/download-spark.yml b/.github/workflows/download-spark.yml new file mode 100644 index 00000000..3dd98ab3 --- /dev/null +++ b/.github/workflows/download-spark.yml @@ -0,0 +1,54 @@ +name: Build + +on: + workflow_call: + +jobs: + download: + name: Download (Spark ${{ matrix.spark-version }} Hadoop ${{ matrix.hadoop-version }}) + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + # use spark versions from test-integration.yaml workflow + include: + - spark-version: '3.0.3' + hadoop-version: '2.7' + - spark-version: '3.1.3' + hadoop-version: '2.7' + - spark-version: '3.2.4' + hadoop-version: '2.7' + - spark-version: '3.3.4' + hadoop-version: '3' + - spark-version: '3.4.3' + hadoop-version: '3' + - spark-version: '3.5.3' + hadoop-version: '3' + - spark-version: '4.0.0-preview2' + hadoop-version: '3' + + steps: + - name: Cache Spark Binaries + uses: actions/cache@v4 + with: + path: ~/spark + key: ${{ runner.os }}-spark-binaries-${{ matrix.spark-version }}-${{ matrix.scala-compat-version }} + + - name: Setup Spark Binaries + env: + SPARK_PACKAGE: spark-${{ matrix.spark-version }}/spark-${{ matrix.spark-version }}-bin-hadoop${{ matrix.hadoop-version }}.tgz + run: | + if [[ ! -e ~/spark ]] + then + wget --progress=dot:giga "https://www.apache.org/dyn/closer.lua/spark/${SPARK_PACKAGE}?action=download" -O - | tar -xzC "${{ runner.temp }}" + archive=$(basename "${SPARK_PACKAGE}") bash -c "mv -v "${{ runner.temp }}/\${archive/%.tgz/}" ~/spark" + fi + shell: bash + + - name: Upload Spark Binaries + uses: actions/upload-artifact@v4 + with: + name: Spark-Binaries-${{ matrix.spark-version }}-${{ matrix.hadoop-version }} + path: ~/spark + diff --git a/pom.xml b/pom.xml index 270545f8..f135f51c 100644 --- a/pom.xml +++ b/pom.xml @@ -115,9 +115,10 @@ - uk.co.gresearch.dgraph - dgraph4j-shaded - 21.12.0-0 + io.dgraph + dgraph4j + 24.1.1 + shaded