diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 5c87ac265c..3d780c7ae1 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -26,6 +26,7 @@ on: env: MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60 + DO_NOT_TRACK: true permissions: contents: read diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 0ebba2e701..9e5546677e 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -25,6 +25,7 @@ env: JAI_CORE_VERSION: "1.1.3" JAI_CODEC_VERSION: "1.1.3" JAI_IMAGEIO_VERSION: "1.1" + DO_NOT_TRACK: true permissions: contents: read diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 3ffdda98b4..199e030774 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -22,6 +22,7 @@ on: env: MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60 + DO_NOT_TRACK: true jobs: build: diff --git a/R/R/dependencies.R b/R/R/dependencies.R index 68cf4e4980..b0c2741d11 100644 --- a/R/R/dependencies.R +++ b/R/R/dependencies.R @@ -60,7 +60,8 @@ sedona_initialize_spark_connection <- function(sc) { sc, "org.apache.sedona.sql.utils.SedonaSQLRegistrator", "registerAll", - spark_session(sc) + spark_session(sc), + "r" ) # Instantiate all enum objects and store them immutably under diff --git a/common/src/main/java/org/apache/sedona/common/utils/TelemetryCollector.java b/common/src/main/java/org/apache/sedona/common/utils/TelemetryCollector.java new file mode 100644 index 0000000000..8b17fdedc6 --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/utils/TelemetryCollector.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.utils; + +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLEncoder; + +public class TelemetryCollector { + + private static final String BASE_URL = "https://sedona.gateway.scarf.sh/packages/"; + + public static String send(String engineName, String language) { + HttpURLConnection conn = null; + String telemetrySubmitted = ""; + try { + String arch = URLEncoder.encode(System.getProperty("os.arch").replaceAll(" ", "_"), "UTF-8"); + String os = URLEncoder.encode(System.getProperty("os.name").replaceAll(" ", "_"), "UTF-8"); + String jvm = URLEncoder.encode(System.getProperty("java.version").replaceAll(" ", "_"), "UTF-8"); + + // Construct URL + telemetrySubmitted = BASE_URL + language + "/" + engineName + "/" + arch + "/" + os + "/" + jvm; + + // Check for user opt-out + if (System.getenv("SCARF_NO_ANALYTICS") != null && System.getenv("SCARF_NO_ANALYTICS").equals("true") || + System.getenv("DO_NOT_TRACK") != null && System.getenv("DO_NOT_TRACK").equals("true") || + System.getProperty("SCARF_NO_ANALYTICS") != null && System.getProperty("SCARF_NO_ANALYTICS").equals("true") || + System.getProperty("DO_NOT_TRACK") != null && System.getProperty("DO_NOT_TRACK").equals("true")){ + return telemetrySubmitted; + } + + // Send GET request + URL url = new URL(telemetrySubmitted); + conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + conn.connect(); + int responseCode = conn.getResponseCode(); + // Optionally check the response for successful execution + if (responseCode != 200) { + // Silent handling, no output or log + } + } catch (Exception e) { + // Silent catch block + } finally { + if (conn != null) { + conn.disconnect(); + } + } + return telemetrySubmitted; + } +} diff --git a/common/src/test/java/org/apache/sedona/common/telemetry/TelemetryTest.java b/common/src/test/java/org/apache/sedona/common/telemetry/TelemetryTest.java new file mode 100644 index 0000000000..4d518d9468 --- /dev/null +++ b/common/src/test/java/org/apache/sedona/common/telemetry/TelemetryTest.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.telemetry; + +import org.apache.sedona.common.utils.TelemetryCollector; +import org.junit.Test; + +public class TelemetryTest +{ + @Test + public void testTelemetryCollector() + { + assert TelemetryCollector.send("test", "java").contains("https://sedona.gateway.scarf.sh/packages/java/test"); + } +} diff --git a/docs/asf/telemetry.md b/docs/asf/telemetry.md new file mode 100644 index 0000000000..ef12147349 --- /dev/null +++ b/docs/asf/telemetry.md @@ -0,0 +1,3 @@ +Apache Sedona uses Scarf to collect anonymous usage data to help us understand how the software is being used and how we can improve it. You can opt out of telemetry collection by setting the environment variable `SCARF_NO_ANALYTICS` or `DO_NOT_TRACK` to `true` on your local machine, or the driver machine of your cluster. + +Scarf fully supports the GDPR and is allowed by [the Apache Software Foundation privacy policy](https://privacy.apache.org/faq/committers.html). The privacy policy of Scarf is available at [https://about.scarf.sh/privacy-policy](https://about.scarf.sh/privacy-policy). diff --git a/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java b/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java index 01eda47f49..4d3511dc09 100644 --- a/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java +++ b/flink/src/main/java/org/apache/sedona/flink/SedonaContext.java @@ -23,6 +23,7 @@ import org.apache.sedona.common.geometryObjects.Circle; import org.apache.sedona.common.geometrySerde.GeometrySerde; import org.apache.sedona.common.geometrySerde.SpatialIndexSerde; +import org.apache.sedona.common.utils.TelemetryCollector; import org.locationtech.jts.geom.Envelope; import org.locationtech.jts.geom.GeometryCollection; import org.locationtech.jts.geom.LineString; @@ -46,6 +47,7 @@ public class SedonaContext */ public static StreamTableEnvironment create(StreamExecutionEnvironment env, StreamTableEnvironment tblEnv) { + TelemetryCollector.send("flink", "java"); GeometrySerde serializer = new GeometrySerde(); SpatialIndexSerde indexSerializer = new SpatialIndexSerde(serializer); env.getConfig().registerTypeWithKryoSerializer(Point.class, serializer); diff --git a/mkdocs.yml b/mkdocs.yml index 2b3896843a..369d31dc26 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -123,6 +123,7 @@ nav: - Thanks: https://www.apache.org/foundation/thanks.html" target="_blank - Security: https://www.apache.org/security/" target="_blank - Privacy: https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank + - Telemetry: asf/telemetry.md repo_url: https://github.com/apache/sedona repo_name: apache/sedona theme: diff --git a/python/sedona/spark/SedonaContext.py b/python/sedona/spark/SedonaContext.py index 76be9576f0..cda98a60f5 100644 --- a/python/sedona/spark/SedonaContext.py +++ b/python/sedona/spark/SedonaContext.py @@ -35,7 +35,7 @@ def create(cls, spark: SparkSession) -> SparkSession: """ spark.sql("SELECT 1 as geom").count() PackageImporter.import_jvm_lib(spark._jvm) - spark._jvm.SedonaContext.create(spark._jsparkSession) + spark._jvm.SedonaContext.create(spark._jsparkSession, "python") return spark @classmethod diff --git a/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala b/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala index d9339193ee..6b262ed162 100644 --- a/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala +++ b/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala @@ -18,6 +18,7 @@ */ package org.apache.sedona.spark +import org.apache.sedona.common.utils.TelemetryCollector import org.apache.sedona.core.serde.SedonaKryoRegistrator import org.apache.sedona.sql.UDF.UdfRegistrator import org.apache.sedona.sql.UDT.UdtRegistrator @@ -26,8 +27,11 @@ import org.apache.spark.sql.sedona_sql.optimization.SpatialFilterPushDownForGeoP import org.apache.spark.sql.sedona_sql.strategy.join.JoinQueryDetector import org.apache.spark.sql.{SQLContext, SparkSession} +import scala.annotation.StaticAnnotation import scala.util.Try +class InternalApi(description: String = "This method is for internal use only and may change without notice.") extends StaticAnnotation + object SedonaContext { def create(sqlContext: SQLContext): SQLContext = { create(sqlContext.sparkSession) @@ -40,6 +44,12 @@ object SedonaContext { * @return */ def create(sparkSession: SparkSession):SparkSession = { + create(sparkSession, "java") + } + + @InternalApi + def create(sparkSession: SparkSession, language: String):SparkSession = { + TelemetryCollector.send("spark", language) if (!sparkSession.experimental.extraStrategies.exists(_.isInstanceOf[JoinQueryDetector])) { sparkSession.experimental.extraStrategies ++= Seq(new JoinQueryDetector(sparkSession)) } diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala b/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala index 6673054e23..91a712fedf 100644 --- a/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala +++ b/spark/common/src/main/scala/org/apache/sedona/sql/utils/SedonaSQLRegistrator.scala @@ -26,12 +26,21 @@ import org.apache.spark.sql.{SQLContext, SparkSession} object SedonaSQLRegistrator { @deprecated("Use SedonaContext.create instead", "1.4.1") def registerAll(sqlContext: SQLContext): Unit = { - SedonaContext.create(sqlContext.sparkSession) + registerAll(sqlContext, "java") } @deprecated("Use SedonaContext.create instead", "1.4.1") def registerAll(sparkSession: SparkSession): Unit = - SedonaContext.create(sparkSession) + registerAll(sparkSession, "java") + + @deprecated("Use SedonaContext.create instead", "1.4.1") + def registerAll(sqlContext: SQLContext, language: String): Unit = { + SedonaContext.create(sqlContext.sparkSession, language) + } + + @deprecated("Use SedonaContext.create instead", "1.4.1") + def registerAll(sparkSession: SparkSession, language: String): Unit = + SedonaContext.create(sparkSession, language) def dropAll(sparkSession: SparkSession): Unit = { UdfRegistrator.dropAll(sparkSession)