Skip to content

Commit

Permalink
Merge pull request #779 from ImageMarkup/upgrade-google-analytics-rep…
Browse files Browse the repository at this point in the history
…orting

Upgrade google analytics reporting to use the v1 data api
  • Loading branch information
danlamanna authored Oct 26, 2023
2 parents 54ce4e4 + 3fcea7b commit 93a021b
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 57 deletions.
10 changes: 6 additions & 4 deletions isic/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,12 @@ def mutate_configuration(configuration: ComposedConfiguration) -> None:
ISIC_DATACITE_API_URL = values.Value("https://api.test.datacite.org")
ISIC_DATACITE_USERNAME = values.Value(None)
ISIC_DATACITE_PASSWORD = values.SecretValue(None)
ISIC_GOOGLE_ANALYTICS_VIEW_IDS = [
"183845203", # ISIC Gallery
"217814783", # ISIC Challenge 2020
"199577101", # ISIC Challenge
ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS = [
"360152967", # ISIC Gallery
"368050084", # ISIC Challenge 2020
"360125792", # ISIC Challenge
"265191179", # ISIC API
"265233311", # ISDIS
]
# This is technically a secret, but it's unset in sandbox so we don't want to make
# it required.
Expand Down
78 changes: 28 additions & 50 deletions isic/stats/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,23 @@
from types import SimpleNamespace
import urllib.parse

from apiclient.discovery import build
import boto3
from botocore.config import Config
from celery import shared_task
from celery.utils.log import get_task_logger
from django.conf import settings
from django.db import transaction
from django.utils import timezone
from googleapiclient.errors import HttpError
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import DateRange, Dimension, Metric, RunReportRequest
from google.oauth2 import service_account
from more_itertools.more import chunked
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import pycountry

from isic.core.models.image import Image
from isic.stats.models import GaMetrics, ImageDownload

SCOPES = ["https://www.googleapis.com/auth/analytics.readonly"]


logger = get_task_logger(__name__)


Expand All @@ -36,52 +33,33 @@ def _s3_client():
)


def _initialize_analyticsreporting():
credentials = ServiceAccountCredentials.from_json_keyfile_dict(
json.loads(settings.ISIC_GOOGLE_API_JSON_KEY), SCOPES
def _get_analytics_client():
json_acct_info = json.loads(settings.ISIC_GOOGLE_API_JSON_KEY)
credentials = service_account.Credentials.from_service_account_info(json_acct_info)
scoped_credentials = credentials.with_scopes(
["https://www.googleapis.com/auth/analytics.readonly"]
)
analytics = build("analyticsreporting", "v4", credentials=credentials)
return analytics
return BetaAnalyticsDataClient(credentials=scoped_credentials)


def _get_google_analytics_report(analytics, view_id: str) -> dict:
def _get_google_analytics_report(client, property_id: str) -> dict:
results = {
"num_sessions": 0,
"sessions_per_country": defaultdict(int),
}
response = (
analytics.reports()
.batchGet(
body={
"reportRequests": [
{
"viewId": view_id,
"dateRanges": [{"startDate": "30daysAgo", "endDate": "today"}],
"metrics": [{"expression": "ga:sessions"}],
"dimensions": [{"name": "ga:countryIsoCode"}],
}
]
}
)
.execute()
)

for report in response.get("reports", []):
column_header = report.get("columnHeader", {})
metric_headers = column_header.get("metricHeader", {}).get("metricHeaderEntries", [])

for row in report.get("data", {}).get("rows", []):
dimensions = row.get("dimensions", [])
date_range_values = row.get("metrics", [])

for _, values in enumerate(date_range_values):
for _, value in zip(metric_headers, values.get("values")):
if dimensions[0] != "ZZ": # unknown country
results["sessions_per_country"][dimensions[0]] += int(value)
request = RunReportRequest(
property=f"properties/{property_id}",
dimensions=[Dimension(name="countryId")],
metrics=[Metric(name="sessions")],
date_ranges=[DateRange(start_date="30daysAgo", end_date="today")],
)
response = client.run_report(request)

results["num_sessions"] += int(
report.get("data", {}).get("totals", [{}])[0].get("values", ["0"])[0]
)
for row in response.rows:
country_id, sessions = row.dimension_values[0].value, row.metric_values[0].value
results["sessions_per_country"][country_id] += int(sessions)
results["num_sessions"] += int(sessions)

return results

Expand All @@ -106,9 +84,6 @@ def _country_from_iso_code(iso_code: str) -> dict:
@shared_task(
soft_time_limit=60,
time_limit=120,
# Figuring out retries within googleapiclient is a bit cumbersome, use celery.
autoretry_for=(HttpError,),
retry_backoff=True,
)
def collect_google_analytics_metrics_task():
if not settings.ISIC_GOOGLE_API_JSON_KEY:
Expand All @@ -117,19 +92,22 @@ def collect_google_analytics_metrics_task():
)
return

analytics = _initialize_analyticsreporting()
client = _get_analytics_client()
num_sessions = 0
sessions_per_country = []
sessions_per_iso_code: dict[str, int] = defaultdict(int)

for view_id in settings.ISIC_GOOGLE_ANALYTICS_VIEW_IDS:
results = _get_google_analytics_report(analytics, view_id)
for property_id in settings.ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS:
results = _get_google_analytics_report(client, property_id)
num_sessions += results["num_sessions"]
for key, value in results["sessions_per_country"].items():
sessions_per_iso_code[key] += value

for iso_code, sessions in sessions_per_iso_code.items():
sessions_per_country.append({**{"sessions": sessions}, **_country_from_iso_code(iso_code)})
if iso_code != "(not set)":
sessions_per_country.append(
{**{"sessions": sessions}, **_country_from_iso_code(iso_code)}
)

GaMetrics.objects.create(
range_start=timezone.now() - timedelta(days=30),
Expand Down
4 changes: 2 additions & 2 deletions isic/stats/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
@pytest.mark.django_db
def test_collect_google_analytics_task(mocker, settings):
# only have one VIEW_ID, otherwise the counts will be multiplied
settings.ISIC_GOOGLE_ANALYTICS_VIEW_IDS = ["just_one"]
settings.ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS = ["just_one"]
settings.ISIC_GOOGLE_API_JSON_KEY = "something"

mocker.patch("isic.stats.tasks._initialize_analyticsreporting", mocker.MagicMock)
mocker.patch("isic.stats.tasks._get_analytics_client", mocker.MagicMock)
mocker.patch(
"isic.stats.tasks._get_google_analytics_report",
return_value={
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
"django-spurl",
"django-storages>=1.14",
"django-widget-tweaks",
"google-api-python-client",
"google-analytics-data",
"hashids",
"isic-metadata>=0.2.0",
"jaro-winkler",
Expand Down

0 comments on commit 93a021b

Please sign in to comment.