Skip to content

Commit

Permalink
[Issue 740] current sprint report (#760)
Browse files Browse the repository at this point in the history
Enables users to calculate sprint burndown for the current sprint
by passing `@current` to the `sprint` argument
  • Loading branch information
widal001 committed Nov 28, 2023
1 parent 32a11ac commit 633bd03
Show file tree
Hide file tree
Showing 9 changed files with 213 additions and 57 deletions.
8 changes: 4 additions & 4 deletions analytics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,13 @@ poetry run analytics export gh_project_data --owner HHS --project 13 --output-fi
Once you've exported the sprint and issue data from GitHub, you can start calculating metrics. We'll begin with sprint burndown:

```bash
poetry run analytics calculate sprint_burndown --sprint-file data/sprint-data.json --issue-file data/issue-data.json --sprint "Sprint 10" --show-results
poetry run analytics calculate sprint_burndown --sprint-file data/sprint-data.json --issue-file data/issue-data.json --sprint @current --show-results
```

A couple of important notes about this command:

- `--sprint "Sprint 10"` In order to calculate burndown, you'll need to specify a sprint by name. We're currently working to add an option to use `@current` and other date-relative sprint flags
- `--show-results` In order to the see the output in a browser you'll need to pass this flag.
- `--sprint @current` In order to calculate burndown, you'll need to specify either `@current` for the current sprint or the name of another sprint, e.g. `"Sprint 10"`
- `--show-results` In order to the see the output in a browser you'll need to pass this flag.

![Screenshot of burndown for sprint 10](static/reporting-notebook-screenshot.png)

Expand All @@ -97,7 +97,7 @@ poetry run analytics calculate sprint_burndown --sprint-file data/sprint-data.js

### Calculating deliverable percent complete

Another key metric you can report is the percentage of tasks or points completed per 30k deliverable.
Another key metric you can report is the percentage of tasks or points completed per 30k deliverable.
You can specify the unit you want to use for percent complete (e.g. points or tasks) using the `--unit` flag.

For example, here we're calculating percentage completion based on the number of tickets under each deliverable.
Expand Down
2 changes: 1 addition & 1 deletion analytics/src/analytics/datasets/deliverable_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Implements the DeliverableTasks dataset.
This is a sub-class of BaseDataset that groups 30k ft deliverables with the
tasks needed to complete those delivearable
tasks needed to complete those deliverable
"""
from typing import Self

Expand Down
26 changes: 26 additions & 0 deletions analytics/src/analytics/datasets/sprint_board.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,32 @@ def sprint_end(self, sprint: str) -> pd.Timestamp:
sprint_end = self.df.loc[sprint_mask, self.sprint_end_col].max()
return sprint_end.tz_localize("UTC")

@property
def sprints(self) -> pd.DataFrame:
"""Return the unique list of sprints with their start and end dates."""
sprint_cols = [self.sprint_col, self.sprint_start_col, self.sprint_end_col]
return self.df[sprint_cols].drop_duplicates()

@property
def current_sprint(self) -> str | None:
"""Return the name of the current sprint, if a sprint is currently active."""
return self.get_sprint_name_from_date(pd.Timestamp.today())

def get_sprint_name_from_date(self, date: pd.Timestamp) -> str | None:
"""Get the name of a sprint from a given date, if that date falls in a sprint."""
# fmt: off
date_filter = (
(self.sprints[self.sprint_start_col] <= date) # after sprint start
& (self.sprints[self.sprint_end_col] > date) # before sprint end
)
# fmt: on
matching_sprints = self.sprints.loc[date_filter, self.sprint_col]
# if there aren't any sprints return None
if len(matching_sprints) == 0:
return None
# if there are, return the first value as a string
return str(matching_sprints.squeeze())

@classmethod
def load_from_json_files(
cls,
Expand Down
16 changes: 15 additions & 1 deletion analytics/src/analytics/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,26 @@ class BaseMetric:
def __init__(self) -> None:
"""Initialize and calculate the metric from the input dataset."""
self.results = self.calculate()
self.chart = self.plot_results()
self._chart: Figure | None = None

def calculate(self) -> pd.DataFrame:
"""Calculate the metric and return the resulting dataset."""
raise NotImplementedError

@property
def chart(self) -> Figure:
"""
Return a chart visualizing the results.
Note:
----
By deferring the self.plot_results() method invocation until the chart is
needed, we decrease the amount of time required to instantiate the class
"""
if not self._chart:
self._chart = self.plot_results()
return self._chart

def plot_results(self) -> Figure:
"""Create a plotly chart that visually represents the results."""
raise NotImplementedError
Expand Down
19 changes: 17 additions & 2 deletions analytics/src/analytics/metrics/burndown.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ class SprintBurndown(BaseMetric):

def __init__(self, dataset: SprintBoard, sprint: str) -> None:
"""Initialize the SprintBurndown metric."""
self.sprint = sprint
self.dataset = dataset
self.sprint = self._get_and_validate_sprint_name(sprint)
self.date_col = "date"
self.opened_col = dataset.opened_col # type: ignore[attr-defined]
self.closed_col = dataset.closed_col # type: ignore[attr-defined]
self.dataset = dataset
self.unit = "tickets"
super().__init__()

Expand Down Expand Up @@ -98,6 +98,21 @@ def post_results_to_slack(self, slackbot: SlackBot, channel_id: str) -> None:
message=message,
)

def _get_and_validate_sprint_name(self, sprint: str | None) -> str:
"""Get the name of the sprint we're using to calculate burndown or raise an error."""
# save dataset to local variable for brevity
dataset = self.dataset
# update sprint name if calculating burndown for the current sprint
if sprint == "@current":
sprint = dataset.current_sprint
# check that the sprint name matches one of the sprints in the dataset
valid_sprint = sprint in list(dataset.sprints[dataset.sprint_col])
if not sprint or not valid_sprint: # needs `not sprint` for mypy checking
msg = "Sprint value doesn't match one of the available sprints"
raise ValueError(msg)
# return the sprint name if it's valid
return sprint

def _get_daily_tix_counts_by_status(
self,
df: pd.DataFrame,
Expand Down
5 changes: 4 additions & 1 deletion analytics/src/analytics/metrics/percent_complete.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Calculate and visualizes percent completion by deliverable."""
import datetime as dt
from enum import Enum
from typing import Literal

Expand Down Expand Up @@ -62,6 +63,8 @@ def calculate(self) -> pd.DataFrame:

def plot_results(self) -> Figure:
"""Create a bar chart of percent completion from the data in self.results."""
# get the current date in YYYY-MM-DD format
today = dt.datetime.now(tz=dt.timezone.utc).strftime("%Y-%m-%d")
# reshape the dataframe in self.results for plotly
df = self._prepare_result_dataframe_for_plotly()
# create a stacked bar chart from the data
Expand All @@ -73,7 +76,7 @@ def plot_results(self) -> Figure:
text="percent_of_total",
color_discrete_map={"open": "#aacde3", "closed": "#06508f"},
orientation="h",
title=f"Deliverable Percent Complete by {self.unit}",
title=f"Deliverable percent complete by {self.unit} as of {today}",
height=800,
)

Expand Down
49 changes: 49 additions & 0 deletions analytics/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,19 @@
import json
from pathlib import Path

import pandas as pd

# skips the integration tests in tests/integrations/
# to run the integration tests, invoke them directly: pytest tests/integrations/
collect_ignore = ["integrations"]

DAY_0 = "2023-10-31"
DAY_1 = "2023-11-01"
DAY_2 = "2023-11-02"
DAY_3 = "2023-11-03"
DAY_4 = "2023-11-04"
DAY_5 = "2023-11-05"


def write_test_data_to_file(data: dict, output_file: str):
"""Write test JSON data to a file for use in a test."""
Expand Down Expand Up @@ -72,3 +81,43 @@ def json_sprint_row(
"story Points": points,
"title": "Test issue 1",
}


def sprint_row(
issue: int,
created: str = DAY_1,
closed: str | None = None,
status: str = "In Progress",
points: int = 1,
sprint: int = 1,
sprint_start: str = DAY_1,
sprint_length: int = 2,
) -> dict:
"""Create a sample row of the SprintBoard dataset."""
# create timestamp and time delta fields
sprint_start_ts = pd.Timestamp(sprint_start)
sprint_duration = pd.Timedelta(days=sprint_length)
sprint_end_ts = sprint_start_ts + sprint_duration
created_date = pd.Timestamp(created, tz="UTC")
closed_date = pd.Timestamp(closed, tz="UTC") if closed else None
# return the sample record
return {
"issue_number": issue,
"issue_title": f"Issue {issue}",
"type": "issue",
"issue_body": f"Description of issue {issue}",
"status": "Done" if closed else status,
"assignees": "mickeymouse",
"labels": [],
"url": f"https://github.com/HHS/simpler-grants-gov/issues/{issue}",
"points": points,
"milestone": "Milestone 1",
"milestone_due_date": sprint_end_ts,
"milestone_description": "Milestone 1 description",
"sprint": f"Sprint {sprint}",
"sprint_start_date": sprint_start_ts,
"sprint_end_date": sprint_end_ts,
"sprint_duration": sprint_duration,
"created_date": created_date,
"closed_date": closed_date,
}
50 changes: 48 additions & 2 deletions analytics/tests/datasets/test_sprint_board.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"""Tests for analytics/datasets/sprint_board.py."""
import pandas as pd # noqa: I001
import pytest

import pandas as pd
import pytest
from analytics.datasets.sprint_board import SprintBoard

from tests.conftest import (
DAY_1,
DAY_2,
DAY_4,
DAY_5,
json_issue_row,
json_sprint_row,
sprint_row,
write_test_data_to_file,
)

Expand Down Expand Up @@ -94,3 +100,43 @@ def test_extract_parent_issue_correctly(self, parent_number: int):
df = df.set_index("issue_number")
# validation -- check that issue 111's parent_issue_number is 222
assert df.loc[111]["parent_issue_number"] == parent_number


class TestGetSprintNameFromDate:
"""Test the SprintBoard.get_sprint_name_from_date() method."""

@pytest.mark.parametrize(
("date", "expected"),
[
(DAY_1, "Sprint 1"),
(DAY_2, "Sprint 1"),
(DAY_4, "Sprint 2"),
(DAY_5, "Sprint 2"),
],
)
def test_return_name_if_matching_sprint_exists(self, date: str, expected: str):
"""Test that correct sprint is returned if date exists in a sprint."""
# setup - create sample dataset
board_data = [
sprint_row(issue=1, sprint=1, sprint_start=DAY_1),
sprint_row(issue=2, sprint=1, sprint_start=DAY_1),
sprint_row(issue=3, sprint=2, sprint_start=DAY_4),
]
board = SprintBoard.from_dict(board_data)
# validation
sprint_date = pd.Timestamp(date)
sprint_name = board.get_sprint_name_from_date(sprint_date)
assert sprint_name == expected

def test_return_none_if_no_matching_sprint(self):
"""The method should return None if no sprint contains the date."""
# setup - create sample dataset
board_data = [
sprint_row(issue=1, sprint=1, sprint_start=DAY_1),
sprint_row(issue=2, sprint=2, sprint_start=DAY_4),
]
board = SprintBoard.from_dict(board_data)
# validation
bad_date = pd.Timestamp("1900-01-01")
sprint_name = board.get_sprint_name_from_date(bad_date)
assert sprint_name is None
95 changes: 49 additions & 46 deletions analytics/tests/metrics/test_burndown.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,18 @@
"""Test the analytics.metrics.burndown module."""
import pandas as pd # noqa: I001
from datetime import datetime, timedelta, timezone

import pandas as pd
import pytest
from analytics.datasets.sprint_board import SprintBoard
from analytics.metrics.burndown import SprintBurndown

DAY_0 = "2023-10-31"
DAY_1 = "2023-11-01"
DAY_2 = "2023-11-02"
DAY_3 = "2023-11-03"


def sprint_row(
issue: int,
created: str = DAY_1,
closed: str | None = None,
status: str = "In Progress",
points: int = 1,
sprint: int = 1,
sprint_start: str = DAY_1,
sprint_length: int = 2,
) -> dict:
"""Create a sample row of the SprintBoard dataset."""
# create timestamp and time delta fields
sprint_start_ts = pd.Timestamp(sprint_start)
sprint_duration = pd.Timedelta(days=sprint_length)
sprint_end_ts = sprint_start_ts + sprint_duration
created_date = pd.Timestamp(created, tz="UTC")
closed_date = pd.Timestamp(closed, tz="UTC") if closed else None
# return the sample record
return {
"issue_number": issue,
"issue_title": f"Issue {issue}",
"type": "issue",
"issue_body": f"Description of issue {issue}",
"status": "Done" if closed else status,
"assignees": "mickeymouse",
"labels": [],
"url": f"https://github.com/HHS/simpler-grants-gov/issues/{issue}",
"points": points,
"milestone": "Milestone 1",
"milestone_due_date": sprint_end_ts,
"milestone_description": "Milestone 1 description",
"sprint": f"Sprint {sprint}",
"sprint_start_date": sprint_start_ts,
"sprint_end_date": sprint_end_ts,
"sprint_duration": sprint_duration,
"created_date": created_date,
"closed_date": closed_date,
}
from tests.conftest import (
DAY_0,
DAY_1,
DAY_2,
DAY_3,
sprint_row,
)


def result_row(
Expand Down Expand Up @@ -118,11 +83,49 @@ def test_include_all_sprint_days_if_tix_closed_early(self):
# setup - create test data
sprint_data = [
sprint_row(issue=1, sprint_start=DAY_1, created=DAY_0, closed=DAY_1),
sprint_row(issue=1, sprint_start=DAY_1, created=DAY_0),
sprint_row(issue=1, sprint_start=DAY_1, created=DAY_0, closed=DAY_1),
]
test_data = SprintBoard.from_dict(sprint_data)
# execution
output = SprintBurndown(test_data, sprint="Sprint 1")
df = output.results
# validation - check max date is end of sprint not last closed date
assert df[output.date_col].max() == pd.Timestamp(DAY_3, tz="UTC")

def test_raise_value_error_if_sprint_arg_not_in_dataset(self):
"""A ValueError should be raised if the sprint argument isn't valid."""
# setup - create test data
sprint_data = [
sprint_row(issue=1, sprint_start=DAY_1, created=DAY_0, closed=DAY_1),
sprint_row(issue=1, sprint_start=DAY_1, created=DAY_0),
]
test_data = SprintBoard.from_dict(sprint_data)
# validation
with pytest.raises(
ValueError,
match="Sprint value doesn't match one of the available sprints",
):
SprintBurndown(test_data, sprint="Fake sprint")

def test_calculate_burndown_for_current_sprint(self):
"""A ValueError should be raised if the sprint argument isn't valid."""
# setup - create test data
now = datetime.now(tz=timezone.utc)
day_1 = now.strftime("%Y-%m-%d")
day_2 = (now + timedelta(days=1)).strftime("%Y-%m-%d")
day_3 = (now + timedelta(days=2)).strftime("%Y-%m-%d")
sprint_data = [
sprint_row(issue=1, sprint_start=day_1, created=day_1, closed=day_2),
sprint_row(issue=1, sprint_start=day_1, created=day_1),
]
test_data = SprintBoard.from_dict(sprint_data)
# execution
output = SprintBurndown(test_data, sprint="Sprint 1")
df = output.results
# validation - check burndown output
expected = [
result_row(day=day_1, opened=2, closed=0, delta=2, total=2),
result_row(day=day_2, opened=0, closed=1, delta=-1, total=1),
result_row(day=day_3, opened=0, closed=0, delta=0, total=1),
]
assert df.to_dict("records") == expected

0 comments on commit 633bd03

Please sign in to comment.