[Issue 740] current sprint report (#760)

Enables users to calculate sprint burndown for the current sprint by passing `@current` to the `sprint` argument
HHS · Nov 28, 2023 · 633bd03 · 633bd03
1 parent 32a11ac
commit 633bd03
Show file tree

Hide file tree

Showing 9 changed files with 213 additions and 57 deletions.
diff --git a/analytics/README.md b/analytics/README.md
@@ -75,13 +75,13 @@ poetry run analytics export gh_project_data --owner HHS --project 13 --output-fi
 Once you've exported the sprint and issue data from GitHub, you can start calculating metrics. We'll begin with sprint burndown:
 
 ```bash
-poetry run analytics calculate sprint_burndown --sprint-file data/sprint-data.json --issue-file data/issue-data.json --sprint "Sprint 10" --show-results
+poetry run analytics calculate sprint_burndown --sprint-file data/sprint-data.json --issue-file data/issue-data.json --sprint @current --show-results
 ```
 
 A couple of important notes about this command:
 
-- `--sprint "Sprint 10"` In order to calculate burndown, you'll need to specify a sprint by name. We're currently working to add an option to use `@current` and other date-relative sprint flags
-- `--show-results` In order to the see the output in a browser you'll need to pass this flag. 
+- `--sprint @current` In order to calculate burndown, you'll need to specify either `@current` for the current sprint or the name of another sprint, e.g. `"Sprint 10"`
+- `--show-results` In order to the see the output in a browser you'll need to pass this flag.
 
 ![Screenshot of burndown for sprint 10](static/reporting-notebook-screenshot.png)
 
@@ -97,7 +97,7 @@ poetry run analytics calculate sprint_burndown --sprint-file data/sprint-data.js
 
 ### Calculating deliverable percent complete
 
-Another key metric you can report is the percentage of tasks or points completed per 30k deliverable. 
+Another key metric you can report is the percentage of tasks or points completed per 30k deliverable.
 You can specify the unit you want to use for percent complete (e.g. points or tasks) using the `--unit` flag.
 
 For example, here we're calculating percentage completion based on the number of tickets under each deliverable.

diff --git a/analytics/src/analytics/datasets/deliverable_tasks.py b/analytics/src/analytics/datasets/deliverable_tasks.py
@@ -2,7 +2,7 @@
 Implements the DeliverableTasks dataset.
 
 This is a sub-class of BaseDataset that groups 30k ft deliverables with the
-tasks needed to complete those delivearable
+tasks needed to complete those deliverable
 """
 from typing import Self
 

diff --git a/analytics/src/analytics/datasets/sprint_board.py b/analytics/src/analytics/datasets/sprint_board.py
@@ -63,6 +63,32 @@ def sprint_end(self, sprint: str) -> pd.Timestamp:
         sprint_end = self.df.loc[sprint_mask, self.sprint_end_col].max()
         return sprint_end.tz_localize("UTC")
 
+    @property
+    def sprints(self) -> pd.DataFrame:
+        """Return the unique list of sprints with their start and end dates."""
+        sprint_cols = [self.sprint_col, self.sprint_start_col, self.sprint_end_col]
+        return self.df[sprint_cols].drop_duplicates()
+
+    @property
+    def current_sprint(self) -> str | None:
+        """Return the name of the current sprint, if a sprint is currently active."""
+        return self.get_sprint_name_from_date(pd.Timestamp.today())
+
+    def get_sprint_name_from_date(self, date: pd.Timestamp) -> str | None:
+        """Get the name of a sprint from a given date, if that date falls in a sprint."""
+        # fmt: off
+        date_filter = (
+            (self.sprints[self.sprint_start_col] <= date)  # after sprint start
+            & (self.sprints[self.sprint_end_col] > date)  # before sprint end
+        )
+        # fmt: on
+        matching_sprints = self.sprints.loc[date_filter, self.sprint_col]
+        # if there aren't any sprints return None
+        if len(matching_sprints) == 0:
+            return None
+        # if there are, return the first value as a string
+        return str(matching_sprints.squeeze())
+
     @classmethod
     def load_from_json_files(
         cls,

diff --git a/analytics/src/analytics/metrics/base.py b/analytics/src/analytics/metrics/base.py
@@ -17,12 +17,26 @@ class BaseMetric:
     def __init__(self) -> None:
         """Initialize and calculate the metric from the input dataset."""
         self.results = self.calculate()
-        self.chart = self.plot_results()
+        self._chart: Figure | None = None
 
     def calculate(self) -> pd.DataFrame:
         """Calculate the metric and return the resulting dataset."""
         raise NotImplementedError
 
+    @property
+    def chart(self) -> Figure:
+        """
+        Return a chart visualizing the results.
+
+        Note:
+        ----
+        By deferring the self.plot_results() method invocation until the chart is
+        needed, we decrease the amount of time required to instantiate the class
+        """
+        if not self._chart:
+            self._chart = self.plot_results()
+        return self._chart
+
     def plot_results(self) -> Figure:
         """Create a plotly chart that visually represents the results."""
         raise NotImplementedError

diff --git a/analytics/src/analytics/metrics/burndown.py b/analytics/src/analytics/metrics/burndown.py
@@ -20,11 +20,11 @@ class SprintBurndown(BaseMetric):
 
     def __init__(self, dataset: SprintBoard, sprint: str) -> None:
         """Initialize the SprintBurndown metric."""
-        self.sprint = sprint
+        self.dataset = dataset
+        self.sprint = self._get_and_validate_sprint_name(sprint)
         self.date_col = "date"
         self.opened_col = dataset.opened_col  # type: ignore[attr-defined]
         self.closed_col = dataset.closed_col  # type: ignore[attr-defined]
-        self.dataset = dataset
         self.unit = "tickets"
         super().__init__()
 
@@ -98,6 +98,21 @@ def post_results_to_slack(self, slackbot: SlackBot, channel_id: str) -> None:
             message=message,
         )
 
+    def _get_and_validate_sprint_name(self, sprint: str | None) -> str:
+        """Get the name of the sprint we're using to calculate burndown or raise an error."""
+        # save dataset to local variable for brevity
+        dataset = self.dataset
+        # update sprint name if calculating burndown for the current sprint
+        if sprint == "@current":
+            sprint = dataset.current_sprint
+        # check that the sprint name matches one of the sprints in the dataset
+        valid_sprint = sprint in list(dataset.sprints[dataset.sprint_col])
+        if not sprint or not valid_sprint:  # needs `not sprint` for mypy checking
+            msg = "Sprint value doesn't match one of the available sprints"
+            raise ValueError(msg)
+        # return the sprint name if it's valid
+        return sprint
+
     def _get_daily_tix_counts_by_status(
         self,
         df: pd.DataFrame,

diff --git a/analytics/src/analytics/metrics/percent_complete.py b/analytics/src/analytics/metrics/percent_complete.py
@@ -1,4 +1,5 @@
 """Calculate and visualizes percent completion by deliverable."""
+import datetime as dt
 from enum import Enum
 from typing import Literal
 
@@ -62,6 +63,8 @@ def calculate(self) -> pd.DataFrame:
 
     def plot_results(self) -> Figure:
         """Create a bar chart of percent completion from the data in self.results."""
+        # get the current date in YYYY-MM-DD format
+        today = dt.datetime.now(tz=dt.timezone.utc).strftime("%Y-%m-%d")
         # reshape the dataframe in self.results for plotly
         df = self._prepare_result_dataframe_for_plotly()
         # create a stacked bar chart from the data
@@ -73,7 +76,7 @@ def plot_results(self) -> Figure:
             text="percent_of_total",
             color_discrete_map={"open": "#aacde3", "closed": "#06508f"},
             orientation="h",
-            title=f"Deliverable Percent Complete by {self.unit}",
+            title=f"Deliverable percent complete by {self.unit} as of {today}",
             height=800,
         )
 

diff --git a/analytics/tests/conftest.py b/analytics/tests/conftest.py
@@ -7,10 +7,19 @@
 import json
 from pathlib import Path
 
+import pandas as pd
+
 # skips the integration tests in tests/integrations/
 # to run the integration tests, invoke them directly: pytest tests/integrations/
 collect_ignore = ["integrations"]
 
+DAY_0 = "2023-10-31"
+DAY_1 = "2023-11-01"
+DAY_2 = "2023-11-02"
+DAY_3 = "2023-11-03"
+DAY_4 = "2023-11-04"
+DAY_5 = "2023-11-05"
+
 
 def write_test_data_to_file(data: dict, output_file: str):
     """Write test JSON data to a file for use in a test."""
@@ -72,3 +81,43 @@ def json_sprint_row(
         "story Points": points,
         "title": "Test issue 1",
     }
+
+
+def sprint_row(
+    issue: int,
+    created: str = DAY_1,
+    closed: str | None = None,
+    status: str = "In Progress",
+    points: int = 1,
+    sprint: int = 1,
+    sprint_start: str = DAY_1,
+    sprint_length: int = 2,
+) -> dict:
+    """Create a sample row of the SprintBoard dataset."""
+    # create timestamp and time delta fields
+    sprint_start_ts = pd.Timestamp(sprint_start)
+    sprint_duration = pd.Timedelta(days=sprint_length)
+    sprint_end_ts = sprint_start_ts + sprint_duration
+    created_date = pd.Timestamp(created, tz="UTC")
+    closed_date = pd.Timestamp(closed, tz="UTC") if closed else None
+    # return the sample record
+    return {
+        "issue_number": issue,
+        "issue_title": f"Issue {issue}",
+        "type": "issue",
+        "issue_body": f"Description of issue {issue}",
+        "status": "Done" if closed else status,
+        "assignees": "mickeymouse",
+        "labels": [],
+        "url": f"https://github.com/HHS/simpler-grants-gov/issues/{issue}",
+        "points": points,
+        "milestone": "Milestone 1",
+        "milestone_due_date": sprint_end_ts,
+        "milestone_description": "Milestone 1 description",
+        "sprint": f"Sprint {sprint}",
+        "sprint_start_date": sprint_start_ts,
+        "sprint_end_date": sprint_end_ts,
+        "sprint_duration": sprint_duration,
+        "created_date": created_date,
+        "closed_date": closed_date,
+    }
diff --git a/analytics/tests/datasets/test_sprint_board.py b/analytics/tests/datasets/test_sprint_board.py
@@ -1,11 +1,17 @@
 """Tests for analytics/datasets/sprint_board.py."""
-import pandas as pd  # noqa: I001
-import pytest
 
+import pandas as pd
+import pytest
 from analytics.datasets.sprint_board import SprintBoard
+
 from tests.conftest import (
+    DAY_1,
+    DAY_2,
+    DAY_4,
+    DAY_5,
     json_issue_row,
     json_sprint_row,
+    sprint_row,
     write_test_data_to_file,
 )
 
@@ -94,3 +100,43 @@ def test_extract_parent_issue_correctly(self, parent_number: int):
         df = df.set_index("issue_number")
         # validation -- check that issue 111's parent_issue_number is 222
         assert df.loc[111]["parent_issue_number"] == parent_number
+
+
+class TestGetSprintNameFromDate:
+    """Test the SprintBoard.get_sprint_name_from_date() method."""
+
+    @pytest.mark.parametrize(
+        ("date", "expected"),
+        [
+            (DAY_1, "Sprint 1"),
+            (DAY_2, "Sprint 1"),
+            (DAY_4, "Sprint 2"),
+            (DAY_5, "Sprint 2"),
+        ],
+    )
+    def test_return_name_if_matching_sprint_exists(self, date: str, expected: str):
+        """Test that correct sprint is returned if date exists in a sprint."""
+        # setup - create sample dataset
+        board_data = [
+            sprint_row(issue=1, sprint=1, sprint_start=DAY_1),
+            sprint_row(issue=2, sprint=1, sprint_start=DAY_1),
+            sprint_row(issue=3, sprint=2, sprint_start=DAY_4),
+        ]
+        board = SprintBoard.from_dict(board_data)
+        # validation
+        sprint_date = pd.Timestamp(date)
+        sprint_name = board.get_sprint_name_from_date(sprint_date)
+        assert sprint_name == expected
+
+    def test_return_none_if_no_matching_sprint(self):
+        """The method should return None if no sprint contains the date."""
+        # setup - create sample dataset
+        board_data = [
+            sprint_row(issue=1, sprint=1, sprint_start=DAY_1),
+            sprint_row(issue=2, sprint=2, sprint_start=DAY_4),
+        ]
+        board = SprintBoard.from_dict(board_data)
+        # validation
+        bad_date = pd.Timestamp("1900-01-01")
+        sprint_name = board.get_sprint_name_from_date(bad_date)
+        assert sprint_name is None
diff --git a/analytics/tests/metrics/test_burndown.py b/analytics/tests/metrics/test_burndown.py
@@ -1,53 +1,18 @@
 """Test the analytics.metrics.burndown module."""
-import pandas as pd  # noqa: I001
+from datetime import datetime, timedelta, timezone
 
+import pandas as pd
+import pytest
 from analytics.datasets.sprint_board import SprintBoard
 from analytics.metrics.burndown import SprintBurndown
 
-DAY_0 = "2023-10-31"
-DAY_1 = "2023-11-01"
-DAY_2 = "2023-11-02"
-DAY_3 = "2023-11-03"
-
-
-def sprint_row(
-    issue: int,
-    created: str = DAY_1,
-    closed: str | None = None,
-    status: str = "In Progress",
-    points: int = 1,
-    sprint: int = 1,
-    sprint_start: str = DAY_1,
-    sprint_length: int = 2,
-) -> dict:
-    """Create a sample row of the SprintBoard dataset."""
-    # create timestamp and time delta fields
-    sprint_start_ts = pd.Timestamp(sprint_start)
-    sprint_duration = pd.Timedelta(days=sprint_length)
-    sprint_end_ts = sprint_start_ts + sprint_duration
-    created_date = pd.Timestamp(created, tz="UTC")
-    closed_date = pd.Timestamp(closed, tz="UTC") if closed else None
-    # return the sample record
-    return {
-        "issue_number": issue,
-        "issue_title": f"Issue {issue}",
-        "type": "issue",
-        "issue_body": f"Description of issue {issue}",
-        "status": "Done" if closed else status,
-        "assignees": "mickeymouse",
-        "labels": [],
-        "url": f"https://github.com/HHS/simpler-grants-gov/issues/{issue}",
-        "points": points,
-        "milestone": "Milestone 1",
-        "milestone_due_date": sprint_end_ts,
-        "milestone_description": "Milestone 1 description",
-        "sprint": f"Sprint {sprint}",
-        "sprint_start_date": sprint_start_ts,
-        "sprint_end_date": sprint_end_ts,
-        "sprint_duration": sprint_duration,
-        "created_date": created_date,
-        "closed_date": closed_date,
-    }
+from tests.conftest import (
+    DAY_0,
+    DAY_1,
+    DAY_2,
+    DAY_3,
+    sprint_row,
+)
 
 
 def result_row(
@@ -118,11 +83,49 @@ def test_include_all_sprint_days_if_tix_closed_early(self):
         # setup - create test data
         sprint_data = [
             sprint_row(issue=1, sprint_start=DAY_1, created=DAY_0, closed=DAY_1),
-            sprint_row(issue=1, sprint_start=DAY_1, created=DAY_0),
+            sprint_row(issue=1, sprint_start=DAY_1, created=DAY_0, closed=DAY_1),
         ]
         test_data = SprintBoard.from_dict(sprint_data)
         # execution
         output = SprintBurndown(test_data, sprint="Sprint 1")
         df = output.results
         # validation - check max date is end of sprint not last closed date
         assert df[output.date_col].max() == pd.Timestamp(DAY_3, tz="UTC")
+
+    def test_raise_value_error_if_sprint_arg_not_in_dataset(self):
+        """A ValueError should be raised if the sprint argument isn't valid."""
+        # setup - create test data
+        sprint_data = [
+            sprint_row(issue=1, sprint_start=DAY_1, created=DAY_0, closed=DAY_1),
+            sprint_row(issue=1, sprint_start=DAY_1, created=DAY_0),
+        ]
+        test_data = SprintBoard.from_dict(sprint_data)
+        # validation
+        with pytest.raises(
+            ValueError,
+            match="Sprint value doesn't match one of the available sprints",
+        ):
+            SprintBurndown(test_data, sprint="Fake sprint")
+
+    def test_calculate_burndown_for_current_sprint(self):
+        """A ValueError should be raised if the sprint argument isn't valid."""
+        # setup - create test data
+        now = datetime.now(tz=timezone.utc)
+        day_1 = now.strftime("%Y-%m-%d")
+        day_2 = (now + timedelta(days=1)).strftime("%Y-%m-%d")
+        day_3 = (now + timedelta(days=2)).strftime("%Y-%m-%d")
+        sprint_data = [
+            sprint_row(issue=1, sprint_start=day_1, created=day_1, closed=day_2),
+            sprint_row(issue=1, sprint_start=day_1, created=day_1),
+        ]
+        test_data = SprintBoard.from_dict(sprint_data)
+        # execution
+        output = SprintBurndown(test_data, sprint="Sprint 1")
+        df = output.results
+        # validation - check burndown output
+        expected = [
+            result_row(day=day_1, opened=2, closed=0, delta=2, total=2),
+            result_row(day=day_2, opened=0, closed=1, delta=-1, total=1),
+            result_row(day=day_3, opened=0, closed=0, delta=0, total=1),
+        ]
+        assert df.to_dict("records") == expected