Skip to content

Commit

Permalink
Merge branch 'main' into joseph/rewrite-test-results
Browse files Browse the repository at this point in the history
  • Loading branch information
joseph-sentry authored Oct 21, 2024
2 parents 0ebdd15 + 2ae6c6d commit e8e0b92
Show file tree
Hide file tree
Showing 22 changed files with 2,122 additions and 2,186 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ jobs:
lint:
name: Run Lint

uses: codecov/gha-workflows/.github/workflows/[email protected].23
uses: codecov/gha-workflows/.github/workflows/[email protected].24

build:
name: Build Worker
uses: codecov/gha-workflows/.github/workflows/[email protected].23
uses: codecov/gha-workflows/.github/workflows/[email protected].24

secrets: inherit
with:
Expand All @@ -35,14 +35,14 @@ jobs:
codecovstartup:
name: Codecov Startup
needs: build
uses: codecov/gha-workflows/.github/workflows/[email protected].23
uses: codecov/gha-workflows/.github/workflows/[email protected].24

secrets: inherit

test:
name: Test
needs: [build]
uses: codecov/gha-workflows/.github/workflows/[email protected].23
uses: codecov/gha-workflows/.github/workflows/[email protected].24

secrets: inherit
with:
Expand All @@ -52,7 +52,7 @@ jobs:
name: Build Self Hosted Worker
needs: [build, test]

uses: codecov/gha-workflows/.github/workflows/[email protected].23
uses: codecov/gha-workflows/.github/workflows/[email protected].24
secrets: inherit
with:
repo: ${{ vars.CODECOV_IMAGE_V2 || 'codecov/self-hosted-worker' }}
Expand All @@ -61,7 +61,7 @@ jobs:
name: Push Staging Image
needs: [build, test]
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || github.event.ref == 'refs/heads/staging') && github.repository_owner == 'codecov' }}
uses: codecov/gha-workflows/.github/workflows/[email protected].23
uses: codecov/gha-workflows/.github/workflows/[email protected].24
secrets: inherit
with:
environment: staging
Expand All @@ -71,7 +71,7 @@ jobs:
name: Push Production Image
needs: [build, test]
if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/main' && github.repository_owner == 'codecov' }}
uses: codecov/gha-workflows/.github/workflows/[email protected].23
uses: codecov/gha-workflows/.github/workflows/[email protected].24
secrets: inherit
with:
environment: production
Expand All @@ -82,7 +82,7 @@ jobs:
needs: [build-self-hosted, test]
secrets: inherit
if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/main' && github.repository_owner == 'codecov' }}
uses: codecov/gha-workflows/.github/workflows/[email protected].23
uses: codecov/gha-workflows/.github/workflows/[email protected].24
with:
push_rolling: true
repo: ${{ vars.CODECOV_IMAGE_V2 || 'codecov/self-hosted-worker' }}
10 changes: 5 additions & 5 deletions database/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from helpers.config import should_write_data_to_storage_config_check


class User(CodecovBaseModel):
class User(CodecovBaseModel, MixinBaseClass):
__tablename__ = "users"
id_ = Column("id", types.BigInteger, primary_key=True)

Expand Down Expand Up @@ -136,7 +136,7 @@ class Repository(CodecovBaseModel):
updatestamp = Column(types.DateTime)
yaml = Column(postgresql.JSON)
deleted = Column(types.Boolean, nullable=False, default=False)
branch = Column(types.Text)
branch = Column(types.Text, default="main")
image_token = Column(
types.Text,
default=lambda: "".join(
Expand Down Expand Up @@ -184,7 +184,7 @@ class GithubAppInstallation(CodecovBaseModel, MixinBaseClass):
# replacement for owner.integration_id
# installation id GitHub sends us in the installation-related webhook events
installation_id = Column(types.Integer, server_default=FetchedValue())
name = Column(types.Text, server_default=FetchedValue())
name = Column(types.Text, default=GITHUB_APP_INSTALLATION_DEFAULT_NAME)
# if null, all repos are covered by this installation
# otherwise, it's a list of repo.id values
repository_service_ids = Column(
Expand All @@ -199,7 +199,7 @@ class GithubAppInstallation(CodecovBaseModel, MixinBaseClass):
Owner, foreign_keys=[ownerid], back_populates="github_app_installations"
)

is_suspended = Column(types.Boolean, server_default=FetchedValue())
is_suspended = Column(types.Boolean, default=False)

def repository_queryset(self, dbsession: Session):
"""Returns a query set of repositories covered by this installation"""
Expand Down Expand Up @@ -364,7 +364,7 @@ class Branch(CodecovBaseModel):
__tablename__ = "branches"

repoid = Column(types.Integer, ForeignKey("repos.repoid"), primary_key=True)
updatestamp = Column(types.DateTime)
updatestamp = Column(types.DateTime, default=datetime.now)
branch = Column(types.Text, nullable=False, primary_key=True)
base = Column(types.Text)
head = Column(types.Text, nullable=False)
Expand Down
2 changes: 1 addition & 1 deletion database/tests/factories/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class Meta:
).hexdigest()
)
ci_passed = True
pullid = 1
pullid = None
timestamp = datetime(2019, 2, 1, 17, 59, 47)
author = factory.SubFactory(OwnerFactory)
repository = factory.SubFactory(RepositoryFactory)
Expand Down
83 changes: 13 additions & 70 deletions helpers/checkpoint_logger/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,79 +17,22 @@
)

import sentry_sdk
from shared.metrics import Counter, Histogram, metrics
from shared.metrics import metrics

logger = logging.getLogger(__name__)
from helpers.checkpoint_logger.prometheus import PROMETHEUS_HANDLER

logger = logging.getLogger(__name__)

T = TypeVar("T", bound="BaseFlow")
TSubflows: TypeAlias = Mapping[T, Iterable[tuple[str, T]]]


CHECKPOINTS_TOTAL_BEGUN = Counter(
"worker_checkpoints_begun",
"Total number of times a flow's first checkpoint was logged.",
["flow"],
)
CHECKPOINTS_TOTAL_SUCCEEDED = Counter(
"worker_checkpoints_succeeded",
"Total number of times one of a flow's success checkpoints was logged.",
["flow"],
)
CHECKPOINTS_TOTAL_FAILED = Counter(
"worker_checkpoints_failed",
"Total number of times one of a flow's failure checkpoints was logged.",
["flow"],
)
CHECKPOINTS_TOTAL_ENDED = Counter(
"worker_checkpoints_ended",
"Total number of times one of a flow's terminal checkpoints (success or failure) was logged.",
["flow"],
)
CHECKPOINTS_ERRORS = Counter(
"worker_checkpoints_errors",
"Total number of errors while trying to log checkpoints",
["flow"],
)
CHECKPOINTS_EVENTS = Counter(
"worker_checkpoints_events",
"Total number of checkpoints logged.",
["flow", "checkpoint"],
)

CHECKPOINTS_SUBFLOW_DURATION = Histogram(
"worker_checkpoints_subflow_duration_seconds",
"Duration of subflows in seconds.",
["flow", "subflow"],
buckets=[
0.05,
0.1,
0.5,
1,
2,
5,
10,
30,
60,
120,
180,
300,
600,
900,
1200,
1800,
2400,
3600,
],
)


def _error(msg, flow, strict=False):
# When a new version of worker rolls out, it will pick up tasks that
# may have been enqueued by the old worker and be missing checkpoints
# data. At least for that reason, we want to allow failing softly.
metrics.incr("worker.checkpoint_logger.error")
CHECKPOINTS_ERRORS.labels(flow=flow.__name__).inc()
PROMETHEUS_HANDLER.log_errors(flow=flow.__name__)
if strict:
raise ValueError(msg)
else:
Expand Down Expand Up @@ -331,12 +274,12 @@ class MyEnum(str, Enum):

def log_counters(obj: T) -> None:
metrics.incr(f"{klass.__name__}.events.{obj.name}")
CHECKPOINTS_EVENTS.labels(flow=klass.__name__, checkpoint=obj.name).inc()
PROMETHEUS_HANDLER.log_checkpoints(flow=klass.__name__, checkpoint=obj.name)

# If this is the first checkpoint, increment the number of flows we've begun
if obj == next(iter(klass.__members__.values())):
metrics.incr(f"{klass.__name__}.total.begun")
CHECKPOINTS_TOTAL_BEGUN.labels(flow=klass.__name__).inc()
PROMETHEUS_HANDLER.log_begun(flow=klass.__name__)
return

is_failure = hasattr(obj, "is_failure") and obj.is_failure()
Expand All @@ -345,14 +288,14 @@ def log_counters(obj: T) -> None:

if is_failure:
metrics.incr(f"{klass.__name__}.total.failed")
CHECKPOINTS_TOTAL_FAILED.labels(flow=klass.__name__).inc()
PROMETHEUS_HANDLER.log_failure(flow=klass.__name__)
elif is_success:
metrics.incr(f"{klass.__name__}.total.succeeded")
CHECKPOINTS_TOTAL_SUCCEEDED.labels(flow=klass.__name__).inc()
PROMETHEUS_HANDLER.log_success(flow=klass.__name__)

if is_terminal:
metrics.incr(f"{klass.__name__}.total.ended")
CHECKPOINTS_TOTAL_ENDED.labels(flow=klass.__name__).inc()
PROMETHEUS_HANDLER.log_total_ended(flow=klass.__name__)

klass.log_counters = log_counters
return klass
Expand All @@ -373,7 +316,7 @@ class CheckpointLogger(Generic[T]):
reconstructed from its serialized data allowing you to begin a flow on one host
and log its completion on another (as long as clock drift is marginal).
See `UploadFlow` for an example of defining a flow. It's recomended that you
See `UploadFlow` for an example of defining a flow. It's recommended that you
define your flow with the decorators in this file:
- `@success_events()`, `@failure_events()`: designate some events as terminal
success/fail states of your flow.
Expand Down Expand Up @@ -489,9 +432,9 @@ def submit_subflow(self: _Self, metric: str, start: T, end: T) -> _Self:
if duration:
sentry_sdk.set_measurement(metric, duration, "milliseconds")
duration_in_seconds = duration / 1000
CHECKPOINTS_SUBFLOW_DURATION.labels(
flow=self.cls.__name__, subflow=metric
).observe(duration_in_seconds)
PROMETHEUS_HANDLER.log_subflow(
flow=self.cls.__name__, subflow=metric, duration=duration_in_seconds
)

return self

Expand Down
94 changes: 94 additions & 0 deletions helpers/checkpoint_logger/prometheus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from shared.metrics import Counter, Histogram

# Main Counter
CHECKPOINTS_TOTAL_BEGUN = Counter(
"worker_checkpoints_begun",
"Total number of times a flow's first checkpoint was logged.",
["flow"],
)
CHECKPOINTS_TOTAL_SUCCEEDED = Counter(
"worker_checkpoints_succeeded",
"Total number of times one of a flow's success checkpoints was logged.",
["flow"],
)
CHECKPOINTS_TOTAL_FAILED = Counter(
"worker_checkpoints_failed",
"Total number of times one of a flow's failure checkpoints was logged.",
["flow"],
)
CHECKPOINTS_TOTAL_ENDED = Counter(
"worker_checkpoints_ended",
"Total number of times one of a flow's terminal checkpoints (success or failure) was logged.",
["flow"],
)
CHECKPOINTS_ERRORS = Counter(
"worker_checkpoints_errors",
"Total number of errors while trying to log checkpoints",
["flow"],
)
CHECKPOINTS_EVENTS = Counter(
"worker_checkpoints_events",
"Total number of checkpoints logged.",
["flow", "checkpoint"],
)
CHECKPOINTS_SUBFLOW_DURATION = Histogram(
"worker_checkpoints_subflow_duration_seconds",
"Duration of subflows in seconds.",
["flow", "subflow"],
buckets=[
0.05,
0.1,
0.5,
1,
2,
5,
10,
30,
60,
120,
180,
300,
600,
900,
1200,
1800,
2400,
3600,
],
)


class PrometheusCheckpointLoggerHandler:
"""
PrometheusCheckpointLoggerHandler is a class that is responsible for all
Prometheus related logs. This checkpoint logic is responsible for logging
metrics to any checkpoints we define. This class is made with the intent
of extending different checkpoints for metrics for different needs. The
methods in this class are mainly used by the CheckpointLogger class.
"""

def log_begun(self, flow: str):
CHECKPOINTS_TOTAL_BEGUN.labels(flow=flow).inc()

def log_failure(self, flow: str):
CHECKPOINTS_TOTAL_FAILED.labels(flow=flow).inc()

def log_success(self, flow: str):
CHECKPOINTS_TOTAL_SUCCEEDED.labels(flow=flow).inc()

def log_total_ended(self, flow: str):
CHECKPOINTS_TOTAL_ENDED.labels(flow=flow).inc()

def log_checkpoints(self, flow: str, checkpoint: str):
CHECKPOINTS_EVENTS.labels(flow=flow, checkpoint=checkpoint).inc()

def log_errors(self, flow: str):
CHECKPOINTS_ERRORS.labels(flow=flow).inc()

def log_subflow(self, flow: str, subflow: str, duration: int):
CHECKPOINTS_SUBFLOW_DURATION.labels(flow=flow, subflow=subflow).observe(
duration
)


PROMETHEUS_HANDLER = PrometheusCheckpointLoggerHandler()
Loading

0 comments on commit e8e0b92

Please sign in to comment.