Skip to content

Commit

Permalink
feat(app): long term metric storage (#173)
Browse files Browse the repository at this point in the history
Co-authored-by: Ralf Grubenmann <[email protected]>
  • Loading branch information
olevski and Panaetius authored Jul 4, 2022
1 parent 5024dc3 commit 6335ec5
Show file tree
Hide file tree
Showing 17 changed files with 1,167 additions and 466 deletions.
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ expiringdict = "*"
jinja2 = "*"
pytz = "*"
prometheus_client = "*"
dataconf = "*"
boto3 = "*"

[dev-packages]
black = "*"
Expand Down
563 changes: 310 additions & 253 deletions Pipfile.lock

Large diffs are not rendered by default.

10 changes: 3 additions & 7 deletions controller/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import yaml

from controller.utils import sanitize_prometheus_metric_label_name
from controller.config_types import AuditlogConfig, PrometheusMetricsConfig

api_group = os.getenv("CRD_API_GROUP", "amalthea.dev")
api_version = os.getenv("CRD_API_VERSION", "v1alpha1")
Expand Down Expand Up @@ -72,9 +72,5 @@
CHILD_KEY_LABEL_KEY = f"{api_group}/child-key"
MAIN_POD_LABEL_KEY = f"{api_group}/main-pod"

METRICS_ENABLED = os.environ.get("METRICS_ENABLED", "false").lower() == "true"
METRICS_EXTRA_LABELS = json.loads(os.environ.get("METRICS_EXTRA_LABELS", "[]"))
METRICS_EXTRA_LABELS_SANITIZED = tuple([
sanitize_prometheus_metric_label_name(i) for i in METRICS_EXTRA_LABELS
])
METRICS_PORT = int(os.environ.get("METRICS_PORT", 8765))
METRICS = PrometheusMetricsConfig.dataconf_from_env()
AUDITLOG = AuditlogConfig.dataconf_from_env()
62 changes: 62 additions & 0 deletions controller/config_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from dataclasses import dataclass, field
import dataconf
import json
from typing import Optional, Union, List


@dataclass
class S3Config:
"""The configuration needed to upload metrics to S3."""
endpoint: str
bucket: str
path_prefix: str
access_key_id: str
secret_access_key: str
rotation_period_seconds: Union[str, int] = 86400

def __post_init__(self):
if type(self.rotation_period_seconds) is str:
self.rotation_period_seconds = int(self.rotation_period_seconds)


@dataclass
class MetricsBaseConfig:
"""Base metrics/auditlog configuration."""
enabled: Union[str, bool] = False
extra_labels: Union[str, List[str]] = field(default_factory=list)

def __post_init__(self):
if type(self.enabled) is str:
self.enabled = self.enabled.lower() == "true"
if type(self.extra_labels) is str:
self.extra_labels = json.loads(self.extra_labels)


@dataclass
class AuditlogConfig(MetricsBaseConfig):
"""The configuration used for the auditlogs."""
s3: Optional[S3Config] = None

def __post_init__(self):
super().__post_init__()
if self.enabled and not self.s3:
raise ValueError("If auditlog is enabled then the S3 configuration has to be provided.")

@classmethod
def dataconf_from_env(cls, prefix="AUDITLOG_"):
return dataconf.env(prefix, cls)


@dataclass
class PrometheusMetricsConfig(MetricsBaseConfig):
"""The configuration for prometheus metrics"""
port: Union[str, int] = 8765

def __post_init__(self):
super().__post_init__()
if type(self.port) is str:
self.port = int(self.port)

@classmethod
def dataconf_from_env(cls, prefix="METRICS_"):
return dataconf.env(prefix, cls)
95 changes: 0 additions & 95 deletions controller/metrics.py

This file was deleted.

Empty file added controller/metrics/__init__.py
Empty file.
45 changes: 45 additions & 0 deletions controller/metrics/events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime
from dateutil import parser
from typing import Any, Dict, Optional

from controller.server_status_enum import ServerStatusEnum


@dataclass
class MetricEvent:
"""Every element in the metrics queue that is created by
amalthea and consumed by the metrics handlers conforms to this
structure."""
event_timestamp: datetime
session: Dict[str, Any]
sessionCreationTimestamp: Optional[datetime] = None
old_status: Optional[ServerStatusEnum] = None
status: Optional[ServerStatusEnum] = None

def __post_init__(self):
if self.status and type(self.status) is str:
self.status = ServerStatusEnum(self.status)
if self.old_status and type(self.old_status) is str:
self.old_status = ServerStatusEnum(self.old_status)
if self.session.get("metadata", {}).get("creationTimestamp"):
self.sessionCreationTimestamp = parser.isoparse(
self.session.get("metadata", {}).get("creationTimestamp")
)

def __repr__(self) -> str:
return (
f"MetricEvent(event_timestamp={self.event_timestamp}, old_status={self.old_status}, "
f"status={self.status}, sessionCreationTimestamp={self.sessionCreationTimestamp}, "
"session=<redacted>)"
)


class MetricEventHandler(ABC):
"""Abstract class for the queue workers that will
be doing the final publishing or persisting of any metrics
that are generated by amalthea in the metrics queue."""
@abstractmethod
def publish(self, metric_event: MetricEvent):
pass
Loading

0 comments on commit 6335ec5

Please sign in to comment.