diff --git a/deploy/helm/apps/charts/prometheus/charts/alertmanager/.helmignore b/deploy/helm/apps/charts/prometheus/charts/alertmanager/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/deploy/helm/apps/charts/prometheus/charts/alertmanager/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/deploy/helm/apps/charts/prometheus/charts/alertmanager/Chart.yaml b/deploy/helm/apps/charts/prometheus/charts/alertmanager/Chart.yaml new file mode 100644 index 00000000..c2594869 --- /dev/null +++ b/deploy/helm/apps/charts/prometheus/charts/alertmanager/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart for Kubernetes +name: alertmanager +version: 0.1.0 diff --git a/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/config.yaml b/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/config.yaml new file mode 100644 index 00000000..554d93e3 --- /dev/null +++ b/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/config.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: alertmanager-config + namespace: vortex +data: + alertmanager.yml: |- + global: + smtp_smarthost: '{{ .Values.config.smtpHost }}' + smtp_from: '{{ .Values.config.smtpFrom }}' + smtp_auth_username: '{{ .Values.config.smtpUsername }}' + smtp_auth_password: '{{ .Values.config.smtpPassword }}' + + route: + group_by: ['admin', 'email_to'] + receiver: admin + routes: + - match: + admin: true + receiver: admin + - match: + admin: false + receiver: email_router + + receivers: + - name: admin + email_configs: + - to: "{{ .Values.config.adminEmail }}" + headers: { Subject: "[WARN] Alert Email From Vortex" } + html: '{{ "{{" }} template "email.html" . {{ "}}" }}' + - name: email_router + email_configs: + - to: "{{ "{{" }} .GroupLabels.email_to {{ "}}" }}@gmail.com" + headers: { Subject: "[WARN] Alert Email From Vortex" } + html: '{{ "{{" }} template "email.html" . {{ "}}" }}' + + templates: + - '/etc/email-template/email.tmpl' diff --git a/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/deployment.yaml b/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/deployment.yaml new file mode 100644 index 00000000..e3c67c97 --- /dev/null +++ b/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/deployment.yaml @@ -0,0 +1,67 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: alertmanager + namespace: vortex + labels: + app: alertmanager +spec: + replicas: 1 + template: + metadata: + labels: + app: alertmanager + spec: + containers: + - name: prometheus-alertmanager + image: prom/alertmanager:{{ .Values.controller.imageTag }} + imagePullPolicy: "IfNotPresent" + args: + - --config.file=/etc/config/alertmanager.yml + - --storage.path=/data + - --web.external-url=/ + ports: + - containerPort: 9093 + readinessProbe: + httpGet: + path: /#/status + port: 9093 + initialDelaySeconds: 30 + timeoutSeconds: 30 + volumeMounts: + - name: config-volume + mountPath: /etc/config + - name: storage-volume + mountPath: "/data" + subPath: "" + - name: email-template + mountPath: /etc/email-template + resources: + limits: + cpu: 10m + memory: 50Mi + requests: + cpu: 10m + memory: 50Mi + - name: prometheus-alertmanager-configmap-reload + image: "jimmidyson/configmap-reload:v0.1" + imagePullPolicy: "IfNotPresent" + args: + - --volume-dir=/etc/config + - --webhook-url=http://localhost:9093/-/reload + volumeMounts: + - name: config-volume + mountPath: /etc/config + readOnly: true + resources: + requests: + cpu: {{ .Values.controller.cpu }} + volumes: + - name: storage-volume + emptyDir: {} + - name: config-volume + configMap: + name: alertmanager-config + - name: email-template + configMap: + name: alertmanager-email-template \ No newline at end of file diff --git a/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/email-tempalte.yaml b/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/email-tempalte.yaml new file mode 100644 index 00000000..87e83d18 --- /dev/null +++ b/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/email-tempalte.yaml @@ -0,0 +1,365 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: alertmanager-email-template + namespace: vortex +data: + email.tmpl: |- + {{ "{{" }} define "email.html" {{ "}}" }} + + + + + + + {{ "{{" }} template "__subject" . {{ "}}" }} + + + + + + + + + + + +
+
+ + + {{ "{{" }} if gt (len .Alerts.Firing) 0 {{ "}}" }} + + + + + +
+ {{ "{{" }} else {{ "}}" }} + + {{ "{{" }} end {{ "}}" }} + {{ "{{" }} .Alerts | len {{ "}}" }} alert{{ "{{" }} if gt (len .Alerts) 1 {{ "}}" }}s{{ "{{" }} end {{ "}}" }} +
+ + {{ "{{" }} if gt (len .Alerts.Firing) 0 {{ "}}" }} + + + + {{ "{{" }} end {{ "}}" }} + {{ "{{" }} range .Alerts.Firing {{ "}}" }} + + + + {{ "{{" }} end {{ "}}" }} + + {{ "{{" }} if gt (len .Alerts.Resolved) 0 {{ "}}" }} + {{ "{{" }} if gt (len .Alerts.Firing) 0 {{ "}}" }} + + + + {{ "{{" }} end {{ "}}" }} + + + + {{ "{{" }} end {{ "}}" }} + {{ "{{" }} range .Alerts.Resolved {{ "}}" }} + + + + {{ "{{" }} end {{ "}}" }} +
+ [{{ "{{" }} .Alerts.Firing | len {{ "}}" }}] Firing +
+ Labels
+ {{ "{{" }} range .Labels.SortedPairs {{ "}}" }}{{ "{{" }} .Name {{ "}}" }} = {{ "{{" }} .Value {{ "}}" }}
{{ "{{" }} end {{ "}}" }} + {{ "{{" }} if gt (len .Annotations) 0 {{ "}}" }}Annotations
{{ "{{" }} end {{ "}}" }} + {{ "{{" }} range .Annotations.SortedPairs {{ "}}" }}{{ "{{" }} .Name {{ "}}" }} = {{ "{{" }} .Value {{ "}}" }}
{{ "{{" }} end {{ "}}" }} +
+
+
+
+
+ [{{ "{{" }} .Alerts.Resolved | len {{ "}}" }}] Resolved +
+ Labels
+ {{ "{{" }} range .Labels.SortedPairs {{ "}}" }}{{ "{{" }} .Name {{ "}}" }} = {{ "{{" }} .Value {{ "}}" }}
{{ "{{" }} end {{ "}}" }} + {{ "{{" }} if gt (len .Annotations) 0 {{ "}}" }}Annotations
{{ "{{" }} end {{ "}}" }} + {{ "{{" }} range .Annotations.SortedPairs {{ "}}" }}{{ "{{" }} .Name {{ "}}" }} = {{ "{{" }} .Value {{ "}}" }}
{{ "{{" }} end {{ "}}" }} +
+
+ +
+
+ + + + {{ "{{" }} end {{ "}}" }} \ No newline at end of file diff --git a/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/service-external.yaml b/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/service-external.yaml new file mode 100644 index 00000000..3139a61c --- /dev/null +++ b/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/service-external.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: alertmanager-external + namespace: vortex + labels: + app: alertmanager +spec: + type: NodePort + ports: + - port: 9093 + targetPort: 9093 + nodePort: 30004 + selector: + app: alertmanager \ No newline at end of file diff --git a/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/service.yaml b/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/service.yaml new file mode 100644 index 00000000..44b6574a --- /dev/null +++ b/deploy/helm/apps/charts/prometheus/charts/alertmanager/templates/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: alertmanager + namespace: vortex + labels: + app: alertmanager +spec: + ports: + - port: 9093 + targetPort: 9093 + selector: + app: alertmanager \ No newline at end of file diff --git a/deploy/helm/apps/charts/prometheus/templates/alert-rules.yaml b/deploy/helm/apps/charts/prometheus/templates/alert-rules.yaml new file mode 100644 index 00000000..6fa60f48 --- /dev/null +++ b/deploy/helm/apps/charts/prometheus/templates/alert-rules.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +data: + rules.yml: | + groups: + - name: pod-alert-rule + rules: + - alert: Deployment_Not_Running + expr: (max(kube_deployment_status_replicas_available{namespace!~"vortex|kube-system" }) by (deployment,namespace) == 0) * on(deployment,namespace) group_left(label_email_to)(kube_deployment_labels) + for: 2m + labels: + admin: false + email_to: "{{ "{{" }}$labels.label_email_to{{ "}}" }}" + annotations: + summary: "Deployment \"{{ "{{" }}$labels.deployment{{ "}}" }}\" in namespace \"{{ "{{" }}$labels.namespace{{ "}}" }}\" is not running." + description: "There is no avaiable pod for the deployment \"{{ "{{" }}$labels.deployment{{ "}}" }}\" in namespace \"{{ "{{" }}$labels.namespace{{ "}}" }}\"." + - alert: Vortex_Not_Running + expr: (max(kube_deployment_status_replicas_available{namespace=~"vortex|kube-system" }) by (deployment,namespace) == 0) + for: 2m + labels: + admin: true + email_to: admin + annotations: + summary: "Deployment \"{{ "{{" }}$labels.deployment{{ "}}" }}\" in namespace \"{{ "{{" }}$labels.namespace{{ "}}" }}\" is not running." + description: "There is no avaiable pod for the deployment \"{{ "{{" }}$labels.deployment{{ "}}" }}\" in namespace \"{{ "{{" }}$labels.namespace{{ "}}" }}\"." +kind: ConfigMap +metadata: + name: prometheus-rules + namespace: vortex + + diff --git a/deploy/helm/apps/charts/prometheus/templates/config.yaml b/deploy/helm/apps/charts/prometheus/templates/config.yaml index 5b569b17..cfdbcdde 100644 --- a/deploy/helm/apps/charts/prometheus/templates/config.yaml +++ b/deploy/helm/apps/charts/prometheus/templates/config.yaml @@ -17,14 +17,20 @@ data: global: scrape_interval: {{ .Values.controller.scrapeInterval }} scrape_timeout: 5s - scrape_configs: - + rule_files: + - /etc/prometheus-rules/rules.yml + + alerting: + alertmanagers: + - static_configs: + - targets: ["alertmanager.vortex.svc.cluster.local:9093"] + + scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] - - job_name: 'kubernetes-service-endpoints' kubernetes_sd_configs: - role: endpoints @@ -57,7 +63,6 @@ data: action: replace target_label: node - - job_name: 'kubernetes-pods' kubernetes_sd_configs: - role: pod diff --git a/deploy/helm/apps/charts/prometheus/templates/deployment.yaml b/deploy/helm/apps/charts/prometheus/templates/deployment.yaml index a7795218..17982e90 100644 --- a/deploy/helm/apps/charts/prometheus/templates/deployment.yaml +++ b/deploy/helm/apps/charts/prometheus/templates/deployment.yaml @@ -26,10 +26,12 @@ spec: - containerPort: 9090 protocol: TCP volumeMounts: - - mountPath: "/prometheus" - name: data - - mountPath: "/etc/prometheus" - name: config-volume + - name: data + mountPath: "/prometheus" + - name: config-volume + mountPath: "/etc/prometheus" + - name: rules-volume + mountPath: /etc/prometheus-rules resources: requests: cpu: {{ .Values.controller.cpu }} @@ -39,3 +41,6 @@ spec: - configMap: name: prometheus-config name: config-volume + - name: rules-volume + configMap: + name: prometheus-rules diff --git a/deploy/helm/config/development.yaml b/deploy/helm/config/development.yaml index 16bb17c7..bc55620c 100644 --- a/deploy/helm/config/development.yaml +++ b/deploy/helm/config/development.yaml @@ -30,6 +30,17 @@ apps: cpu: 50m service: nodePort: true + # vortex/deploy/helm/apps/charts/prometheus/charts/alertmanager + alertmanager: + controller: + imageTag: v0.14.0 + cpu: 10m + config: + smtpHost: smtp.sendgrid.net:587 + smtpFrom: vortex@linkernetworks.com + smtpUsername: apikey + smtpPassword: "you need to replace this token manually" + adminEmail: vortex@linkernetworks.com # vortex/deploy/helm/apps/charts/prometheus/charts/cadvisor cadvisor: controller: diff --git a/deploy/helm/config/production.yaml b/deploy/helm/config/production.yaml index f4c7f534..e01574a3 100644 --- a/deploy/helm/config/production.yaml +++ b/deploy/helm/config/production.yaml @@ -30,6 +30,17 @@ apps: cpu: 100m service: nodePort: false + # vortex/deploy/helm/apps/charts/prometheus/charts/alertmanager + alertmanager: + controller: + imageTag: v0.14.0 + cpu: 10m + config: + smtpHost: smtp.sendgrid.net:587 + smtpFrom: vortex@linkernetworks.com + smtpUsername: apikey + smtpPassword: "you need to replace this token manually" + adminEmail: vortex@linkernetworks.com # vortex/deploy/helm/apps/charts/prometheus/charts/cadvisor cadvisor: controller: diff --git a/deploy/helm/config/testing.yaml b/deploy/helm/config/testing.yaml index 719f3cf4..a54c8e2b 100644 --- a/deploy/helm/config/testing.yaml +++ b/deploy/helm/config/testing.yaml @@ -30,6 +30,17 @@ apps: cpu: 50m service: nodePort: true + # vortex/deploy/helm/apps/charts/prometheus/charts/alertmanager + alertmanager: + controller: + imageTag: v0.14.0 + cpu: 10m + config: + smtpHost: smtp.sendgrid.net:587 + smtpFrom: vortex@linkernetworks.com + smtpUsername: apikey + smtpPassword: "you need to replace this token manually" + adminEmail: vortex@linkernetworks.com # vortex/deploy/helm/apps/charts/prometheus/charts/cadvisor cadvisor: controller: diff --git a/deploy/kubernetes/apps/monitoring/alertmanager/config.yaml b/deploy/kubernetes/apps/monitoring/alertmanager/config.yaml index b2d19f91..22d5a5b1 100644 --- a/deploy/kubernetes/apps/monitoring/alertmanager/config.yaml +++ b/deploy/kubernetes/apps/monitoring/alertmanager/config.yaml @@ -12,10 +12,22 @@ data: smtp_auth_password: 'password' route: - group_by: [email_to] - receiver: email_router + group_by: [admin] + receiver: admin + routes: + - match: + admin: true + receiver: admin + - match: + admin: false + receiver: email_router receivers: + - name: admin + email_configs: + - to: "c20712983@gmail.com" + headers: { Subject: "[WARN] Alert Email From Vortex" } + html: '{{ template "email.html" . }}' - name: email_router email_configs: - to: "{{ .GroupLabels.email_to }}@gmail.com" diff --git a/deploy/kubernetes/apps/monitoring/prometheus/alert-rules.yaml b/deploy/kubernetes/apps/monitoring/prometheus/alert-rules.yaml index b1d0b8de..09145f27 100644 --- a/deploy/kubernetes/apps/monitoring/prometheus/alert-rules.yaml +++ b/deploy/kubernetes/apps/monitoring/prometheus/alert-rules.yaml @@ -5,15 +5,26 @@ data: - name: pod-alert-rule rules: - alert: Deployment_Not_Running - expr: (max(kube_deployment_status_replicas_available{}) by (deployment,namespace) == 0) * on(deployment,namespace) group_left(label_email_to)(kube_deployment_labels) + expr: (max(kube_deployment_status_replicas_available{namespace!~"vortex|kube-system" }) by (deployment,namespace) == 0) * on(deployment,namespace) group_left(label_email_to)(kube_deployment_labels) for: 2m labels: - team: node + admin: false email_to: "{{$labels.label_email_to}}" annotations: - summary: "Deployment \"{{$labels.deployment}}\" is not running" - description: "There is no avaiable pod for the deployment {\"{$labels.deployment}}\"" + summary: "Deployment \"{{$labels.deployment}}\" in namespace \"{{$labels.namespace}}\" is not running." + description: "There is no avaiable pod for the deployment \"{{$labels.deployment}}\" in namespace \"{{$labels.namespace}}\"." + - alert: Vortex_Not_Running + expr: (max(kube_deployment_status_replicas_available{namespace=~"vortex|kube-system" }) by (deployment,namespace) == 0) + for: 2m + labels: + admin: true + email_to: admin + annotations: + summary: "Deployment \"{{$labels.deployment}}\" in namespace \"{{$labels.namespace}}\" is not running." + description: "There is no avaiable pod for the deployment \"{{$labels.deployment}}\" in namespace \"{{$labels.namespace}}\"." kind: ConfigMap metadata: name: prometheus-rules namespace: vortex + +