Sample configuration for AWS EC2 handling 10k metrics/sec (small) with NGINX Ingress controller

Download this sample AWS EC2 handling 10k metrics/sec (small) configuration provided by ITRS.

# Example Obcerv configuration for AWS EC2 handling 10k metrics/sec.
#
# Nodes: (3) c5.4xlarge (16CPU, 32GB)
#
# The resource requests total ~32 cores and ~87GiB memory (assuming collection-agent DaemonSet runs on 3 nodes)
# and includes Linkerd resources.
#
# Disk requirements:
# - Timescale:
#   - 8 TiB data disk for each replica (x3)
#   - 30 GiB WAL disk for each replica (x3)
# - Kafka: 140 GiB for each replica (x3)
# - Loki: 30 GiB for each replica (x1)
# - Zookeeper: 1 GiB for each replica (x3)
# - etcd: 1 GiB for each replica (x3)
# - Downsampled Metrics:
#   - Raw: 5 GiB for each replica (x3)
#   - Bucketed: 5 GiB for each replica (x3)
#
# The configuration references a StorageClass named `io1-25` which uses io1 with 25 iopsPerGB - you can create
# this class or change the config to use a class of your own, but it should be similar in performance.
#
# This configuration is based upon a certain number of Obcerv entities, average metrics per entity, and
# average metrics collection interval. The following function can be used to figure out what type of load to expect:
#
# metrics/sec = (Obcerv entities * metrics/entity) / average metrics collection interval
#
# In this example configuration, we have the following:
#
# 10,000 metrics/sec = (25,000 Obcerv entities * 4 metrics/entity) / 10 seconds average metrics collection interval
#

defaultStorageClass: "gp2"
apps:
  externalHostname: "obcerv.mydomain.internal"
  ingress:
    annotations:
      kubernetes.io/ingress.class: "nginx"
      nginx.org/mergeable-ingress-type: "master"
ingestion:
  externalHostname: "obcerv-ingestion.mydomain.internal"
  replicas: 2
  ingress:
    annotations:
      kubernetes.io/ingress.class: "nginx"
      nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
  resources:
    requests:
      memory: "512Mi"
      cpu: "500m"
    limits:
      memory: "512Mi"
      cpu: "500m"
iam:
  ingress:
    annotations:
      kubernetes.io/ingress.class: "nginx"
      nginx.org/mergeable-ingress-type: "minion"
zookeeper:
  replicas: 3
  resources:
    requests:
      memory: "256Mi"
      cpu: "200m"
    limits:
      memory: "512Mi"
      cpu: "200m"
kafka:
  replicas: 3
  diskSize: "140Gi"
  storageClass: "io1-25"
  consumer:
    fetchMaxWaitMs: 500
    fetchMinBytes: 524288
  resources:
    requests:
      memory: "3Gi"
      cpu: "1"
    limits:
      memory: "3Gi"
      cpu: "2"
timescale:
  clusterSize: 3
  dataDiskSize: "8Ti"
  dataStorageClass: "io1-25"
  walDiskSize: "30Gi"
  walStorageClass: "io1-25"
  resources:
    requests:
      memory: "14Gi"
      cpu: "2"
    limits:
      memory: "14Gi"
      cpu: "3"
  compressAfter: 1h
  retention:
    entity_attributes:
      chunkSize: 2d
    metrics:
      chunkSize: 20m
    metrics_5m:
      chunkSize: 1h
    metrics_15m:
      chunkSize: 2h
    metrics_1h:
      chunkSize: 6h
    metrics_3h:
      chunkSize: 12h
    metrics_12h:
      chunkSize: 2d
    metrics_1d:
      chunkSize: 3d
    statuses:
      chunkSize: 7d
loki:
  diskSize: "30Gi"
  storageClass: "io1-25"
sinkd:
  replicas: 1
  rawReplicas: 1
  resources:
    requests:
      memory: "1Gi"
      cpu: "250m"
    limits:
      memory: "1Gi"
      cpu: "400m"
  rawResources:
    requests:
      memory: "1Gi"
      cpu: "250m"
    limits:
      memory: "1Gi"
      cpu: "400m"
platformd:
  replicas: 2
  resources:
    requests:
      memory: "1536Mi"
      cpu: "1"
    limits:
      memory: "2Gi"
      cpu: "1500m"
dpd:
  replicas: 1
  jvmOpts: "-Xmx2G -XX:NewSize=1G"
  metricsMultiplexer:
    maxFilterResultCacheSize: 200000
    maxConcurrentOps: 100
    localParallelism: 6
  selfMonitoringThresholds:
    metrics_partition_lag_warn: 5000
    metrics_partition_lag_critical: 10000
  resources:
    requests:
      memory: "3Gi"
      cpu: "2"
    limits:
      memory: "3500Mi"
      cpu: "3"
metricForecastd:
  resources:
    requests:
      memory: "512Mi"
      cpu: "250m"
    limits:
      memory: "768Mi"
      cpu: "500m"
downsampledMetricsStream:
  replicas: 2
  storageClass: "io1-25"
  bucketedReplicas: 2
  resources:
    requests:
      memory: "1Gi"
      cpu: "750m"
    limits:
      memory: "1536Mi"
      cpu: "1"
  bucketedResources:
    requests:
      memory: "1536Mi"
      cpu: "1"
    limits:
      memory: "1536Mi"
      cpu: "1500m"
entityStream:
  intermediate:
    resources:
      requests:
        memory: "768Mi"
        cpu: "300m"
      limits:
        memory: "1Gi"
        cpu: "500m"
  final:
    resources:
      requests:
        memory: "512Mi"
        cpu: "300m"
      limits:
        memory: "1500Mi"
        cpu: "500m"
etcd:
  replicas: 3
collection:
  metrics:
    resources:
      requests:
        memory: "768Mi"
        cpu: "200m"
      limits:
        memory: "1Gi"
        cpu: "250m"
["Obcerv"] ["User Guide", "Technical Reference"]

Was this topic helpful?