You are currently viewing an older version of the documentation. You can find the latest documentation here.

Collection Agent configuration reference

Overview

Collection Agent configuration reference contains details about setting up collectors, reporters, workflows and plug-ins.

Configuration reference

Below is an example YAML file which may require some changes for your project’s configuration:

# Collection Agent Configuration Reference

# Directory containing plugin artifacts. Required.
plugin-directory: /usr/local/lib/geneos/plugins

# Agent monitoring and metrics settings.
monitoring:

  # Health and metrics reporting interval in milliseconds. Defaults to 10 seconds.
  reporting-interval: 10000

  # The agent will listen on an HTTP port so that an external system can check its health.
  # In Kubernetes, this can be used in conjunction with the readiness/liveness probes.
  # 200 is returned if the agent is started, 500 otherwise.
  health-check:

    # Defaults to true.
    enabled: true

    # HTTP listen port, defaults to 8080.
    listen-port: 8080

  # Agent metrics.
  # See the documentation for a full list of metrics that are collected.
  metrics:

    # Whether metric reporting is enabled. Defaults to true.
    enabled: true

    # Whether counter values are reset per interval. Defaults to false.
    disable-windowing: false

    # Dimensions added to all metrics.
    dimensions:
      namespace: ${env:NAMESPACE}
      pod_name: ${env:POD_NAME}
      container: ${env:CONTAINER_NAME}

    # Properties added to all metrics.
    properties:
      prop1: value

#
# Collector definitions
#
collectors:

  # Collector type (all collectors are of type 'plugin').
  - type: plugin

    # Optional name used in logging.  If omitted, an auto-generated name will be assigned.
    name: statsd

    # Simple class name of the collector in the plugin jar.
    class-name: StatsdServer

    # Data point processors applied to data points published from this collector.
    # This optional processing chain allows for manipulating and/or filtering data points prior
    # to workflow publication.  This is the recommended way to perform edge processing, when applicable, so that
    # unneeded data can be dropped before incurring workflow overhead.
    processors:
      # For example, drop all events collected from statsd.  See "workflow -> common -> processors" section for
      # details on each type of processor.
      - type: drop-filter
        matchers:
          - type: kind
            kind: generic-event

    # Additional properties are specific to each collector type.  See plugin's configuration reference for details.
    listen-port: 8125

#
# Reporter definitions
#
reporters:

  # Kafka Reporter
  - type: kafka

    # Reporter name. Referenced from a pipeline's 'reporter' setting.
    name: kafka-metrics

    # Remaining settings are specific to each reporter type.

    # Comma-separated list of kafka broker addresses and ports
    brokers: ${env:KAFKA_BROKERS}

    # Topic name/format.  Can be a literal string, an environment/message variable, or a combination thereof.
    # Valid variables:
    #
    #   Metric name:           ${datapoint.name}
    #   Dimension value:       ${datapoint.dimension:DIMENSION_NAME}
    #   Property value:        ${datapoint.property:PROPERTY_NAME}
    #   Environment variable:  ${env:SOME_VAR}
    #
    topic: metrics-${datapoint.dimension:namespace}

    # Retry/consistency behavior.  Valid values:
    #
    #   exactly-once:  The kafka producer will operate with idempotence enabled.  This method is recommended
    #                  for logs and events pipelines.
    #
    #   at-most-once:  Messages are published once and no retries are made if unsuccessful.  This method is recommended
    #                  for the metrics pipeline.
    delivery-method: at-most-once

  - type: kafka
    name: kafka-logs
    brokers: ${env:KAFKA_BROKERS}
    topic: logs-${datapoint.dimension:namespace}
    delivery-method: exactly-once

  - type: kafka
    name: kafka-events
    brokers: ${env:KAFKA_BROKERS}
    topic: events-${datapoint.dimension:namespace}
    delivery-method: exactly-once

  # Logging reporter that simply logs each data point. This is intended only for testing purposes.
  - type: logging

    # Reporter name. Referenced from a pipeline's 'reporter' setting.
    name: stdout

    # Log level at which each data point is logged.  Can be: error, info (default), warn, debug or trace.
    level: info

  # TCP reporter sends internal DataModel messages over a TCP connection.
  - type: tcp

    # Reporter name. Referenced from a pipeline's 'reporter' setting.
    name: my-tcp-reporter

    # The TCP server hostname. Default value shown.
    hostname: localhost

    # The TCP server port. Default value shown.
    port: 7137

    # The TCP server connection timeout in milliseconds. Default value shown.
    connection-timeout-millis: 10000

    # The TCP server write timeout in milliseconds. Default value shown.
    write-timeout-millis: 10000

    # Maximum message length in bytes. Default value shown.
    max-message-length: 65536

  # External/custom reporters are defined using the 'plugin' type.
  - type: plugin

    # Reporter name. Referenced from a pipeline's 'reporter' setting.
    name: my-custom-reporter

    # Simple class name of the reporter in the plugin jar.
    class-name: CustomReporter

    # Additional properties are specific to each reporter type.  See plugin's configuration reference for details.
    custom-prop: asdf

#
# Workflow settings for controlling the flow of data points from plugins to reporters.
#
workflow:

  # Directory to store pipeline persistence.  Required only if at least one pipeline is configured to buffer data
  # points on disk.  The directory must be writable.
  store-directory: /var/lib/geneos/collection-agent

  # Pipelines.
  #
  # A pipeline exists for each class of data (metrics/logs/events)
  #
  # At least one pipeline must be configured.  A runtime error will occur if a plugin attempts delivery to a pipeline
  # that is not configured.
  #

  # Metrics pipeline.
  metrics:

    # Reporter to which all data points on this pipeline are sent.
    # This property is optional if there is only one reporter configured.  Otherwise the value is required and
    # must correspond to the 'name' of a reporter defined above.
    reporter: kafka-metrics

    # Store settings.
    #
    # Data points are stored either in memory or on disk before delivery to a reporter.
    #
    # If a reporter's target becomes unavailable, data points are queued until either the store is full or
    # the reporter target becomes available again.
    #
    # Plugins are informed when a store becomes full and are free to handle the situation in a way that makes
    # sense for that plugin (i.e. dropping the message if not critical, or waiting for the store to re-open before
    # collecting any more data).
    store:

      # Store type.
      #
      # Permitted values:
      # 'memory':  A circular, fixed-size, in-memory store that provides no persistence.  The oldest data point
      #            is removed when adding to a full store, therefore this store never rejects new data points
      #            and will begin to drop data if a slow reporter cannot keep up.
      #
      # 'disk':    A fixed-size store that is persisted to disk.  Requires the workflow 'store-directory' setting
      #            to be configured.
      #
      # For the metrics pipeline, it is recommended (and the default) to use a memory store, as metric data is
      # generally non-critical and loses relevance if delayed.
      #
      type: memory

      # Maximum number of data points to hold before the store is considered full and new data points are rejected.
      # The default capacity for a memory store is 8192 data points and 10,000,000 data points for a disk store.
      capacity: 8192

    # Number of retries after initial delivery fails.  Defaults to 3.  For infinite retries set to -1.
    # The interval between consecutive retries for the same message increases from 1 second up to 120 seconds.
    max-retries: 3

    # Custom processing of data points on this pipeline.  Processors can manipulate, enrich and/or filter
    # data points before reporting.
    #
    # See the 'common' pipeline for more details.
    processors:
      - type: enrichment
        name: metrics-enricher
        dimemsions:
          custom_dimension: value

  # Logs pipeline.
  logs:
    reporter: kafka-logs
    store:
      # For logs, it is recommended (and the default) to use a disk store if data loss is not tolerable.
      type: disk

    # For logs, it is recommended (and the default) to retry infinitely if data loss is not tolerable.
    max-retries: -1

  # Events pipeline.
  events:
    reporter: kafka-events
    store:
      # For events, it is recommended (and the default) to use a disk store if data loss is not tolerable.
      type: disk

    # For events, it is recommended (and the default) to retry infinitely if data loss is not tolerable.
    max-retries: -1

  # Common pipeline.
  #
  # This is a unique pipeline that only has data-point processors (there is no reporter). The processors are applied
  # to data points on all pipelines, before any pipeline-specific processors are applied.
  common:

    # Data-point processors.
    #
    # Processors can manipulate, enrich and/or filter data points before reporting.  They are applied before
    # a data point is saved in the pipeline's store.
    #
    processors:

      # Enrichment processor.  Adds dimensions and/or properties to all data points.
      - type: enrichment

        # Optional name used in logging.  If omitted, an auto-generated name will be assigned.
        name: enricher

        # Whether to overwrite an existing dimension or property with the same name (defaults to false)
        overwrite: false

        # Dimensions to add
        dimensions:
          node_name: ${env:NODE_NAME}

        # Properties to add
        properties:
          prop: value

      # Drop filter processor.  Drops data points that match the configured criteria.
      - type: drop-filter

        # One or more match criteria.
        # For a data point to be dropped, all configured criteria must match, otherwise the data point
        # will be forwarded.  If no matchers are configured, all data points will be forwarded.
        matchers:

          # Match by data point name, either exactly or via regex.
          - type: name

            # Exact match
            name: kubernetes_node_cpu_usage

            # Regex match (only one of 'name' or 'name-pattern' can be configured)
            name-pattern: kubernetes_.*

          # Match by data point dimension key and either an exact value or a regex pattern.
          - type: dimension
            key: namespace

            # Exact value match
            value: finance

            # Regex match (only one of 'value' or 'value-pattern' can be configured)
            value-pattern: ns.*

          # Match by data point property key and either an exact value or a regex pattern.
          - type: property
            key: someProperty

            # Exact value match
            value: someValue

            # Regex match (only one of 'value' or 'value-pattern' can be configured)
            value-pattern: value.*

          # Match by data point type. Value kinds are: [attribute|counter|gauge|generic-event|log-event|timer-histogram]
          - type: kind
            kind: counter

      # Forward filter processor.  Forwards data points that match the configured criteria.
      # This behaves inversely to "drop-filter" above but is configured identically.
      - type: forward-filter

        # One or more match criteria.
        # For a data point to be forwarded, all configured criteria must match, otherwise the data point
        # will be dropped.  If no matchers are configured, all data points will be dropped.
        # See "drop-filter" for details on each type of matcher.
        matchers:
          - type: name
            pattern: myCounter

      # Normalize processor.  Normalizes dimension names for consistency in subsequent processing and reporting.
      - type: normalize

        # Optional name used in logging.  If omitted, an auto-generated name will be assigned.
        name: normalize

        # Dimension normalization settings.
        dimensions:

          # Default overwrite behavior, can be overridden per mapping.  Defaults to false.
          overwrite: false

          # Dimension mappings.
          mappings:

              # Old dimension name.
            - from: project

              # New dimension name.
              to: namespace

              # Whether to overwrite if a dimension already exists with the same name.  Defaults to parent setting.
              overwrite: false

      # External/custom processors are defined using the 'plugin' type.
      - type: plugin

        # Optional name used in logging.  If omitted, an auto-generated name will be assigned.
        name: kube-enricher

        # Simple class name of the processor in the plugin jar.
        class-name: KubernetesEnricher

        # Additional properties are specific to each processor type.  See plugin's configuration reference for details.
        custom-prop: abc