diff --git a/README.md b/README.md index f378c412e62659a7ae38bbf67f34ec8f28b0370a..90be894097f470bf7485a8ed040789c37841ad15 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,159 @@ Loading dashboards - next press import -Variables which must be changed/verified from values.yml during instalation on other kubernetes cluster: - - metricbeat.dropEvent - set correct namespace. ( system is mandatory ) +========================================================================================================= += = += Performance parameters = += = +========================================================================================================= + +===================================== Logstash performance parameters ================================== + +In file values/dev/observability/values.yml are following logstash performance parameters: + +- logstash.env.ls_java_opts: "-Xms3g -Xmx3g" + Set heap memory for logstash process inside container. + Logstash statefulsets restart is required. + +- logstash.resources.requests.memory: 4Gi + logstash.resources.limits.memory: 4Gi + Set memory allocation (request/limit) for logstash pod. + +- logstash.resources.requests.cpu: 300m + logstash.resources.limits.cpu: 300m + Set CPU allocation (request/limit) for logstash pod. + +- pipelines_yml_config.pipeline.workers: 1 + Set number of workers for logstash pipeline. + Logstash statefulsets restart is required. + +- pipelines_yml_config.pipeline.pipeline.batch.size: 125 + Set batch_size for logstash pipeline. + Logstash statefulsets restart is required. + +===================================== Elasticsearch performance parameters ================================== + +- elasticsearch.diskSpace: 60Gi + Set disk size to store indices in elasticsearch pods + +- elasticsearch.count: 3 + Number of elasticsearch pods in stack + +- elasticsearch.resources.requests.memory: 4Gi + elasticsearch.resources.limits.memory: 4Gi + Set memory allocation (request/limit) for elasticsearch pod. + +- elasticsearch.resources.requests.cpu: 300m + elasticsearch.resources.limits.cpu: 300m + Set CPU allocation (request/limit) for elasticsearch pod. + + +===================================== Kibana performance parameters ================================== + +- kibana.resources.requests.memory: 1Gi + kibana.resources.limits.memory: 1Gi + Set memory allocation (request/limit) for kibana pod. + +- kibana.resources.requests.cpu: 300m + kibana.resources.limits.cpu: 300m + Set CPU allocation (request/limit) for kibana pod. + +- kibana.count: 1 + Number of kibana pods in stack + +===================================== Filebeat performance parameters ================================== + +- filebeat4agents.resources.requests.memory: 1Gi + filebeat4agents.resources.limits.memory: 1Gi + Set memory allocation (request/limit) for filebeat pod. + +- filebeat4agents.resources.requests.cpu: 100m + filebeat4agents.resources.limits.cpu: 100m + Set CPU allocation (request/limit) for filebeat pod. + +========================================================================================================= += = += ILM = += = +========================================================================================================= + +On each monitoring ELK stack are 5 ILM politics: +- bussines-ilm - reponsible for index rotation with bussines logs +- technical-ilm - reponsible for index rotation with technical logs +- metricbeat-ilm - reponsible for index rotation with metrics collected from agents +- filebeat - reponsible for index rotation with ELK stack logs +- heartbeat-ilm - reponsible for index rotation with services heartbeats + + +Apply changes on heartbeat-ilm: + 1) Modify values.yml file and set new values: + .... + heartbeat: + ilm: + hot: + max_age: 30d <- set max age for documents in hot phase + max_primary_shard_size: 50gb <- set max size for primary shard in hot phase + delete: + min_age: 365d <- set age after which index will be deleted + services: + heartbeat.monitors: + .... + 2) Restart heartbeat by command: + + kubectl rollout restart deployment heartbeat-beat-heartbeat + + +Apply changes on metricbeat-ilm: + 1) Modify values.yml file and set new values: + .... + metricbeat: + ilm: + hot: + max_age: 30d <- set max age for documents in hot phase + max_primary_shard_size: 50gb <- set max size for primary shard in hot phase + delete: + min_age: 365d <- set age after which index will be deleted + resources: + requests: + memory: 500Mi + limits: + .... + 2) Restart metricbeat by command: + + kubectl rollout restart daemonset metricbeat-beat-metricbeat + +Apply changes on filebeat: + 1) Login to Kibana and go to: Stack Management -> Index Lifecycle Policies. + 2) Click on filebeat policy + 3) Modify "Hot phase" in advanced setting when disable "Use recommneded defaults" and/or modify Delete phase if needed. + 4) Press "Save policy". + + +Apply changes on business-ilm and technical-ilm: + 1) Modify values.yml file and set new values: + .... + logstash: + ilm: + business: + hot: + max_age: 30d <- set max age for business documents in hot phase + max_primary_shard_size: 1gb <- set max size for business primary shard in hot phase + delete: + min_age: 30d <- set age after which business index will be deleted + technical: + hot: + max_age: 30d <- set max age for technical documents in hot phase + max_primary_shard_size: 1gb <- set max size for technical primary shard in hot phase + delete: + min_age: 30d <- set age after which technical index will be deleted + count_beats: 1 + count_syslog: 0 + .... + 2) Restart logstash statefulsets by command: + + kubectl rollout restart sts logstash-beats-ls + + + + diff --git a/charts/Chart.yaml b/charts/Chart.yaml index dc19d20d41bda1fa1fb33423af3684922cfea10b..06235c20c79aed4a3f0213d13e808eaa73c44d88 100644 --- a/charts/Chart.yaml +++ b/charts/Chart.yaml @@ -1,6 +1,6 @@ name: eck-monitoring version: ${PROJECT_RELEASE_VERSION} appVersion: "${PROJECT_RELEASE_VERSION}" -#version: 0.1.3 +#version: 0.1.4 diff --git a/charts/templates/filebeat-agents.yaml b/charts/templates/filebeat-agents.yaml index d706d3431bdb935d5133f1f2285c99f4503dc6c3..1c17dfaa9bfce09a8309ab63e7d7696884cfd670 100644 --- a/charts/templates/filebeat-agents.yaml +++ b/charts/templates/filebeat-agents.yaml @@ -16,6 +16,10 @@ spec: hostNetwork: true # Allows to provide richer host metadata containers: - name: filebeat + {{- with .Values.filebeat4agents.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} command: ['sh', '-c', 'exec /usr/share/filebeat/filebeat -e -c /usr/share/filebeat/filebeat.yml'] securityContext: runAsUser: 0 diff --git a/charts/templates/heartbeat.yaml b/charts/templates/heartbeat.yaml index 7740a695288eef6e221fc8d803b5dfeb3f19ea8e..696b9e9a264d18d9c9fee3da3b15a702485f2543 100644 --- a/charts/templates/heartbeat.yaml +++ b/charts/templates/heartbeat.yaml @@ -14,6 +14,7 @@ spec: {{- end }} setup.ilm.enabled: true setup.ilm.policy_name: heartbeat-ilm + setup.ilm.overwrite: true setup.ilm.policy_file: "/usr/share/heartbeat/ilm/heartbeat-ilm.json" deployment: replicas: 1 diff --git a/charts/templates/logstash_beats.yaml b/charts/templates/logstash_beats.yaml index 855c66fcabfee6557f86b73673ae022a158fb0cc..48f0db35cb1e722410c17370589ba52858210d41 100644 --- a/charts/templates/logstash_beats.yaml +++ b/charts/templates/logstash_beats.yaml @@ -75,7 +75,7 @@ spec: subPath: logstash-business-ilm.json - name: logstash-technical-ilm-vol mountPath: /usr/share/logstash/ilm/logstash-technical-ilm.json - subPath: logstash-technical-ilm.json + subPath: logstash-technical-ilm.json env: - name: RELEASE_NAME value: {{ .Release.Name }} diff --git a/charts/templates/metricbeat.yaml b/charts/templates/metricbeat.yaml index 5ad574640dd494187702bd2b55c36b06e97b9548..1e6e6c3bea1487e9daecf1d54ca3e23eb6a3ce2d 100644 --- a/charts/templates/metricbeat.yaml +++ b/charts/templates/metricbeat.yaml @@ -70,6 +70,7 @@ spec: verification_mode: none setup.ilm.enabled: true setup.ilm.policy_name: metricbeat-ilm + setup.ilm.overwrite: true setup.ilm.policy_file: "/usr/share/metricbeat/ilm/metricbeat-ilm.json" processors: - add_cloud_metadata: {} diff --git a/charts/values/dev/observability/values.yaml b/charts/values/dev/observability/values.yaml index df345e9b4591bc6c2f6ff1b9aa31b1594d9cea88..ec195b5898effa004460af259402c203057d3099 100644 --- a/charts/values/dev/observability/values.yaml +++ b/charts/values/dev/observability/values.yaml @@ -51,8 +51,10 @@ kibana: resources: requests: memory: 1Gi + cpu: 300m limits: memory: 1Gi + cpu: 300m #Environment variables to set in kibana pod #Usage from cli: # --set "kibana.env[0].name=VARIABLE_NAME" --set "kibana.env[0].value=VARIABLE_VALUE" @@ -65,13 +67,13 @@ logstash: max_age: 30d max_primary_shard_size: 1gb delete: - min_age: 30d + min_age: 365d technical: hot: max_age: 30d max_primary_shard_size: 1gb delete: - min_age: 30d + min_age: 365d count_beats: 1 count_syslog: 0 image: docker.elastic.co/logstash/logstash @@ -84,8 +86,10 @@ logstash: resources: requests: memory: 4Gi + cpu: 300m limits: memory: 4Gi + cpu: 300m cert: duration: 2160h0m0s # 90d renewBefore: 360h0m0s # 15d @@ -116,21 +120,38 @@ logstash: drop { } } - if [kubernetes][container][name] == "sd-creation-wizard-api" or [kubernetes][container][name] == "signer" or [kubernetes][container][name] == "sd-creation-wizard-api-validation" or [kubernetes][container][name] == "simpl-cloud-gateway" { + if [kubernetes][container][name] == "sd-creation-wizard-api" or [kubernetes][container][name] == "signer" or [kubernetes][container][name] == "sd-creation-wizard-api-validation" { json { source => "message" skip_on_invalid_json => true + add_field => { "log_type" => "NA" } + } } - - if [kubernetes][container][name] == "users-roles" { + # Business logs + if [kubernetes][container][name] == "simpl-cloud-gateway" or [kubernetes][container][name] == "tls-gateway" { + json { + source => "message" + skip_on_invalid_json => true + add_field => { "log_type" => "business" } + } + } + # Onboaring technical logs + if [kubernetes][container][name] == "identity-provider" or [kubernetes][container][name] == "onboarding" or [kubernetes][container][name] == "security-attributes-provider" { json { source => "message" skip_on_invalid_json => true + add_field => { "log_type" => "technical" } + } + } + # Onboaring technical logs - more complex parsing because of the specific log structure + if [kubernetes][container][name] == "users-roles" { + json { + source => "message" + skip_on_invalid_json => true + add_field => { "log_type" => "technical" } } - - ruby { code => ' if event.get("[message]").is_a?(Hash) @@ -140,7 +161,6 @@ logstash: end ' } - if [is_json_message] { if [message][httpStatus] { mutate { add_field => { "httpStatus" => "%{[message][httpStatus]}" } } } if [message][msg] { mutate { add_field => { "msg" => "%{[message][msg]}" } } } @@ -153,38 +173,17 @@ logstash: } } - if [kubernetes][container][name] == "keycloak" { - grok { - match => { - "message" => [ - '%{TIMESTAMP_ISO8601:timestamp}%{SPACE}%{WORD:loglevel}%{SPACE}\[%{JAVACLASS:logger}\]%{SPACE}\(%{DATA:thread}\)%{SPACE}%{GREEDYDATA:message}' - ] - } - overwrite => [ "message" ] - } - } - - if [kubernetes][container][name] == "postgresql" { - grok { - match => { - "message" => [ - '%{TIMESTAMP_ISO8601:timestamp}%{SPACE}%{WORD:timezone}%{SPACE}\[%{NUMBER:pid}\]%{SPACE}%{WORD:log_level}:%{SPACE}%{GREEDYDATA:message}' - ] - } - overwrite => [ "message" ] - } - } date { - match => [ "timestamp", "yyyy-MM-dd HH:mm:ss.SSS", "ISO8601", "yyyy-MM-dd HH:mm:ss", "dd MMM yyyy HH:mm:ss.SSS"] + match => [ "timestamp", "yyyy-MM-dd HH:mm:ss.SSS", "ISO8601", "yyyy-MM-dd HH:mm:ss", "dd MMM yyyy HH:mm:ss.SSS", "yyyy-MM-dd HH:mm:ss,SSS"] } date { - match => [ "ts", "yyyy-MM-dd HH:mm:ss.SSS", "ISO8601", "yyyy-MM-dd HH:mm:ss", "dd MMM yyyy HH:mm:ss.SSS"] + match => [ "ts", "yyyy-MM-dd HH:mm:ss.SSS", "ISO8601", "yyyy-MM-dd HH:mm:ss", "dd MMM yyyy HH:mm:ss.SSS", "yyyy-MM-dd HH:mm:ss,SSS"] } } output: |- output { - if [kubernetes][container][name] == "simpl-cloud-gateway" { + if [log_type] == "business" { elasticsearch { hosts => [ "${ELASTIC_ELASTICSEARCH_ES_HOSTS}" ] user => "${LOGSTASH_USER}" @@ -197,7 +196,7 @@ logstash: action => "create" } } - else if [fields][logtype] == "logs-sample-wrapper" { + else if [log_type] == "technical" { elasticsearch { hosts => [ "${ELASTIC_ELASTICSEARCH_ES_HOSTS}" ] user => "${LOGSTASH_USER}" @@ -205,30 +204,11 @@ logstash: ssl_enabled => "true" ssl_verification_mode => "full" ssl_certificate_authorities => "/usr/share/logstash/config/certs/ca.crt" - #data_stream => "true" - #data_stream_type => "logs" - #data_stream_dataset => "business" - index => "business-logs" - template_name => "business-template" + index => "technical-logs" + template_name => "technical-template" action => "create" } } - else { - elasticsearch { - hosts => [ "${ELASTIC_ELASTICSEARCH_ES_HOSTS}" ] - user => "${LOGSTASH_USER}" - password => "${LOGSTASH_PASSWORD}" - ssl_enabled => "true" - ssl_verification_mode => "full" - ssl_certificate_authorities => "/usr/share/logstash/config/certs/ca.crt" - #data_stream => "true" - #data_stream_type => "logs" - #data_stream_dataset => "technical" - index => "technical-logs" - template_name => "technical-template" - action => "create" - } - } #stdout { # codec => rubydebug #} @@ -348,6 +328,13 @@ filebeat: filebeat4agents: image: docker.elastic.co/beats/filebeat imageTag: "" + resources: + requests: + memory: 1Gi + cpu: 100m + limits: + memory: 1Gi + cpu: 100m cert: duration: 2160h0m0s # 90d renewBefore: 360h0m0s # 15d @@ -374,36 +361,31 @@ filebeat4agents: # Condition for json structured logs - condition: or: + # Business logs + - equals: + kubernetes.container.name: "simpl-cloud-gateway" + - equals: + kubernetes.container.name: "tls-gateway" + # Onboarding technical logs containers + - equals: + kubernetes.container.name: "identity-provider" + - equals: + kubernetes.container.name: "onboarding" + - equals: + kubernetes.container.name: "security-attributes-provider" - equals: kubernetes.container.name: "users-roles" + # Logs not specified yet - equals: kubernetes.container.name: "signer" - equals: kubernetes.container.name: "sd-creation-wizard-api" - equals: kubernetes.container.name: "sd-creation-wizard-api-validation" - - equals: - kubernetes.container.name: "simpl-cloud-gateway" - config: - - type: container - paths: - - /var/log/containers/*-${data.kubernetes.container.id}.log - # Condition for plain text logs - - condition: - or: - - equals: - kubernetes.container.name: "keycloak" - - equals: - kubernetes.container.name: "postgresql" config: - type: container paths: - /var/log/containers/*-${data.kubernetes.container.id}.log - multiline: - type: pattern - pattern: '^[0-9]{4}-[0-9]{2}-[0-9]{2}' - negate: true - match: after processors: # Add cloud and host metadata - add_cloud_metadata: {} @@ -422,13 +404,12 @@ metricbeat: ilm: hot: max_age: 30d - max_primary_shard_size: 1gb + max_primary_shard_size: 50gb delete: - min_age: 30d + min_age: 365d resources: requests: memory: 500Mi - limits: memory: 500Mi cpu: 300m @@ -439,9 +420,9 @@ heartbeat: ilm: hot: max_age: 30d - max_primary_shard_size: 100mb + max_primary_shard_size: 50gb delete: - min_age: 30d + min_age: 365d services: heartbeat.monitors: - type: tcp diff --git a/pipeline.variables.sh b/pipeline.variables.sh index 322e00ad6412089f90b1f74dddb8d6479adc8def..e8adbe92d55c15fe38f6e8e05a47c22ac7c0b9fa 100644 --- a/pipeline.variables.sh +++ b/pipeline.variables.sh @@ -1 +1 @@ -PROJECT_VERSION_NUMBER="0.1.3" \ No newline at end of file +PROJECT_VERSION_NUMBER="0.1.4"