Blob Blame Raw
From a6a52365df26edd4f6b0028056395d943344d787 Mon Sep 17 00:00:00 2001
From: Mark Reynolds <mreynolds@redhat.com>
Date: Thu, 11 Jun 2020 15:30:28 -0400
Subject: [PATCH] Issue 50791 - Healthcheck should look for notes=A/F in access
 log

Description:  Add checks for notes=A (fully unindexed search) and
              notes=F (Unknown attribute in search filter) in the
              current access log.

relates: https://pagure.io/389-ds-base/issue/50791

Reviewed by: firstyear(Thanks!)
---
 src/lib389/lib389/cli_ctl/health.py |  4 +-
 src/lib389/lib389/dirsrv_log.py     | 72 +++++++++++++++++++++++++++--
 src/lib389/lib389/lint.py           | 26 ++++++++++-
 3 files changed, 96 insertions(+), 6 deletions(-)

diff --git a/src/lib389/lib389/cli_ctl/health.py b/src/lib389/lib389/cli_ctl/health.py
index 6333a753a..89484a11b 100644
--- a/src/lib389/lib389/cli_ctl/health.py
+++ b/src/lib389/lib389/cli_ctl/health.py
@@ -1,5 +1,5 @@
 # --- BEGIN COPYRIGHT BLOCK ---
-# Copyright (C) 2019 Red Hat, Inc.
+# Copyright (C) 2020 Red Hat, Inc.
 # All rights reserved.
 #
 # License: GPL (version 3 or any later version).
@@ -18,6 +18,7 @@ from lib389.monitor import MonitorDiskSpace
 from lib389.replica import Replica, Changelog5
 from lib389.nss_ssl import NssSsl
 from lib389.dseldif import FSChecks, DSEldif
+from lib389.dirsrv_log import DirsrvAccessLog
 from lib389 import lint
 from lib389 import plugins
 from lib389._constants import DSRC_HOME
@@ -37,6 +38,7 @@ CHECK_OBJECTS = [
     Changelog5,
     DSEldif,
     NssSsl,
+    DirsrvAccessLog,
 ]
 
 
diff --git a/src/lib389/lib389/dirsrv_log.py b/src/lib389/lib389/dirsrv_log.py
index baac2a3c9..7bed4bb17 100644
--- a/src/lib389/lib389/dirsrv_log.py
+++ b/src/lib389/lib389/dirsrv_log.py
@@ -1,5 +1,5 @@
 # --- BEGIN COPYRIGHT BLOCK ---
-# Copyright (C) 2016 Red Hat, Inc.
+# Copyright (C) 2020 Red Hat, Inc.
 # All rights reserved.
 #
 # License: GPL (version 3 or any later version).
@@ -9,12 +9,17 @@
 """Helpers for managing the directory server internal logs.
 """
 
+import copy
 import re
 import gzip
 from dateutil.parser import parse as dt_parse
 from glob import glob
 from lib389.utils import ensure_bytes
-
+from lib389._mapped_object_lint import DSLint
+from lib389.lint import (
+    DSLOGNOTES0001,  # Unindexed search
+    DSLOGNOTES0002,  # Unknown attr in search filter
+)
 
 # Because many of these settings can change live, we need to check for certain
 # attributes all the time.
@@ -35,7 +40,7 @@ MONTH_LOOKUP = {
 }
 
 
-class DirsrvLog(object):
+class DirsrvLog(DSLint):
     """Class of functions to working with the various DIrectory Server logs
     """
     def __init__(self, dirsrv):
@@ -189,6 +194,67 @@ class DirsrvAccessLog(DirsrvLog):
         self.full_regexs = [self.prog_m1, self.prog_con, self.prog_discon]
         self.result_regexs = [self.prog_notes, self.prog_repl,
                               self.prog_result]
+    @classmethod
+    def lint_uid(cls):
+        return 'logs'
+
+    def _log_get_search_stats(self, conn, op):
+        lines = self.match(f".* conn={conn} op={op} SRCH base=.*")
+        if len(lines) != 1:
+            return None
+
+        quoted_vals = re.findall('"([^"]*)"', lines[0])
+        return {
+            'base': quoted_vals[0],
+            'filter': quoted_vals[1],
+            'timestamp': re.findall('\[(.*)\]', lines[0])[0],
+            'scope': lines[0].split(' scope=', 1)[1].split(' ',1)[0]
+        }
+
+    def _lint_notes(self):
+        """
+        Check for notes=A (fully unindexed searches), and
+        notes=F (unknown attribute in filter)
+        """
+        for pattern, lint_report in [(".* notes=A", DSLOGNOTES0001), (".* notes=F", DSLOGNOTES0002)]:
+            lines = self.match(pattern)
+            if len(lines) > 0:
+                count = 0
+                searches = []
+                for line in lines:
+                    if ' RESULT err=' in line:
+                        # Looks like a valid notes=A/F
+                        conn = line.split(' conn=', 1)[1].split(' ',1)[0]
+                        op = line.split(' op=', 1)[1].split(' ',1)[0]
+                        etime = line.split(' etime=', 1)[1].split(' ',1)[0]
+                        stats = self._log_get_search_stats(conn, op)
+                        if stats is not None:
+                            timestamp = stats['timestamp']
+                            base = stats['base']
+                            scope = stats['scope']
+                            srch_filter = stats['filter']
+                            count += 1
+                            if lint_report == DSLOGNOTES0001:
+                                searches.append(f'\n  [{count}] Unindexed Search\n'
+                                                f'      - date:    {timestamp}\n'
+                                                f'      - conn/op: {conn}/{op}\n'
+                                                f'      - base:    {base}\n'
+                                                f'      - scope:   {scope}\n'
+                                                f'      - filter:  {srch_filter}\n'
+                                                f'      - etime:   {etime}\n')
+                            else:
+                                searches.append(f'\n  [{count}] Invalid Attribute in Filter\n'
+                                                f'      - date:    {timestamp}\n'
+                                                f'      - conn/op: {conn}/{op}\n'
+                                                f'      - filter:  {srch_filter}\n')
+                if len(searches) > 0:
+                    report = copy.deepcopy(lint_report)
+                    report['items'].append(self._get_log_path())
+                    report['detail'] = report['detail'].replace('NUMBER', str(count))
+                    for srch in searches:
+                        report['detail'] += srch
+                    yield report
+
 
     def _get_log_path(self):
         """Return the current log file location"""
diff --git a/src/lib389/lib389/lint.py b/src/lib389/lib389/lint.py
index a103feec7..4b1700b92 100644
--- a/src/lib389/lib389/lint.py
+++ b/src/lib389/lib389/lint.py
@@ -1,5 +1,5 @@
 # --- BEGIN COPYRIGHT BLOCK ---
-# Copyright (C) 2019 Red Hat, Inc.
+# Copyright (C) 2020 Red Hat, Inc.
 # All rights reserved.
 #
 # License: GPL (version 3 or any later version).
@@ -253,7 +253,7 @@ can use the CLI tool "dsconf" to resolve the conflict.  Here is an example:
 
     Remove conflict entry and keep only the original/counterpart entry:
 
-        # dsconf slapd-YOUR_INSTANCE  repl-conflict remove <DN of conflict entry>
+        # dsconf slapd-YOUR_INSTANCE  repl-conflict delete <DN of conflict entry>
 
     Replace the original/counterpart entry with the conflict entry:
 
@@ -418,3 +418,25 @@ until the time issues have been resolved:
 Also look at https://access.redhat.com/documentation/en-us/red_hat_directory_server/11/html/administration_guide/managing_replication-troubleshooting_replication_related_problems
 and find the paragraph "Too much time skew"."""
 }
+
+DSLOGNOTES0001 = {
+    'dsle': 'DSLOGNOTES0001',
+    'severity': 'Medium',
+    'description': 'Unindexed Search',
+    'items': ['Performance'],
+    'detail': """Found NUMBER fully unindexed searches in the current access log.
+Unindexed searches can cause high CPU and slow down the entire server's performance.\n""",
+    'fix': """Examine the searches that are unindexed, and either properly index the attributes
+in the filter, increase the nsslapd-idlistscanlimit, or stop using that filter."""
+}
+
+DSLOGNOTES0002 = {
+    'dsle': 'DSLOGNOTES0002',
+    'severity': 'Medium',
+    'description': 'Unknown Attribute In Filter',
+    'items': ['Possible Performance Impact'],
+    'detail': """Found NUMBER searches in the current access log that are using an
+unknown attribute in the search filter.\n""",
+    'fix': """Stop using this these unknown attributes in the filter, or add the schema
+to the server and make sure it's properly indexed."""
+}
-- 
2.26.2