Blame SOURCES/0034-Use-satyr-for-calculation-of-the-duplicates-hashes.patch

f9a98e
From 9801d36ca38a03c69a92689fdcb37afde01dc066 Mon Sep 17 00:00:00 2001
f9a98e
From: Jakub Filak <jfilak@redhat.com>
f9a98e
Date: Mon, 20 Jan 2014 09:46:33 +0100
f9a98e
Subject: [PATCH 34/39] Use satyr for calculation of the duplicates hashes
f9a98e
f9a98e
The current algorithm simply hashes the entire backtrace file. This
f9a98e
approach results in non-working deduplication because the duplicate hash
f9a98e
depends on file system paths and line numbers.
f9a98e
f9a98e
Satyr provide a function for generating of reliable hashes where only
f9a98e
function names (in case of Java it means exception type too) are used
f9a98e
for input of the hash function.
f9a98e
f9a98e
Related to #29
f9a98e
Related to rhbz#1054737
f9a98e
---
f9a98e
 etc/java_event.conf                                |   8 -
f9a98e
 test/outputs/not_reportable_1remote_class.log.in   |   5 +
f9a98e
 test/outputs/not_reportable_3remote_classes.log.in |   5 +
f9a98e
 utils/abrt-action-analyze-java.c                   | 195 +++++++++++++--------
f9a98e
 4 files changed, 136 insertions(+), 77 deletions(-)
f9a98e
f9a98e
diff --git a/etc/java_event.conf b/etc/java_event.conf
f9a98e
index 302cac0..6014e93 100644
f9a98e
--- a/etc/java_event.conf
f9a98e
+++ b/etc/java_event.conf
f9a98e
@@ -8,14 +8,6 @@ EVENT=post-create type=Java
f9a98e
             # abrtd will delete the problem directory when we exit nonzero:
f9a98e
             exit 1
f9a98e
         fi
f9a98e
-        # TODO: Replace lines below by something more sane once abrt switches to satyr
f9a98e
-        if [ -f backtrace ]; then
f9a98e
-            printf '%s' "`sha1sum < "backtrace" | cut -d" " -f1`" > "uuid"
f9a98e
-            cp uuid duphash
f9a98e
-        else
f9a98e
-            echo "Cannot create neither 'duphas' nor 'uuid' because of missing 'backtrace' file"
f9a98e
-            exit 1
f9a98e
-        fi
f9a98e
         abrt-action-analyze-java -d $DUMP_DIR || exit 1
f9a98e
 
f9a98e
 # Create a bug in Bugzilla
f9a98e
diff --git a/test/outputs/not_reportable_1remote_class.log.in b/test/outputs/not_reportable_1remote_class.log.in
f9a98e
index 282933a..291072a 100644
f9a98e
--- a/test/outputs/not_reportable_1remote_class.log.in
f9a98e
+++ b/test/outputs/not_reportable_1remote_class.log.in
f9a98e
@@ -1 +1,6 @@
f9a98e
+duphash
f9a98e
+4bd13090ba6559c9c9023926671295559a25bc9b
f9a98e
+uuid
f9a98e
+4bd13090ba6559c9c9023926671295559a25bc9b
f9a98e
+not-reportable
f9a98e
 This problem can be caused by a 3rd party code from the jar/class at http://localhost:54321/JarTest.jar. In order to provide valuable problem reports, ABRT will not allow you to submit this problem. If you still want to participate in solving this problem, please contact the developers directly.
f9a98e
diff --git a/test/outputs/not_reportable_3remote_classes.log.in b/test/outputs/not_reportable_3remote_classes.log.in
f9a98e
index 7489f74..ddbd29c 100644
f9a98e
--- a/test/outputs/not_reportable_3remote_classes.log.in
f9a98e
+++ b/test/outputs/not_reportable_3remote_classes.log.in
f9a98e
@@ -1 +1,6 @@
f9a98e
+duphash
f9a98e
+4bd13090ba6559c9c9023926671295559a25bc9b
f9a98e
+uuid
f9a98e
+4bd13090ba6559c9c9023926671295559a25bc9b
f9a98e
+not-reportable
f9a98e
 This problem can be caused by a 3rd party code from the jar/class at http://localhost:54321/JarTest.jar, http://localhost:321/JarTest.jar, http://localhost:4321/JarTest.jar. In order to provide valuable problem reports, ABRT will not allow you to submit this problem. If you still want to participate in solving this problem, please contact the developers directly.
f9a98e
diff --git a/utils/abrt-action-analyze-java.c b/utils/abrt-action-analyze-java.c
f9a98e
index a4728b6..03542ec 100644
f9a98e
--- a/utils/abrt-action-analyze-java.c
f9a98e
+++ b/utils/abrt-action-analyze-java.c
f9a98e
@@ -17,6 +17,7 @@
f9a98e
 */
f9a98e
 
f9a98e
 #include <satyr/location.h>
f9a98e
+#include <satyr/thread.h>
f9a98e
 #include <satyr/java/stacktrace.h>
f9a98e
 #include <satyr/java/thread.h>
f9a98e
 #include <satyr/java/frame.h>
f9a98e
@@ -24,6 +25,16 @@
f9a98e
 #include <abrt/libabrt.h>
f9a98e
 #include <stdlib.h>
f9a98e
 
f9a98e
+/* 4 = 1 exception + 3 methods */
f9a98e
+#define FRAMES_FOR_DUPHASH 4
f9a98e
+
f9a98e
+typedef struct
f9a98e
+{
f9a98e
+    const char *name;
f9a98e
+    char *data;
f9a98e
+    int nofree;
f9a98e
+} analysis_result_t;
f9a98e
+
f9a98e
 static char *
f9a98e
 backtrace_from_dump_dir(const char *dir_name)
f9a98e
 {
f9a98e
@@ -44,37 +55,62 @@ backtrace_from_dump_dir(const char *dir_name)
f9a98e
 }
f9a98e
 
f9a98e
 static void
f9a98e
-write_not_reportable_message_to_dump_dir(const char *dir_name, const char *message)
f9a98e
+write_results_to_dump_dir(const char *dir_name,
f9a98e
+        const analysis_result_t *res_begin, const analysis_result_t *res_end)
f9a98e
 {
f9a98e
     struct dump_dir *dd = dd_opendir(dir_name, /*Open for writing*/0);
f9a98e
     if (NULL != dd)
f9a98e
     {
f9a98e
-        dd_save_text(dd, FILENAME_NOT_REPORTABLE, message);
f9a98e
+        const analysis_result_t *res = res_begin;
f9a98e
+
f9a98e
+        for ( ; res != res_end; ++res)
f9a98e
+            dd_save_text(dd, res->name, res->data);
f9a98e
+
f9a98e
         dd_close(dd);
f9a98e
     }
f9a98e
 }
f9a98e
 
f9a98e
 static void
f9a98e
-write_not_reportable_message_to_fd(int fdout, const char *message)
f9a98e
+write_to_fd(int fdout, const char *message)
f9a98e
 {
f9a98e
     full_write(fdout, message, strlen(message));
f9a98e
     full_write(fdout, "\n", 1);
f9a98e
 }
f9a98e
 
f9a98e
+
f9a98e
 static void
f9a98e
-write_not_reportable_message_to_file(const char *file_name, const char *message)
f9a98e
+write_results_to_fd(int fdout,
f9a98e
+        const analysis_result_t *res_begin, const analysis_result_t *res_end)
f9a98e
 {
f9a98e
-    int fdout = open(file_name,
f9a98e
-            O_WRONLY | O_TRUNC | O_CREAT | O_NOFOLLOW,
f9a98e
-            S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP );
f9a98e
+    const analysis_result_t *res = res_begin;
f9a98e
 
f9a98e
-    if (0 > fdout)
f9a98e
+    for ( ; res != res_end; ++res)
f9a98e
     {
f9a98e
-        perror_msg("Can't open file '%s' for writing", file_name);
f9a98e
-        return;
f9a98e
+        write_to_fd(fdout, res->name);
f9a98e
+        write_to_fd(fdout, res->data);
f9a98e
+    }
f9a98e
+}
f9a98e
+
f9a98e
+static void
f9a98e
+write_results_to_file(const analysis_result_t *res_begin, const analysis_result_t *res_end)
f9a98e
+{
f9a98e
+    const analysis_result_t *res = res_begin;
f9a98e
+
f9a98e
+    for ( ; res != res_end; ++res)
f9a98e
+    {
f9a98e
+        int fdout = open(res->name,
f9a98e
+                O_WRONLY | O_TRUNC | O_CREAT | O_NOFOLLOW,
f9a98e
+                S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP );
f9a98e
+
f9a98e
+        if (0 > fdout)
f9a98e
+        {
f9a98e
+            perror_msg("Can't open file '%s' for writing", res->name);
f9a98e
+            continue;
f9a98e
+        }
f9a98e
+
f9a98e
+        write_to_fd(fdout, res->data);
f9a98e
+        close(fdout);
f9a98e
     }
f9a98e
-    write_not_reportable_message_to_fd(fdout, message);
f9a98e
-    close(fdout);
f9a98e
 }
f9a98e
 
f9a98e
 static char *
f9a98e
@@ -89,50 +125,46 @@ backtrace_from_file(const char *file_name)
f9a98e
     return xmalloc_xopen_read_close(file_name, /*no size limit*/NULL);
f9a98e
 }
f9a98e
 
f9a98e
-typedef void (*frame_cb)(struct sr_java_frame *frame, void *args);
f9a98e
-
f9a98e
-typedef struct {
f9a98e
-    frame_cb callback;
f9a98e
-    void *args;
f9a98e
-} frame_proc_t;
f9a98e
-
f9a98e
-static void
f9a98e
-iterate_trough_stacktrace(struct sr_java_stacktrace *stacktrace, frame_proc_t **fproc)
f9a98e
+static char *
f9a98e
+work_out_list_of_remote_urls(struct sr_java_stacktrace *stacktrace)
f9a98e
 {
f9a98e
+    struct strbuf *remote_files_csv = strbuf_new();
f9a98e
     struct sr_java_thread *thread = stacktrace->threads;
f9a98e
     while (NULL != thread)
f9a98e
     {
f9a98e
         struct sr_java_frame *frame = thread->frames;
f9a98e
         while (NULL != frame)
f9a98e
         {
f9a98e
-            frame_proc_t **it = fproc;
f9a98e
-            while (NULL != *it)
f9a98e
+            if (NULL != frame->class_path && prefixcmp(frame->class_path, "file://") != 0)
f9a98e
             {
f9a98e
-                (*it)->callback(frame, (*it)->args);
f9a98e
-                ++it;
f9a98e
+                struct stat buf;
f9a98e
+                if (stat(frame->class_path, &buf) && errno == ENOENT)
f9a98e
+                {
f9a98e
+                    if (strstr(remote_files_csv->buf, frame->class_path) == NULL)
f9a98e
+                    {
f9a98e
+                        log_debug("Adding a new path to the list of remote paths: '%s'", frame->class_path);
f9a98e
+                        strbuf_append_strf(remote_files_csv, "%s%s",
f9a98e
+                                remote_files_csv->buf[0] != '\0' ? ", " : "",
f9a98e
+                                frame->class_path);
f9a98e
+                    }
f9a98e
+                    else
f9a98e
+                        log_debug("The list of remote paths already contains path: '%s'", frame->class_path);
f9a98e
+                }
f9a98e
+                else
f9a98e
+                    log_debug("Class path exists or is malformed: '%s'", frame->class_path);
f9a98e
             }
f9a98e
             frame = frame->next;
f9a98e
         }
f9a98e
         thread = thread->next;
f9a98e
     }
f9a98e
-}
f9a98e
 
f9a98e
-static void
f9a98e
-work_out_list_of_remote_urls(struct sr_java_frame *frame, struct strbuf *remote_files_csv)
f9a98e
-{
f9a98e
-    if (NULL != frame->class_path && prefixcmp(frame->class_path, "file://") != 0)
f9a98e
+    if (remote_files_csv->buf[0] != '\0')
f9a98e
     {
f9a98e
-        struct stat buf;
f9a98e
-        if (stat(frame->class_path, &buf) && errno == ENOENT)
f9a98e
-        {
f9a98e
-            if (strstr(remote_files_csv->buf, frame->class_path) == NULL)
f9a98e
-            {
f9a98e
-                strbuf_append_strf(remote_files_csv, "%s%s",
f9a98e
-                        remote_files_csv->buf[0] != '\0' ? ", " : "",
f9a98e
-                        frame->class_path);
f9a98e
-            }
f9a98e
-        }
f9a98e
+        return strbuf_free_nobuf(remote_files_csv);
f9a98e
     }
f9a98e
+
f9a98e
+    strbuf_free(remote_files_csv);
f9a98e
+    return NULL;
f9a98e
 }
f9a98e
 
f9a98e
 int main(int argc, char *argv[])
f9a98e
@@ -153,7 +185,9 @@ int main(int argc, char *argv[])
f9a98e
     const char *program_usage_string = _(
f9a98e
         "& [[-d DIR] | [-f FILE]] [-o]\n"
f9a98e
         "\n"
f9a98e
-        "Analyzes Java backtrace\n"
f9a98e
+        "Analyzes Java backtrace, generates duplication hash and creates\n"
f9a98e
+        "not-reportable file for bracktraces whose frames have remote files in their\n"
f9a98e
+        "class path\n"
f9a98e
     );
f9a98e
     enum {
f9a98e
         OPT_v = 1 << 0,
f9a98e
@@ -208,49 +242,72 @@ int main(int argc, char *argv[])
f9a98e
         goto finish;
f9a98e
     }
f9a98e
 
f9a98e
-    struct strbuf *remote_files_csv = strbuf_new();
f9a98e
-    frame_proc_t remote_files_proc = {
f9a98e
-        .callback = (frame_cb)&work_out_list_of_remote_urls,
f9a98e
-        .args = (void *)remote_files_csv
f9a98e
-    };
f9a98e
+    analysis_result_t results[3] = { { 0 } };
f9a98e
+    analysis_result_t *results_iter = results;
f9a98e
 
f9a98e
-    frame_proc_t *fproc[] = {
f9a98e
-        &remote_files_proc,
f9a98e
-        //duphash_proc,
f9a98e
-        //backtrace_usability,
f9a98e
-        NULL,
f9a98e
-    };
f9a98e
+    char *remote_files_csv = work_out_list_of_remote_urls(stacktrace);
f9a98e
 
f9a98e
-    iterate_trough_stacktrace(stacktrace, fproc);
f9a98e
+    char *hash_str = NULL;
f9a98e
+    struct sr_thread *crash_thread = (struct sr_thread *)stacktrace->threads;
f9a98e
+    if (g_verbose >= 3)
f9a98e
+    {
f9a98e
+        hash_str = sr_thread_get_duphash(crash_thread, FRAMES_FOR_DUPHASH,
f9a98e
+                /*noprefix*/NULL, SR_DUPHASH_NOHASH);
f9a98e
+        log("Generating duphash from string: '%s'", hash_str);
f9a98e
+        free(hash_str);
f9a98e
+    }
f9a98e
+
f9a98e
+    hash_str = sr_thread_get_duphash(crash_thread, FRAMES_FOR_DUPHASH,
f9a98e
+            /*noprefix*/NULL, SR_DUPHASH_NORMAL);
f9a98e
+
f9a98e
+    /* DUPHASH is used for searching for duplicates in Bugzilla */
f9a98e
+    results_iter->name = FILENAME_DUPHASH;
f9a98e
+    results_iter->data = hash_str;
f9a98e
+    ++results_iter;
f9a98e
+
f9a98e
+    /* UUID is used for local deduplication */
f9a98e
+    results_iter->name = FILENAME_UUID;
f9a98e
+    results_iter->data = hash_str;
f9a98e
+    results_iter->nofree = 1;
f9a98e
+    ++results_iter;
f9a98e
 
f9a98e
     sr_java_stacktrace_free(stacktrace);
f9a98e
 
f9a98e
-    if ('\0' != remote_files_csv->buf[0])
f9a98e
+    if (NULL != remote_files_csv)
f9a98e
     {
f9a98e
-        char *not_reportable_message = xasprintf(
f9a98e
+        results_iter->name = FILENAME_NOT_REPORTABLE;
f9a98e
+        results_iter->data = xasprintf(
f9a98e
         _("This problem can be caused by a 3rd party code from the "\
f9a98e
         "jar/class at %s. In order to provide valuable problem " \
f9a98e
         "reports, ABRT will not allow you to submit this problem. If you " \
f9a98e
         "still want to participate in solving this problem, please contact " \
f9a98e
-        "the developers directly."), remote_files_csv->buf);
f9a98e
+        "the developers directly."), remote_files_csv);
f9a98e
+        ++results_iter;
f9a98e
+        free(remote_files_csv);
f9a98e
+    }
f9a98e
 
f9a98e
-        if (opts & OPT_o)
f9a98e
-        {
f9a98e
-            write_not_reportable_message_to_fd(STDOUT_FILENO,  not_reportable_message);
f9a98e
-        }
f9a98e
-        else if (NULL != dump_dir_name)
f9a98e
+    if (opts & OPT_o)
f9a98e
+    {
f9a98e
+        write_results_to_fd(STDOUT_FILENO, results, results_iter);
f9a98e
+    }
f9a98e
+    else if (NULL != dump_dir_name)
f9a98e
+    {
f9a98e
+        write_results_to_dump_dir(dump_dir_name, results, results_iter);
f9a98e
+    }
f9a98e
+    else
f9a98e
+    {   /* Just write it to the current working directory */
f9a98e
+        write_results_to_file(results, results_iter);
f9a98e
+    }
f9a98e
+
f9a98e
+    const analysis_result_t *res = results;
f9a98e
+    for (; res != results_iter; ++res)
f9a98e
+    {
f9a98e
+        if (!res->nofree)
f9a98e
         {
f9a98e
-            write_not_reportable_message_to_dump_dir(dump_dir_name,  not_reportable_message);
f9a98e
+            free(res->data);
f9a98e
         }
f9a98e
-        else
f9a98e
-        {   /* Just write it to the current working directory */
f9a98e
-            write_not_reportable_message_to_file(FILENAME_NOT_REPORTABLE,  not_reportable_message);
f9a98e
-        }
f9a98e
-
f9a98e
-        free(not_reportable_message);
f9a98e
     }
f9a98e
 
f9a98e
-    strbuf_free(remote_files_csv);
f9a98e
     retval = 0;
f9a98e
 finish:
f9a98e
 
f9a98e
-- 
f9a98e
1.8.3.1
f9a98e