14f8ab
From b924c8ca8a133fc9413c8ed1407e63f1658c7e79 Mon Sep 17 00:00:00 2001
14f8ab
From: Xavi Hernandez <xhernandez@redhat.com>
14f8ab
Date: Tue, 12 May 2020 23:54:54 +0200
14f8ab
Subject: [PATCH 523/526] open-behind: rewrite of internal logic
14f8ab
14f8ab
There was a critical flaw in the previous implementation of open-behind.
14f8ab
14f8ab
When an open is done in the background, it's necessary to take a
14f8ab
reference on the fd_t object because once we "fake" the open answer,
14f8ab
the fd could be destroyed. However as long as there's a reference,
14f8ab
the release function won't be called. So, if the application closes
14f8ab
the file descriptor without having actually opened it, there will
14f8ab
always remain at least 1 reference, causing a leak.
14f8ab
14f8ab
To avoid this problem, the previous implementation didn't take a
14f8ab
reference on the fd_t, so there were races where the fd could be
14f8ab
destroyed while it was still in use.
14f8ab
14f8ab
To fix this, I've implemented a new xlator cbk that gets called from
14f8ab
fuse when the application closes a file descriptor.
14f8ab
14f8ab
The whole logic of handling background opens have been simplified and
14f8ab
it's more efficient now. Only if the fop needs to be delayed until an
14f8ab
open completes, a stub is created. Otherwise no memory allocations are
14f8ab
needed.
14f8ab
14f8ab
Correctly handling the close request while the open is still pending
14f8ab
has added a bit of complexity, but overall normal operation is simpler.
14f8ab
14f8ab
Upstream patch:
14f8ab
> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24451
14f8ab
> Change-Id: I6376a5491368e0e1c283cc452849032636261592
14f8ab
> Fixes: #1225
14f8ab
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
14f8ab
14f8ab
BUG: 1830713
14f8ab
Change-Id: I6376a5491368e0e1c283cc452849032636261592
14f8ab
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/224487
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 libglusterfs/src/fd.c                              |   26 +
14f8ab
 libglusterfs/src/glusterfs/fd.h                    |    3 +
14f8ab
 libglusterfs/src/glusterfs/xlator.h                |    4 +
14f8ab
 libglusterfs/src/libglusterfs.sym                  |    1 +
14f8ab
 tests/basic/open-behind/open-behind.t              |  183 +++
14f8ab
 tests/basic/open-behind/tester-fd.c                |   99 ++
14f8ab
 tests/basic/open-behind/tester.c                   |  444 +++++++
14f8ab
 tests/basic/open-behind/tester.h                   |  145 +++
14f8ab
 tests/bugs/glusterfs/bug-873962-spb.t              |    1 +
14f8ab
 xlators/mount/fuse/src/fuse-bridge.c               |    2 +
14f8ab
 .../open-behind/src/open-behind-messages.h         |    6 +-
14f8ab
 xlators/performance/open-behind/src/open-behind.c  | 1302 ++++++++------------
14f8ab
 12 files changed, 1393 insertions(+), 823 deletions(-)
14f8ab
 create mode 100644 tests/basic/open-behind/open-behind.t
14f8ab
 create mode 100644 tests/basic/open-behind/tester-fd.c
14f8ab
 create mode 100644 tests/basic/open-behind/tester.c
14f8ab
 create mode 100644 tests/basic/open-behind/tester.h
14f8ab
14f8ab
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
14f8ab
index 314546a..e4ec401 100644
14f8ab
--- a/libglusterfs/src/fd.c
14f8ab
+++ b/libglusterfs/src/fd.c
14f8ab
@@ -501,6 +501,32 @@ out:
14f8ab
 }
14f8ab
 
14f8ab
 void
14f8ab
+fd_close(fd_t *fd)
14f8ab
+{
14f8ab
+    xlator_t *xl, *old_THIS;
14f8ab
+
14f8ab
+    old_THIS = THIS;
14f8ab
+
14f8ab
+    for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) {
14f8ab
+        if (!xl->call_cleanup) {
14f8ab
+            THIS = xl;
14f8ab
+
14f8ab
+            if (IA_ISDIR(fd->inode->ia_type)) {
14f8ab
+                if (xl->cbks->fdclosedir != NULL) {
14f8ab
+                    xl->cbks->fdclosedir(xl, fd);
14f8ab
+                }
14f8ab
+            } else {
14f8ab
+                if (xl->cbks->fdclose != NULL) {
14f8ab
+                    xl->cbks->fdclose(xl, fd);
14f8ab
+                }
14f8ab
+            }
14f8ab
+        }
14f8ab
+    }
14f8ab
+
14f8ab
+    THIS = old_THIS;
14f8ab
+}
14f8ab
+
14f8ab
+void
14f8ab
 fd_unref(fd_t *fd)
14f8ab
 {
14f8ab
     int32_t refcount = 0;
14f8ab
diff --git a/libglusterfs/src/glusterfs/fd.h b/libglusterfs/src/glusterfs/fd.h
14f8ab
index cdbe289..4d157c4 100644
14f8ab
--- a/libglusterfs/src/glusterfs/fd.h
14f8ab
+++ b/libglusterfs/src/glusterfs/fd.h
14f8ab
@@ -107,6 +107,9 @@ fd_ref(fd_t *fd);
14f8ab
 void
14f8ab
 fd_unref(fd_t *fd);
14f8ab
 
14f8ab
+void
14f8ab
+fd_close(fd_t *fd);
14f8ab
+
14f8ab
 fd_t *
14f8ab
 fd_create(struct _inode *inode, pid_t pid);
14f8ab
 
14f8ab
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
14f8ab
index 8650ccc..273039a 100644
14f8ab
--- a/libglusterfs/src/glusterfs/xlator.h
14f8ab
+++ b/libglusterfs/src/glusterfs/xlator.h
14f8ab
@@ -705,6 +705,8 @@ typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode);
14f8ab
 
14f8ab
 typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd);
14f8ab
 
14f8ab
+typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd);
14f8ab
+
14f8ab
 struct xlator_cbks {
14f8ab
     cbk_forget_t forget;
14f8ab
     cbk_release_t release;
14f8ab
@@ -715,6 +717,8 @@ struct xlator_cbks {
14f8ab
     cbk_ictxmerge_t ictxmerge;
14f8ab
     cbk_inodectx_size_t ictxsize;
14f8ab
     cbk_fdctx_size_t fdctxsize;
14f8ab
+    cbk_fdclose_t fdclose;
14f8ab
+    cbk_fdclose_t fdclosedir;
14f8ab
 };
14f8ab
 
14f8ab
 typedef int32_t (*dumpop_priv_t)(xlator_t *this);
14f8ab
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
14f8ab
index bc770e2..0a0862e 100644
14f8ab
--- a/libglusterfs/src/libglusterfs.sym
14f8ab
+++ b/libglusterfs/src/libglusterfs.sym
14f8ab
@@ -456,6 +456,7 @@ event_unregister_close
14f8ab
 fd_anonymous
14f8ab
 fd_anonymous_with_flags
14f8ab
 fd_bind
14f8ab
+fd_close
14f8ab
 fd_create
14f8ab
 fd_create_uint64
14f8ab
 __fd_ctx_del
14f8ab
diff --git a/tests/basic/open-behind/open-behind.t b/tests/basic/open-behind/open-behind.t
14f8ab
new file mode 100644
14f8ab
index 0000000..5e865d6
14f8ab
--- /dev/null
14f8ab
+++ b/tests/basic/open-behind/open-behind.t
14f8ab
@@ -0,0 +1,183 @@
14f8ab
+#!/bin/bash
14f8ab
+
14f8ab
+WD="$(dirname "${0}")"
14f8ab
+
14f8ab
+. ${WD}/../../include.rc
14f8ab
+. ${WD}/../../volume.rc
14f8ab
+
14f8ab
+function assign() {
14f8ab
+    local _assign_var="${1}"
14f8ab
+    local _assign_value="${2}"
14f8ab
+
14f8ab
+    printf -v "${_assign_var}" "%s" "${_assign_value}"
14f8ab
+}
14f8ab
+
14f8ab
+function pipe_create() {
14f8ab
+    local _pipe_create_var="${1}"
14f8ab
+    local _pipe_create_name
14f8ab
+    local _pipe_create_fd
14f8ab
+
14f8ab
+    _pipe_create_name="$(mktemp -u)"
14f8ab
+    mkfifo "${_pipe_create_name}"
14f8ab
+    exec {_pipe_create_fd}<>"${_pipe_create_name}"
14f8ab
+    rm "${_pipe_create_name}"
14f8ab
+
14f8ab
+    assign "${_pipe_create_var}" "${_pipe_create_fd}"
14f8ab
+}
14f8ab
+
14f8ab
+function pipe_close() {
14f8ab
+    local _pipe_close_fd="${!1}"
14f8ab
+
14f8ab
+    exec {_pipe_close_fd}>&-
14f8ab
+}
14f8ab
+
14f8ab
+function tester_start() {
14f8ab
+    declare -ag tester
14f8ab
+    local tester_in
14f8ab
+    local tester_out
14f8ab
+
14f8ab
+    pipe_create tester_in
14f8ab
+    pipe_create tester_out
14f8ab
+
14f8ab
+    ${WD}/tester <&${tester_in} >&${tester_out} &
14f8ab
+
14f8ab
+    tester=("$!" "${tester_in}" "${tester_out}")
14f8ab
+}
14f8ab
+
14f8ab
+function tester_send() {
14f8ab
+    declare -ag tester
14f8ab
+    local tester_res
14f8ab
+    local tester_extra
14f8ab
+
14f8ab
+    echo "${*}" >&${tester[1]}
14f8ab
+
14f8ab
+    read -t 3 -u ${tester[2]} tester_res tester_extra
14f8ab
+    echo "${tester_res} ${tester_extra}"
14f8ab
+    if [[ "${tester_res}" == "OK" ]]; then
14f8ab
+        return 0
14f8ab
+    fi
14f8ab
+
14f8ab
+    return 1
14f8ab
+}
14f8ab
+
14f8ab
+function tester_stop() {
14f8ab
+    declare -ag tester
14f8ab
+    local tester_res
14f8ab
+
14f8ab
+    tester_send "quit"
14f8ab
+
14f8ab
+    tester_res=0
14f8ab
+    if ! wait ${tester[0]}; then
14f8ab
+        tester_res=$?
14f8ab
+    fi
14f8ab
+
14f8ab
+    unset tester
14f8ab
+
14f8ab
+    return ${tester_res}
14f8ab
+}
14f8ab
+
14f8ab
+function count_open() {
14f8ab
+    local file="$(realpath "${B0}/${V0}/${1}")"
14f8ab
+    local count="0"
14f8ab
+    local inode
14f8ab
+    local ref
14f8ab
+
14f8ab
+    inode="$(stat -c %i "${file}")"
14f8ab
+
14f8ab
+    for fd in /proc/${BRICK_PID}/fd/*; do
14f8ab
+        ref="$(readlink "${fd}")"
14f8ab
+        if [[ "${ref}" == "${B0}/${V0}/"* ]]; then
14f8ab
+            if [[ "$(stat -c %i "${ref}")" == "${inode}" ]]; then
14f8ab
+                count="$((${count} + 1))"
14f8ab
+            fi
14f8ab
+        fi
14f8ab
+    done
14f8ab
+
14f8ab
+    echo "${count}"
14f8ab
+}
14f8ab
+
14f8ab
+cleanup
14f8ab
+
14f8ab
+TEST build_tester ${WD}/tester.c ${WD}/tester-fd.c
14f8ab
+
14f8ab
+TEST glusterd
14f8ab
+TEST pidof glusterd
14f8ab
+TEST ${CLI} volume create ${V0} ${H0}:${B0}/${V0}
14f8ab
+TEST ${CLI} volume set ${V0} flush-behind off
14f8ab
+TEST ${CLI} volume set ${V0} write-behind off
14f8ab
+TEST ${CLI} volume set ${V0} quick-read off
14f8ab
+TEST ${CLI} volume set ${V0} stat-prefetch on
14f8ab
+TEST ${CLI} volume set ${V0} io-cache off
14f8ab
+TEST ${CLI} volume set ${V0} open-behind on
14f8ab
+TEST ${CLI} volume set ${V0} lazy-open off
14f8ab
+TEST ${CLI} volume set ${V0} read-after-open off
14f8ab
+TEST ${CLI} volume start ${V0}
14f8ab
+
14f8ab
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
14f8ab
+
14f8ab
+BRICK_PID="$(get_brick_pid ${V0} ${H0} ${B0}/${V0})"
14f8ab
+
14f8ab
+TEST touch "${M0}/test"
14f8ab
+
14f8ab
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
14f8ab
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
14f8ab
+
14f8ab
+TEST tester_start
14f8ab
+
14f8ab
+TEST tester_send fd open 0 "${M0}/test"
14f8ab
+EXPECT_WITHIN 5 "1" count_open "/test"
14f8ab
+TEST tester_send fd close 0
14f8ab
+EXPECT_WITHIN 5 "0" count_open "/test"
14f8ab
+
14f8ab
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
14f8ab
+TEST ${CLI} volume set ${V0} lazy-open on
14f8ab
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
14f8ab
+
14f8ab
+TEST tester_send fd open 0 "${M0}/test"
14f8ab
+sleep 2
14f8ab
+EXPECT "0" count_open "/test"
14f8ab
+TEST tester_send fd write 0 "test"
14f8ab
+EXPECT "1" count_open "/test"
14f8ab
+TEST tester_send fd close 0
14f8ab
+EXPECT_WITHIN 5 "0" count_open "/test"
14f8ab
+
14f8ab
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
14f8ab
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
14f8ab
+
14f8ab
+TEST tester_send fd open 0 "${M0}/test"
14f8ab
+EXPECT "0" count_open "/test"
14f8ab
+EXPECT "test" tester_send fd read 0 64
14f8ab
+# Even though read-after-open is disabled, use-anonymous-fd is also disabled,
14f8ab
+# so reads need to open the file first.
14f8ab
+EXPECT "1" count_open "/test"
14f8ab
+TEST tester_send fd close 0
14f8ab
+EXPECT "0" count_open "/test"
14f8ab
+
14f8ab
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
14f8ab
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
14f8ab
+
14f8ab
+TEST tester_send fd open 0 "${M0}/test"
14f8ab
+EXPECT "0" count_open "/test"
14f8ab
+TEST tester_send fd open 1 "${M0}/test"
14f8ab
+EXPECT "2" count_open "/test"
14f8ab
+TEST tester_send fd close 0
14f8ab
+EXPECT_WITHIN 5 "1" count_open "/test"
14f8ab
+TEST tester_send fd close 1
14f8ab
+EXPECT_WITHIN 5 "0" count_open "/test"
14f8ab
+
14f8ab
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
14f8ab
+TEST ${CLI} volume set ${V0} read-after-open on
14f8ab
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
14f8ab
+
14f8ab
+TEST tester_send fd open 0 "${M0}/test"
14f8ab
+EXPECT "0" count_open "/test"
14f8ab
+EXPECT "test" tester_send fd read 0 64
14f8ab
+EXPECT "1" count_open "/test"
14f8ab
+TEST tester_send fd close 0
14f8ab
+EXPECT_WITHIN 5 "0" count_open "/test"
14f8ab
+
14f8ab
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
14f8ab
+
14f8ab
+TEST tester_stop
14f8ab
+
14f8ab
+cleanup
14f8ab
diff --git a/tests/basic/open-behind/tester-fd.c b/tests/basic/open-behind/tester-fd.c
14f8ab
new file mode 100644
14f8ab
index 0000000..00f02bc
14f8ab
--- /dev/null
14f8ab
+++ b/tests/basic/open-behind/tester-fd.c
14f8ab
@@ -0,0 +1,99 @@
14f8ab
+/*
14f8ab
+  Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
14f8ab
+  This file is part of GlusterFS.
14f8ab
+
14f8ab
+  This file is licensed to you under your choice of the GNU Lesser
14f8ab
+  General Public License, version 3 or any later version (LGPLv3 or
14f8ab
+  later), or the GNU General Public License, version 2 (GPLv2), in all
14f8ab
+  cases as published by the Free Software Foundation.
14f8ab
+*/
14f8ab
+
14f8ab
+#include "tester.h"
14f8ab
+
14f8ab
+#include <stdlib.h>
14f8ab
+#include <unistd.h>
14f8ab
+#include <sys/types.h>
14f8ab
+#include <sys/stat.h>
14f8ab
+#include <fcntl.h>
14f8ab
+#include <string.h>
14f8ab
+#include <ctype.h>
14f8ab
+#include <errno.h>
14f8ab
+
14f8ab
+static int32_t
14f8ab
+fd_open(context_t *ctx, command_t *cmd)
14f8ab
+{
14f8ab
+    obj_t *obj;
14f8ab
+    int32_t fd;
14f8ab
+
14f8ab
+    obj = cmd->args[0].obj.ref;
14f8ab
+
14f8ab
+    fd = open(cmd->args[1].str.data, O_RDWR);
14f8ab
+    if (fd < 0) {
14f8ab
+        return error(errno, "open() failed");
14f8ab
+    }
14f8ab
+
14f8ab
+    obj->type = OBJ_TYPE_FD;
14f8ab
+    obj->fd = fd;
14f8ab
+
14f8ab
+    out_ok("%d", fd);
14f8ab
+
14f8ab
+    return 0;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+fd_close(context_t *ctx, command_t *cmd)
14f8ab
+{
14f8ab
+    obj_t *obj;
14f8ab
+
14f8ab
+    obj = cmd->args[0].obj.ref;
14f8ab
+    obj->type = OBJ_TYPE_NONE;
14f8ab
+
14f8ab
+    if (close(obj->fd) != 0) {
14f8ab
+        return error(errno, "close() failed");
14f8ab
+    }
14f8ab
+
14f8ab
+    out_ok();
14f8ab
+
14f8ab
+    return 0;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+fd_write(context_t *ctx, command_t *cmd)
14f8ab
+{
14f8ab
+    ssize_t len, ret;
14f8ab
+
14f8ab
+    len = strlen(cmd->args[1].str.data);
14f8ab
+    ret = write(cmd->args[0].obj.ref->fd, cmd->args[1].str.data, len);
14f8ab
+    if (ret < 0) {
14f8ab
+        return error(errno, "write() failed");
14f8ab
+    }
14f8ab
+
14f8ab
+    out_ok("%zd", ret);
14f8ab
+
14f8ab
+    return 0;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+fd_read(context_t *ctx, command_t *cmd)
14f8ab
+{
14f8ab
+    char data[cmd->args[1].num.value + 1];
14f8ab
+    ssize_t ret;
14f8ab
+
14f8ab
+    ret = read(cmd->args[0].obj.ref->fd, data, cmd->args[1].num.value);
14f8ab
+    if (ret < 0) {
14f8ab
+        return error(errno, "read() failed");
14f8ab
+    }
14f8ab
+
14f8ab
+    data[ret] = 0;
14f8ab
+
14f8ab
+    out_ok("%zd %s", ret, data);
14f8ab
+
14f8ab
+    return 0;
14f8ab
+}
14f8ab
+
14f8ab
+command_t fd_commands[] = {
14f8ab
+    {"open", fd_open, CMD_ARGS(ARG_VAL(OBJ_TYPE_NONE), ARG_STR(1024))},
14f8ab
+    {"close", fd_close, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD))},
14f8ab
+    {"write", fd_write, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_STR(1024))},
14f8ab
+    {"read", fd_read, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_NUM(0, 1024))},
14f8ab
+    CMD_END};
14f8ab
diff --git a/tests/basic/open-behind/tester.c b/tests/basic/open-behind/tester.c
14f8ab
new file mode 100644
14f8ab
index 0000000..b2da71c
14f8ab
--- /dev/null
14f8ab
+++ b/tests/basic/open-behind/tester.c
14f8ab
@@ -0,0 +1,444 @@
14f8ab
+/*
14f8ab
+  Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
14f8ab
+  This file is part of GlusterFS.
14f8ab
+
14f8ab
+  This file is licensed to you under your choice of the GNU Lesser
14f8ab
+  General Public License, version 3 or any later version (LGPLv3 or
14f8ab
+  later), or the GNU General Public License, version 2 (GPLv2), in all
14f8ab
+  cases as published by the Free Software Foundation.
14f8ab
+*/
14f8ab
+
14f8ab
+#include "tester.h"
14f8ab
+
14f8ab
+#include <stdlib.h>
14f8ab
+#include <unistd.h>
14f8ab
+#include <string.h>
14f8ab
+#include <ctype.h>
14f8ab
+#include <errno.h>
14f8ab
+
14f8ab
+static void *
14f8ab
+mem_alloc(size_t size)
14f8ab
+{
14f8ab
+    void *ptr;
14f8ab
+
14f8ab
+    ptr = malloc(size);
14f8ab
+    if (ptr == NULL) {
14f8ab
+        error(ENOMEM, "Failed to allocate memory (%zu bytes)", size);
14f8ab
+    }
14f8ab
+
14f8ab
+    return ptr;
14f8ab
+}
14f8ab
+
14f8ab
+static void
14f8ab
+mem_free(void *ptr)
14f8ab
+{
14f8ab
+    free(ptr);
14f8ab
+}
14f8ab
+
14f8ab
+static bool
14f8ab
+buffer_create(context_t *ctx, size_t size)
14f8ab
+{
14f8ab
+    ctx->buffer.base = mem_alloc(size);
14f8ab
+    if (ctx->buffer.base == NULL) {
14f8ab
+        return false;
14f8ab
+    }
14f8ab
+
14f8ab
+    ctx->buffer.size = size;
14f8ab
+    ctx->buffer.len = 0;
14f8ab
+    ctx->buffer.pos = 0;
14f8ab
+
14f8ab
+    return true;
14f8ab
+}
14f8ab
+
14f8ab
+static void
14f8ab
+buffer_destroy(context_t *ctx)
14f8ab
+{
14f8ab
+    mem_free(ctx->buffer.base);
14f8ab
+    ctx->buffer.size = 0;
14f8ab
+    ctx->buffer.len = 0;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+buffer_get(context_t *ctx)
14f8ab
+{
14f8ab
+    ssize_t len;
14f8ab
+
14f8ab
+    if (ctx->buffer.pos >= ctx->buffer.len) {
14f8ab
+        len = read(0, ctx->buffer.base, ctx->buffer.size);
14f8ab
+        if (len < 0) {
14f8ab
+            return error(errno, "read() failed");
14f8ab
+        }
14f8ab
+        if (len == 0) {
14f8ab
+            return 0;
14f8ab
+        }
14f8ab
+
14f8ab
+        ctx->buffer.len = len;
14f8ab
+        ctx->buffer.pos = 0;
14f8ab
+    }
14f8ab
+
14f8ab
+    return ctx->buffer.base[ctx->buffer.pos++];
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+str_skip_spaces(context_t *ctx, int32_t current)
14f8ab
+{
14f8ab
+    while ((current > 0) && (current != '\n') && isspace(current)) {
14f8ab
+        current = buffer_get(ctx);
14f8ab
+    }
14f8ab
+
14f8ab
+    return current;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+str_token(context_t *ctx, char *buffer, uint32_t size, int32_t current)
14f8ab
+{
14f8ab
+    uint32_t len;
14f8ab
+
14f8ab
+    current = str_skip_spaces(ctx, current);
14f8ab
+
14f8ab
+    len = 0;
14f8ab
+    while ((size > 0) && (current > 0) && (current != '\n') &&
14f8ab
+           !isspace(current)) {
14f8ab
+        len++;
14f8ab
+        *buffer++ = current;
14f8ab
+        size--;
14f8ab
+        current = buffer_get(ctx);
14f8ab
+    }
14f8ab
+
14f8ab
+    if (len == 0) {
14f8ab
+        return error(ENODATA, "Expecting a token");
14f8ab
+    }
14f8ab
+
14f8ab
+    if (size == 0) {
14f8ab
+        return error(ENOBUFS, "Token too long");
14f8ab
+    }
14f8ab
+
14f8ab
+    *buffer = 0;
14f8ab
+
14f8ab
+    return current;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+str_number(context_t *ctx, uint64_t min, uint64_t max, uint64_t *value,
14f8ab
+           int32_t current)
14f8ab
+{
14f8ab
+    char text[32], *ptr;
14f8ab
+    uint64_t num;
14f8ab
+
14f8ab
+    current = str_token(ctx, text, sizeof(text), current);
14f8ab
+    if (current > 0) {
14f8ab
+        num = strtoul(text, &ptr, 0);
14f8ab
+        if ((*ptr != 0) || (num < min) || (num > max)) {
14f8ab
+            return error(ERANGE, "Invalid number");
14f8ab
+        }
14f8ab
+        *value = num;
14f8ab
+    }
14f8ab
+
14f8ab
+    return current;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+str_eol(context_t *ctx, int32_t current)
14f8ab
+{
14f8ab
+    current = str_skip_spaces(ctx, current);
14f8ab
+    if (current != '\n') {
14f8ab
+        return error(EINVAL, "Expecting end of command");
14f8ab
+    }
14f8ab
+
14f8ab
+    return current;
14f8ab
+}
14f8ab
+
14f8ab
+static void
14f8ab
+str_skip(context_t *ctx, int32_t current)
14f8ab
+{
14f8ab
+    while ((current > 0) && (current != '\n')) {
14f8ab
+        current = buffer_get(ctx);
14f8ab
+    }
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+cmd_parse_obj(context_t *ctx, arg_t *arg, int32_t current)
14f8ab
+{
14f8ab
+    obj_t *obj;
14f8ab
+    uint64_t id;
14f8ab
+
14f8ab
+    current = str_number(ctx, 0, ctx->obj_count, &id, current);
14f8ab
+    if (current <= 0) {
14f8ab
+        return current;
14f8ab
+    }
14f8ab
+
14f8ab
+    obj = &ctx->objs[id];
14f8ab
+    if (obj->type != arg->obj.type) {
14f8ab
+        if (obj->type != OBJ_TYPE_NONE) {
14f8ab
+            return error(EBUSY, "Object is in use");
14f8ab
+        }
14f8ab
+        return error(ENOENT, "Object is not defined");
14f8ab
+    }
14f8ab
+
14f8ab
+    arg->obj.ref = obj;
14f8ab
+
14f8ab
+    return current;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+cmd_parse_num(context_t *ctx, arg_t *arg, int32_t current)
14f8ab
+{
14f8ab
+    return str_number(ctx, arg->num.min, arg->num.max, &arg->num.value,
14f8ab
+                      current);
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+cmd_parse_str(context_t *ctx, arg_t *arg, int32_t current)
14f8ab
+{
14f8ab
+    return str_token(ctx, arg->str.data, arg->str.size, current);
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+cmd_parse_args(context_t *ctx, command_t *cmd, int32_t current)
14f8ab
+{
14f8ab
+    arg_t *arg;
14f8ab
+
14f8ab
+    for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
14f8ab
+        switch (arg->type) {
14f8ab
+            case ARG_TYPE_OBJ:
14f8ab
+                current = cmd_parse_obj(ctx, arg, current);
14f8ab
+                break;
14f8ab
+            case ARG_TYPE_NUM:
14f8ab
+                current = cmd_parse_num(ctx, arg, current);
14f8ab
+                break;
14f8ab
+            case ARG_TYPE_STR:
14f8ab
+                current = cmd_parse_str(ctx, arg, current);
14f8ab
+                break;
14f8ab
+            default:
14f8ab
+                return error(EINVAL, "Unknown argument type");
14f8ab
+        }
14f8ab
+    }
14f8ab
+
14f8ab
+    if (current < 0) {
14f8ab
+        return current;
14f8ab
+    }
14f8ab
+
14f8ab
+    current = str_eol(ctx, current);
14f8ab
+    if (current <= 0) {
14f8ab
+        return error(EINVAL, "Syntax error");
14f8ab
+    }
14f8ab
+
14f8ab
+    return cmd->handler(ctx, cmd);
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+cmd_parse(context_t *ctx, command_t *cmds)
14f8ab
+{
14f8ab
+    char text[32];
14f8ab
+    command_t *cmd;
14f8ab
+    int32_t current;
14f8ab
+
14f8ab
+    cmd = cmds;
14f8ab
+    do {
14f8ab
+        current = str_token(ctx, text, sizeof(text), buffer_get(ctx));
14f8ab
+        if (current <= 0) {
14f8ab
+            return current;
14f8ab
+        }
14f8ab
+
14f8ab
+        while (cmd->name != NULL) {
14f8ab
+            if (strcmp(cmd->name, text) == 0) {
14f8ab
+                if (cmd->handler != NULL) {
14f8ab
+                    return cmd_parse_args(ctx, cmd, current);
14f8ab
+                }
14f8ab
+                cmd = cmd->cmds;
14f8ab
+                break;
14f8ab
+            }
14f8ab
+            cmd++;
14f8ab
+        }
14f8ab
+    } while (cmd->name != NULL);
14f8ab
+
14f8ab
+    str_skip(ctx, current);
14f8ab
+
14f8ab
+    return error(ENOTSUP, "Unknown command");
14f8ab
+}
14f8ab
+
14f8ab
+static void
14f8ab
+cmd_fini(context_t *ctx, command_t *cmds)
14f8ab
+{
14f8ab
+    command_t *cmd;
14f8ab
+    arg_t *arg;
14f8ab
+
14f8ab
+    for (cmd = cmds; cmd->name != NULL; cmd++) {
14f8ab
+        if (cmd->handler == NULL) {
14f8ab
+            cmd_fini(ctx, cmd->cmds);
14f8ab
+        } else {
14f8ab
+            for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
14f8ab
+                switch (arg->type) {
14f8ab
+                    case ARG_TYPE_STR:
14f8ab
+                        mem_free(arg->str.data);
14f8ab
+                        arg->str.data = NULL;
14f8ab
+                        break;
14f8ab
+                    default:
14f8ab
+                        break;
14f8ab
+                }
14f8ab
+            }
14f8ab
+        }
14f8ab
+    }
14f8ab
+}
14f8ab
+
14f8ab
+static bool
14f8ab
+cmd_init(context_t *ctx, command_t *cmds)
14f8ab
+{
14f8ab
+    command_t *cmd;
14f8ab
+    arg_t *arg;
14f8ab
+
14f8ab
+    for (cmd = cmds; cmd->name != NULL; cmd++) {
14f8ab
+        if (cmd->handler == NULL) {
14f8ab
+            if (!cmd_init(ctx, cmd->cmds)) {
14f8ab
+                return false;
14f8ab
+            }
14f8ab
+        } else {
14f8ab
+            for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
14f8ab
+                switch (arg->type) {
14f8ab
+                    case ARG_TYPE_STR:
14f8ab
+                        arg->str.data = mem_alloc(arg->str.size);
14f8ab
+                        if (arg->str.data == NULL) {
14f8ab
+                            return false;
14f8ab
+                        }
14f8ab
+                        break;
14f8ab
+                    default:
14f8ab
+                        break;
14f8ab
+                }
14f8ab
+            }
14f8ab
+        }
14f8ab
+    }
14f8ab
+
14f8ab
+    return true;
14f8ab
+}
14f8ab
+
14f8ab
+static bool
14f8ab
+objs_create(context_t *ctx, uint32_t count)
14f8ab
+{
14f8ab
+    uint32_t i;
14f8ab
+
14f8ab
+    ctx->objs = mem_alloc(sizeof(obj_t) * count);
14f8ab
+    if (ctx->objs == NULL) {
14f8ab
+        return false;
14f8ab
+    }
14f8ab
+    ctx->obj_count = count;
14f8ab
+
14f8ab
+    for (i = 0; i < count; i++) {
14f8ab
+        ctx->objs[i].type = OBJ_TYPE_NONE;
14f8ab
+    }
14f8ab
+
14f8ab
+    return true;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+objs_destroy(context_t *ctx)
14f8ab
+{
14f8ab
+    uint32_t i;
14f8ab
+    int32_t err;
14f8ab
+
14f8ab
+    err = 0;
14f8ab
+    for (i = 0; i < ctx->obj_count; i++) {
14f8ab
+        if (ctx->objs[i].type != OBJ_TYPE_NONE) {
14f8ab
+            err = error(ENOTEMPTY, "Objects not destroyed");
14f8ab
+            break;
14f8ab
+        }
14f8ab
+    }
14f8ab
+
14f8ab
+    mem_free(ctx->objs);
14f8ab
+    ctx->objs = NULL;
14f8ab
+    ctx->obj_count = 0;
14f8ab
+
14f8ab
+    return err;
14f8ab
+}
14f8ab
+
14f8ab
+static context_t *
14f8ab
+init(size_t size, uint32_t objs, command_t *cmds)
14f8ab
+{
14f8ab
+    context_t *ctx;
14f8ab
+
14f8ab
+    ctx = mem_alloc(sizeof(context_t));
14f8ab
+    if (ctx == NULL) {
14f8ab
+        goto failed;
14f8ab
+    }
14f8ab
+
14f8ab
+    if (!buffer_create(ctx, size)) {
14f8ab
+        goto failed_ctx;
14f8ab
+    }
14f8ab
+
14f8ab
+    if (!objs_create(ctx, objs)) {
14f8ab
+        goto failed_buffer;
14f8ab
+    }
14f8ab
+
14f8ab
+    if (!cmd_init(ctx, cmds)) {
14f8ab
+        goto failed_objs;
14f8ab
+    }
14f8ab
+
14f8ab
+    ctx->active = true;
14f8ab
+
14f8ab
+    return ctx;
14f8ab
+
14f8ab
+failed_objs:
14f8ab
+    cmd_fini(ctx, cmds);
14f8ab
+    objs_destroy(ctx);
14f8ab
+failed_buffer:
14f8ab
+    buffer_destroy(ctx);
14f8ab
+failed_ctx:
14f8ab
+    mem_free(ctx);
14f8ab
+failed:
14f8ab
+    return NULL;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+fini(context_t *ctx, command_t *cmds)
14f8ab
+{
14f8ab
+    int32_t ret;
14f8ab
+
14f8ab
+    cmd_fini(ctx, cmds);
14f8ab
+    buffer_destroy(ctx);
14f8ab
+
14f8ab
+    ret = objs_destroy(ctx);
14f8ab
+
14f8ab
+    ctx->active = false;
14f8ab
+
14f8ab
+    return ret;
14f8ab
+}
14f8ab
+
14f8ab
+static int32_t
14f8ab
+exec_quit(context_t *ctx, command_t *cmd)
14f8ab
+{
14f8ab
+    ctx->active = false;
14f8ab
+
14f8ab
+    return 0;
14f8ab
+}
14f8ab
+
14f8ab
+static command_t commands[] = {{"fd", NULL, CMD_SUB(fd_commands)},
14f8ab
+                               {"quit", exec_quit, CMD_ARGS()},
14f8ab
+                               CMD_END};
14f8ab
+
14f8ab
+int32_t
14f8ab
+main(int32_t argc, char *argv[])
14f8ab
+{
14f8ab
+    context_t *ctx;
14f8ab
+    int32_t res;
14f8ab
+
14f8ab
+    ctx = init(1024, 16, commands);
14f8ab
+    if (ctx == NULL) {
14f8ab
+        return 1;
14f8ab
+    }
14f8ab
+
14f8ab
+    do {
14f8ab
+        res = cmd_parse(ctx, commands);
14f8ab
+        if (res < 0) {
14f8ab
+            out_err(-res);
14f8ab
+        }
14f8ab
+    } while (ctx->active);
14f8ab
+
14f8ab
+    res = fini(ctx, commands);
14f8ab
+    if (res >= 0) {
14f8ab
+        out_ok();
14f8ab
+        return 0;
14f8ab
+    }
14f8ab
+
14f8ab
+    out_err(-res);
14f8ab
+
14f8ab
+    return 1;
14f8ab
+}
14f8ab
diff --git a/tests/basic/open-behind/tester.h b/tests/basic/open-behind/tester.h
14f8ab
new file mode 100644
14f8ab
index 0000000..64e940c
14f8ab
--- /dev/null
14f8ab
+++ b/tests/basic/open-behind/tester.h
14f8ab
@@ -0,0 +1,145 @@
14f8ab
+/*
14f8ab
+  Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
14f8ab
+  This file is part of GlusterFS.
14f8ab
+
14f8ab
+  This file is licensed to you under your choice of the GNU Lesser
14f8ab
+  General Public License, version 3 or any later version (LGPLv3 or
14f8ab
+  later), or the GNU General Public License, version 2 (GPLv2), in all
14f8ab
+  cases as published by the Free Software Foundation.
14f8ab
+*/
14f8ab
+
14f8ab
+#ifndef __TESTER_H__
14f8ab
+#define __TESTER_H__
14f8ab
+
14f8ab
+#include <stdio.h>
14f8ab
+#include <inttypes.h>
14f8ab
+#include <stdbool.h>
14f8ab
+
14f8ab
+enum _obj_type;
14f8ab
+typedef enum _obj_type obj_type_t;
14f8ab
+
14f8ab
+enum _arg_type;
14f8ab
+typedef enum _arg_type arg_type_t;
14f8ab
+
14f8ab
+struct _buffer;
14f8ab
+typedef struct _buffer buffer_t;
14f8ab
+
14f8ab
+struct _obj;
14f8ab
+typedef struct _obj obj_t;
14f8ab
+
14f8ab
+struct _context;
14f8ab
+typedef struct _context context_t;
14f8ab
+
14f8ab
+struct _arg;
14f8ab
+typedef struct _arg arg_t;
14f8ab
+
14f8ab
+struct _command;
14f8ab
+typedef struct _command command_t;
14f8ab
+
14f8ab
+enum _obj_type { OBJ_TYPE_NONE, OBJ_TYPE_FD };
14f8ab
+
14f8ab
+enum _arg_type { ARG_TYPE_NONE, ARG_TYPE_OBJ, ARG_TYPE_NUM, ARG_TYPE_STR };
14f8ab
+
14f8ab
+struct _buffer {
14f8ab
+    char *base;
14f8ab
+    uint32_t size;
14f8ab
+    uint32_t len;
14f8ab
+    uint32_t pos;
14f8ab
+};
14f8ab
+
14f8ab
+struct _obj {
14f8ab
+    obj_type_t type;
14f8ab
+    union {
14f8ab
+        int32_t fd;
14f8ab
+    };
14f8ab
+};
14f8ab
+
14f8ab
+struct _context {
14f8ab
+    obj_t *objs;
14f8ab
+    buffer_t buffer;
14f8ab
+    uint32_t obj_count;
14f8ab
+    bool active;
14f8ab
+};
14f8ab
+
14f8ab
+struct _arg {
14f8ab
+    arg_type_t type;
14f8ab
+    union {
14f8ab
+        struct {
14f8ab
+            obj_type_t type;
14f8ab
+            obj_t *ref;
14f8ab
+        } obj;
14f8ab
+        struct {
14f8ab
+            uint64_t value;
14f8ab
+            uint64_t min;
14f8ab
+            uint64_t max;
14f8ab
+        } num;
14f8ab
+        struct {
14f8ab
+            uint32_t size;
14f8ab
+            char *data;
14f8ab
+        } str;
14f8ab
+    };
14f8ab
+};
14f8ab
+
14f8ab
+struct _command {
14f8ab
+    const char *name;
14f8ab
+    int32_t (*handler)(context_t *ctx, command_t *cmd);
14f8ab
+    union {
14f8ab
+        arg_t *args;
14f8ab
+        command_t *cmds;
14f8ab
+    };
14f8ab
+};
14f8ab
+
14f8ab
+#define msg(_stream, _fmt, _args...)                                           \
14f8ab
+    do {                                                                       \
14f8ab
+        fprintf(_stream, _fmt "\n", ##_args);                                  \
14f8ab
+        fflush(_stream);                                                       \
14f8ab
+    } while (0)
14f8ab
+
14f8ab
+#define msg_out(_fmt, _args...) msg(stdout, _fmt, ##_args)
14f8ab
+#define msg_err(_err, _fmt, _args...)                                          \
14f8ab
+    ({                                                                         \
14f8ab
+        int32_t __msg_err = (_err);                                            \
14f8ab
+        msg(stderr, "[%4u:%-15s] " _fmt, __LINE__, __FUNCTION__, __msg_err,    \
14f8ab
+            ##_args);                                                          \
14f8ab
+        -__msg_err;                                                            \
14f8ab
+    })
14f8ab
+
14f8ab
+#define error(_err, _fmt, _args...) msg_err(_err, "E(%4d) " _fmt, ##_args)
14f8ab
+#define warn(_err, _fmt, _args...) msg_err(_err, "W(%4d) " _fmt, ##_args)
14f8ab
+#define info(_err, _fmt, _args...) msg_err(_err, "I(%4d) " _fmt, ##_args)
14f8ab
+
14f8ab
+#define out_ok(_args...) msg_out("OK " _args)
14f8ab
+#define out_err(_err) msg_out("ERR %d", _err)
14f8ab
+
14f8ab
+#define ARG_END                                                                \
14f8ab
+    {                                                                          \
14f8ab
+        ARG_TYPE_NONE                                                          \
14f8ab
+    }
14f8ab
+
14f8ab
+#define CMD_ARGS1(_x, _args...)                                                \
14f8ab
+    .args = (arg_t[]) { _args }
14f8ab
+#define CMD_ARGS(_args...) CMD_ARGS1(, ##_args, ARG_END)
14f8ab
+
14f8ab
+#define CMD_SUB(_cmds) .cmds = _cmds
14f8ab
+
14f8ab
+#define CMD_END                                                                \
14f8ab
+    {                                                                          \
14f8ab
+        NULL, NULL, CMD_SUB(NULL)                                              \
14f8ab
+    }
14f8ab
+
14f8ab
+#define ARG_VAL(_type)                                                         \
14f8ab
+    {                                                                          \
14f8ab
+        ARG_TYPE_OBJ, .obj = {.type = _type }                                  \
14f8ab
+    }
14f8ab
+#define ARG_NUM(_min, _max)                                                    \
14f8ab
+    {                                                                          \
14f8ab
+        ARG_TYPE_NUM, .num = {.min = _min, .max = _max }                       \
14f8ab
+    }
14f8ab
+#define ARG_STR(_size)                                                         \
14f8ab
+    {                                                                          \
14f8ab
+        ARG_TYPE_STR, .str = {.size = _size }                                  \
14f8ab
+    }
14f8ab
+
14f8ab
+extern command_t fd_commands[];
14f8ab
+
14f8ab
+#endif /* __TESTER_H__ */
14f8ab
\ No newline at end of file
14f8ab
diff --git a/tests/bugs/glusterfs/bug-873962-spb.t b/tests/bugs/glusterfs/bug-873962-spb.t
14f8ab
index db84a22..db71cc0 100644
14f8ab
--- a/tests/bugs/glusterfs/bug-873962-spb.t
14f8ab
+++ b/tests/bugs/glusterfs/bug-873962-spb.t
14f8ab
@@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.io-cache off
14f8ab
 TEST $CLI volume set $V0 performance.write-behind off
14f8ab
 TEST $CLI volume set $V0 performance.stat-prefetch off
14f8ab
 TEST $CLI volume set $V0 performance.read-ahead off
14f8ab
+TEST $CLI volume set $V0 performance.open-behind off
14f8ab
 TEST $CLI volume set $V0 cluster.background-self-heal-count 0
14f8ab
 TEST $CLI volume start $V0
14f8ab
 TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
14f8ab
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
14f8ab
index 919eea3..76b5809 100644
14f8ab
--- a/xlators/mount/fuse/src/fuse-bridge.c
14f8ab
+++ b/xlators/mount/fuse/src/fuse-bridge.c
14f8ab
@@ -3398,6 +3398,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg,
14f8ab
     gf_log("glusterfs-fuse", GF_LOG_TRACE,
14f8ab
            "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd);
14f8ab
 
14f8ab
+    fd_close(state->fd);
14f8ab
+
14f8ab
     fuse_fd_ctx_destroy(this, state->fd);
14f8ab
     fd_unref(fd);
14f8ab
 
14f8ab
diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h
14f8ab
index f250824..0e78917 100644
14f8ab
--- a/xlators/performance/open-behind/src/open-behind-messages.h
14f8ab
+++ b/xlators/performance/open-behind/src/open-behind-messages.h
14f8ab
@@ -23,6 +23,10 @@
14f8ab
  */
14f8ab
 
14f8ab
 GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED,
14f8ab
-           OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY);
14f8ab
+           OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY,
14f8ab
+           OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE);
14f8ab
+
14f8ab
+#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop"
14f8ab
+#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state"
14f8ab
 
14f8ab
 #endif /* _OPEN_BEHIND_MESSAGES_H_ */
14f8ab
diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
14f8ab
index cbe89ec..e43fe73 100644
14f8ab
--- a/xlators/performance/open-behind/src/open-behind.c
14f8ab
+++ b/xlators/performance/open-behind/src/open-behind.c
14f8ab
@@ -16,6 +16,18 @@
14f8ab
 #include "open-behind-messages.h"
14f8ab
 #include <glusterfs/glusterfs-acl.h>
14f8ab
 
14f8ab
+/* Note: The initial design of open-behind was made to cover the simple case
14f8ab
+ *       of open, read, close for small files. This pattern combined with
14f8ab
+ *       quick-read can do the whole operation without a single request to the
14f8ab
+ *       bricks (except the initial lookup).
14f8ab
+ *
14f8ab
+ *       The way to do this has been improved, but the logic remains the same.
14f8ab
+ *       Basically, this means that any operation sent to the fd or the inode
14f8ab
+ *       that it's not a read, causes the open request to be sent to the
14f8ab
+ *       bricks, and all future operations will be executed synchronously,
14f8ab
+ *       including opens (it's reset once all fd's are closed).
14f8ab
+ */
14f8ab
+
14f8ab
 typedef struct ob_conf {
14f8ab
     gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe
14f8ab
                                       e.g - fstat() readv()
14f8ab
@@ -32,1096 +44,754 @@ typedef struct ob_conf {
14f8ab
                                         */
14f8ab
 } ob_conf_t;
14f8ab
 
14f8ab
-typedef struct ob_inode {
14f8ab
-    inode_t *inode;
14f8ab
-    struct list_head resume_fops;
14f8ab
-    struct list_head ob_fds;
14f8ab
-    int count;
14f8ab
-    int op_ret;
14f8ab
-    int op_errno;
14f8ab
-    gf_boolean_t open_in_progress;
14f8ab
-    int unlinked;
14f8ab
-} ob_inode_t;
14f8ab
+/* A negative state represents an errno value negated. In this case the
14f8ab
+ * current operation cannot be processed. */
14f8ab
+typedef enum _ob_state {
14f8ab
+    /* There are no opens on the inode or the first open is already
14f8ab
+     * completed. The current operation can be sent directly. */
14f8ab
+    OB_STATE_READY = 0,
14f8ab
 
14f8ab
-typedef struct ob_fd {
14f8ab
-    call_frame_t *open_frame;
14f8ab
-    loc_t loc;
14f8ab
-    dict_t *xdata;
14f8ab
-    int flags;
14f8ab
-    int op_errno;
14f8ab
-    ob_inode_t *ob_inode;
14f8ab
-    fd_t *fd;
14f8ab
-    gf_boolean_t opened;
14f8ab
-    gf_boolean_t ob_inode_fops_waiting;
14f8ab
-    struct list_head list;
14f8ab
-    struct list_head ob_fds_on_inode;
14f8ab
-} ob_fd_t;
14f8ab
+    /* There's an open pending and it has been triggered. The current
14f8ab
+     * operation should be "stubbified" and processed with
14f8ab
+     * ob_stub_dispatch(). */
14f8ab
+    OB_STATE_OPEN_TRIGGERED,
14f8ab
 
14f8ab
-ob_inode_t *
14f8ab
-ob_inode_alloc(inode_t *inode)
14f8ab
-{
14f8ab
-    ob_inode_t *ob_inode = NULL;
14f8ab
+    /* There's an open pending but it has not been triggered. The current
14f8ab
+     * operation can be processed directly but using an anonymous fd. */
14f8ab
+    OB_STATE_OPEN_PENDING,
14f8ab
 
14f8ab
-    ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
14f8ab
-    if (ob_inode == NULL)
14f8ab
-        goto out;
14f8ab
+    /* The current operation is the first open on the inode. */
14f8ab
+    OB_STATE_FIRST_OPEN
14f8ab
+} ob_state_t;
14f8ab
 
14f8ab
-    ob_inode->inode = inode;
14f8ab
-    INIT_LIST_HEAD(&ob_inode->resume_fops);
14f8ab
-    INIT_LIST_HEAD(&ob_inode->ob_fds);
14f8ab
-out:
14f8ab
-    return ob_inode;
14f8ab
-}
14f8ab
-
14f8ab
-void
14f8ab
-ob_inode_free(ob_inode_t *ob_inode)
14f8ab
-{
14f8ab
-    if (ob_inode == NULL)
14f8ab
-        goto out;
14f8ab
+typedef struct ob_inode {
14f8ab
+    /* List of stubs pending on the first open. Once the first open is
14f8ab
+     * complete, all these stubs will be resubmitted, and dependencies
14f8ab
+     * will be checked again. */
14f8ab
+    struct list_head resume_fops;
14f8ab
 
14f8ab
-    list_del_init(&ob_inode->resume_fops);
14f8ab
-    list_del_init(&ob_inode->ob_fds);
14f8ab
+    /* The inode this object references. */
14f8ab
+    inode_t *inode;
14f8ab
 
14f8ab
-    GF_FREE(ob_inode);
14f8ab
-out:
14f8ab
-    return;
14f8ab
-}
14f8ab
+    /* The fd from the first open sent to this inode. It will be set
14f8ab
+     * from the moment the open is processed until the open if fully
14f8ab
+     * executed or closed before actually opened. It's NULL in all
14f8ab
+     * other cases. */
14f8ab
+    fd_t *first_fd;
14f8ab
+
14f8ab
+    /* The stub from the first open operation. When open fop starts
14f8ab
+     * being processed, it's assigned the OB_OPEN_PREPARING value
14f8ab
+     * until the actual stub is created. This is necessary to avoid
14f8ab
+     * creating the stub inside a locked region. Once the stub is
14f8ab
+     * successfully created, it's assigned here. This value is set
14f8ab
+     * to NULL once the stub is resumed. */
14f8ab
+    call_stub_t *first_open;
14f8ab
+
14f8ab
+    /* The total number of currently open fd's on this inode. */
14f8ab
+    int32_t open_count;
14f8ab
+
14f8ab
+    /* This flag is set as soon as we know that the open will be
14f8ab
+     * sent to the bricks, even before the stub is ready. */
14f8ab
+    bool triggered;
14f8ab
+} ob_inode_t;
14f8ab
 
14f8ab
-ob_inode_t *
14f8ab
-ob_inode_get(xlator_t *this, inode_t *inode)
14f8ab
+/* Dummy pointer used temporarily while the actual open stub is being created */
14f8ab
+#define OB_OPEN_PREPARING ((call_stub_t *)-1)
14f8ab
+
14f8ab
+#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...)                       \
14f8ab
+    case OB_STATE_FIRST_OPEN:                                                  \
14f8ab
+        gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE,  \
14f8ab
+                "fop=%s", #_fop, "state=%d", __ob_state, NULL);                \
14f8ab
+        default_##_fop##_failure_cbk(_frame, EINVAL);                          \
14f8ab
+        break;                                                                 \
14f8ab
+    case OB_STATE_READY:                                                       \
14f8ab
+        default_##_fop(_frame, _xl, ##_args);                                  \
14f8ab
+        break;                                                                 \
14f8ab
+    case OB_STATE_OPEN_TRIGGERED: {                                            \
14f8ab
+        call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop,          \
14f8ab
+                                                   ##_args);                   \
14f8ab
+        if (__ob_stub != NULL) {                                               \
14f8ab
+            ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub);                 \
14f8ab
+            break;                                                             \
14f8ab
+        }                                                                      \
14f8ab
+        __ob_state = -ENOMEM;                                                  \
14f8ab
+    }                                                                          \
14f8ab
+    default:                                                                   \
14f8ab
+        gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state,                        \
14f8ab
+                OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL);                \
14f8ab
+        default_##_fop##_failure_cbk(_frame, -__ob_state)
14f8ab
+
14f8ab
+#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...)                 \
14f8ab
+    do {                                                                       \
14f8ab
+        ob_inode_t *__ob_inode;                                                \
14f8ab
+        fd_t *__first_fd;                                                      \
14f8ab
+        ob_state_t __ob_state = ob_open_and_resume_fd(                         \
14f8ab
+            _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd);            \
14f8ab
+        switch (__ob_state) {                                                  \
14f8ab
+            case OB_STATE_OPEN_PENDING:                                        \
14f8ab
+                if (!(_trigger)) {                                             \
14f8ab
+                    fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode,      \
14f8ab
+                                                            (_fd)->flags);     \
14f8ab
+                    if (__ob_fd != NULL) {                                     \
14f8ab
+                        default_##_fop(_frame, _xl, ##_args);                  \
14f8ab
+                        fd_unref(__ob_fd);                                     \
14f8ab
+                        break;                                                 \
14f8ab
+                    }                                                          \
14f8ab
+                    __ob_state = -ENOMEM;                                      \
14f8ab
+                }                                                              \
14f8ab
+                OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args);        \
14f8ab
+        }                                                                      \
14f8ab
+    } while (0)
14f8ab
+
14f8ab
+#define OB_POST_FLUSH(_xl, _frame, _fd, _args...)                              \
14f8ab
+    do {                                                                       \
14f8ab
+        ob_inode_t *__ob_inode;                                                \
14f8ab
+        fd_t *__first_fd;                                                      \
14f8ab
+        ob_state_t __ob_state = ob_open_and_resume_fd(                         \
14f8ab
+            _xl, _fd, 0, true, false, &__ob_inode, &__first_fd);               \
14f8ab
+        switch (__ob_state) {                                                  \
14f8ab
+            case OB_STATE_OPEN_PENDING:                                        \
14f8ab
+                default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL);              \
14f8ab
+                break;                                                         \
14f8ab
+                OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args);       \
14f8ab
+        }                                                                      \
14f8ab
+    } while (0)
14f8ab
+
14f8ab
+#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...)           \
14f8ab
+    do {                                                                       \
14f8ab
+        ob_inode_t *__ob_inode;                                                \
14f8ab
+        fd_t *__first_fd;                                                      \
14f8ab
+        ob_state_t __ob_state = ob_open_and_resume_inode(                      \
14f8ab
+            _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd);   \
14f8ab
+        switch (__ob_state) {                                                  \
14f8ab
+            case OB_STATE_OPEN_PENDING:                                        \
14f8ab
+                OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args);        \
14f8ab
+        }                                                                      \
14f8ab
+    } while (0)
14f8ab
+
14f8ab
+static ob_inode_t *
14f8ab
+ob_inode_get_locked(xlator_t *this, inode_t *inode)
14f8ab
 {
14f8ab
     ob_inode_t *ob_inode = NULL;
14f8ab
     uint64_t value = 0;
14f8ab
-    int ret = 0;
14f8ab
 
14f8ab
-    if (!inode)
14f8ab
-        goto out;
14f8ab
+    if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) {
14f8ab
+        return (ob_inode_t *)(uintptr_t)value;
14f8ab
+    }
14f8ab
 
14f8ab
-    LOCK(&inode->lock);
14f8ab
-    {
14f8ab
-        __inode_ctx_get(inode, this, &value);
14f8ab
-        if (value == 0) {
14f8ab
-            ob_inode = ob_inode_alloc(inode);
14f8ab
-            if (ob_inode == NULL)
14f8ab
-                goto unlock;
14f8ab
-
14f8ab
-            value = (uint64_t)(uintptr_t)ob_inode;
14f8ab
-            ret = __inode_ctx_set(inode, this, &value);
14f8ab
-            if (ret < 0) {
14f8ab
-                ob_inode_free(ob_inode);
14f8ab
-                ob_inode = NULL;
14f8ab
-            }
14f8ab
-        } else {
14f8ab
-            ob_inode = (ob_inode_t *)(uintptr_t)value;
14f8ab
+    ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
14f8ab
+    if (ob_inode != NULL) {
14f8ab
+        ob_inode->inode = inode;
14f8ab
+        INIT_LIST_HEAD(&ob_inode->resume_fops);
14f8ab
+
14f8ab
+        value = (uint64_t)(uintptr_t)ob_inode;
14f8ab
+        if (__inode_ctx_set(inode, this, &value) < 0) {
14f8ab
+            GF_FREE(ob_inode);
14f8ab
+            ob_inode = NULL;
14f8ab
         }
14f8ab
     }
14f8ab
-unlock:
14f8ab
-    UNLOCK(&inode->lock);
14f8ab
 
14f8ab
-out:
14f8ab
     return ob_inode;
14f8ab
 }
14f8ab
 
14f8ab
-ob_fd_t *
14f8ab
-__ob_fd_ctx_get(xlator_t *this, fd_t *fd)
14f8ab
+static ob_state_t
14f8ab
+ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd,
14f8ab
+                         int32_t open_count, bool synchronous, bool trigger,
14f8ab
+                         ob_inode_t **pob_inode, fd_t **pfd)
14f8ab
 {
14f8ab
-    uint64_t value = 0;
14f8ab
-    int ret = -1;
14f8ab
-    ob_fd_t *ob_fd = NULL;
14f8ab
+    ob_conf_t *conf;
14f8ab
+    ob_inode_t *ob_inode;
14f8ab
+    call_stub_t *open_stub;
14f8ab
 
14f8ab
-    ret = __fd_ctx_get(fd, this, &value);
14f8ab
-    if (ret)
14f8ab
-        return NULL;
14f8ab
+    if (inode == NULL) {
14f8ab
+        return OB_STATE_READY;
14f8ab
+    }
14f8ab
 
14f8ab
-    ob_fd = (void *)((long)value);
14f8ab
+    conf = xl->private;
14f8ab
 
14f8ab
-    return ob_fd;
14f8ab
-}
14f8ab
+    *pfd = NULL;
14f8ab
 
14f8ab
-ob_fd_t *
14f8ab
-ob_fd_ctx_get(xlator_t *this, fd_t *fd)
14f8ab
-{
14f8ab
-    ob_fd_t *ob_fd = NULL;
14f8ab
-
14f8ab
-    LOCK(&fd->lock);
14f8ab
+    LOCK(&inode->lock);
14f8ab
     {
14f8ab
-        ob_fd = __ob_fd_ctx_get(this, fd);
14f8ab
-    }
14f8ab
-    UNLOCK(&fd->lock);
14f8ab
-
14f8ab
-    return ob_fd;
14f8ab
-}
14f8ab
+        ob_inode = ob_inode_get_locked(xl, inode);
14f8ab
+        if (ob_inode == NULL) {
14f8ab
+            UNLOCK(&inode->lock);
14f8ab
 
14f8ab
-int
14f8ab
-__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
14f8ab
-{
14f8ab
-    uint64_t value = 0;
14f8ab
-    int ret = -1;
14f8ab
+            return -ENOMEM;
14f8ab
+        }
14f8ab
+        *pob_inode = ob_inode;
14f8ab
+
14f8ab
+        ob_inode->open_count += open_count;
14f8ab
+
14f8ab
+        /* If first_fd is not NULL, it means that there's a previous open not
14f8ab
+         * yet completed. */
14f8ab
+        if (ob_inode->first_fd != NULL) {
14f8ab
+            *pfd = ob_inode->first_fd;
14f8ab
+            /* If the current request doesn't trigger the open and it hasn't
14f8ab
+             * been triggered yet, we can continue without issuing the open
14f8ab
+             * only if the current request belongs to the same fd as the
14f8ab
+             * first one. */
14f8ab
+            if (!trigger && !ob_inode->triggered &&
14f8ab
+                (ob_inode->first_fd == fd)) {
14f8ab
+                UNLOCK(&inode->lock);
14f8ab
+
14f8ab
+                return OB_STATE_OPEN_PENDING;
14f8ab
+            }
14f8ab
 
14f8ab
-    value = (long)((void *)ob_fd);
14f8ab
+            /* We need to issue the open. It could have already been triggered
14f8ab
+             * before. In this case open_stub will be NULL. Or the initial open
14f8ab
+             * may not be completely ready yet. In this case open_stub will be
14f8ab
+             * OB_OPEN_PREPARING. */
14f8ab
+            open_stub = ob_inode->first_open;
14f8ab
+            ob_inode->first_open = NULL;
14f8ab
+            ob_inode->triggered = true;
14f8ab
 
14f8ab
-    ret = __fd_ctx_set(fd, this, value);
14f8ab
+            UNLOCK(&inode->lock);
14f8ab
 
14f8ab
-    return ret;
14f8ab
-}
14f8ab
+            if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) {
14f8ab
+                call_resume(open_stub);
14f8ab
+            }
14f8ab
 
14f8ab
-int
14f8ab
-ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
14f8ab
-{
14f8ab
-    int ret = -1;
14f8ab
+            return OB_STATE_OPEN_TRIGGERED;
14f8ab
+        }
14f8ab
 
14f8ab
-    LOCK(&fd->lock);
14f8ab
-    {
14f8ab
-        ret = __ob_fd_ctx_set(this, fd, ob_fd);
14f8ab
-    }
14f8ab
-    UNLOCK(&fd->lock);
14f8ab
+        /* There's no pending open. Only opens can be non synchronous, so all
14f8ab
+         * regular fops will be processed directly. For non synchronous opens,
14f8ab
+         * we'll still process them normally (i.e. synchornous) if there are
14f8ab
+         * more file descriptors open. */
14f8ab
+        if (synchronous || (ob_inode->open_count > open_count)) {
14f8ab
+            UNLOCK(&inode->lock);
14f8ab
 
14f8ab
-    return ret;
14f8ab
-}
14f8ab
+            return OB_STATE_READY;
14f8ab
+        }
14f8ab
 
14f8ab
-ob_fd_t *
14f8ab
-ob_fd_new(void)
14f8ab
-{
14f8ab
-    ob_fd_t *ob_fd = NULL;
14f8ab
+        *pfd = fd;
14f8ab
 
14f8ab
-    ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t);
14f8ab
+        /* This is the first open. We keep a reference on the fd and set
14f8ab
+         * first_open stub to OB_OPEN_PREPARING until the actual stub can
14f8ab
+         * be assigned (we don't create the stub here to avoid doing memory
14f8ab
+         * allocations inside the mutex). */
14f8ab
+        ob_inode->first_fd = __fd_ref(fd);
14f8ab
+        ob_inode->first_open = OB_OPEN_PREPARING;
14f8ab
 
14f8ab
-    INIT_LIST_HEAD(&ob_fd->list);
14f8ab
-    INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode);
14f8ab
+        /* If lazy_open is not set, we'll need to immediately send the open,
14f8ab
+         * so we set triggered right now. */
14f8ab
+        ob_inode->triggered = !conf->lazy_open;
14f8ab
+    }
14f8ab
+    UNLOCK(&inode->lock);
14f8ab
 
14f8ab
-    return ob_fd;
14f8ab
+    return OB_STATE_FIRST_OPEN;
14f8ab
 }
14f8ab
 
14f8ab
-void
14f8ab
-ob_fd_free(ob_fd_t *ob_fd)
14f8ab
+static ob_state_t
14f8ab
+ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count,
14f8ab
+                      bool synchronous, bool trigger, ob_inode_t **pob_inode,
14f8ab
+                      fd_t **pfd)
14f8ab
 {
14f8ab
-    LOCK(&ob_fd->fd->inode->lock);
14f8ab
-    {
14f8ab
-        list_del_init(&ob_fd->ob_fds_on_inode);
14f8ab
-    }
14f8ab
-    UNLOCK(&ob_fd->fd->inode->lock);
14f8ab
-
14f8ab
-    loc_wipe(&ob_fd->loc);
14f8ab
-
14f8ab
-    if (ob_fd->xdata)
14f8ab
-        dict_unref(ob_fd->xdata);
14f8ab
+    uint64_t err;
14f8ab
 
14f8ab
-    if (ob_fd->open_frame) {
14f8ab
-        /* If we sill have a frame it means that background open has never
14f8ab
-         * been triggered. We need to release the pending reference. */
14f8ab
-        fd_unref(ob_fd->fd);
14f8ab
-
14f8ab
-        STACK_DESTROY(ob_fd->open_frame->root);
14f8ab
+    if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) {
14f8ab
+        return (ob_state_t)-err;
14f8ab
     }
14f8ab
 
14f8ab
-    GF_FREE(ob_fd);
14f8ab
+    return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous,
14f8ab
+                                    trigger, pob_inode, pfd);
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
-ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
14f8ab
-            int op_errno, fd_t *fd_ret, dict_t *xdata)
14f8ab
+static ob_state_t
14f8ab
+ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode,
14f8ab
+               fd_t **pfd)
14f8ab
 {
14f8ab
-    fd_t *fd = NULL;
14f8ab
-    int count = 0;
14f8ab
-    int ob_inode_op_ret = 0;
14f8ab
-    int ob_inode_op_errno = 0;
14f8ab
-    ob_fd_t *ob_fd = NULL;
14f8ab
-    call_stub_t *stub = NULL, *tmp = NULL;
14f8ab
-    ob_inode_t *ob_inode = NULL;
14f8ab
-    gf_boolean_t ob_inode_fops_waiting = _gf_false;
14f8ab
-    struct list_head fops_waiting_on_fd, fops_waiting_on_inode;
14f8ab
+    bool synchronous;
14f8ab
 
14f8ab
-    fd = frame->local;
14f8ab
-    frame->local = NULL;
14f8ab
-
14f8ab
-    INIT_LIST_HEAD(&fops_waiting_on_fd);
14f8ab
-    INIT_LIST_HEAD(&fops_waiting_on_inode);
14f8ab
+    /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't
14f8ab
+     *       we also execute this open synchronously ? */
14f8ab
+    synchronous = (flags & O_TRUNC) != 0;
14f8ab
 
14f8ab
-    ob_inode = ob_inode_get(this, fd->inode);
14f8ab
+    return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd);
14f8ab
+}
14f8ab
 
14f8ab
-    LOCK(&fd->lock);
14f8ab
+static int32_t
14f8ab
+ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
14f8ab
+                 call_stub_t *stub)
14f8ab
+{
14f8ab
+    LOCK(&ob_inode->inode->lock);
14f8ab
     {
14f8ab
-        ob_fd = __ob_fd_ctx_get(this, fd);
14f8ab
-        ob_fd->opened = _gf_true;
14f8ab
-
14f8ab
-        ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting;
14f8ab
-
14f8ab
-        list_splice_init(&ob_fd->list, &fops_waiting_on_fd);
14f8ab
-
14f8ab
-        if (op_ret < 0) {
14f8ab
-            /* mark fd BAD for ever */
14f8ab
-            ob_fd->op_errno = op_errno;
14f8ab
-            ob_fd = NULL; /*shouldn't be freed*/
14f8ab
-        } else {
14f8ab
-            __fd_ctx_del(fd, this, NULL);
14f8ab
-        }
14f8ab
-    }
14f8ab
-    UNLOCK(&fd->lock);
14f8ab
-
14f8ab
-    if (ob_inode_fops_waiting) {
14f8ab
-        LOCK(&fd->inode->lock);
14f8ab
-        {
14f8ab
-            count = --ob_inode->count;
14f8ab
-            if (op_ret < 0) {
14f8ab
-                /* TODO: when to reset the error? */
14f8ab
-                ob_inode->op_ret = -1;
14f8ab
-                ob_inode->op_errno = op_errno;
14f8ab
-            }
14f8ab
-
14f8ab
-            if (count == 0) {
14f8ab
-                ob_inode->open_in_progress = _gf_false;
14f8ab
-                ob_inode_op_ret = ob_inode->op_ret;
14f8ab
-                ob_inode_op_errno = ob_inode->op_errno;
14f8ab
-                list_splice_init(&ob_inode->resume_fops,
14f8ab
-                                 &fops_waiting_on_inode);
14f8ab
-            }
14f8ab
+        /* We only queue a stub if the open has not been completed or
14f8ab
+         * cancelled. */
14f8ab
+        if (ob_inode->first_fd == fd) {
14f8ab
+            list_add_tail(&stub->list, &ob_inode->resume_fops);
14f8ab
+            stub = NULL;
14f8ab
         }
14f8ab
-        UNLOCK(&fd->inode->lock);
14f8ab
-    }
14f8ab
-
14f8ab
-    if (ob_fd)
14f8ab
-        ob_fd_free(ob_fd);
14f8ab
-
14f8ab
-    list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list)
14f8ab
-    {
14f8ab
-        list_del_init(&stub->list);
14f8ab
-
14f8ab
-        if (op_ret < 0)
14f8ab
-            call_unwind_error(stub, -1, op_errno);
14f8ab
-        else
14f8ab
-            call_resume(stub);
14f8ab
     }
14f8ab
+    UNLOCK(&ob_inode->inode->lock);
14f8ab
 
14f8ab
-    list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list)
14f8ab
-    {
14f8ab
-        list_del_init(&stub->list);
14f8ab
-
14f8ab
-        if (ob_inode_op_ret < 0)
14f8ab
-            call_unwind_error(stub, -1, ob_inode_op_errno);
14f8ab
-        else
14f8ab
-            call_resume(stub);
14f8ab
+    if (stub != NULL) {
14f8ab
+        call_resume(stub);
14f8ab
     }
14f8ab
 
14f8ab
-    /* The background open is completed. We can release the 'fd' reference. */
14f8ab
-    fd_unref(fd);
14f8ab
-
14f8ab
-    STACK_DESTROY(frame->root);
14f8ab
-
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
-ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
14f8ab
+static int32_t
14f8ab
+ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
14f8ab
+                 call_stub_t *stub)
14f8ab
 {
14f8ab
-    call_frame_t *frame = NULL;
14f8ab
-
14f8ab
-    if (ob_fd == NULL) {
14f8ab
-        LOCK(&fd->lock);
14f8ab
-        {
14f8ab
-            ob_fd = __ob_fd_ctx_get(this, fd);
14f8ab
-            if (!ob_fd)
14f8ab
-                goto unlock;
14f8ab
+    bool closed;
14f8ab
 
14f8ab
-            frame = ob_fd->open_frame;
14f8ab
-            ob_fd->open_frame = NULL;
14f8ab
-        }
14f8ab
-    unlock:
14f8ab
-        UNLOCK(&fd->lock);
14f8ab
-    } else {
14f8ab
-        LOCK(&fd->lock);
14f8ab
-        {
14f8ab
-            frame = ob_fd->open_frame;
14f8ab
-            ob_fd->open_frame = NULL;
14f8ab
+    LOCK(&ob_inode->inode->lock);
14f8ab
+    {
14f8ab
+        closed = ob_inode->first_fd != fd;
14f8ab
+        if (!closed) {
14f8ab
+            if (ob_inode->triggered) {
14f8ab
+                ob_inode->first_open = NULL;
14f8ab
+            } else {
14f8ab
+                ob_inode->first_open = stub;
14f8ab
+                stub = NULL;
14f8ab
+            }
14f8ab
         }
14f8ab
-        UNLOCK(&fd->lock);
14f8ab
     }
14f8ab
+    UNLOCK(&ob_inode->inode->lock);
14f8ab
 
14f8ab
-    if (frame) {
14f8ab
-        /* We don't need to take a reference here. We already have a reference
14f8ab
-         * while the open is pending. */
14f8ab
-        frame->local = fd;
14f8ab
-
14f8ab
-        STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this),
14f8ab
-                   FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd,
14f8ab
-                   ob_fd->xdata);
14f8ab
+    if (stub != NULL) {
14f8ab
+        if (closed) {
14f8ab
+            call_stub_destroy(stub);
14f8ab
+            fd_unref(fd);
14f8ab
+        } else {
14f8ab
+            call_resume(stub);
14f8ab
+        }
14f8ab
     }
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-void
14f8ab
-ob_inode_wake(xlator_t *this, struct list_head *ob_fds)
14f8ab
+static void
14f8ab
+ob_resume_pending(struct list_head *list)
14f8ab
 {
14f8ab
-    ob_fd_t *ob_fd = NULL, *tmp = NULL;
14f8ab
+    call_stub_t *stub;
14f8ab
 
14f8ab
-    if (!list_empty(ob_fds)) {
14f8ab
-        list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode)
14f8ab
-        {
14f8ab
-            ob_fd_wake(this, ob_fd->fd, ob_fd);
14f8ab
-            ob_fd_free(ob_fd);
14f8ab
-        }
14f8ab
-    }
14f8ab
-}
14f8ab
+    while (!list_empty(list)) {
14f8ab
+        stub = list_first_entry(list, call_stub_t, list);
14f8ab
+        list_del_init(&stub->list);
14f8ab
 
14f8ab
-/* called holding inode->lock and fd->lock */
14f8ab
-void
14f8ab
-ob_fd_copy(ob_fd_t *src, ob_fd_t *dst)
14f8ab
-{
14f8ab
-    if (!src || !dst)
14f8ab
-        goto out;
14f8ab
-
14f8ab
-    dst->fd = src->fd;
14f8ab
-    dst->loc.inode = inode_ref(src->loc.inode);
14f8ab
-    gf_uuid_copy(dst->loc.gfid, src->loc.gfid);
14f8ab
-    dst->flags = src->flags;
14f8ab
-    dst->xdata = dict_ref(src->xdata);
14f8ab
-    dst->ob_inode = src->ob_inode;
14f8ab
-out:
14f8ab
-    return;
14f8ab
+        call_resume(stub);
14f8ab
+    }
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
-open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode,
14f8ab
-                                call_stub_t *stub)
14f8ab
+static void
14f8ab
+ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret,
14f8ab
+                  int32_t op_errno)
14f8ab
 {
14f8ab
-    ob_inode_t *ob_inode = NULL;
14f8ab
-    ob_fd_t *ob_fd = NULL, *tmp = NULL;
14f8ab
-    gf_boolean_t was_open_in_progress = _gf_false;
14f8ab
-    gf_boolean_t wait_for_open = _gf_false;
14f8ab
-    struct list_head ob_fds;
14f8ab
+    struct list_head list;
14f8ab
 
14f8ab
-    ob_inode = ob_inode_get(this, inode);
14f8ab
-    if (ob_inode == NULL)
14f8ab
-        goto out;
14f8ab
+    INIT_LIST_HEAD(&list);
14f8ab
 
14f8ab
-    INIT_LIST_HEAD(&ob_fds);
14f8ab
+    if (op_ret < 0) {
14f8ab
+        fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno);
14f8ab
+    }
14f8ab
 
14f8ab
-    LOCK(&inode->lock);
14f8ab
+    LOCK(&ob_inode->inode->lock);
14f8ab
     {
14f8ab
-        was_open_in_progress = ob_inode->open_in_progress;
14f8ab
-        ob_inode->unlinked = 1;
14f8ab
-
14f8ab
-        if (was_open_in_progress) {
14f8ab
-            list_add_tail(&stub->list, &ob_inode->resume_fops);
14f8ab
-            goto inode_unlock;
14f8ab
-        }
14f8ab
-
14f8ab
-        list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode)
14f8ab
-        {
14f8ab
-            LOCK(&ob_fd->fd->lock);
14f8ab
-            {
14f8ab
-                if (ob_fd->opened)
14f8ab
-                    goto fd_unlock;
14f8ab
-
14f8ab
-                ob_inode->count++;
14f8ab
-                ob_fd->ob_inode_fops_waiting = _gf_true;
14f8ab
-
14f8ab
-                if (ob_fd->open_frame == NULL) {
14f8ab
-                    /* open in progress no need of wake */
14f8ab
-                } else {
14f8ab
-                    tmp = ob_fd_new();
14f8ab
-                    tmp->open_frame = ob_fd->open_frame;
14f8ab
-                    ob_fd->open_frame = NULL;
14f8ab
-
14f8ab
-                    ob_fd_copy(ob_fd, tmp);
14f8ab
-                    list_add_tail(&tmp->ob_fds_on_inode, &ob_fds);
14f8ab
-                }
14f8ab
-            }
14f8ab
-        fd_unlock:
14f8ab
-            UNLOCK(&ob_fd->fd->lock);
14f8ab
-        }
14f8ab
-
14f8ab
-        if (ob_inode->count) {
14f8ab
-            wait_for_open = ob_inode->open_in_progress = _gf_true;
14f8ab
-            list_add_tail(&stub->list, &ob_inode->resume_fops);
14f8ab
+        /* Only update the fields if the file has not been closed before
14f8ab
+         * getting here. */
14f8ab
+        if (ob_inode->first_fd == fd) {
14f8ab
+            list_splice_init(&ob_inode->resume_fops, &list);
14f8ab
+            ob_inode->first_fd = NULL;
14f8ab
+            ob_inode->first_open = NULL;
14f8ab
+            ob_inode->triggered = false;
14f8ab
         }
14f8ab
     }
14f8ab
-inode_unlock:
14f8ab
-    UNLOCK(&inode->lock);
14f8ab
+    UNLOCK(&ob_inode->inode->lock);
14f8ab
 
14f8ab
-out:
14f8ab
-    if (!was_open_in_progress) {
14f8ab
-        if (!wait_for_open) {
14f8ab
-            call_resume(stub);
14f8ab
-        } else {
14f8ab
-            ob_inode_wake(this, &ob_fds);
14f8ab
-        }
14f8ab
-    }
14f8ab
+    ob_resume_pending(&list);
14f8ab
 
14f8ab
-    return 0;
14f8ab
+    fd_unref(fd);
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
-open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub)
14f8ab
+static int32_t
14f8ab
+ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret,
14f8ab
+            int32_t op_errno, fd_t *fd, dict_t *xdata)
14f8ab
 {
14f8ab
-    ob_fd_t *ob_fd = NULL;
14f8ab
-    int op_errno = 0;
14f8ab
-
14f8ab
-    if (!fd)
14f8ab
-        goto nofd;
14f8ab
-
14f8ab
-    LOCK(&fd->lock);
14f8ab
-    {
14f8ab
-        ob_fd = __ob_fd_ctx_get(this, fd);
14f8ab
-        if (!ob_fd)
14f8ab
-            goto unlock;
14f8ab
+    ob_inode_t *ob_inode;
14f8ab
 
14f8ab
-        if (ob_fd->op_errno) {
14f8ab
-            op_errno = ob_fd->op_errno;
14f8ab
-            goto unlock;
14f8ab
-        }
14f8ab
+    ob_inode = frame->local;
14f8ab
+    frame->local = NULL;
14f8ab
 
14f8ab
-        list_add_tail(&stub->list, &ob_fd->list);
14f8ab
-    }
14f8ab
-unlock:
14f8ab
-    UNLOCK(&fd->lock);
14f8ab
+    ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno);
14f8ab
 
14f8ab
-nofd:
14f8ab
-    if (op_errno)
14f8ab
-        call_unwind_error(stub, -1, op_errno);
14f8ab
-    else if (ob_fd)
14f8ab
-        ob_fd_wake(this, fd, NULL);
14f8ab
-    else
14f8ab
-        call_resume(stub);
14f8ab
+    STACK_DESTROY(frame->root);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
-ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
14f8ab
+static int32_t
14f8ab
+ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
14f8ab
                fd_t *fd, dict_t *xdata)
14f8ab
 {
14f8ab
-    ob_fd_t *ob_fd = NULL;
14f8ab
-    int ret = -1;
14f8ab
-    ob_conf_t *conf = NULL;
14f8ab
-    ob_inode_t *ob_inode = NULL;
14f8ab
-    gf_boolean_t open_in_progress = _gf_false;
14f8ab
-    int unlinked = 0;
14f8ab
-
14f8ab
-    conf = this->private;
14f8ab
-
14f8ab
-    if (flags & O_TRUNC) {
14f8ab
-        STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
14f8ab
-                   FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
14f8ab
-        return 0;
14f8ab
-    }
14f8ab
-
14f8ab
-    ob_inode = ob_inode_get(this, fd->inode);
14f8ab
-
14f8ab
-    ob_fd = ob_fd_new();
14f8ab
-    if (!ob_fd)
14f8ab
-        goto enomem;
14f8ab
-
14f8ab
-    ob_fd->ob_inode = ob_inode;
14f8ab
-
14f8ab
-    ob_fd->fd = fd;
14f8ab
-
14f8ab
-    ob_fd->open_frame = copy_frame(frame);
14f8ab
-    if (!ob_fd->open_frame)
14f8ab
-        goto enomem;
14f8ab
-    ret = loc_copy(&ob_fd->loc, loc);
14f8ab
-    if (ret)
14f8ab
-        goto enomem;
14f8ab
-
14f8ab
-    ob_fd->flags = flags;
14f8ab
-    if (xdata)
14f8ab
-        ob_fd->xdata = dict_ref(xdata);
14f8ab
-
14f8ab
-    LOCK(&fd->inode->lock);
14f8ab
-    {
14f8ab
-        open_in_progress = ob_inode->open_in_progress;
14f8ab
-        unlinked = ob_inode->unlinked;
14f8ab
-        if (!open_in_progress && !unlinked) {
14f8ab
-            ret = ob_fd_ctx_set(this, fd, ob_fd);
14f8ab
-            if (ret) {
14f8ab
-                UNLOCK(&fd->inode->lock);
14f8ab
-                goto enomem;
14f8ab
-            }
14f8ab
-
14f8ab
-            list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds);
14f8ab
-        }
14f8ab
-    }
14f8ab
-    UNLOCK(&fd->inode->lock);
14f8ab
-
14f8ab
-    /* We take a reference while the background open is pending or being
14f8ab
-     * processed. If we finally wind the request in the foreground, then
14f8ab
-     * ob_fd_free() will take care of this additional reference. */
14f8ab
-    fd_ref(fd);
14f8ab
-
14f8ab
-    if (!open_in_progress && !unlinked) {
14f8ab
-        STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata);
14f8ab
-
14f8ab
-        if (!conf->lazy_open)
14f8ab
-            ob_fd_wake(this, fd, NULL);
14f8ab
-    } else {
14f8ab
-        ob_fd_free(ob_fd);
14f8ab
-        STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
14f8ab
-                   FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
14f8ab
-    }
14f8ab
+    STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this),
14f8ab
+                      FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
-enomem:
14f8ab
-    if (ob_fd) {
14f8ab
-        if (ob_fd->open_frame)
14f8ab
-            STACK_DESTROY(ob_fd->open_frame->root);
14f8ab
-
14f8ab
-        loc_wipe(&ob_fd->loc);
14f8ab
-        if (ob_fd->xdata)
14f8ab
-            dict_unref(ob_fd->xdata);
14f8ab
-
14f8ab
-        GF_FREE(ob_fd);
14f8ab
-    }
14f8ab
-
14f8ab
-    return -1;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
14f8ab
         dict_t *xdata)
14f8ab
 {
14f8ab
-    fd_t *old_fd = NULL;
14f8ab
-    int ret = -1;
14f8ab
-    int op_errno = ENOMEM;
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    old_fd = fd_lookup(fd->inode, 0);
14f8ab
-    if (old_fd) {
14f8ab
-        /* open-behind only when this is the first FD */
14f8ab
-        stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata);
14f8ab
-        if (!stub) {
14f8ab
-            fd_unref(old_fd);
14f8ab
-            goto err;
14f8ab
-        }
14f8ab
-
14f8ab
-        open_and_resume(this, old_fd, stub);
14f8ab
+    ob_inode_t *ob_inode;
14f8ab
+    call_frame_t *open_frame;
14f8ab
+    call_stub_t *stub;
14f8ab
+    fd_t *first_fd;
14f8ab
+    ob_state_t state;
14f8ab
+
14f8ab
+    state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd);
14f8ab
+    if (state == OB_STATE_READY) {
14f8ab
+        /* There's no pending open, but there are other file descriptors opened
14f8ab
+         * or the current flags require a synchronous open. */
14f8ab
+        return default_open(frame, this, loc, flags, fd, xdata);
14f8ab
+    }
14f8ab
 
14f8ab
-        fd_unref(old_fd);
14f8ab
+    if (state == OB_STATE_OPEN_TRIGGERED) {
14f8ab
+        /* The first open is in progress (either because it was already issued
14f8ab
+         * or because this request triggered it). We try to create a new stub
14f8ab
+         * to retry the operation once the initial open completes. */
14f8ab
+        stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata);
14f8ab
+        if (stub != NULL) {
14f8ab
+            return ob_stub_dispatch(this, ob_inode, first_fd, stub);
14f8ab
+        }
14f8ab
 
14f8ab
-        return 0;
14f8ab
+        state = -ENOMEM;
14f8ab
     }
14f8ab
 
14f8ab
-    ret = ob_open_behind(frame, this, loc, flags, fd, xdata);
14f8ab
-    if (ret) {
14f8ab
-        goto err;
14f8ab
-    }
14f8ab
+    if (state == OB_STATE_FIRST_OPEN) {
14f8ab
+        /* We try to create a stub for the new open. A new frame needs to be
14f8ab
+         * used because the current one may be destroyed soon after sending
14f8ab
+         * the open's reply. */
14f8ab
+        open_frame = copy_frame(frame);
14f8ab
+        if (open_frame != NULL) {
14f8ab
+            stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd,
14f8ab
+                                 xdata);
14f8ab
+            if (stub != NULL) {
14f8ab
+                open_frame->local = ob_inode;
14f8ab
 
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s",
14f8ab
-           loc->path);
14f8ab
+                /* TODO: Previous version passed xdata back to the caller, but
14f8ab
+                 *       probably this doesn't make sense since it won't contain
14f8ab
+                 *       any requested data. I think it would be better to pass
14f8ab
+                 *       NULL for xdata. */
14f8ab
+                default_open_cbk(frame, NULL, this, 0, 0, fd, xdata);
14f8ab
 
14f8ab
-    STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0);
14f8ab
+                return ob_open_dispatch(this, ob_inode, first_fd, stub);
14f8ab
+            }
14f8ab
 
14f8ab
-    return 0;
14f8ab
-}
14f8ab
+            STACK_DESTROY(open_frame->root);
14f8ab
+        }
14f8ab
 
14f8ab
-fd_t *
14f8ab
-ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag)
14f8ab
-{
14f8ab
-    fd_t *wind_fd = NULL;
14f8ab
-    ob_fd_t *ob_fd = NULL;
14f8ab
-    ob_conf_t *conf = NULL;
14f8ab
+        /* In case of error, simulate a regular completion but with an error
14f8ab
+         * code. */
14f8ab
+        ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM);
14f8ab
 
14f8ab
-    conf = this->private;
14f8ab
+        state = -ENOMEM;
14f8ab
+    }
14f8ab
 
14f8ab
-    ob_fd = ob_fd_ctx_get(this, fd);
14f8ab
+    /* In case of failure we need to decrement the number of open files because
14f8ab
+     * ob_fdclose() won't be called. */
14f8ab
 
14f8ab
-    if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) {
14f8ab
-        wind_fd = fd_anonymous(fd->inode);
14f8ab
-        if ((ob_fd->flags & O_DIRECT) && (flag))
14f8ab
-            *flag = *flag | O_DIRECT;
14f8ab
-    } else {
14f8ab
-        wind_fd = fd_ref(fd);
14f8ab
+    LOCK(&fd->inode->lock);
14f8ab
+    {
14f8ab
+        ob_inode->open_count--;
14f8ab
     }
14f8ab
+    UNLOCK(&fd->inode->lock);
14f8ab
 
14f8ab
-    return wind_fd;
14f8ab
+    gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
14f8ab
+            "open", "path=%s", loc->path, NULL);
14f8ab
+
14f8ab
+    return default_open_failure_cbk(frame, -state);
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
14f8ab
          off_t offset, uint32_t flags, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-    fd_t *wind_fd = NULL;
14f8ab
-    ob_conf_t *conf = NULL;
14f8ab
+    ob_conf_t *conf = this->private;
14f8ab
+    bool trigger = conf->read_after_open || !conf->use_anonymous_fd;
14f8ab
 
14f8ab
-    conf = this->private;
14f8ab
-
14f8ab
-    if (!conf->read_after_open)
14f8ab
-        wind_fd = ob_get_wind_fd(this, fd, &flags);
14f8ab
-    else
14f8ab
-        wind_fd = fd_ref(fd);
14f8ab
-
14f8ab
-    stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset,
14f8ab
-                          flags, xdata);
14f8ab
-    fd_unref(wind_fd);
14f8ab
-
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, wind_fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
14f8ab
+    OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
14f8ab
           int count, off_t offset, uint32_t flags, struct iobref *iobref,
14f8ab
           dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset,
14f8ab
-                           flags, iobref, xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0);
14f8ab
+    OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags,
14f8ab
+               iobref, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-    fd_t *wind_fd = NULL;
14f8ab
-
14f8ab
-    wind_fd = ob_get_wind_fd(this, fd, NULL);
14f8ab
-
14f8ab
-    stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata);
14f8ab
+    ob_conf_t *conf = this->private;
14f8ab
+    bool trigger = !conf->use_anonymous_fd;
14f8ab
 
14f8ab
-    fd_unref(wind_fd);
14f8ab
-
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, wind_fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
14f8ab
+    OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
14f8ab
         gf_seek_what_t what, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-    fd_t *wind_fd = NULL;
14f8ab
-
14f8ab
-    wind_fd = ob_get_wind_fd(this, fd, NULL);
14f8ab
+    ob_conf_t *conf = this->private;
14f8ab
+    bool trigger = !conf->use_anonymous_fd;
14f8ab
 
14f8ab
-    stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what,
14f8ab
-                         xdata);
14f8ab
-
14f8ab
-    fd_unref(wind_fd);
14f8ab
-
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, wind_fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
14f8ab
+    OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-    ob_fd_t *ob_fd = NULL;
14f8ab
-    gf_boolean_t unwind = _gf_false;
14f8ab
-
14f8ab
-    LOCK(&fd->lock);
14f8ab
-    {
14f8ab
-        ob_fd = __ob_fd_ctx_get(this, fd);
14f8ab
-        if (ob_fd && ob_fd->open_frame)
14f8ab
-            /* if open() was never wound to backend,
14f8ab
-               no need to wind flush() either.
14f8ab
-            */
14f8ab
-            unwind = _gf_true;
14f8ab
-    }
14f8ab
-    UNLOCK(&fd->lock);
14f8ab
-
14f8ab
-    if (unwind)
14f8ab
-        goto unwind;
14f8ab
-
14f8ab
-    stub = fop_flush_stub(frame, default_flush_resume, fd, xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-
14f8ab
-unwind:
14f8ab
-    STACK_UNWIND_STRICT(flush, frame, 0, 0, 0);
14f8ab
+    OB_POST_FLUSH(this, frame, fd, fd, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0);
14f8ab
+    OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
14f8ab
       struct gf_flock *flock, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0);
14f8ab
+    OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
14f8ab
              dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset,
14f8ab
-                              xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0);
14f8ab
+    OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
14f8ab
              int flags, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags,
14f8ab
-                              xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0);
14f8ab
+    OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
14f8ab
              dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0);
14f8ab
+    OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
14f8ab
                 dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name,
14f8ab
-                                 xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0);
14f8ab
+    OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
14f8ab
             int cmd, struct gf_flock *flock, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume,
14f8ab
-                                          volume, fd, cmd, flock, xdata);
14f8ab
-    if (stub)
14f8ab
-        open_and_resume(this, fd, stub);
14f8ab
-    else
14f8ab
-        STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0);
14f8ab
+    OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
14f8ab
             const char *basename, entrylk_cmd cmd, entrylk_type type,
14f8ab
             dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = fop_fentrylk_stub(
14f8ab
-        frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata);
14f8ab
-    if (stub)
14f8ab
-        open_and_resume(this, fd, stub);
14f8ab
-    else
14f8ab
-        STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0);
14f8ab
+    OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type,
14f8ab
+               xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
14f8ab
             gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd,
14f8ab
-                                          optype, xattr, xdata);
14f8ab
-    if (stub)
14f8ab
-        open_and_resume(this, fd, stub);
14f8ab
-    else
14f8ab
-        STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0);
14f8ab
+    OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt,
14f8ab
             int valid, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid,
14f8ab
-                             xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0);
14f8ab
+    OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
14f8ab
              off_t offset, size_t len, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub;
14f8ab
-
14f8ab
-    stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset,
14f8ab
-                              len, xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
+    OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
14f8ab
-    return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
14f8ab
            size_t len, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub;
14f8ab
-
14f8ab
-    stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len,
14f8ab
-                            xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
+    OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
14f8ab
-    return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
14f8ab
             off_t len, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub;
14f8ab
-
14f8ab
-    stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len,
14f8ab
-                             xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
+    OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata);
14f8ab
 
14f8ab
-    open_and_resume(this, fd, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
14f8ab
           dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_all_pending_fds_and_resume(this, loc->inode, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0);
14f8ab
+    OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
+static int32_t
14f8ab
 ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst,
14f8ab
           dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_all_pending_fds_and_resume(this, dst->inode, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0);
14f8ab
+    OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int32_t
14f8ab
+static int32_t
14f8ab
 ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
14f8ab
            int32_t valid, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-
14f8ab
-    stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid,
14f8ab
-                            xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
+    OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid,
14f8ab
+                  xdata);
14f8ab
 
14f8ab
-    open_all_pending_fds_and_resume(this, loc->inode, stub);
14f8ab
-
14f8ab
-    return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int32_t
14f8ab
+static int32_t
14f8ab
 ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
14f8ab
             int32_t flags, dict_t *xdata)
14f8ab
 {
14f8ab
-    call_stub_t *stub = NULL;
14f8ab
-    gf_boolean_t access_xattr = _gf_false;
14f8ab
-
14f8ab
     if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) ||
14f8ab
         dict_get(dict, POSIX_ACL_ACCESS_XATTR) ||
14f8ab
-        dict_get(dict, GF_SELINUX_XATTR_KEY))
14f8ab
-        access_xattr = _gf_true;
14f8ab
-
14f8ab
-    if (!access_xattr)
14f8ab
+        dict_get(dict, GF_SELINUX_XATTR_KEY)) {
14f8ab
         return default_setxattr(frame, this, loc, dict, flags, xdata);
14f8ab
+    }
14f8ab
 
14f8ab
-    stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags,
14f8ab
-                             xdata);
14f8ab
-    if (!stub)
14f8ab
-        goto err;
14f8ab
-
14f8ab
-    open_all_pending_fds_and_resume(this, loc->inode, stub);
14f8ab
+    OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags,
14f8ab
+                  xdata);
14f8ab
 
14f8ab
     return 0;
14f8ab
-err:
14f8ab
-    STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL);
14f8ab
-    return 0;
14f8ab
 }
14f8ab
 
14f8ab
-int
14f8ab
-ob_release(xlator_t *this, fd_t *fd)
14f8ab
+static void
14f8ab
+ob_fdclose(xlator_t *this, fd_t *fd)
14f8ab
 {
14f8ab
-    ob_fd_t *ob_fd = NULL;
14f8ab
+    struct list_head list;
14f8ab
+    ob_inode_t *ob_inode;
14f8ab
+    call_stub_t *stub;
14f8ab
+
14f8ab
+    INIT_LIST_HEAD(&list);
14f8ab
+    stub = NULL;
14f8ab
 
14f8ab
-    ob_fd = ob_fd_ctx_get(this, fd);
14f8ab
+    LOCK(&fd->inode->lock);
14f8ab
+    {
14f8ab
+        ob_inode = ob_inode_get_locked(this, fd->inode);
14f8ab
+        if (ob_inode != NULL) {
14f8ab
+            ob_inode->open_count--;
14f8ab
+
14f8ab
+            /* If this fd is the same as ob_inode->first_fd, it means that
14f8ab
+             * the initial open has not fully completed. We'll try to cancel
14f8ab
+             * it. */
14f8ab
+            if (ob_inode->first_fd == fd) {
14f8ab
+                if (ob_inode->first_open == OB_OPEN_PREPARING) {
14f8ab
+                    /* In this case ob_open_dispatch() has not been called yet.
14f8ab
+                     * We clear first_fd and first_open to allow that function
14f8ab
+                     * to know that the open is not really needed. This also
14f8ab
+                     * allows other requests to work as expected if they
14f8ab
+                     * arrive before the dispatch function is called. If there
14f8ab
+                     * are pending fops, we can directly process them here.
14f8ab
+                     * (note that there shouldn't be any fd related fops, but
14f8ab
+                     * if there are, it's fine if they fail). */
14f8ab
+                    ob_inode->first_fd = NULL;
14f8ab
+                    ob_inode->first_open = NULL;
14f8ab
+                    ob_inode->triggered = false;
14f8ab
+                    list_splice_init(&ob_inode->resume_fops, &list);
14f8ab
+                } else if (!ob_inode->triggered) {
14f8ab
+                    /* If the open has already been dispatched, we can only
14f8ab
+                     * cancel it if it has not been triggered. Otherwise we
14f8ab
+                     * simply wait until it completes. While it's not triggered,
14f8ab
+                     * first_open must be a valid stub and there can't be any
14f8ab
+                     * pending fops. */
14f8ab
+                    GF_ASSERT((ob_inode->first_open != NULL) &&
14f8ab
+                              list_empty(&ob_inode->resume_fops));
14f8ab
+
14f8ab
+                    ob_inode->first_fd = NULL;
14f8ab
+                    stub = ob_inode->first_open;
14f8ab
+                    ob_inode->first_open = NULL;
14f8ab
+                }
14f8ab
+            }
14f8ab
+        }
14f8ab
+    }
14f8ab
+    UNLOCK(&fd->inode->lock);
14f8ab
 
14f8ab
-    ob_fd_free(ob_fd);
14f8ab
+    if (stub != NULL) {
14f8ab
+        call_stub_destroy(stub);
14f8ab
+        fd_unref(fd);
14f8ab
+    }
14f8ab
 
14f8ab
-    return 0;
14f8ab
+    ob_resume_pending(&list);
14f8ab
 }
14f8ab
 
14f8ab
 int
14f8ab
 ob_forget(xlator_t *this, inode_t *inode)
14f8ab
 {
14f8ab
-    ob_inode_t *ob_inode = NULL;
14f8ab
+    ob_inode_t *ob_inode;
14f8ab
     uint64_t value = 0;
14f8ab
 
14f8ab
-    inode_ctx_del(inode, this, &value);
14f8ab
-
14f8ab
-    if (value) {
14f8ab
+    if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) {
14f8ab
         ob_inode = (ob_inode_t *)(uintptr_t)value;
14f8ab
-        ob_inode_free(ob_inode);
14f8ab
+        GF_FREE(ob_inode);
14f8ab
     }
14f8ab
 
14f8ab
     return 0;
14f8ab
@@ -1153,20 +823,18 @@ ob_priv_dump(xlator_t *this)
14f8ab
 int
14f8ab
 ob_fdctx_dump(xlator_t *this, fd_t *fd)
14f8ab
 {
14f8ab
-    ob_fd_t *ob_fd = NULL;
14f8ab
     char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
14f8ab
         0,
14f8ab
     };
14f8ab
-    int ret = 0;
14f8ab
+    uint64_t value = 0;
14f8ab
+    int ret = 0, error = 0;
14f8ab
 
14f8ab
     ret = TRY_LOCK(&fd->lock);
14f8ab
     if (ret)
14f8ab
         return 0;
14f8ab
 
14f8ab
-    ob_fd = __ob_fd_ctx_get(this, fd);
14f8ab
-    if (!ob_fd) {
14f8ab
-        UNLOCK(&fd->lock);
14f8ab
-        return 0;
14f8ab
+    if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) {
14f8ab
+        error = (int32_t)value;
14f8ab
     }
14f8ab
 
14f8ab
     gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind",
14f8ab
@@ -1175,17 +843,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd)
14f8ab
 
14f8ab
     gf_proc_dump_write("fd", "%p", fd);
14f8ab
 
14f8ab
-    gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame);
14f8ab
-
14f8ab
-    if (ob_fd->open_frame)
14f8ab
-        gf_proc_dump_write("open_frame.root.unique", "%" PRIu64,
14f8ab
-                           ob_fd->open_frame->root->unique);
14f8ab
-
14f8ab
-    gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path);
14f8ab
-
14f8ab
-    gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid));
14f8ab
-
14f8ab
-    gf_proc_dump_write("flags", "%d", ob_fd->flags);
14f8ab
+    gf_proc_dump_write("error", "%d", error);
14f8ab
 
14f8ab
     UNLOCK(&fd->lock);
14f8ab
 
14f8ab
@@ -1307,7 +965,7 @@ struct xlator_fops fops = {
14f8ab
 };
14f8ab
 
14f8ab
 struct xlator_cbks cbks = {
14f8ab
-    .release = ob_release,
14f8ab
+    .fdclose = ob_fdclose,
14f8ab
     .forget = ob_forget,
14f8ab
 };
14f8ab
 
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab