74096c
From b924c8ca8a133fc9413c8ed1407e63f1658c7e79 Mon Sep 17 00:00:00 2001
74096c
From: Xavi Hernandez <xhernandez@redhat.com>
74096c
Date: Tue, 12 May 2020 23:54:54 +0200
74096c
Subject: [PATCH 523/526] open-behind: rewrite of internal logic
74096c
74096c
There was a critical flaw in the previous implementation of open-behind.
74096c
74096c
When an open is done in the background, it's necessary to take a
74096c
reference on the fd_t object because once we "fake" the open answer,
74096c
the fd could be destroyed. However as long as there's a reference,
74096c
the release function won't be called. So, if the application closes
74096c
the file descriptor without having actually opened it, there will
74096c
always remain at least 1 reference, causing a leak.
74096c
74096c
To avoid this problem, the previous implementation didn't take a
74096c
reference on the fd_t, so there were races where the fd could be
74096c
destroyed while it was still in use.
74096c
74096c
To fix this, I've implemented a new xlator cbk that gets called from
74096c
fuse when the application closes a file descriptor.
74096c
74096c
The whole logic of handling background opens have been simplified and
74096c
it's more efficient now. Only if the fop needs to be delayed until an
74096c
open completes, a stub is created. Otherwise no memory allocations are
74096c
needed.
74096c
74096c
Correctly handling the close request while the open is still pending
74096c
has added a bit of complexity, but overall normal operation is simpler.
74096c
74096c
Upstream patch:
74096c
> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24451
74096c
> Change-Id: I6376a5491368e0e1c283cc452849032636261592
74096c
> Fixes: #1225
74096c
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
74096c
74096c
BUG: 1830713
74096c
Change-Id: I6376a5491368e0e1c283cc452849032636261592
74096c
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/224487
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
---
74096c
 libglusterfs/src/fd.c                              |   26 +
74096c
 libglusterfs/src/glusterfs/fd.h                    |    3 +
74096c
 libglusterfs/src/glusterfs/xlator.h                |    4 +
74096c
 libglusterfs/src/libglusterfs.sym                  |    1 +
74096c
 tests/basic/open-behind/open-behind.t              |  183 +++
74096c
 tests/basic/open-behind/tester-fd.c                |   99 ++
74096c
 tests/basic/open-behind/tester.c                   |  444 +++++++
74096c
 tests/basic/open-behind/tester.h                   |  145 +++
74096c
 tests/bugs/glusterfs/bug-873962-spb.t              |    1 +
74096c
 xlators/mount/fuse/src/fuse-bridge.c               |    2 +
74096c
 .../open-behind/src/open-behind-messages.h         |    6 +-
74096c
 xlators/performance/open-behind/src/open-behind.c  | 1302 ++++++++------------
74096c
 12 files changed, 1393 insertions(+), 823 deletions(-)
74096c
 create mode 100644 tests/basic/open-behind/open-behind.t
74096c
 create mode 100644 tests/basic/open-behind/tester-fd.c
74096c
 create mode 100644 tests/basic/open-behind/tester.c
74096c
 create mode 100644 tests/basic/open-behind/tester.h
74096c
74096c
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
74096c
index 314546a..e4ec401 100644
74096c
--- a/libglusterfs/src/fd.c
74096c
+++ b/libglusterfs/src/fd.c
74096c
@@ -501,6 +501,32 @@ out:
74096c
 }
74096c
 
74096c
 void
74096c
+fd_close(fd_t *fd)
74096c
+{
74096c
+    xlator_t *xl, *old_THIS;
74096c
+
74096c
+    old_THIS = THIS;
74096c
+
74096c
+    for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) {
74096c
+        if (!xl->call_cleanup) {
74096c
+            THIS = xl;
74096c
+
74096c
+            if (IA_ISDIR(fd->inode->ia_type)) {
74096c
+                if (xl->cbks->fdclosedir != NULL) {
74096c
+                    xl->cbks->fdclosedir(xl, fd);
74096c
+                }
74096c
+            } else {
74096c
+                if (xl->cbks->fdclose != NULL) {
74096c
+                    xl->cbks->fdclose(xl, fd);
74096c
+                }
74096c
+            }
74096c
+        }
74096c
+    }
74096c
+
74096c
+    THIS = old_THIS;
74096c
+}
74096c
+
74096c
+void
74096c
 fd_unref(fd_t *fd)
74096c
 {
74096c
     int32_t refcount = 0;
74096c
diff --git a/libglusterfs/src/glusterfs/fd.h b/libglusterfs/src/glusterfs/fd.h
74096c
index cdbe289..4d157c4 100644
74096c
--- a/libglusterfs/src/glusterfs/fd.h
74096c
+++ b/libglusterfs/src/glusterfs/fd.h
74096c
@@ -107,6 +107,9 @@ fd_ref(fd_t *fd);
74096c
 void
74096c
 fd_unref(fd_t *fd);
74096c
 
74096c
+void
74096c
+fd_close(fd_t *fd);
74096c
+
74096c
 fd_t *
74096c
 fd_create(struct _inode *inode, pid_t pid);
74096c
 
74096c
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
74096c
index 8650ccc..273039a 100644
74096c
--- a/libglusterfs/src/glusterfs/xlator.h
74096c
+++ b/libglusterfs/src/glusterfs/xlator.h
74096c
@@ -705,6 +705,8 @@ typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode);
74096c
 
74096c
 typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd);
74096c
 
74096c
+typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd);
74096c
+
74096c
 struct xlator_cbks {
74096c
     cbk_forget_t forget;
74096c
     cbk_release_t release;
74096c
@@ -715,6 +717,8 @@ struct xlator_cbks {
74096c
     cbk_ictxmerge_t ictxmerge;
74096c
     cbk_inodectx_size_t ictxsize;
74096c
     cbk_fdctx_size_t fdctxsize;
74096c
+    cbk_fdclose_t fdclose;
74096c
+    cbk_fdclose_t fdclosedir;
74096c
 };
74096c
 
74096c
 typedef int32_t (*dumpop_priv_t)(xlator_t *this);
74096c
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
74096c
index bc770e2..0a0862e 100644
74096c
--- a/libglusterfs/src/libglusterfs.sym
74096c
+++ b/libglusterfs/src/libglusterfs.sym
74096c
@@ -456,6 +456,7 @@ event_unregister_close
74096c
 fd_anonymous
74096c
 fd_anonymous_with_flags
74096c
 fd_bind
74096c
+fd_close
74096c
 fd_create
74096c
 fd_create_uint64
74096c
 __fd_ctx_del
74096c
diff --git a/tests/basic/open-behind/open-behind.t b/tests/basic/open-behind/open-behind.t
74096c
new file mode 100644
74096c
index 0000000..5e865d6
74096c
--- /dev/null
74096c
+++ b/tests/basic/open-behind/open-behind.t
74096c
@@ -0,0 +1,183 @@
74096c
+#!/bin/bash
74096c
+
74096c
+WD="$(dirname "${0}")"
74096c
+
74096c
+. ${WD}/../../include.rc
74096c
+. ${WD}/../../volume.rc
74096c
+
74096c
+function assign() {
74096c
+    local _assign_var="${1}"
74096c
+    local _assign_value="${2}"
74096c
+
74096c
+    printf -v "${_assign_var}" "%s" "${_assign_value}"
74096c
+}
74096c
+
74096c
+function pipe_create() {
74096c
+    local _pipe_create_var="${1}"
74096c
+    local _pipe_create_name
74096c
+    local _pipe_create_fd
74096c
+
74096c
+    _pipe_create_name="$(mktemp -u)"
74096c
+    mkfifo "${_pipe_create_name}"
74096c
+    exec {_pipe_create_fd}<>"${_pipe_create_name}"
74096c
+    rm "${_pipe_create_name}"
74096c
+
74096c
+    assign "${_pipe_create_var}" "${_pipe_create_fd}"
74096c
+}
74096c
+
74096c
+function pipe_close() {
74096c
+    local _pipe_close_fd="${!1}"
74096c
+
74096c
+    exec {_pipe_close_fd}>&-
74096c
+}
74096c
+
74096c
+function tester_start() {
74096c
+    declare -ag tester
74096c
+    local tester_in
74096c
+    local tester_out
74096c
+
74096c
+    pipe_create tester_in
74096c
+    pipe_create tester_out
74096c
+
74096c
+    ${WD}/tester <&${tester_in} >&${tester_out} &
74096c
+
74096c
+    tester=("$!" "${tester_in}" "${tester_out}")
74096c
+}
74096c
+
74096c
+function tester_send() {
74096c
+    declare -ag tester
74096c
+    local tester_res
74096c
+    local tester_extra
74096c
+
74096c
+    echo "${*}" >&${tester[1]}
74096c
+
74096c
+    read -t 3 -u ${tester[2]} tester_res tester_extra
74096c
+    echo "${tester_res} ${tester_extra}"
74096c
+    if [[ "${tester_res}" == "OK" ]]; then
74096c
+        return 0
74096c
+    fi
74096c
+
74096c
+    return 1
74096c
+}
74096c
+
74096c
+function tester_stop() {
74096c
+    declare -ag tester
74096c
+    local tester_res
74096c
+
74096c
+    tester_send "quit"
74096c
+
74096c
+    tester_res=0
74096c
+    if ! wait ${tester[0]}; then
74096c
+        tester_res=$?
74096c
+    fi
74096c
+
74096c
+    unset tester
74096c
+
74096c
+    return ${tester_res}
74096c
+}
74096c
+
74096c
+function count_open() {
74096c
+    local file="$(realpath "${B0}/${V0}/${1}")"
74096c
+    local count="0"
74096c
+    local inode
74096c
+    local ref
74096c
+
74096c
+    inode="$(stat -c %i "${file}")"
74096c
+
74096c
+    for fd in /proc/${BRICK_PID}/fd/*; do
74096c
+        ref="$(readlink "${fd}")"
74096c
+        if [[ "${ref}" == "${B0}/${V0}/"* ]]; then
74096c
+            if [[ "$(stat -c %i "${ref}")" == "${inode}" ]]; then
74096c
+                count="$((${count} + 1))"
74096c
+            fi
74096c
+        fi
74096c
+    done
74096c
+
74096c
+    echo "${count}"
74096c
+}
74096c
+
74096c
+cleanup
74096c
+
74096c
+TEST build_tester ${WD}/tester.c ${WD}/tester-fd.c
74096c
+
74096c
+TEST glusterd
74096c
+TEST pidof glusterd
74096c
+TEST ${CLI} volume create ${V0} ${H0}:${B0}/${V0}
74096c
+TEST ${CLI} volume set ${V0} flush-behind off
74096c
+TEST ${CLI} volume set ${V0} write-behind off
74096c
+TEST ${CLI} volume set ${V0} quick-read off
74096c
+TEST ${CLI} volume set ${V0} stat-prefetch on
74096c
+TEST ${CLI} volume set ${V0} io-cache off
74096c
+TEST ${CLI} volume set ${V0} open-behind on
74096c
+TEST ${CLI} volume set ${V0} lazy-open off
74096c
+TEST ${CLI} volume set ${V0} read-after-open off
74096c
+TEST ${CLI} volume start ${V0}
74096c
+
74096c
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
74096c
+
74096c
+BRICK_PID="$(get_brick_pid ${V0} ${H0} ${B0}/${V0})"
74096c
+
74096c
+TEST touch "${M0}/test"
74096c
+
74096c
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74096c
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
74096c
+
74096c
+TEST tester_start
74096c
+
74096c
+TEST tester_send fd open 0 "${M0}/test"
74096c
+EXPECT_WITHIN 5 "1" count_open "/test"
74096c
+TEST tester_send fd close 0
74096c
+EXPECT_WITHIN 5 "0" count_open "/test"
74096c
+
74096c
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74096c
+TEST ${CLI} volume set ${V0} lazy-open on
74096c
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
74096c
+
74096c
+TEST tester_send fd open 0 "${M0}/test"
74096c
+sleep 2
74096c
+EXPECT "0" count_open "/test"
74096c
+TEST tester_send fd write 0 "test"
74096c
+EXPECT "1" count_open "/test"
74096c
+TEST tester_send fd close 0
74096c
+EXPECT_WITHIN 5 "0" count_open "/test"
74096c
+
74096c
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74096c
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
74096c
+
74096c
+TEST tester_send fd open 0 "${M0}/test"
74096c
+EXPECT "0" count_open "/test"
74096c
+EXPECT "test" tester_send fd read 0 64
74096c
+# Even though read-after-open is disabled, use-anonymous-fd is also disabled,
74096c
+# so reads need to open the file first.
74096c
+EXPECT "1" count_open "/test"
74096c
+TEST tester_send fd close 0
74096c
+EXPECT "0" count_open "/test"
74096c
+
74096c
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74096c
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
74096c
+
74096c
+TEST tester_send fd open 0 "${M0}/test"
74096c
+EXPECT "0" count_open "/test"
74096c
+TEST tester_send fd open 1 "${M0}/test"
74096c
+EXPECT "2" count_open "/test"
74096c
+TEST tester_send fd close 0
74096c
+EXPECT_WITHIN 5 "1" count_open "/test"
74096c
+TEST tester_send fd close 1
74096c
+EXPECT_WITHIN 5 "0" count_open "/test"
74096c
+
74096c
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74096c
+TEST ${CLI} volume set ${V0} read-after-open on
74096c
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
74096c
+
74096c
+TEST tester_send fd open 0 "${M0}/test"
74096c
+EXPECT "0" count_open "/test"
74096c
+EXPECT "test" tester_send fd read 0 64
74096c
+EXPECT "1" count_open "/test"
74096c
+TEST tester_send fd close 0
74096c
+EXPECT_WITHIN 5 "0" count_open "/test"
74096c
+
74096c
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74096c
+
74096c
+TEST tester_stop
74096c
+
74096c
+cleanup
74096c
diff --git a/tests/basic/open-behind/tester-fd.c b/tests/basic/open-behind/tester-fd.c
74096c
new file mode 100644
74096c
index 0000000..00f02bc
74096c
--- /dev/null
74096c
+++ b/tests/basic/open-behind/tester-fd.c
74096c
@@ -0,0 +1,99 @@
74096c
+/*
74096c
+  Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
74096c
+  This file is part of GlusterFS.
74096c
+
74096c
+  This file is licensed to you under your choice of the GNU Lesser
74096c
+  General Public License, version 3 or any later version (LGPLv3 or
74096c
+  later), or the GNU General Public License, version 2 (GPLv2), in all
74096c
+  cases as published by the Free Software Foundation.
74096c
+*/
74096c
+
74096c
+#include "tester.h"
74096c
+
74096c
+#include <stdlib.h>
74096c
+#include <unistd.h>
74096c
+#include <sys/types.h>
74096c
+#include <sys/stat.h>
74096c
+#include <fcntl.h>
74096c
+#include <string.h>
74096c
+#include <ctype.h>
74096c
+#include <errno.h>
74096c
+
74096c
+static int32_t
74096c
+fd_open(context_t *ctx, command_t *cmd)
74096c
+{
74096c
+    obj_t *obj;
74096c
+    int32_t fd;
74096c
+
74096c
+    obj = cmd->args[0].obj.ref;
74096c
+
74096c
+    fd = open(cmd->args[1].str.data, O_RDWR);
74096c
+    if (fd < 0) {
74096c
+        return error(errno, "open() failed");
74096c
+    }
74096c
+
74096c
+    obj->type = OBJ_TYPE_FD;
74096c
+    obj->fd = fd;
74096c
+
74096c
+    out_ok("%d", fd);
74096c
+
74096c
+    return 0;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+fd_close(context_t *ctx, command_t *cmd)
74096c
+{
74096c
+    obj_t *obj;
74096c
+
74096c
+    obj = cmd->args[0].obj.ref;
74096c
+    obj->type = OBJ_TYPE_NONE;
74096c
+
74096c
+    if (close(obj->fd) != 0) {
74096c
+        return error(errno, "close() failed");
74096c
+    }
74096c
+
74096c
+    out_ok();
74096c
+
74096c
+    return 0;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+fd_write(context_t *ctx, command_t *cmd)
74096c
+{
74096c
+    ssize_t len, ret;
74096c
+
74096c
+    len = strlen(cmd->args[1].str.data);
74096c
+    ret = write(cmd->args[0].obj.ref->fd, cmd->args[1].str.data, len);
74096c
+    if (ret < 0) {
74096c
+        return error(errno, "write() failed");
74096c
+    }
74096c
+
74096c
+    out_ok("%zd", ret);
74096c
+
74096c
+    return 0;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+fd_read(context_t *ctx, command_t *cmd)
74096c
+{
74096c
+    char data[cmd->args[1].num.value + 1];
74096c
+    ssize_t ret;
74096c
+
74096c
+    ret = read(cmd->args[0].obj.ref->fd, data, cmd->args[1].num.value);
74096c
+    if (ret < 0) {
74096c
+        return error(errno, "read() failed");
74096c
+    }
74096c
+
74096c
+    data[ret] = 0;
74096c
+
74096c
+    out_ok("%zd %s", ret, data);
74096c
+
74096c
+    return 0;
74096c
+}
74096c
+
74096c
+command_t fd_commands[] = {
74096c
+    {"open", fd_open, CMD_ARGS(ARG_VAL(OBJ_TYPE_NONE), ARG_STR(1024))},
74096c
+    {"close", fd_close, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD))},
74096c
+    {"write", fd_write, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_STR(1024))},
74096c
+    {"read", fd_read, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_NUM(0, 1024))},
74096c
+    CMD_END};
74096c
diff --git a/tests/basic/open-behind/tester.c b/tests/basic/open-behind/tester.c
74096c
new file mode 100644
74096c
index 0000000..b2da71c
74096c
--- /dev/null
74096c
+++ b/tests/basic/open-behind/tester.c
74096c
@@ -0,0 +1,444 @@
74096c
+/*
74096c
+  Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
74096c
+  This file is part of GlusterFS.
74096c
+
74096c
+  This file is licensed to you under your choice of the GNU Lesser
74096c
+  General Public License, version 3 or any later version (LGPLv3 or
74096c
+  later), or the GNU General Public License, version 2 (GPLv2), in all
74096c
+  cases as published by the Free Software Foundation.
74096c
+*/
74096c
+
74096c
+#include "tester.h"
74096c
+
74096c
+#include <stdlib.h>
74096c
+#include <unistd.h>
74096c
+#include <string.h>
74096c
+#include <ctype.h>
74096c
+#include <errno.h>
74096c
+
74096c
+static void *
74096c
+mem_alloc(size_t size)
74096c
+{
74096c
+    void *ptr;
74096c
+
74096c
+    ptr = malloc(size);
74096c
+    if (ptr == NULL) {
74096c
+        error(ENOMEM, "Failed to allocate memory (%zu bytes)", size);
74096c
+    }
74096c
+
74096c
+    return ptr;
74096c
+}
74096c
+
74096c
+static void
74096c
+mem_free(void *ptr)
74096c
+{
74096c
+    free(ptr);
74096c
+}
74096c
+
74096c
+static bool
74096c
+buffer_create(context_t *ctx, size_t size)
74096c
+{
74096c
+    ctx->buffer.base = mem_alloc(size);
74096c
+    if (ctx->buffer.base == NULL) {
74096c
+        return false;
74096c
+    }
74096c
+
74096c
+    ctx->buffer.size = size;
74096c
+    ctx->buffer.len = 0;
74096c
+    ctx->buffer.pos = 0;
74096c
+
74096c
+    return true;
74096c
+}
74096c
+
74096c
+static void
74096c
+buffer_destroy(context_t *ctx)
74096c
+{
74096c
+    mem_free(ctx->buffer.base);
74096c
+    ctx->buffer.size = 0;
74096c
+    ctx->buffer.len = 0;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+buffer_get(context_t *ctx)
74096c
+{
74096c
+    ssize_t len;
74096c
+
74096c
+    if (ctx->buffer.pos >= ctx->buffer.len) {
74096c
+        len = read(0, ctx->buffer.base, ctx->buffer.size);
74096c
+        if (len < 0) {
74096c
+            return error(errno, "read() failed");
74096c
+        }
74096c
+        if (len == 0) {
74096c
+            return 0;
74096c
+        }
74096c
+
74096c
+        ctx->buffer.len = len;
74096c
+        ctx->buffer.pos = 0;
74096c
+    }
74096c
+
74096c
+    return ctx->buffer.base[ctx->buffer.pos++];
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+str_skip_spaces(context_t *ctx, int32_t current)
74096c
+{
74096c
+    while ((current > 0) && (current != '\n') && isspace(current)) {
74096c
+        current = buffer_get(ctx);
74096c
+    }
74096c
+
74096c
+    return current;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+str_token(context_t *ctx, char *buffer, uint32_t size, int32_t current)
74096c
+{
74096c
+    uint32_t len;
74096c
+
74096c
+    current = str_skip_spaces(ctx, current);
74096c
+
74096c
+    len = 0;
74096c
+    while ((size > 0) && (current > 0) && (current != '\n') &&
74096c
+           !isspace(current)) {
74096c
+        len++;
74096c
+        *buffer++ = current;
74096c
+        size--;
74096c
+        current = buffer_get(ctx);
74096c
+    }
74096c
+
74096c
+    if (len == 0) {
74096c
+        return error(ENODATA, "Expecting a token");
74096c
+    }
74096c
+
74096c
+    if (size == 0) {
74096c
+        return error(ENOBUFS, "Token too long");
74096c
+    }
74096c
+
74096c
+    *buffer = 0;
74096c
+
74096c
+    return current;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+str_number(context_t *ctx, uint64_t min, uint64_t max, uint64_t *value,
74096c
+           int32_t current)
74096c
+{
74096c
+    char text[32], *ptr;
74096c
+    uint64_t num;
74096c
+
74096c
+    current = str_token(ctx, text, sizeof(text), current);
74096c
+    if (current > 0) {
74096c
+        num = strtoul(text, &ptr, 0);
74096c
+        if ((*ptr != 0) || (num < min) || (num > max)) {
74096c
+            return error(ERANGE, "Invalid number");
74096c
+        }
74096c
+        *value = num;
74096c
+    }
74096c
+
74096c
+    return current;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+str_eol(context_t *ctx, int32_t current)
74096c
+{
74096c
+    current = str_skip_spaces(ctx, current);
74096c
+    if (current != '\n') {
74096c
+        return error(EINVAL, "Expecting end of command");
74096c
+    }
74096c
+
74096c
+    return current;
74096c
+}
74096c
+
74096c
+static void
74096c
+str_skip(context_t *ctx, int32_t current)
74096c
+{
74096c
+    while ((current > 0) && (current != '\n')) {
74096c
+        current = buffer_get(ctx);
74096c
+    }
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+cmd_parse_obj(context_t *ctx, arg_t *arg, int32_t current)
74096c
+{
74096c
+    obj_t *obj;
74096c
+    uint64_t id;
74096c
+
74096c
+    current = str_number(ctx, 0, ctx->obj_count, &id, current);
74096c
+    if (current <= 0) {
74096c
+        return current;
74096c
+    }
74096c
+
74096c
+    obj = &ctx->objs[id];
74096c
+    if (obj->type != arg->obj.type) {
74096c
+        if (obj->type != OBJ_TYPE_NONE) {
74096c
+            return error(EBUSY, "Object is in use");
74096c
+        }
74096c
+        return error(ENOENT, "Object is not defined");
74096c
+    }
74096c
+
74096c
+    arg->obj.ref = obj;
74096c
+
74096c
+    return current;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+cmd_parse_num(context_t *ctx, arg_t *arg, int32_t current)
74096c
+{
74096c
+    return str_number(ctx, arg->num.min, arg->num.max, &arg->num.value,
74096c
+                      current);
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+cmd_parse_str(context_t *ctx, arg_t *arg, int32_t current)
74096c
+{
74096c
+    return str_token(ctx, arg->str.data, arg->str.size, current);
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+cmd_parse_args(context_t *ctx, command_t *cmd, int32_t current)
74096c
+{
74096c
+    arg_t *arg;
74096c
+
74096c
+    for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
74096c
+        switch (arg->type) {
74096c
+            case ARG_TYPE_OBJ:
74096c
+                current = cmd_parse_obj(ctx, arg, current);
74096c
+                break;
74096c
+            case ARG_TYPE_NUM:
74096c
+                current = cmd_parse_num(ctx, arg, current);
74096c
+                break;
74096c
+            case ARG_TYPE_STR:
74096c
+                current = cmd_parse_str(ctx, arg, current);
74096c
+                break;
74096c
+            default:
74096c
+                return error(EINVAL, "Unknown argument type");
74096c
+        }
74096c
+    }
74096c
+
74096c
+    if (current < 0) {
74096c
+        return current;
74096c
+    }
74096c
+
74096c
+    current = str_eol(ctx, current);
74096c
+    if (current <= 0) {
74096c
+        return error(EINVAL, "Syntax error");
74096c
+    }
74096c
+
74096c
+    return cmd->handler(ctx, cmd);
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+cmd_parse(context_t *ctx, command_t *cmds)
74096c
+{
74096c
+    char text[32];
74096c
+    command_t *cmd;
74096c
+    int32_t current;
74096c
+
74096c
+    cmd = cmds;
74096c
+    do {
74096c
+        current = str_token(ctx, text, sizeof(text), buffer_get(ctx));
74096c
+        if (current <= 0) {
74096c
+            return current;
74096c
+        }
74096c
+
74096c
+        while (cmd->name != NULL) {
74096c
+            if (strcmp(cmd->name, text) == 0) {
74096c
+                if (cmd->handler != NULL) {
74096c
+                    return cmd_parse_args(ctx, cmd, current);
74096c
+                }
74096c
+                cmd = cmd->cmds;
74096c
+                break;
74096c
+            }
74096c
+            cmd++;
74096c
+        }
74096c
+    } while (cmd->name != NULL);
74096c
+
74096c
+    str_skip(ctx, current);
74096c
+
74096c
+    return error(ENOTSUP, "Unknown command");
74096c
+}
74096c
+
74096c
+static void
74096c
+cmd_fini(context_t *ctx, command_t *cmds)
74096c
+{
74096c
+    command_t *cmd;
74096c
+    arg_t *arg;
74096c
+
74096c
+    for (cmd = cmds; cmd->name != NULL; cmd++) {
74096c
+        if (cmd->handler == NULL) {
74096c
+            cmd_fini(ctx, cmd->cmds);
74096c
+        } else {
74096c
+            for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
74096c
+                switch (arg->type) {
74096c
+                    case ARG_TYPE_STR:
74096c
+                        mem_free(arg->str.data);
74096c
+                        arg->str.data = NULL;
74096c
+                        break;
74096c
+                    default:
74096c
+                        break;
74096c
+                }
74096c
+            }
74096c
+        }
74096c
+    }
74096c
+}
74096c
+
74096c
+static bool
74096c
+cmd_init(context_t *ctx, command_t *cmds)
74096c
+{
74096c
+    command_t *cmd;
74096c
+    arg_t *arg;
74096c
+
74096c
+    for (cmd = cmds; cmd->name != NULL; cmd++) {
74096c
+        if (cmd->handler == NULL) {
74096c
+            if (!cmd_init(ctx, cmd->cmds)) {
74096c
+                return false;
74096c
+            }
74096c
+        } else {
74096c
+            for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
74096c
+                switch (arg->type) {
74096c
+                    case ARG_TYPE_STR:
74096c
+                        arg->str.data = mem_alloc(arg->str.size);
74096c
+                        if (arg->str.data == NULL) {
74096c
+                            return false;
74096c
+                        }
74096c
+                        break;
74096c
+                    default:
74096c
+                        break;
74096c
+                }
74096c
+            }
74096c
+        }
74096c
+    }
74096c
+
74096c
+    return true;
74096c
+}
74096c
+
74096c
+static bool
74096c
+objs_create(context_t *ctx, uint32_t count)
74096c
+{
74096c
+    uint32_t i;
74096c
+
74096c
+    ctx->objs = mem_alloc(sizeof(obj_t) * count);
74096c
+    if (ctx->objs == NULL) {
74096c
+        return false;
74096c
+    }
74096c
+    ctx->obj_count = count;
74096c
+
74096c
+    for (i = 0; i < count; i++) {
74096c
+        ctx->objs[i].type = OBJ_TYPE_NONE;
74096c
+    }
74096c
+
74096c
+    return true;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+objs_destroy(context_t *ctx)
74096c
+{
74096c
+    uint32_t i;
74096c
+    int32_t err;
74096c
+
74096c
+    err = 0;
74096c
+    for (i = 0; i < ctx->obj_count; i++) {
74096c
+        if (ctx->objs[i].type != OBJ_TYPE_NONE) {
74096c
+            err = error(ENOTEMPTY, "Objects not destroyed");
74096c
+            break;
74096c
+        }
74096c
+    }
74096c
+
74096c
+    mem_free(ctx->objs);
74096c
+    ctx->objs = NULL;
74096c
+    ctx->obj_count = 0;
74096c
+
74096c
+    return err;
74096c
+}
74096c
+
74096c
+static context_t *
74096c
+init(size_t size, uint32_t objs, command_t *cmds)
74096c
+{
74096c
+    context_t *ctx;
74096c
+
74096c
+    ctx = mem_alloc(sizeof(context_t));
74096c
+    if (ctx == NULL) {
74096c
+        goto failed;
74096c
+    }
74096c
+
74096c
+    if (!buffer_create(ctx, size)) {
74096c
+        goto failed_ctx;
74096c
+    }
74096c
+
74096c
+    if (!objs_create(ctx, objs)) {
74096c
+        goto failed_buffer;
74096c
+    }
74096c
+
74096c
+    if (!cmd_init(ctx, cmds)) {
74096c
+        goto failed_objs;
74096c
+    }
74096c
+
74096c
+    ctx->active = true;
74096c
+
74096c
+    return ctx;
74096c
+
74096c
+failed_objs:
74096c
+    cmd_fini(ctx, cmds);
74096c
+    objs_destroy(ctx);
74096c
+failed_buffer:
74096c
+    buffer_destroy(ctx);
74096c
+failed_ctx:
74096c
+    mem_free(ctx);
74096c
+failed:
74096c
+    return NULL;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+fini(context_t *ctx, command_t *cmds)
74096c
+{
74096c
+    int32_t ret;
74096c
+
74096c
+    cmd_fini(ctx, cmds);
74096c
+    buffer_destroy(ctx);
74096c
+
74096c
+    ret = objs_destroy(ctx);
74096c
+
74096c
+    ctx->active = false;
74096c
+
74096c
+    return ret;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+exec_quit(context_t *ctx, command_t *cmd)
74096c
+{
74096c
+    ctx->active = false;
74096c
+
74096c
+    return 0;
74096c
+}
74096c
+
74096c
+static command_t commands[] = {{"fd", NULL, CMD_SUB(fd_commands)},
74096c
+                               {"quit", exec_quit, CMD_ARGS()},
74096c
+                               CMD_END};
74096c
+
74096c
+int32_t
74096c
+main(int32_t argc, char *argv[])
74096c
+{
74096c
+    context_t *ctx;
74096c
+    int32_t res;
74096c
+
74096c
+    ctx = init(1024, 16, commands);
74096c
+    if (ctx == NULL) {
74096c
+        return 1;
74096c
+    }
74096c
+
74096c
+    do {
74096c
+        res = cmd_parse(ctx, commands);
74096c
+        if (res < 0) {
74096c
+            out_err(-res);
74096c
+        }
74096c
+    } while (ctx->active);
74096c
+
74096c
+    res = fini(ctx, commands);
74096c
+    if (res >= 0) {
74096c
+        out_ok();
74096c
+        return 0;
74096c
+    }
74096c
+
74096c
+    out_err(-res);
74096c
+
74096c
+    return 1;
74096c
+}
74096c
diff --git a/tests/basic/open-behind/tester.h b/tests/basic/open-behind/tester.h
74096c
new file mode 100644
74096c
index 0000000..64e940c
74096c
--- /dev/null
74096c
+++ b/tests/basic/open-behind/tester.h
74096c
@@ -0,0 +1,145 @@
74096c
+/*
74096c
+  Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
74096c
+  This file is part of GlusterFS.
74096c
+
74096c
+  This file is licensed to you under your choice of the GNU Lesser
74096c
+  General Public License, version 3 or any later version (LGPLv3 or
74096c
+  later), or the GNU General Public License, version 2 (GPLv2), in all
74096c
+  cases as published by the Free Software Foundation.
74096c
+*/
74096c
+
74096c
+#ifndef __TESTER_H__
74096c
+#define __TESTER_H__
74096c
+
74096c
+#include <stdio.h>
74096c
+#include <inttypes.h>
74096c
+#include <stdbool.h>
74096c
+
74096c
+enum _obj_type;
74096c
+typedef enum _obj_type obj_type_t;
74096c
+
74096c
+enum _arg_type;
74096c
+typedef enum _arg_type arg_type_t;
74096c
+
74096c
+struct _buffer;
74096c
+typedef struct _buffer buffer_t;
74096c
+
74096c
+struct _obj;
74096c
+typedef struct _obj obj_t;
74096c
+
74096c
+struct _context;
74096c
+typedef struct _context context_t;
74096c
+
74096c
+struct _arg;
74096c
+typedef struct _arg arg_t;
74096c
+
74096c
+struct _command;
74096c
+typedef struct _command command_t;
74096c
+
74096c
+enum _obj_type { OBJ_TYPE_NONE, OBJ_TYPE_FD };
74096c
+
74096c
+enum _arg_type { ARG_TYPE_NONE, ARG_TYPE_OBJ, ARG_TYPE_NUM, ARG_TYPE_STR };
74096c
+
74096c
+struct _buffer {
74096c
+    char *base;
74096c
+    uint32_t size;
74096c
+    uint32_t len;
74096c
+    uint32_t pos;
74096c
+};
74096c
+
74096c
+struct _obj {
74096c
+    obj_type_t type;
74096c
+    union {
74096c
+        int32_t fd;
74096c
+    };
74096c
+};
74096c
+
74096c
+struct _context {
74096c
+    obj_t *objs;
74096c
+    buffer_t buffer;
74096c
+    uint32_t obj_count;
74096c
+    bool active;
74096c
+};
74096c
+
74096c
+struct _arg {
74096c
+    arg_type_t type;
74096c
+    union {
74096c
+        struct {
74096c
+            obj_type_t type;
74096c
+            obj_t *ref;
74096c
+        } obj;
74096c
+        struct {
74096c
+            uint64_t value;
74096c
+            uint64_t min;
74096c
+            uint64_t max;
74096c
+        } num;
74096c
+        struct {
74096c
+            uint32_t size;
74096c
+            char *data;
74096c
+        } str;
74096c
+    };
74096c
+};
74096c
+
74096c
+struct _command {
74096c
+    const char *name;
74096c
+    int32_t (*handler)(context_t *ctx, command_t *cmd);
74096c
+    union {
74096c
+        arg_t *args;
74096c
+        command_t *cmds;
74096c
+    };
74096c
+};
74096c
+
74096c
+#define msg(_stream, _fmt, _args...)                                           \
74096c
+    do {                                                                       \
74096c
+        fprintf(_stream, _fmt "\n", ##_args);                                  \
74096c
+        fflush(_stream);                                                       \
74096c
+    } while (0)
74096c
+
74096c
+#define msg_out(_fmt, _args...) msg(stdout, _fmt, ##_args)
74096c
+#define msg_err(_err, _fmt, _args...)                                          \
74096c
+    ({                                                                         \
74096c
+        int32_t __msg_err = (_err);                                            \
74096c
+        msg(stderr, "[%4u:%-15s] " _fmt, __LINE__, __FUNCTION__, __msg_err,    \
74096c
+            ##_args);                                                          \
74096c
+        -__msg_err;                                                            \
74096c
+    })
74096c
+
74096c
+#define error(_err, _fmt, _args...) msg_err(_err, "E(%4d) " _fmt, ##_args)
74096c
+#define warn(_err, _fmt, _args...) msg_err(_err, "W(%4d) " _fmt, ##_args)
74096c
+#define info(_err, _fmt, _args...) msg_err(_err, "I(%4d) " _fmt, ##_args)
74096c
+
74096c
+#define out_ok(_args...) msg_out("OK " _args)
74096c
+#define out_err(_err) msg_out("ERR %d", _err)
74096c
+
74096c
+#define ARG_END                                                                \
74096c
+    {                                                                          \
74096c
+        ARG_TYPE_NONE                                                          \
74096c
+    }
74096c
+
74096c
+#define CMD_ARGS1(_x, _args...)                                                \
74096c
+    .args = (arg_t[]) { _args }
74096c
+#define CMD_ARGS(_args...) CMD_ARGS1(, ##_args, ARG_END)
74096c
+
74096c
+#define CMD_SUB(_cmds) .cmds = _cmds
74096c
+
74096c
+#define CMD_END                                                                \
74096c
+    {                                                                          \
74096c
+        NULL, NULL, CMD_SUB(NULL)                                              \
74096c
+    }
74096c
+
74096c
+#define ARG_VAL(_type)                                                         \
74096c
+    {                                                                          \
74096c
+        ARG_TYPE_OBJ, .obj = {.type = _type }                                  \
74096c
+    }
74096c
+#define ARG_NUM(_min, _max)                                                    \
74096c
+    {                                                                          \
74096c
+        ARG_TYPE_NUM, .num = {.min = _min, .max = _max }                       \
74096c
+    }
74096c
+#define ARG_STR(_size)                                                         \
74096c
+    {                                                                          \
74096c
+        ARG_TYPE_STR, .str = {.size = _size }                                  \
74096c
+    }
74096c
+
74096c
+extern command_t fd_commands[];
74096c
+
74096c
+#endif /* __TESTER_H__ */
74096c
\ No newline at end of file
74096c
diff --git a/tests/bugs/glusterfs/bug-873962-spb.t b/tests/bugs/glusterfs/bug-873962-spb.t
74096c
index db84a22..db71cc0 100644
74096c
--- a/tests/bugs/glusterfs/bug-873962-spb.t
74096c
+++ b/tests/bugs/glusterfs/bug-873962-spb.t
74096c
@@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.io-cache off
74096c
 TEST $CLI volume set $V0 performance.write-behind off
74096c
 TEST $CLI volume set $V0 performance.stat-prefetch off
74096c
 TEST $CLI volume set $V0 performance.read-ahead off
74096c
+TEST $CLI volume set $V0 performance.open-behind off
74096c
 TEST $CLI volume set $V0 cluster.background-self-heal-count 0
74096c
 TEST $CLI volume start $V0
74096c
 TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
74096c
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
74096c
index 919eea3..76b5809 100644
74096c
--- a/xlators/mount/fuse/src/fuse-bridge.c
74096c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
74096c
@@ -3398,6 +3398,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg,
74096c
     gf_log("glusterfs-fuse", GF_LOG_TRACE,
74096c
            "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd);
74096c
 
74096c
+    fd_close(state->fd);
74096c
+
74096c
     fuse_fd_ctx_destroy(this, state->fd);
74096c
     fd_unref(fd);
74096c
 
74096c
diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h
74096c
index f250824..0e78917 100644
74096c
--- a/xlators/performance/open-behind/src/open-behind-messages.h
74096c
+++ b/xlators/performance/open-behind/src/open-behind-messages.h
74096c
@@ -23,6 +23,10 @@
74096c
  */
74096c
 
74096c
 GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED,
74096c
-           OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY);
74096c
+           OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY,
74096c
+           OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE);
74096c
+
74096c
+#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop"
74096c
+#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state"
74096c
 
74096c
 #endif /* _OPEN_BEHIND_MESSAGES_H_ */
74096c
diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
74096c
index cbe89ec..e43fe73 100644
74096c
--- a/xlators/performance/open-behind/src/open-behind.c
74096c
+++ b/xlators/performance/open-behind/src/open-behind.c
74096c
@@ -16,6 +16,18 @@
74096c
 #include "open-behind-messages.h"
74096c
 #include <glusterfs/glusterfs-acl.h>
74096c
 
74096c
+/* Note: The initial design of open-behind was made to cover the simple case
74096c
+ *       of open, read, close for small files. This pattern combined with
74096c
+ *       quick-read can do the whole operation without a single request to the
74096c
+ *       bricks (except the initial lookup).
74096c
+ *
74096c
+ *       The way to do this has been improved, but the logic remains the same.
74096c
+ *       Basically, this means that any operation sent to the fd or the inode
74096c
+ *       that it's not a read, causes the open request to be sent to the
74096c
+ *       bricks, and all future operations will be executed synchronously,
74096c
+ *       including opens (it's reset once all fd's are closed).
74096c
+ */
74096c
+
74096c
 typedef struct ob_conf {
74096c
     gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe
74096c
                                       e.g - fstat() readv()
74096c
@@ -32,1096 +44,754 @@ typedef struct ob_conf {
74096c
                                         */
74096c
 } ob_conf_t;
74096c
 
74096c
-typedef struct ob_inode {
74096c
-    inode_t *inode;
74096c
-    struct list_head resume_fops;
74096c
-    struct list_head ob_fds;
74096c
-    int count;
74096c
-    int op_ret;
74096c
-    int op_errno;
74096c
-    gf_boolean_t open_in_progress;
74096c
-    int unlinked;
74096c
-} ob_inode_t;
74096c
+/* A negative state represents an errno value negated. In this case the
74096c
+ * current operation cannot be processed. */
74096c
+typedef enum _ob_state {
74096c
+    /* There are no opens on the inode or the first open is already
74096c
+     * completed. The current operation can be sent directly. */
74096c
+    OB_STATE_READY = 0,
74096c
 
74096c
-typedef struct ob_fd {
74096c
-    call_frame_t *open_frame;
74096c
-    loc_t loc;
74096c
-    dict_t *xdata;
74096c
-    int flags;
74096c
-    int op_errno;
74096c
-    ob_inode_t *ob_inode;
74096c
-    fd_t *fd;
74096c
-    gf_boolean_t opened;
74096c
-    gf_boolean_t ob_inode_fops_waiting;
74096c
-    struct list_head list;
74096c
-    struct list_head ob_fds_on_inode;
74096c
-} ob_fd_t;
74096c
+    /* There's an open pending and it has been triggered. The current
74096c
+     * operation should be "stubbified" and processed with
74096c
+     * ob_stub_dispatch(). */
74096c
+    OB_STATE_OPEN_TRIGGERED,
74096c
 
74096c
-ob_inode_t *
74096c
-ob_inode_alloc(inode_t *inode)
74096c
-{
74096c
-    ob_inode_t *ob_inode = NULL;
74096c
+    /* There's an open pending but it has not been triggered. The current
74096c
+     * operation can be processed directly but using an anonymous fd. */
74096c
+    OB_STATE_OPEN_PENDING,
74096c
 
74096c
-    ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
74096c
-    if (ob_inode == NULL)
74096c
-        goto out;
74096c
+    /* The current operation is the first open on the inode. */
74096c
+    OB_STATE_FIRST_OPEN
74096c
+} ob_state_t;
74096c
 
74096c
-    ob_inode->inode = inode;
74096c
-    INIT_LIST_HEAD(&ob_inode->resume_fops);
74096c
-    INIT_LIST_HEAD(&ob_inode->ob_fds);
74096c
-out:
74096c
-    return ob_inode;
74096c
-}
74096c
-
74096c
-void
74096c
-ob_inode_free(ob_inode_t *ob_inode)
74096c
-{
74096c
-    if (ob_inode == NULL)
74096c
-        goto out;
74096c
+typedef struct ob_inode {
74096c
+    /* List of stubs pending on the first open. Once the first open is
74096c
+     * complete, all these stubs will be resubmitted, and dependencies
74096c
+     * will be checked again. */
74096c
+    struct list_head resume_fops;
74096c
 
74096c
-    list_del_init(&ob_inode->resume_fops);
74096c
-    list_del_init(&ob_inode->ob_fds);
74096c
+    /* The inode this object references. */
74096c
+    inode_t *inode;
74096c
 
74096c
-    GF_FREE(ob_inode);
74096c
-out:
74096c
-    return;
74096c
-}
74096c
+    /* The fd from the first open sent to this inode. It will be set
74096c
+     * from the moment the open is processed until the open if fully
74096c
+     * executed or closed before actually opened. It's NULL in all
74096c
+     * other cases. */
74096c
+    fd_t *first_fd;
74096c
+
74096c
+    /* The stub from the first open operation. When open fop starts
74096c
+     * being processed, it's assigned the OB_OPEN_PREPARING value
74096c
+     * until the actual stub is created. This is necessary to avoid
74096c
+     * creating the stub inside a locked region. Once the stub is
74096c
+     * successfully created, it's assigned here. This value is set
74096c
+     * to NULL once the stub is resumed. */
74096c
+    call_stub_t *first_open;
74096c
+
74096c
+    /* The total number of currently open fd's on this inode. */
74096c
+    int32_t open_count;
74096c
+
74096c
+    /* This flag is set as soon as we know that the open will be
74096c
+     * sent to the bricks, even before the stub is ready. */
74096c
+    bool triggered;
74096c
+} ob_inode_t;
74096c
 
74096c
-ob_inode_t *
74096c
-ob_inode_get(xlator_t *this, inode_t *inode)
74096c
+/* Dummy pointer used temporarily while the actual open stub is being created */
74096c
+#define OB_OPEN_PREPARING ((call_stub_t *)-1)
74096c
+
74096c
+#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...)                       \
74096c
+    case OB_STATE_FIRST_OPEN:                                                  \
74096c
+        gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE,  \
74096c
+                "fop=%s", #_fop, "state=%d", __ob_state, NULL);                \
74096c
+        default_##_fop##_failure_cbk(_frame, EINVAL);                          \
74096c
+        break;                                                                 \
74096c
+    case OB_STATE_READY:                                                       \
74096c
+        default_##_fop(_frame, _xl, ##_args);                                  \
74096c
+        break;                                                                 \
74096c
+    case OB_STATE_OPEN_TRIGGERED: {                                            \
74096c
+        call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop,          \
74096c
+                                                   ##_args);                   \
74096c
+        if (__ob_stub != NULL) {                                               \
74096c
+            ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub);                 \
74096c
+            break;                                                             \
74096c
+        }                                                                      \
74096c
+        __ob_state = -ENOMEM;                                                  \
74096c
+    }                                                                          \
74096c
+    default:                                                                   \
74096c
+        gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state,                        \
74096c
+                OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL);                \
74096c
+        default_##_fop##_failure_cbk(_frame, -__ob_state)
74096c
+
74096c
+#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...)                 \
74096c
+    do {                                                                       \
74096c
+        ob_inode_t *__ob_inode;                                                \
74096c
+        fd_t *__first_fd;                                                      \
74096c
+        ob_state_t __ob_state = ob_open_and_resume_fd(                         \
74096c
+            _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd);            \
74096c
+        switch (__ob_state) {                                                  \
74096c
+            case OB_STATE_OPEN_PENDING:                                        \
74096c
+                if (!(_trigger)) {                                             \
74096c
+                    fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode,      \
74096c
+                                                            (_fd)->flags);     \
74096c
+                    if (__ob_fd != NULL) {                                     \
74096c
+                        default_##_fop(_frame, _xl, ##_args);                  \
74096c
+                        fd_unref(__ob_fd);                                     \
74096c
+                        break;                                                 \
74096c
+                    }                                                          \
74096c
+                    __ob_state = -ENOMEM;                                      \
74096c
+                }                                                              \
74096c
+                OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args);        \
74096c
+        }                                                                      \
74096c
+    } while (0)
74096c
+
74096c
+#define OB_POST_FLUSH(_xl, _frame, _fd, _args...)                              \
74096c
+    do {                                                                       \
74096c
+        ob_inode_t *__ob_inode;                                                \
74096c
+        fd_t *__first_fd;                                                      \
74096c
+        ob_state_t __ob_state = ob_open_and_resume_fd(                         \
74096c
+            _xl, _fd, 0, true, false, &__ob_inode, &__first_fd);               \
74096c
+        switch (__ob_state) {                                                  \
74096c
+            case OB_STATE_OPEN_PENDING:                                        \
74096c
+                default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL);              \
74096c
+                break;                                                         \
74096c
+                OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args);       \
74096c
+        }                                                                      \
74096c
+    } while (0)
74096c
+
74096c
+#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...)           \
74096c
+    do {                                                                       \
74096c
+        ob_inode_t *__ob_inode;                                                \
74096c
+        fd_t *__first_fd;                                                      \
74096c
+        ob_state_t __ob_state = ob_open_and_resume_inode(                      \
74096c
+            _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd);   \
74096c
+        switch (__ob_state) {                                                  \
74096c
+            case OB_STATE_OPEN_PENDING:                                        \
74096c
+                OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args);        \
74096c
+        }                                                                      \
74096c
+    } while (0)
74096c
+
74096c
+static ob_inode_t *
74096c
+ob_inode_get_locked(xlator_t *this, inode_t *inode)
74096c
 {
74096c
     ob_inode_t *ob_inode = NULL;
74096c
     uint64_t value = 0;
74096c
-    int ret = 0;
74096c
 
74096c
-    if (!inode)
74096c
-        goto out;
74096c
+    if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) {
74096c
+        return (ob_inode_t *)(uintptr_t)value;
74096c
+    }
74096c
 
74096c
-    LOCK(&inode->lock);
74096c
-    {
74096c
-        __inode_ctx_get(inode, this, &value);
74096c
-        if (value == 0) {
74096c
-            ob_inode = ob_inode_alloc(inode);
74096c
-            if (ob_inode == NULL)
74096c
-                goto unlock;
74096c
-
74096c
-            value = (uint64_t)(uintptr_t)ob_inode;
74096c
-            ret = __inode_ctx_set(inode, this, &value);
74096c
-            if (ret < 0) {
74096c
-                ob_inode_free(ob_inode);
74096c
-                ob_inode = NULL;
74096c
-            }
74096c
-        } else {
74096c
-            ob_inode = (ob_inode_t *)(uintptr_t)value;
74096c
+    ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
74096c
+    if (ob_inode != NULL) {
74096c
+        ob_inode->inode = inode;
74096c
+        INIT_LIST_HEAD(&ob_inode->resume_fops);
74096c
+
74096c
+        value = (uint64_t)(uintptr_t)ob_inode;
74096c
+        if (__inode_ctx_set(inode, this, &value) < 0) {
74096c
+            GF_FREE(ob_inode);
74096c
+            ob_inode = NULL;
74096c
         }
74096c
     }
74096c
-unlock:
74096c
-    UNLOCK(&inode->lock);
74096c
 
74096c
-out:
74096c
     return ob_inode;
74096c
 }
74096c
 
74096c
-ob_fd_t *
74096c
-__ob_fd_ctx_get(xlator_t *this, fd_t *fd)
74096c
+static ob_state_t
74096c
+ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd,
74096c
+                         int32_t open_count, bool synchronous, bool trigger,
74096c
+                         ob_inode_t **pob_inode, fd_t **pfd)
74096c
 {
74096c
-    uint64_t value = 0;
74096c
-    int ret = -1;
74096c
-    ob_fd_t *ob_fd = NULL;
74096c
+    ob_conf_t *conf;
74096c
+    ob_inode_t *ob_inode;
74096c
+    call_stub_t *open_stub;
74096c
 
74096c
-    ret = __fd_ctx_get(fd, this, &value);
74096c
-    if (ret)
74096c
-        return NULL;
74096c
+    if (inode == NULL) {
74096c
+        return OB_STATE_READY;
74096c
+    }
74096c
 
74096c
-    ob_fd = (void *)((long)value);
74096c
+    conf = xl->private;
74096c
 
74096c
-    return ob_fd;
74096c
-}
74096c
+    *pfd = NULL;
74096c
 
74096c
-ob_fd_t *
74096c
-ob_fd_ctx_get(xlator_t *this, fd_t *fd)
74096c
-{
74096c
-    ob_fd_t *ob_fd = NULL;
74096c
-
74096c
-    LOCK(&fd->lock);
74096c
+    LOCK(&inode->lock);
74096c
     {
74096c
-        ob_fd = __ob_fd_ctx_get(this, fd);
74096c
-    }
74096c
-    UNLOCK(&fd->lock);
74096c
-
74096c
-    return ob_fd;
74096c
-}
74096c
+        ob_inode = ob_inode_get_locked(xl, inode);
74096c
+        if (ob_inode == NULL) {
74096c
+            UNLOCK(&inode->lock);
74096c
 
74096c
-int
74096c
-__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
74096c
-{
74096c
-    uint64_t value = 0;
74096c
-    int ret = -1;
74096c
+            return -ENOMEM;
74096c
+        }
74096c
+        *pob_inode = ob_inode;
74096c
+
74096c
+        ob_inode->open_count += open_count;
74096c
+
74096c
+        /* If first_fd is not NULL, it means that there's a previous open not
74096c
+         * yet completed. */
74096c
+        if (ob_inode->first_fd != NULL) {
74096c
+            *pfd = ob_inode->first_fd;
74096c
+            /* If the current request doesn't trigger the open and it hasn't
74096c
+             * been triggered yet, we can continue without issuing the open
74096c
+             * only if the current request belongs to the same fd as the
74096c
+             * first one. */
74096c
+            if (!trigger && !ob_inode->triggered &&
74096c
+                (ob_inode->first_fd == fd)) {
74096c
+                UNLOCK(&inode->lock);
74096c
+
74096c
+                return OB_STATE_OPEN_PENDING;
74096c
+            }
74096c
 
74096c
-    value = (long)((void *)ob_fd);
74096c
+            /* We need to issue the open. It could have already been triggered
74096c
+             * before. In this case open_stub will be NULL. Or the initial open
74096c
+             * may not be completely ready yet. In this case open_stub will be
74096c
+             * OB_OPEN_PREPARING. */
74096c
+            open_stub = ob_inode->first_open;
74096c
+            ob_inode->first_open = NULL;
74096c
+            ob_inode->triggered = true;
74096c
 
74096c
-    ret = __fd_ctx_set(fd, this, value);
74096c
+            UNLOCK(&inode->lock);
74096c
 
74096c
-    return ret;
74096c
-}
74096c
+            if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) {
74096c
+                call_resume(open_stub);
74096c
+            }
74096c
 
74096c
-int
74096c
-ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
74096c
-{
74096c
-    int ret = -1;
74096c
+            return OB_STATE_OPEN_TRIGGERED;
74096c
+        }
74096c
 
74096c
-    LOCK(&fd->lock);
74096c
-    {
74096c
-        ret = __ob_fd_ctx_set(this, fd, ob_fd);
74096c
-    }
74096c
-    UNLOCK(&fd->lock);
74096c
+        /* There's no pending open. Only opens can be non synchronous, so all
74096c
+         * regular fops will be processed directly. For non synchronous opens,
74096c
+         * we'll still process them normally (i.e. synchornous) if there are
74096c
+         * more file descriptors open. */
74096c
+        if (synchronous || (ob_inode->open_count > open_count)) {
74096c
+            UNLOCK(&inode->lock);
74096c
 
74096c
-    return ret;
74096c
-}
74096c
+            return OB_STATE_READY;
74096c
+        }
74096c
 
74096c
-ob_fd_t *
74096c
-ob_fd_new(void)
74096c
-{
74096c
-    ob_fd_t *ob_fd = NULL;
74096c
+        *pfd = fd;
74096c
 
74096c
-    ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t);
74096c
+        /* This is the first open. We keep a reference on the fd and set
74096c
+         * first_open stub to OB_OPEN_PREPARING until the actual stub can
74096c
+         * be assigned (we don't create the stub here to avoid doing memory
74096c
+         * allocations inside the mutex). */
74096c
+        ob_inode->first_fd = __fd_ref(fd);
74096c
+        ob_inode->first_open = OB_OPEN_PREPARING;
74096c
 
74096c
-    INIT_LIST_HEAD(&ob_fd->list);
74096c
-    INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode);
74096c
+        /* If lazy_open is not set, we'll need to immediately send the open,
74096c
+         * so we set triggered right now. */
74096c
+        ob_inode->triggered = !conf->lazy_open;
74096c
+    }
74096c
+    UNLOCK(&inode->lock);
74096c
 
74096c
-    return ob_fd;
74096c
+    return OB_STATE_FIRST_OPEN;
74096c
 }
74096c
 
74096c
-void
74096c
-ob_fd_free(ob_fd_t *ob_fd)
74096c
+static ob_state_t
74096c
+ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count,
74096c
+                      bool synchronous, bool trigger, ob_inode_t **pob_inode,
74096c
+                      fd_t **pfd)
74096c
 {
74096c
-    LOCK(&ob_fd->fd->inode->lock);
74096c
-    {
74096c
-        list_del_init(&ob_fd->ob_fds_on_inode);
74096c
-    }
74096c
-    UNLOCK(&ob_fd->fd->inode->lock);
74096c
-
74096c
-    loc_wipe(&ob_fd->loc);
74096c
-
74096c
-    if (ob_fd->xdata)
74096c
-        dict_unref(ob_fd->xdata);
74096c
+    uint64_t err;
74096c
 
74096c
-    if (ob_fd->open_frame) {
74096c
-        /* If we sill have a frame it means that background open has never
74096c
-         * been triggered. We need to release the pending reference. */
74096c
-        fd_unref(ob_fd->fd);
74096c
-
74096c
-        STACK_DESTROY(ob_fd->open_frame->root);
74096c
+    if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) {
74096c
+        return (ob_state_t)-err;
74096c
     }
74096c
 
74096c
-    GF_FREE(ob_fd);
74096c
+    return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous,
74096c
+                                    trigger, pob_inode, pfd);
74096c
 }
74096c
 
74096c
-int
74096c
-ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
74096c
-            int op_errno, fd_t *fd_ret, dict_t *xdata)
74096c
+static ob_state_t
74096c
+ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode,
74096c
+               fd_t **pfd)
74096c
 {
74096c
-    fd_t *fd = NULL;
74096c
-    int count = 0;
74096c
-    int ob_inode_op_ret = 0;
74096c
-    int ob_inode_op_errno = 0;
74096c
-    ob_fd_t *ob_fd = NULL;
74096c
-    call_stub_t *stub = NULL, *tmp = NULL;
74096c
-    ob_inode_t *ob_inode = NULL;
74096c
-    gf_boolean_t ob_inode_fops_waiting = _gf_false;
74096c
-    struct list_head fops_waiting_on_fd, fops_waiting_on_inode;
74096c
+    bool synchronous;
74096c
 
74096c
-    fd = frame->local;
74096c
-    frame->local = NULL;
74096c
-
74096c
-    INIT_LIST_HEAD(&fops_waiting_on_fd);
74096c
-    INIT_LIST_HEAD(&fops_waiting_on_inode);
74096c
+    /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't
74096c
+     *       we also execute this open synchronously ? */
74096c
+    synchronous = (flags & O_TRUNC) != 0;
74096c
 
74096c
-    ob_inode = ob_inode_get(this, fd->inode);
74096c
+    return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd);
74096c
+}
74096c
 
74096c
-    LOCK(&fd->lock);
74096c
+static int32_t
74096c
+ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
74096c
+                 call_stub_t *stub)
74096c
+{
74096c
+    LOCK(&ob_inode->inode->lock);
74096c
     {
74096c
-        ob_fd = __ob_fd_ctx_get(this, fd);
74096c
-        ob_fd->opened = _gf_true;
74096c
-
74096c
-        ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting;
74096c
-
74096c
-        list_splice_init(&ob_fd->list, &fops_waiting_on_fd);
74096c
-
74096c
-        if (op_ret < 0) {
74096c
-            /* mark fd BAD for ever */
74096c
-            ob_fd->op_errno = op_errno;
74096c
-            ob_fd = NULL; /*shouldn't be freed*/
74096c
-        } else {
74096c
-            __fd_ctx_del(fd, this, NULL);
74096c
-        }
74096c
-    }
74096c
-    UNLOCK(&fd->lock);
74096c
-
74096c
-    if (ob_inode_fops_waiting) {
74096c
-        LOCK(&fd->inode->lock);
74096c
-        {
74096c
-            count = --ob_inode->count;
74096c
-            if (op_ret < 0) {
74096c
-                /* TODO: when to reset the error? */
74096c
-                ob_inode->op_ret = -1;
74096c
-                ob_inode->op_errno = op_errno;
74096c
-            }
74096c
-
74096c
-            if (count == 0) {
74096c
-                ob_inode->open_in_progress = _gf_false;
74096c
-                ob_inode_op_ret = ob_inode->op_ret;
74096c
-                ob_inode_op_errno = ob_inode->op_errno;
74096c
-                list_splice_init(&ob_inode->resume_fops,
74096c
-                                 &fops_waiting_on_inode);
74096c
-            }
74096c
+        /* We only queue a stub if the open has not been completed or
74096c
+         * cancelled. */
74096c
+        if (ob_inode->first_fd == fd) {
74096c
+            list_add_tail(&stub->list, &ob_inode->resume_fops);
74096c
+            stub = NULL;
74096c
         }
74096c
-        UNLOCK(&fd->inode->lock);
74096c
-    }
74096c
-
74096c
-    if (ob_fd)
74096c
-        ob_fd_free(ob_fd);
74096c
-
74096c
-    list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list)
74096c
-    {
74096c
-        list_del_init(&stub->list);
74096c
-
74096c
-        if (op_ret < 0)
74096c
-            call_unwind_error(stub, -1, op_errno);
74096c
-        else
74096c
-            call_resume(stub);
74096c
     }
74096c
+    UNLOCK(&ob_inode->inode->lock);
74096c
 
74096c
-    list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list)
74096c
-    {
74096c
-        list_del_init(&stub->list);
74096c
-
74096c
-        if (ob_inode_op_ret < 0)
74096c
-            call_unwind_error(stub, -1, ob_inode_op_errno);
74096c
-        else
74096c
-            call_resume(stub);
74096c
+    if (stub != NULL) {
74096c
+        call_resume(stub);
74096c
     }
74096c
 
74096c
-    /* The background open is completed. We can release the 'fd' reference. */
74096c
-    fd_unref(fd);
74096c
-
74096c
-    STACK_DESTROY(frame->root);
74096c
-
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
-ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
74096c
+static int32_t
74096c
+ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
74096c
+                 call_stub_t *stub)
74096c
 {
74096c
-    call_frame_t *frame = NULL;
74096c
-
74096c
-    if (ob_fd == NULL) {
74096c
-        LOCK(&fd->lock);
74096c
-        {
74096c
-            ob_fd = __ob_fd_ctx_get(this, fd);
74096c
-            if (!ob_fd)
74096c
-                goto unlock;
74096c
+    bool closed;
74096c
 
74096c
-            frame = ob_fd->open_frame;
74096c
-            ob_fd->open_frame = NULL;
74096c
-        }
74096c
-    unlock:
74096c
-        UNLOCK(&fd->lock);
74096c
-    } else {
74096c
-        LOCK(&fd->lock);
74096c
-        {
74096c
-            frame = ob_fd->open_frame;
74096c
-            ob_fd->open_frame = NULL;
74096c
+    LOCK(&ob_inode->inode->lock);
74096c
+    {
74096c
+        closed = ob_inode->first_fd != fd;
74096c
+        if (!closed) {
74096c
+            if (ob_inode->triggered) {
74096c
+                ob_inode->first_open = NULL;
74096c
+            } else {
74096c
+                ob_inode->first_open = stub;
74096c
+                stub = NULL;
74096c
+            }
74096c
         }
74096c
-        UNLOCK(&fd->lock);
74096c
     }
74096c
+    UNLOCK(&ob_inode->inode->lock);
74096c
 
74096c
-    if (frame) {
74096c
-        /* We don't need to take a reference here. We already have a reference
74096c
-         * while the open is pending. */
74096c
-        frame->local = fd;
74096c
-
74096c
-        STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this),
74096c
-                   FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd,
74096c
-                   ob_fd->xdata);
74096c
+    if (stub != NULL) {
74096c
+        if (closed) {
74096c
+            call_stub_destroy(stub);
74096c
+            fd_unref(fd);
74096c
+        } else {
74096c
+            call_resume(stub);
74096c
+        }
74096c
     }
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-void
74096c
-ob_inode_wake(xlator_t *this, struct list_head *ob_fds)
74096c
+static void
74096c
+ob_resume_pending(struct list_head *list)
74096c
 {
74096c
-    ob_fd_t *ob_fd = NULL, *tmp = NULL;
74096c
+    call_stub_t *stub;
74096c
 
74096c
-    if (!list_empty(ob_fds)) {
74096c
-        list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode)
74096c
-        {
74096c
-            ob_fd_wake(this, ob_fd->fd, ob_fd);
74096c
-            ob_fd_free(ob_fd);
74096c
-        }
74096c
-    }
74096c
-}
74096c
+    while (!list_empty(list)) {
74096c
+        stub = list_first_entry(list, call_stub_t, list);
74096c
+        list_del_init(&stub->list);
74096c
 
74096c
-/* called holding inode->lock and fd->lock */
74096c
-void
74096c
-ob_fd_copy(ob_fd_t *src, ob_fd_t *dst)
74096c
-{
74096c
-    if (!src || !dst)
74096c
-        goto out;
74096c
-
74096c
-    dst->fd = src->fd;
74096c
-    dst->loc.inode = inode_ref(src->loc.inode);
74096c
-    gf_uuid_copy(dst->loc.gfid, src->loc.gfid);
74096c
-    dst->flags = src->flags;
74096c
-    dst->xdata = dict_ref(src->xdata);
74096c
-    dst->ob_inode = src->ob_inode;
74096c
-out:
74096c
-    return;
74096c
+        call_resume(stub);
74096c
+    }
74096c
 }
74096c
 
74096c
-int
74096c
-open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode,
74096c
-                                call_stub_t *stub)
74096c
+static void
74096c
+ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret,
74096c
+                  int32_t op_errno)
74096c
 {
74096c
-    ob_inode_t *ob_inode = NULL;
74096c
-    ob_fd_t *ob_fd = NULL, *tmp = NULL;
74096c
-    gf_boolean_t was_open_in_progress = _gf_false;
74096c
-    gf_boolean_t wait_for_open = _gf_false;
74096c
-    struct list_head ob_fds;
74096c
+    struct list_head list;
74096c
 
74096c
-    ob_inode = ob_inode_get(this, inode);
74096c
-    if (ob_inode == NULL)
74096c
-        goto out;
74096c
+    INIT_LIST_HEAD(&list);
74096c
 
74096c
-    INIT_LIST_HEAD(&ob_fds);
74096c
+    if (op_ret < 0) {
74096c
+        fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno);
74096c
+    }
74096c
 
74096c
-    LOCK(&inode->lock);
74096c
+    LOCK(&ob_inode->inode->lock);
74096c
     {
74096c
-        was_open_in_progress = ob_inode->open_in_progress;
74096c
-        ob_inode->unlinked = 1;
74096c
-
74096c
-        if (was_open_in_progress) {
74096c
-            list_add_tail(&stub->list, &ob_inode->resume_fops);
74096c
-            goto inode_unlock;
74096c
-        }
74096c
-
74096c
-        list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode)
74096c
-        {
74096c
-            LOCK(&ob_fd->fd->lock);
74096c
-            {
74096c
-                if (ob_fd->opened)
74096c
-                    goto fd_unlock;
74096c
-
74096c
-                ob_inode->count++;
74096c
-                ob_fd->ob_inode_fops_waiting = _gf_true;
74096c
-
74096c
-                if (ob_fd->open_frame == NULL) {
74096c
-                    /* open in progress no need of wake */
74096c
-                } else {
74096c
-                    tmp = ob_fd_new();
74096c
-                    tmp->open_frame = ob_fd->open_frame;
74096c
-                    ob_fd->open_frame = NULL;
74096c
-
74096c
-                    ob_fd_copy(ob_fd, tmp);
74096c
-                    list_add_tail(&tmp->ob_fds_on_inode, &ob_fds);
74096c
-                }
74096c
-            }
74096c
-        fd_unlock:
74096c
-            UNLOCK(&ob_fd->fd->lock);
74096c
-        }
74096c
-
74096c
-        if (ob_inode->count) {
74096c
-            wait_for_open = ob_inode->open_in_progress = _gf_true;
74096c
-            list_add_tail(&stub->list, &ob_inode->resume_fops);
74096c
+        /* Only update the fields if the file has not been closed before
74096c
+         * getting here. */
74096c
+        if (ob_inode->first_fd == fd) {
74096c
+            list_splice_init(&ob_inode->resume_fops, &list);
74096c
+            ob_inode->first_fd = NULL;
74096c
+            ob_inode->first_open = NULL;
74096c
+            ob_inode->triggered = false;
74096c
         }
74096c
     }
74096c
-inode_unlock:
74096c
-    UNLOCK(&inode->lock);
74096c
+    UNLOCK(&ob_inode->inode->lock);
74096c
 
74096c
-out:
74096c
-    if (!was_open_in_progress) {
74096c
-        if (!wait_for_open) {
74096c
-            call_resume(stub);
74096c
-        } else {
74096c
-            ob_inode_wake(this, &ob_fds);
74096c
-        }
74096c
-    }
74096c
+    ob_resume_pending(&list);
74096c
 
74096c
-    return 0;
74096c
+    fd_unref(fd);
74096c
 }
74096c
 
74096c
-int
74096c
-open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub)
74096c
+static int32_t
74096c
+ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret,
74096c
+            int32_t op_errno, fd_t *fd, dict_t *xdata)
74096c
 {
74096c
-    ob_fd_t *ob_fd = NULL;
74096c
-    int op_errno = 0;
74096c
-
74096c
-    if (!fd)
74096c
-        goto nofd;
74096c
-
74096c
-    LOCK(&fd->lock);
74096c
-    {
74096c
-        ob_fd = __ob_fd_ctx_get(this, fd);
74096c
-        if (!ob_fd)
74096c
-            goto unlock;
74096c
+    ob_inode_t *ob_inode;
74096c
 
74096c
-        if (ob_fd->op_errno) {
74096c
-            op_errno = ob_fd->op_errno;
74096c
-            goto unlock;
74096c
-        }
74096c
+    ob_inode = frame->local;
74096c
+    frame->local = NULL;
74096c
 
74096c
-        list_add_tail(&stub->list, &ob_fd->list);
74096c
-    }
74096c
-unlock:
74096c
-    UNLOCK(&fd->lock);
74096c
+    ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno);
74096c
 
74096c
-nofd:
74096c
-    if (op_errno)
74096c
-        call_unwind_error(stub, -1, op_errno);
74096c
-    else if (ob_fd)
74096c
-        ob_fd_wake(this, fd, NULL);
74096c
-    else
74096c
-        call_resume(stub);
74096c
+    STACK_DESTROY(frame->root);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
-ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
74096c
+static int32_t
74096c
+ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
74096c
                fd_t *fd, dict_t *xdata)
74096c
 {
74096c
-    ob_fd_t *ob_fd = NULL;
74096c
-    int ret = -1;
74096c
-    ob_conf_t *conf = NULL;
74096c
-    ob_inode_t *ob_inode = NULL;
74096c
-    gf_boolean_t open_in_progress = _gf_false;
74096c
-    int unlinked = 0;
74096c
-
74096c
-    conf = this->private;
74096c
-
74096c
-    if (flags & O_TRUNC) {
74096c
-        STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
74096c
-                   FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
74096c
-        return 0;
74096c
-    }
74096c
-
74096c
-    ob_inode = ob_inode_get(this, fd->inode);
74096c
-
74096c
-    ob_fd = ob_fd_new();
74096c
-    if (!ob_fd)
74096c
-        goto enomem;
74096c
-
74096c
-    ob_fd->ob_inode = ob_inode;
74096c
-
74096c
-    ob_fd->fd = fd;
74096c
-
74096c
-    ob_fd->open_frame = copy_frame(frame);
74096c
-    if (!ob_fd->open_frame)
74096c
-        goto enomem;
74096c
-    ret = loc_copy(&ob_fd->loc, loc);
74096c
-    if (ret)
74096c
-        goto enomem;
74096c
-
74096c
-    ob_fd->flags = flags;
74096c
-    if (xdata)
74096c
-        ob_fd->xdata = dict_ref(xdata);
74096c
-
74096c
-    LOCK(&fd->inode->lock);
74096c
-    {
74096c
-        open_in_progress = ob_inode->open_in_progress;
74096c
-        unlinked = ob_inode->unlinked;
74096c
-        if (!open_in_progress && !unlinked) {
74096c
-            ret = ob_fd_ctx_set(this, fd, ob_fd);
74096c
-            if (ret) {
74096c
-                UNLOCK(&fd->inode->lock);
74096c
-                goto enomem;
74096c
-            }
74096c
-
74096c
-            list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds);
74096c
-        }
74096c
-    }
74096c
-    UNLOCK(&fd->inode->lock);
74096c
-
74096c
-    /* We take a reference while the background open is pending or being
74096c
-     * processed. If we finally wind the request in the foreground, then
74096c
-     * ob_fd_free() will take care of this additional reference. */
74096c
-    fd_ref(fd);
74096c
-
74096c
-    if (!open_in_progress && !unlinked) {
74096c
-        STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata);
74096c
-
74096c
-        if (!conf->lazy_open)
74096c
-            ob_fd_wake(this, fd, NULL);
74096c
-    } else {
74096c
-        ob_fd_free(ob_fd);
74096c
-        STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
74096c
-                   FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
74096c
-    }
74096c
+    STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this),
74096c
+                      FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
74096c
 
74096c
     return 0;
74096c
-enomem:
74096c
-    if (ob_fd) {
74096c
-        if (ob_fd->open_frame)
74096c
-            STACK_DESTROY(ob_fd->open_frame->root);
74096c
-
74096c
-        loc_wipe(&ob_fd->loc);
74096c
-        if (ob_fd->xdata)
74096c
-            dict_unref(ob_fd->xdata);
74096c
-
74096c
-        GF_FREE(ob_fd);
74096c
-    }
74096c
-
74096c
-    return -1;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
74096c
         dict_t *xdata)
74096c
 {
74096c
-    fd_t *old_fd = NULL;
74096c
-    int ret = -1;
74096c
-    int op_errno = ENOMEM;
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    old_fd = fd_lookup(fd->inode, 0);
74096c
-    if (old_fd) {
74096c
-        /* open-behind only when this is the first FD */
74096c
-        stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata);
74096c
-        if (!stub) {
74096c
-            fd_unref(old_fd);
74096c
-            goto err;
74096c
-        }
74096c
-
74096c
-        open_and_resume(this, old_fd, stub);
74096c
+    ob_inode_t *ob_inode;
74096c
+    call_frame_t *open_frame;
74096c
+    call_stub_t *stub;
74096c
+    fd_t *first_fd;
74096c
+    ob_state_t state;
74096c
+
74096c
+    state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd);
74096c
+    if (state == OB_STATE_READY) {
74096c
+        /* There's no pending open, but there are other file descriptors opened
74096c
+         * or the current flags require a synchronous open. */
74096c
+        return default_open(frame, this, loc, flags, fd, xdata);
74096c
+    }
74096c
 
74096c
-        fd_unref(old_fd);
74096c
+    if (state == OB_STATE_OPEN_TRIGGERED) {
74096c
+        /* The first open is in progress (either because it was already issued
74096c
+         * or because this request triggered it). We try to create a new stub
74096c
+         * to retry the operation once the initial open completes. */
74096c
+        stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata);
74096c
+        if (stub != NULL) {
74096c
+            return ob_stub_dispatch(this, ob_inode, first_fd, stub);
74096c
+        }
74096c
 
74096c
-        return 0;
74096c
+        state = -ENOMEM;
74096c
     }
74096c
 
74096c
-    ret = ob_open_behind(frame, this, loc, flags, fd, xdata);
74096c
-    if (ret) {
74096c
-        goto err;
74096c
-    }
74096c
+    if (state == OB_STATE_FIRST_OPEN) {
74096c
+        /* We try to create a stub for the new open. A new frame needs to be
74096c
+         * used because the current one may be destroyed soon after sending
74096c
+         * the open's reply. */
74096c
+        open_frame = copy_frame(frame);
74096c
+        if (open_frame != NULL) {
74096c
+            stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd,
74096c
+                                 xdata);
74096c
+            if (stub != NULL) {
74096c
+                open_frame->local = ob_inode;
74096c
 
74096c
-    return 0;
74096c
-err:
74096c
-    gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s",
74096c
-           loc->path);
74096c
+                /* TODO: Previous version passed xdata back to the caller, but
74096c
+                 *       probably this doesn't make sense since it won't contain
74096c
+                 *       any requested data. I think it would be better to pass
74096c
+                 *       NULL for xdata. */
74096c
+                default_open_cbk(frame, NULL, this, 0, 0, fd, xdata);
74096c
 
74096c
-    STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0);
74096c
+                return ob_open_dispatch(this, ob_inode, first_fd, stub);
74096c
+            }
74096c
 
74096c
-    return 0;
74096c
-}
74096c
+            STACK_DESTROY(open_frame->root);
74096c
+        }
74096c
 
74096c
-fd_t *
74096c
-ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag)
74096c
-{
74096c
-    fd_t *wind_fd = NULL;
74096c
-    ob_fd_t *ob_fd = NULL;
74096c
-    ob_conf_t *conf = NULL;
74096c
+        /* In case of error, simulate a regular completion but with an error
74096c
+         * code. */
74096c
+        ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM);
74096c
 
74096c
-    conf = this->private;
74096c
+        state = -ENOMEM;
74096c
+    }
74096c
 
74096c
-    ob_fd = ob_fd_ctx_get(this, fd);
74096c
+    /* In case of failure we need to decrement the number of open files because
74096c
+     * ob_fdclose() won't be called. */
74096c
 
74096c
-    if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) {
74096c
-        wind_fd = fd_anonymous(fd->inode);
74096c
-        if ((ob_fd->flags & O_DIRECT) && (flag))
74096c
-            *flag = *flag | O_DIRECT;
74096c
-    } else {
74096c
-        wind_fd = fd_ref(fd);
74096c
+    LOCK(&fd->inode->lock);
74096c
+    {
74096c
+        ob_inode->open_count--;
74096c
     }
74096c
+    UNLOCK(&fd->inode->lock);
74096c
 
74096c
-    return wind_fd;
74096c
+    gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
74096c
+            "open", "path=%s", loc->path, NULL);
74096c
+
74096c
+    return default_open_failure_cbk(frame, -state);
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
74096c
          off_t offset, uint32_t flags, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-    fd_t *wind_fd = NULL;
74096c
-    ob_conf_t *conf = NULL;
74096c
+    ob_conf_t *conf = this->private;
74096c
+    bool trigger = conf->read_after_open || !conf->use_anonymous_fd;
74096c
 
74096c
-    conf = this->private;
74096c
-
74096c
-    if (!conf->read_after_open)
74096c
-        wind_fd = ob_get_wind_fd(this, fd, &flags);
74096c
-    else
74096c
-        wind_fd = fd_ref(fd);
74096c
-
74096c
-    stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset,
74096c
-                          flags, xdata);
74096c
-    fd_unref(wind_fd);
74096c
-
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, wind_fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
74096c
+    OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
74096c
           int count, off_t offset, uint32_t flags, struct iobref *iobref,
74096c
           dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset,
74096c
-                           flags, iobref, xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0);
74096c
+    OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags,
74096c
+               iobref, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-    fd_t *wind_fd = NULL;
74096c
-
74096c
-    wind_fd = ob_get_wind_fd(this, fd, NULL);
74096c
-
74096c
-    stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata);
74096c
+    ob_conf_t *conf = this->private;
74096c
+    bool trigger = !conf->use_anonymous_fd;
74096c
 
74096c
-    fd_unref(wind_fd);
74096c
-
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, wind_fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
74096c
+    OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
74096c
         gf_seek_what_t what, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-    fd_t *wind_fd = NULL;
74096c
-
74096c
-    wind_fd = ob_get_wind_fd(this, fd, NULL);
74096c
+    ob_conf_t *conf = this->private;
74096c
+    bool trigger = !conf->use_anonymous_fd;
74096c
 
74096c
-    stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what,
74096c
-                         xdata);
74096c
-
74096c
-    fd_unref(wind_fd);
74096c
-
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, wind_fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
74096c
+    OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-    ob_fd_t *ob_fd = NULL;
74096c
-    gf_boolean_t unwind = _gf_false;
74096c
-
74096c
-    LOCK(&fd->lock);
74096c
-    {
74096c
-        ob_fd = __ob_fd_ctx_get(this, fd);
74096c
-        if (ob_fd && ob_fd->open_frame)
74096c
-            /* if open() was never wound to backend,
74096c
-               no need to wind flush() either.
74096c
-            */
74096c
-            unwind = _gf_true;
74096c
-    }
74096c
-    UNLOCK(&fd->lock);
74096c
-
74096c
-    if (unwind)
74096c
-        goto unwind;
74096c
-
74096c
-    stub = fop_flush_stub(frame, default_flush_resume, fd, xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0);
74096c
-
74096c
-    return 0;
74096c
-
74096c
-unwind:
74096c
-    STACK_UNWIND_STRICT(flush, frame, 0, 0, 0);
74096c
+    OB_POST_FLUSH(this, frame, fd, fd, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0);
74096c
+    OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
74096c
       struct gf_flock *flock, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0);
74096c
+    OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
74096c
              dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset,
74096c
-                              xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0);
74096c
+    OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
74096c
              int flags, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags,
74096c
-                              xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0);
74096c
+    OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
74096c
              dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0);
74096c
+    OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
74096c
                 dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name,
74096c
-                                 xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0);
74096c
+    OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
74096c
             int cmd, struct gf_flock *flock, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume,
74096c
-                                          volume, fd, cmd, flock, xdata);
74096c
-    if (stub)
74096c
-        open_and_resume(this, fd, stub);
74096c
-    else
74096c
-        STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0);
74096c
+    OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
74096c
             const char *basename, entrylk_cmd cmd, entrylk_type type,
74096c
             dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = fop_fentrylk_stub(
74096c
-        frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata);
74096c
-    if (stub)
74096c
-        open_and_resume(this, fd, stub);
74096c
-    else
74096c
-        STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0);
74096c
+    OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type,
74096c
+               xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
74096c
             gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd,
74096c
-                                          optype, xattr, xdata);
74096c
-    if (stub)
74096c
-        open_and_resume(this, fd, stub);
74096c
-    else
74096c
-        STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0);
74096c
+    OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt,
74096c
             int valid, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid,
74096c
-                             xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0);
74096c
+    OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
74096c
              off_t offset, size_t len, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub;
74096c
-
74096c
-    stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset,
74096c
-                              len, xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
+    OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata);
74096c
 
74096c
     return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
74096c
-    return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
74096c
            size_t len, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub;
74096c
-
74096c
-    stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len,
74096c
-                            xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_and_resume(this, fd, stub);
74096c
+    OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata);
74096c
 
74096c
     return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
74096c
-    return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
74096c
             off_t len, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub;
74096c
-
74096c
-    stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len,
74096c
-                             xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
+    OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata);
74096c
 
74096c
-    open_and_resume(this, fd, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
74096c
           dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_all_pending_fds_and_resume(this, loc->inode, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0);
74096c
+    OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int
74096c
+static int32_t
74096c
 ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst,
74096c
           dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_all_pending_fds_and_resume(this, dst->inode, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0);
74096c
+    OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata);
74096c
 
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int32_t
74096c
+static int32_t
74096c
 ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
74096c
            int32_t valid, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-
74096c
-    stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid,
74096c
-                            xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
+    OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid,
74096c
+                  xdata);
74096c
 
74096c
-    open_all_pending_fds_and_resume(this, loc->inode, stub);
74096c
-
74096c
-    return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int32_t
74096c
+static int32_t
74096c
 ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
74096c
             int32_t flags, dict_t *xdata)
74096c
 {
74096c
-    call_stub_t *stub = NULL;
74096c
-    gf_boolean_t access_xattr = _gf_false;
74096c
-
74096c
     if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) ||
74096c
         dict_get(dict, POSIX_ACL_ACCESS_XATTR) ||
74096c
-        dict_get(dict, GF_SELINUX_XATTR_KEY))
74096c
-        access_xattr = _gf_true;
74096c
-
74096c
-    if (!access_xattr)
74096c
+        dict_get(dict, GF_SELINUX_XATTR_KEY)) {
74096c
         return default_setxattr(frame, this, loc, dict, flags, xdata);
74096c
+    }
74096c
 
74096c
-    stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags,
74096c
-                             xdata);
74096c
-    if (!stub)
74096c
-        goto err;
74096c
-
74096c
-    open_all_pending_fds_and_resume(this, loc->inode, stub);
74096c
+    OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags,
74096c
+                  xdata);
74096c
 
74096c
     return 0;
74096c
-err:
74096c
-    STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL);
74096c
-    return 0;
74096c
 }
74096c
 
74096c
-int
74096c
-ob_release(xlator_t *this, fd_t *fd)
74096c
+static void
74096c
+ob_fdclose(xlator_t *this, fd_t *fd)
74096c
 {
74096c
-    ob_fd_t *ob_fd = NULL;
74096c
+    struct list_head list;
74096c
+    ob_inode_t *ob_inode;
74096c
+    call_stub_t *stub;
74096c
+
74096c
+    INIT_LIST_HEAD(&list);
74096c
+    stub = NULL;
74096c
 
74096c
-    ob_fd = ob_fd_ctx_get(this, fd);
74096c
+    LOCK(&fd->inode->lock);
74096c
+    {
74096c
+        ob_inode = ob_inode_get_locked(this, fd->inode);
74096c
+        if (ob_inode != NULL) {
74096c
+            ob_inode->open_count--;
74096c
+
74096c
+            /* If this fd is the same as ob_inode->first_fd, it means that
74096c
+             * the initial open has not fully completed. We'll try to cancel
74096c
+             * it. */
74096c
+            if (ob_inode->first_fd == fd) {
74096c
+                if (ob_inode->first_open == OB_OPEN_PREPARING) {
74096c
+                    /* In this case ob_open_dispatch() has not been called yet.
74096c
+                     * We clear first_fd and first_open to allow that function
74096c
+                     * to know that the open is not really needed. This also
74096c
+                     * allows other requests to work as expected if they
74096c
+                     * arrive before the dispatch function is called. If there
74096c
+                     * are pending fops, we can directly process them here.
74096c
+                     * (note that there shouldn't be any fd related fops, but
74096c
+                     * if there are, it's fine if they fail). */
74096c
+                    ob_inode->first_fd = NULL;
74096c
+                    ob_inode->first_open = NULL;
74096c
+                    ob_inode->triggered = false;
74096c
+                    list_splice_init(&ob_inode->resume_fops, &list);
74096c
+                } else if (!ob_inode->triggered) {
74096c
+                    /* If the open has already been dispatched, we can only
74096c
+                     * cancel it if it has not been triggered. Otherwise we
74096c
+                     * simply wait until it completes. While it's not triggered,
74096c
+                     * first_open must be a valid stub and there can't be any
74096c
+                     * pending fops. */
74096c
+                    GF_ASSERT((ob_inode->first_open != NULL) &&
74096c
+                              list_empty(&ob_inode->resume_fops));
74096c
+
74096c
+                    ob_inode->first_fd = NULL;
74096c
+                    stub = ob_inode->first_open;
74096c
+                    ob_inode->first_open = NULL;
74096c
+                }
74096c
+            }
74096c
+        }
74096c
+    }
74096c
+    UNLOCK(&fd->inode->lock);
74096c
 
74096c
-    ob_fd_free(ob_fd);
74096c
+    if (stub != NULL) {
74096c
+        call_stub_destroy(stub);
74096c
+        fd_unref(fd);
74096c
+    }
74096c
 
74096c
-    return 0;
74096c
+    ob_resume_pending(&list);
74096c
 }
74096c
 
74096c
 int
74096c
 ob_forget(xlator_t *this, inode_t *inode)
74096c
 {
74096c
-    ob_inode_t *ob_inode = NULL;
74096c
+    ob_inode_t *ob_inode;
74096c
     uint64_t value = 0;
74096c
 
74096c
-    inode_ctx_del(inode, this, &value);
74096c
-
74096c
-    if (value) {
74096c
+    if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) {
74096c
         ob_inode = (ob_inode_t *)(uintptr_t)value;
74096c
-        ob_inode_free(ob_inode);
74096c
+        GF_FREE(ob_inode);
74096c
     }
74096c
 
74096c
     return 0;
74096c
@@ -1153,20 +823,18 @@ ob_priv_dump(xlator_t *this)
74096c
 int
74096c
 ob_fdctx_dump(xlator_t *this, fd_t *fd)
74096c
 {
74096c
-    ob_fd_t *ob_fd = NULL;
74096c
     char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
74096c
         0,
74096c
     };
74096c
-    int ret = 0;
74096c
+    uint64_t value = 0;
74096c
+    int ret = 0, error = 0;
74096c
 
74096c
     ret = TRY_LOCK(&fd->lock);
74096c
     if (ret)
74096c
         return 0;
74096c
 
74096c
-    ob_fd = __ob_fd_ctx_get(this, fd);
74096c
-    if (!ob_fd) {
74096c
-        UNLOCK(&fd->lock);
74096c
-        return 0;
74096c
+    if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) {
74096c
+        error = (int32_t)value;
74096c
     }
74096c
 
74096c
     gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind",
74096c
@@ -1175,17 +843,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd)
74096c
 
74096c
     gf_proc_dump_write("fd", "%p", fd);
74096c
 
74096c
-    gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame);
74096c
-
74096c
-    if (ob_fd->open_frame)
74096c
-        gf_proc_dump_write("open_frame.root.unique", "%" PRIu64,
74096c
-                           ob_fd->open_frame->root->unique);
74096c
-
74096c
-    gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path);
74096c
-
74096c
-    gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid));
74096c
-
74096c
-    gf_proc_dump_write("flags", "%d", ob_fd->flags);
74096c
+    gf_proc_dump_write("error", "%d", error);
74096c
 
74096c
     UNLOCK(&fd->lock);
74096c
 
74096c
@@ -1307,7 +965,7 @@ struct xlator_fops fops = {
74096c
 };
74096c
 
74096c
 struct xlator_cbks cbks = {
74096c
-    .release = ob_release,
74096c
+    .fdclose = ob_fdclose,
74096c
     .forget = ob_forget,
74096c
 };
74096c
 
74096c
-- 
74096c
1.8.3.1
74096c