Blame SOURCES/kvm-scsi-build-qemu-pr-helper.patch

9bac43
From c3ce9144a1f76102f51bef7909eac5b2ba4bd777 Mon Sep 17 00:00:00 2001
9bac43
From: Paolo Bonzini <pbonzini@redhat.com>
9bac43
Date: Sat, 2 Dec 2017 12:19:49 +0100
9bac43
Subject: [PATCH 23/36] scsi: build qemu-pr-helper
9bac43
9bac43
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
9bac43
Message-id: <20171202121953.13317-14-pbonzini@redhat.com>
9bac43
Patchwork-id: 78089
9bac43
O-Subject: [RHEL7.4 qemu-kvm-rhev PATCH 13/17] scsi: build qemu-pr-helper
9bac43
Bugzilla: 1464908
9bac43
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
9bac43
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
9bac43
RH-Acked-by: John Snow <jsnow@redhat.com>
9bac43
9bac43
Introduce a privileged helper to run persistent reservation commands.
9bac43
This lets virtual machines send persistent reservations without using
9bac43
CAP_SYS_RAWIO or out-of-tree patches.  The helper uses Unix permissions
9bac43
and SCM_RIGHTS to restrict access to processes that can access its socket
9bac43
and prove that they have an open file descriptor for a raw SCSI device.
9bac43
9bac43
The next patch will also correct the usage of persistent reservations
9bac43
with multipath devices.
9bac43
9bac43
It would also be possible to support for Linux's IOC_PR_* ioctls in
9bac43
the future, to support NVMe devices.  For now, however, only SCSI is
9bac43
supported.
9bac43
9bac43
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9bac43
(cherry picked from commit b855f8d175a0a26c9798cbc5962bb8c0d9538231)
9bac43
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
9bac43
---
9bac43
 Makefile                   |   4 +-
9bac43
 configure                  |  14 +-
9bac43
 docs/interop/pr-helper.rst |  83 +++++
9bac43
 docs/pr-manager.rst        |  33 ++
9bac43
 scsi/pr-helper.h           |  41 +++
9bac43
 scsi/qemu-pr-helper.c      | 735 +++++++++++++++++++++++++++++++++++++++++++++
9bac43
 6 files changed, 905 insertions(+), 5 deletions(-)
9bac43
 create mode 100644 docs/interop/pr-helper.rst
9bac43
 create mode 100644 scsi/pr-helper.h
9bac43
 create mode 100644 scsi/qemu-pr-helper.c
9bac43
9bac43
diff --git a/Makefile b/Makefile
9bac43
index 1a773a8..3e76953 100644
9bac43
--- a/Makefile
9bac43
+++ b/Makefile
9bac43
@@ -386,6 +386,8 @@ qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
9bac43
 fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS)
9bac43
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap
9bac43
 
9bac43
+scsi/qemu-pr-helper$(EXESUF): scsi/qemu-pr-helper.o scsi/utils.o $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
9bac43
+
9bac43
 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
9bac43
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"GEN","$@")
9bac43
 
9bac43
@@ -493,7 +495,7 @@ clean:
9bac43
 	rm -f *.msi
9bac43
 	find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
9bac43
 	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
9bac43
-	rm -f fsdev/*.pod
9bac43
+	rm -f fsdev/*.pod scsi/*.pod
9bac43
 	rm -f qemu-img-cmds.h
9bac43
 	rm -f ui/shader/*-vert.h ui/shader/*-frag.h
9bac43
 	@# May not be present in GENERATED_FILES
9bac43
diff --git a/configure b/configure
9bac43
index 644e52d..2df1b42 100755
9bac43
--- a/configure
9bac43
+++ b/configure
9bac43
@@ -5065,16 +5065,22 @@ if test "$want_tools" = "yes" ; then
9bac43
   fi
9bac43
 fi
9bac43
 if test "$softmmu" = yes ; then
9bac43
-  if test "$virtfs" != no ; then
9bac43
-    if test "$cap" = yes && test "$linux" = yes && test "$attr" = yes ; then
9bac43
+  if test "$linux" = yes; then
9bac43
+    if test "$virtfs" != no && test "$cap" = yes && test "$attr" = yes ; then
9bac43
       virtfs=yes
9bac43
       tools="$tools fsdev/virtfs-proxy-helper\$(EXESUF)"
9bac43
     else
9bac43
       if test "$virtfs" = yes; then
9bac43
-        error_exit "VirtFS is supported only on Linux and requires libcap devel and libattr devel"
9bac43
+        error_exit "VirtFS requires libcap devel and libattr devel"
9bac43
       fi
9bac43
       virtfs=no
9bac43
     fi
9bac43
+    tools="$tools scsi/qemu-pr-helper\$(EXESUF)"
9bac43
+  else
9bac43
+    if test "$virtfs" = yes; then
9bac43
+      error_exit "VirtFS is supported only on Linux"
9bac43
+    fi
9bac43
+    virtfs=no
9bac43
   fi
9bac43
 fi
9bac43
 
9bac43
@@ -6562,7 +6568,7 @@ fi
9bac43
 
9bac43
 # build tree in object directory in case the source is not in the current directory
9bac43
 DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests"
9bac43
-DIRS="$DIRS docs docs/interop fsdev"
9bac43
+DIRS="$DIRS docs docs/interop fsdev scsi"
9bac43
 DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
9bac43
 DIRS="$DIRS roms/seabios roms/vgabios"
9bac43
 DIRS="$DIRS qapi-generated"
9bac43
diff --git a/docs/interop/pr-helper.rst b/docs/interop/pr-helper.rst
9bac43
new file mode 100644
9bac43
index 0000000..9f76d5b
9bac43
--- /dev/null
9bac43
+++ b/docs/interop/pr-helper.rst
9bac43
@@ -0,0 +1,83 @@
9bac43
+..
9bac43
+
9bac43
+======================================
9bac43
+Persistent reservation helper protocol
9bac43
+======================================
9bac43
+
9bac43
+QEMU's SCSI passthrough devices, ``scsi-block`` and ``scsi-generic``,
9bac43
+can delegate implementation of persistent reservations to an external
9bac43
+(and typically privileged) program.  Persistent Reservations allow
9bac43
+restricting access to block devices to specific initiators in a shared
9bac43
+storage setup.
9bac43
+
9bac43
+For a more detailed reference please refer the the SCSI Primary
9bac43
+Commands standard, specifically the section on Reservations and the
9bac43
+"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands.
9bac43
+
9bac43
+This document describes the socket protocol used between QEMU's
9bac43
+``pr-manager-helper`` object and the external program.
9bac43
+
9bac43
+.. contents::
9bac43
+
9bac43
+Connection and initialization
9bac43
+-----------------------------
9bac43
+
9bac43
+All data transmitted on the socket is big-endian.
9bac43
+
9bac43
+After connecting to the helper program's socket, the helper starts a simple
9bac43
+feature negotiation process by writing four bytes corresponding to
9bac43
+the features it exposes (``supported_features``).  QEMU reads it,
9bac43
+then writes four bytes corresponding to the desired features of the
9bac43
+helper program (``requested_features``).
9bac43
+
9bac43
+If a bit is 1 in ``requested_features`` and 0 in ``supported_features``,
9bac43
+the corresponding feature is not supported by the helper and the connection
9bac43
+is closed.  On the other hand, it is acceptable for a bit to be 0 in
9bac43
+``requested_features`` and 1 in ``supported_features``; in this case,
9bac43
+the helper will not enable the feature.
9bac43
+
9bac43
+Right now no feature is defined, so the two parties always write four
9bac43
+zero bytes.
9bac43
+
9bac43
+Command format
9bac43
+--------------
9bac43
+
9bac43
+It is invalid to send multiple commands concurrently on the same
9bac43
+socket.  It is however possible to connect multiple sockets to the
9bac43
+helper and send multiple commands to the helper for one or more
9bac43
+file descriptors.
9bac43
+
9bac43
+A command consists of a request and a response.  A request consists
9bac43
+of a 16-byte SCSI CDB.  A file descriptor must be passed to the helper
9bac43
+together with the SCSI CDB using ancillary data.
9bac43
+
9bac43
+The CDB has the following limitations:
9bac43
+
9bac43
+- the command (stored in the first byte) must be one of 0x5E
9bac43
+  (PERSISTENT RESERVE IN) or 0x5F (PERSISTENT RESERVE OUT).
9bac43
+
9bac43
+- the allocation length (stored in bytes 7-8 of the CDB for PERSISTENT
9bac43
+  RESERVE IN) or parameter list length (stored in bytes 5-8 of the CDB
9bac43
+  for PERSISTENT RESERVE OUT) is limited to 8 KiB.
9bac43
+
9bac43
+For PERSISTENT RESERVE OUT, the parameter list is sent right after the
9bac43
+CDB.  The length of the parameter list is taken from the CDB itself.
9bac43
+
9bac43
+The helper's reply has the following structure:
9bac43
+
9bac43
+- 4 bytes for the SCSI status
9bac43
+
9bac43
+- 4 bytes for the payload size (nonzero only for PERSISTENT RESERVE IN
9bac43
+  and only if the SCSI status is 0x00, i.e. GOOD)
9bac43
+
9bac43
+- 96 bytes for the SCSI sense data
9bac43
+
9bac43
+- if the size is nonzero, the payload follows
9bac43
+
9bac43
+The sense data is always sent to keep the protocol simple, even though
9bac43
+it is only valid if the SCSI status is CHECK CONDITION (0x02).
9bac43
+
9bac43
+The payload size is always less than or equal to the allocation length
9bac43
+specified in the CDB for the PERSISTENT RESERVE IN command.
9bac43
+
9bac43
+If the protocol is violated, the helper closes the socket.
9bac43
diff --git a/docs/pr-manager.rst b/docs/pr-manager.rst
9bac43
index b6089fb..7107e59 100644
9bac43
--- a/docs/pr-manager.rst
9bac43
+++ b/docs/pr-manager.rst
9bac43
@@ -49,3 +49,36 @@ Alternatively, using ``-blockdev``::
9bac43
           -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
9bac43
           -blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0
9bac43
           -device scsi-block,drive=hd
9bac43
+
9bac43
+----------------------------------
9bac43
+Invoking :program:`qemu-pr-helper`
9bac43
+----------------------------------
9bac43
+
9bac43
+QEMU provides an implementation of the persistent reservation helper,
9bac43
+called :program:`qemu-pr-helper`.  The helper should be started as a
9bac43
+system service and supports the following option:
9bac43
+
9bac43
+-d, --daemon              run in the background
9bac43
+-q, --quiet               decrease verbosity
9bac43
+-f, --pidfile=path        PID file when running as a daemon
9bac43
+-k, --socket=path         path to the socket
9bac43
+-T, --trace=trace-opts    tracing options
9bac43
+
9bac43
+By default, the socket and PID file are placed in the runtime state
9bac43
+directory, for example :file:`/var/run/qemu-pr-helper.sock` and
9bac43
+:file:`/var/run/qemu-pr-helper.pid`.  The PID file is not created
9bac43
+unless :option:`-d` is passed too.
9bac43
+
9bac43
+:program:`qemu-pr-helper` can also use the systemd socket activation
9bac43
+protocol.  In this case, the systemd socket unit should specify a
9bac43
+Unix stream socket, like this::
9bac43
+
9bac43
+    [Socket]
9bac43
+    ListenStream=/var/run/qemu-pr-helper.sock
9bac43
+
9bac43
+After connecting to the socket, :program:`qemu-pr-helper`` can optionally drop
9bac43
+root privileges, except for those capabilities that are needed for
9bac43
+its operation.  To do this, add the following options:
9bac43
+
9bac43
+-u, --user=user           user to drop privileges to
9bac43
+-g, --group=group         group to drop privileges to
9bac43
diff --git a/scsi/pr-helper.h b/scsi/pr-helper.h
9bac43
new file mode 100644
9bac43
index 0000000..96c50a9
9bac43
--- /dev/null
9bac43
+++ b/scsi/pr-helper.h
9bac43
@@ -0,0 +1,41 @@
9bac43
+/* Definitions for QEMU's persistent reservation helper daemon
9bac43
+ *
9bac43
+ * Copyright (C) 2017 Red Hat, Inc.
9bac43
+ *
9bac43
+ * Author:
9bac43
+ *   Paolo Bonzini <pbonzini@redhat.com>
9bac43
+ *
9bac43
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
9bac43
+ * of this software and associated documentation files (the "Software"), to
9bac43
+ * deal in the Software without restriction, including without limitation the
9bac43
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
9bac43
+ * sell copies of the Software, and to permit persons to whom the Software is
9bac43
+ * furnished to do so, subject to the following conditions:
9bac43
+ *
9bac43
+ * The above copyright notice and this permission notice shall be included in
9bac43
+ * all copies or substantial portions of the Software.
9bac43
+ *
9bac43
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
9bac43
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
9bac43
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
9bac43
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
9bac43
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
9bac43
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
9bac43
+ * IN THE SOFTWARE.
9bac43
+ */
9bac43
+#ifndef QEMU_PR_HELPER_H
9bac43
+#define QEMU_PR_HELPER_H 1
9bac43
+
9bac43
+#include <stdint.h>
9bac43
+
9bac43
+#define PR_HELPER_CDB_SIZE     16
9bac43
+#define PR_HELPER_SENSE_SIZE   96
9bac43
+#define PR_HELPER_DATA_SIZE    8192
9bac43
+
9bac43
+typedef struct PRHelperResponse {
9bac43
+    int32_t result;
9bac43
+    int32_t sz;
9bac43
+    uint8_t sense[PR_HELPER_SENSE_SIZE];
9bac43
+} PRHelperResponse;
9bac43
+
9bac43
+#endif
9bac43
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
9bac43
new file mode 100644
9bac43
index 0000000..f46266f
9bac43
--- /dev/null
9bac43
+++ b/scsi/qemu-pr-helper.c
9bac43
@@ -0,0 +1,735 @@
9bac43
+/*
9bac43
+ * Privileged helper to handle persistent reservation commands for QEMU
9bac43
+ *
9bac43
+ * Copyright (C) 2017 Red Hat, Inc. <pbonzini@redhat.com>
9bac43
+ *
9bac43
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
9bac43
+ *
9bac43
+ * This program is free software; you can redistribute it and/or modify
9bac43
+ * it under the terms of the GNU General Public License as published by
9bac43
+ * the Free Software Foundation; under version 2 of the License.
9bac43
+ *
9bac43
+ * This program is distributed in the hope that it will be useful,
9bac43
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9bac43
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9bac43
+ * GNU General Public License for more details.
9bac43
+ *
9bac43
+ * You should have received a copy of the GNU General Public License
9bac43
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
9bac43
+ */
9bac43
+
9bac43
+#include "qemu/osdep.h"
9bac43
+#include <getopt.h>
9bac43
+#include <sys/ioctl.h>
9bac43
+#include <linux/dm-ioctl.h>
9bac43
+#include <scsi/sg.h>
9bac43
+
9bac43
+#ifdef CONFIG_LIBCAP
9bac43
+#include <cap-ng.h>
9bac43
+#endif
9bac43
+#include <pwd.h>
9bac43
+#include <grp.h>
9bac43
+
9bac43
+#include "qapi/error.h"
9bac43
+#include "qemu-common.h"
9bac43
+#include "qemu/cutils.h"
9bac43
+#include "qemu/main-loop.h"
9bac43
+#include "qemu/error-report.h"
9bac43
+#include "qemu/config-file.h"
9bac43
+#include "qemu/bswap.h"
9bac43
+#include "qemu/log.h"
9bac43
+#include "qemu/systemd.h"
9bac43
+#include "qapi/util.h"
9bac43
+#include "qapi/qmp/qstring.h"
9bac43
+#include "io/channel-socket.h"
9bac43
+#include "trace/control.h"
9bac43
+#include "qemu-version.h"
9bac43
+
9bac43
+#include "block/aio.h"
9bac43
+#include "block/thread-pool.h"
9bac43
+
9bac43
+#include "scsi/constants.h"
9bac43
+#include "scsi/utils.h"
9bac43
+#include "pr-helper.h"
9bac43
+
9bac43
+#define PR_OUT_FIXED_PARAM_SIZE 24
9bac43
+
9bac43
+static char *socket_path;
9bac43
+static char *pidfile;
9bac43
+static enum { RUNNING, TERMINATE, TERMINATING } state;
9bac43
+static QIOChannelSocket *server_ioc;
9bac43
+static int server_watch;
9bac43
+static int num_active_sockets = 1;
9bac43
+static int verbose;
9bac43
+
9bac43
+#ifdef CONFIG_LIBCAP
9bac43
+static int uid = -1;
9bac43
+static int gid = -1;
9bac43
+#endif
9bac43
+
9bac43
+static void usage(const char *name)
9bac43
+{
9bac43
+    (printf) (
9bac43
+"Usage: %s [OPTIONS] FILE\n"
9bac43
+"Persistent Reservation helper program for QEMU\n"
9bac43
+"\n"
9bac43
+"  -h, --help                display this help and exit\n"
9bac43
+"  -V, --version             output version information and exit\n"
9bac43
+"\n"
9bac43
+"  -d, --daemon              run in the background\n"
9bac43
+"  -f, --pidfile=PATH        PID file when running as a daemon\n"
9bac43
+"                            (default '%s')\n"
9bac43
+"  -k, --socket=PATH         path to the unix socket\n"
9bac43
+"                            (default '%s')\n"
9bac43
+"  -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
9bac43
+"                            specify tracing options\n"
9bac43
+#ifdef CONFIG_LIBCAP
9bac43
+"  -u, --user=USER           user to drop privileges to\n"
9bac43
+"  -g, --group=GROUP         group to drop privileges to\n"
9bac43
+#endif
9bac43
+"\n"
9bac43
+QEMU_HELP_BOTTOM "\n"
9bac43
+    , name, pidfile, socket_path);
9bac43
+}
9bac43
+
9bac43
+static void version(const char *name)
9bac43
+{
9bac43
+    printf(
9bac43
+"%s " QEMU_VERSION QEMU_PKGVERSION "\n"
9bac43
+"Written by Paolo Bonzini.\n"
9bac43
+"\n"
9bac43
+QEMU_COPYRIGHT "\n"
9bac43
+"This is free software; see the source for copying conditions.  There is NO\n"
9bac43
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
9bac43
+    , name);
9bac43
+}
9bac43
+
9bac43
+static void write_pidfile(void)
9bac43
+{
9bac43
+    int pidfd;
9bac43
+    char pidstr[32];
9bac43
+
9bac43
+    pidfd = qemu_open(pidfile, O_CREAT|O_WRONLY, S_IRUSR|S_IWUSR);
9bac43
+    if (pidfd == -1) {
9bac43
+        error_report("Cannot open pid file, %s", strerror(errno));
9bac43
+        exit(EXIT_FAILURE);
9bac43
+    }
9bac43
+
9bac43
+    if (lockf(pidfd, F_TLOCK, 0)) {
9bac43
+        error_report("Cannot lock pid file, %s", strerror(errno));
9bac43
+        goto fail;
9bac43
+    }
9bac43
+    if (ftruncate(pidfd, 0)) {
9bac43
+        error_report("Failed to truncate pid file");
9bac43
+        goto fail;
9bac43
+    }
9bac43
+
9bac43
+    snprintf(pidstr, sizeof(pidstr), "%d\n", getpid());
9bac43
+    if (write(pidfd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
9bac43
+        error_report("Failed to write pid file");
9bac43
+        goto fail;
9bac43
+    }
9bac43
+    return;
9bac43
+
9bac43
+fail:
9bac43
+    unlink(pidfile);
9bac43
+    close(pidfd);
9bac43
+    exit(EXIT_FAILURE);
9bac43
+}
9bac43
+
9bac43
+/* SG_IO support */
9bac43
+
9bac43
+typedef struct PRHelperSGIOData {
9bac43
+    int fd;
9bac43
+    const uint8_t *cdb;
9bac43
+    uint8_t *sense;
9bac43
+    uint8_t *buf;
9bac43
+    int sz;              /* input/output */
9bac43
+    int dir;
9bac43
+} PRHelperSGIOData;
9bac43
+
9bac43
+static int do_sgio_worker(void *opaque)
9bac43
+{
9bac43
+    PRHelperSGIOData *data = opaque;
9bac43
+    struct sg_io_hdr io_hdr;
9bac43
+    int ret;
9bac43
+    int status;
9bac43
+    SCSISense sense_code;
9bac43
+
9bac43
+    memset(data->sense, 0, PR_HELPER_SENSE_SIZE);
9bac43
+    memset(&io_hdr, 0, sizeof(io_hdr));
9bac43
+    io_hdr.interface_id = 'S';
9bac43
+    io_hdr.cmd_len = PR_HELPER_CDB_SIZE;
9bac43
+    io_hdr.cmdp = (uint8_t *)data->cdb;
9bac43
+    io_hdr.sbp = data->sense;
9bac43
+    io_hdr.mx_sb_len = PR_HELPER_SENSE_SIZE;
9bac43
+    io_hdr.timeout = 1;
9bac43
+    io_hdr.dxfer_direction = data->dir;
9bac43
+    io_hdr.dxferp = (char *)data->buf;
9bac43
+    io_hdr.dxfer_len = data->sz;
9bac43
+    ret = ioctl(data->fd, SG_IO, &io_hdr);
9bac43
+    status = sg_io_sense_from_errno(ret < 0 ? errno : 0, &io_hdr,
9bac43
+                                    &sense_code);
9bac43
+    if (status == GOOD) {
9bac43
+        data->sz -= io_hdr.resid;
9bac43
+    } else {
9bac43
+        data->sz = 0;
9bac43
+    }
9bac43
+
9bac43
+    if (status == CHECK_CONDITION &&
9bac43
+        !(io_hdr.driver_status & SG_ERR_DRIVER_SENSE)) {
9bac43
+        scsi_build_sense(data->sense, sense_code);
9bac43
+    }
9bac43
+
9bac43
+    return status;
9bac43
+}
9bac43
+
9bac43
+static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
9bac43
+                    uint8_t *buf, int *sz, int dir)
9bac43
+{
9bac43
+    ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
9bac43
+    int r;
9bac43
+
9bac43
+    PRHelperSGIOData data = {
9bac43
+        .fd = fd,
9bac43
+        .cdb = cdb,
9bac43
+        .sense = sense,
9bac43
+        .buf = buf,
9bac43
+        .sz = *sz,
9bac43
+        .dir = dir,
9bac43
+    };
9bac43
+
9bac43
+    r = thread_pool_submit_co(pool, do_sgio_worker, &data);
9bac43
+    *sz = data.sz;
9bac43
+    return r;
9bac43
+}
9bac43
+
9bac43
+static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
9bac43
+                    uint8_t *data, int *resp_sz)
9bac43
+{
9bac43
+    return do_sgio(fd, cdb, sense, data, resp_sz,
9bac43
+                   SG_DXFER_FROM_DEV);
9bac43
+}
9bac43
+
9bac43
+static int do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
9bac43
+                     const uint8_t *param, int sz)
9bac43
+{
9bac43
+    int resp_sz = sz;
9bac43
+    return do_sgio(fd, cdb, sense, (uint8_t *)param, &resp_sz,
9bac43
+                   SG_DXFER_TO_DEV);
9bac43
+}
9bac43
+
9bac43
+/* Client */
9bac43
+
9bac43
+typedef struct PRHelperClient {
9bac43
+    QIOChannelSocket *ioc;
9bac43
+    Coroutine *co;
9bac43
+    int fd;
9bac43
+    uint8_t data[PR_HELPER_DATA_SIZE];
9bac43
+} PRHelperClient;
9bac43
+
9bac43
+typedef struct PRHelperRequest {
9bac43
+    int fd;
9bac43
+    size_t sz;
9bac43
+    uint8_t cdb[PR_HELPER_CDB_SIZE];
9bac43
+} PRHelperRequest;
9bac43
+
9bac43
+static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
9bac43
+                                 Error **errp)
9bac43
+{
9bac43
+    int ret = 0;
9bac43
+
9bac43
+    while (sz > 0) {
9bac43
+        int *fds = NULL;
9bac43
+        size_t nfds = 0;
9bac43
+        int i;
9bac43
+        struct iovec iov;
9bac43
+        ssize_t n_read;
9bac43
+
9bac43
+        iov.iov_base = buf;
9bac43
+        iov.iov_len = sz;
9bac43
+        n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
9bac43
+                                        &fds, &nfds, errp);
9bac43
+
9bac43
+        if (n_read == QIO_CHANNEL_ERR_BLOCK) {
9bac43
+            qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
9bac43
+            continue;
9bac43
+        }
9bac43
+        if (n_read <= 0) {
9bac43
+            ret = n_read ? n_read : -1;
9bac43
+            goto err;
9bac43
+        }
9bac43
+
9bac43
+        /* Stash one file descriptor per request.  */
9bac43
+        if (nfds) {
9bac43
+            bool too_many = false;
9bac43
+            for (i = 0; i < nfds; i++) {
9bac43
+                if (client->fd == -1) {
9bac43
+                    client->fd = fds[i];
9bac43
+                } else {
9bac43
+                    close(fds[i]);
9bac43
+                    too_many = true;
9bac43
+                }
9bac43
+            }
9bac43
+            g_free(fds);
9bac43
+            if (too_many) {
9bac43
+                ret = -1;
9bac43
+                goto err;
9bac43
+            }
9bac43
+        }
9bac43
+
9bac43
+        buf += n_read;
9bac43
+        sz -= n_read;
9bac43
+    }
9bac43
+
9bac43
+    return 0;
9bac43
+
9bac43
+err:
9bac43
+    if (client->fd != -1) {
9bac43
+        close(client->fd);
9bac43
+        client->fd = -1;
9bac43
+    }
9bac43
+    return ret;
9bac43
+}
9bac43
+
9bac43
+static int coroutine_fn prh_read_request(PRHelperClient *client,
9bac43
+                                         PRHelperRequest *req,
9bac43
+                                         PRHelperResponse *resp, Error **errp)
9bac43
+{
9bac43
+    uint32_t sz;
9bac43
+
9bac43
+    if (prh_read(client, req->cdb, sizeof(req->cdb), NULL) < 0) {
9bac43
+        return -1;
9bac43
+    }
9bac43
+
9bac43
+    if (client->fd == -1) {
9bac43
+        error_setg(errp, "No file descriptor in request.");
9bac43
+        return -1;
9bac43
+    }
9bac43
+
9bac43
+    if (req->cdb[0] != PERSISTENT_RESERVE_OUT &&
9bac43
+        req->cdb[0] != PERSISTENT_RESERVE_IN) {
9bac43
+        error_setg(errp, "Invalid CDB, closing socket.");
9bac43
+        goto out_close;
9bac43
+    }
9bac43
+
9bac43
+    sz = scsi_cdb_xfer(req->cdb);
9bac43
+    if (sz > sizeof(client->data)) {
9bac43
+        goto out_close;
9bac43
+    }
9bac43
+
9bac43
+    if (req->cdb[0] == PERSISTENT_RESERVE_OUT) {
9bac43
+        if (qio_channel_read_all(QIO_CHANNEL(client->ioc),
9bac43
+                                 (char *)client->data, sz,
9bac43
+                                 errp) < 0) {
9bac43
+            goto out_close;
9bac43
+        }
9bac43
+        if ((fcntl(client->fd, F_GETFL) & O_ACCMODE) == O_RDONLY) {
9bac43
+            scsi_build_sense(resp->sense, SENSE_CODE(INVALID_OPCODE));
9bac43
+            sz = 0;
9bac43
+        } else if (sz < PR_OUT_FIXED_PARAM_SIZE) {
9bac43
+            /* Illegal request, Parameter list length error.  This isn't fatal;
9bac43
+             * we have read the data, send an error without closing the socket.
9bac43
+             */
9bac43
+            scsi_build_sense(resp->sense, SENSE_CODE(INVALID_PARAM_LEN));
9bac43
+            sz = 0;
9bac43
+        }
9bac43
+        if (sz == 0) {
9bac43
+            resp->result = CHECK_CONDITION;
9bac43
+            close(client->fd);
9bac43
+            client->fd = -1;
9bac43
+        }
9bac43
+    }
9bac43
+
9bac43
+    req->fd = client->fd;
9bac43
+    req->sz = sz;
9bac43
+    client->fd = -1;
9bac43
+    return sz;
9bac43
+
9bac43
+out_close:
9bac43
+    close(client->fd);
9bac43
+    client->fd = -1;
9bac43
+    return -1;
9bac43
+}
9bac43
+
9bac43
+static int coroutine_fn prh_write_response(PRHelperClient *client,
9bac43
+                                           PRHelperRequest *req,
9bac43
+                                           PRHelperResponse *resp, Error **errp)
9bac43
+{
9bac43
+    ssize_t r;
9bac43
+    size_t sz;
9bac43
+
9bac43
+    if (req->cdb[0] == PERSISTENT_RESERVE_IN && resp->result == GOOD) {
9bac43
+        assert(resp->sz <= req->sz && resp->sz <= sizeof(client->data));
9bac43
+    } else {
9bac43
+        assert(resp->sz == 0);
9bac43
+    }
9bac43
+
9bac43
+    sz = resp->sz;
9bac43
+
9bac43
+    resp->result = cpu_to_be32(resp->result);
9bac43
+    resp->sz = cpu_to_be32(resp->sz);
9bac43
+    r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
9bac43
+                              (char *) resp, sizeof(*resp), errp);
9bac43
+    if (r < 0) {
9bac43
+        return r;
9bac43
+    }
9bac43
+
9bac43
+    r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
9bac43
+                              (char *) client->data,
9bac43
+                              sz, errp);
9bac43
+    return r < 0 ? r : 0;
9bac43
+}
9bac43
+
9bac43
+static void coroutine_fn prh_co_entry(void *opaque)
9bac43
+{
9bac43
+    PRHelperClient *client = opaque;
9bac43
+    Error *local_err = NULL;
9bac43
+    uint32_t flags;
9bac43
+    int r;
9bac43
+
9bac43
+    qio_channel_set_blocking(QIO_CHANNEL(client->ioc),
9bac43
+                             false, NULL);
9bac43
+    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc),
9bac43
+                                   qemu_get_aio_context());
9bac43
+
9bac43
+    /* A very simple negotiation for future extensibility.  No features
9bac43
+     * are defined so write 0.
9bac43
+     */
9bac43
+    flags = cpu_to_be32(0);
9bac43
+    r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
9bac43
+                             (char *) &flags, sizeof(flags), NULL);
9bac43
+    if (r < 0) {
9bac43
+        goto out;
9bac43
+    }
9bac43
+
9bac43
+    r = qio_channel_read_all(QIO_CHANNEL(client->ioc),
9bac43
+                             (char *) &flags, sizeof(flags), NULL);
9bac43
+    if (be32_to_cpu(flags) != 0 || r < 0) {
9bac43
+        goto out;
9bac43
+    }
9bac43
+
9bac43
+    while (atomic_read(&state) == RUNNING) {
9bac43
+        PRHelperRequest req;
9bac43
+        PRHelperResponse resp;
9bac43
+        int sz;
9bac43
+
9bac43
+        sz = prh_read_request(client, &req, &resp, &local_err);
9bac43
+        if (sz < 0) {
9bac43
+            break;
9bac43
+        }
9bac43
+
9bac43
+        if (sz > 0) {
9bac43
+            num_active_sockets++;
9bac43
+            if (req.cdb[0] == PERSISTENT_RESERVE_OUT) {
9bac43
+                r = do_pr_out(req.fd, req.cdb, resp.sense,
9bac43
+                              client->data, sz);
9bac43
+                resp.sz = 0;
9bac43
+            } else {
9bac43
+                resp.sz = sizeof(client->data);
9bac43
+                r = do_pr_in(req.fd, req.cdb, resp.sense,
9bac43
+                             client->data, &resp.sz);
9bac43
+                resp.sz = MIN(resp.sz, sz);
9bac43
+            }
9bac43
+            num_active_sockets--;
9bac43
+            close(req.fd);
9bac43
+            if (r == -1) {
9bac43
+                break;
9bac43
+            }
9bac43
+            resp.result = r;
9bac43
+        }
9bac43
+
9bac43
+        if (prh_write_response(client, &req, &resp, &local_err) < 0) {
9bac43
+            break;
9bac43
+        }
9bac43
+    }
9bac43
+
9bac43
+    if (local_err) {
9bac43
+        if (verbose == 0) {
9bac43
+            error_free(local_err);
9bac43
+        } else {
9bac43
+            error_report_err(local_err);
9bac43
+        }
9bac43
+    }
9bac43
+
9bac43
+out:
9bac43
+    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
9bac43
+    object_unref(OBJECT(client->ioc));
9bac43
+    g_free(client);
9bac43
+}
9bac43
+
9bac43
+static gboolean accept_client(QIOChannel *ioc, GIOCondition cond, gpointer opaque)
9bac43
+{
9bac43
+    QIOChannelSocket *cioc;
9bac43
+    PRHelperClient *prh;
9bac43
+
9bac43
+    cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
9bac43
+                                     NULL);
9bac43
+    if (!cioc) {
9bac43
+        return TRUE;
9bac43
+    }
9bac43
+
9bac43
+    prh = g_new(PRHelperClient, 1);
9bac43
+    prh->ioc = cioc;
9bac43
+    prh->fd = -1;
9bac43
+    prh->co = qemu_coroutine_create(prh_co_entry, prh);
9bac43
+    qemu_coroutine_enter(prh->co);
9bac43
+
9bac43
+    return TRUE;
9bac43
+}
9bac43
+
9bac43
+
9bac43
+/*
9bac43
+ * Check socket parameters compatibility when socket activation is used.
9bac43
+ */
9bac43
+static const char *socket_activation_validate_opts(void)
9bac43
+{
9bac43
+    if (socket_path != NULL) {
9bac43
+        return "Unix socket can't be set when using socket activation";
9bac43
+    }
9bac43
+
9bac43
+    return NULL;
9bac43
+}
9bac43
+
9bac43
+static void compute_default_paths(void)
9bac43
+{
9bac43
+    if (!socket_path) {
9bac43
+        socket_path = qemu_get_local_state_pathname("run/qemu-pr-helper.sock");
9bac43
+    }
9bac43
+}
9bac43
+
9bac43
+static void termsig_handler(int signum)
9bac43
+{
9bac43
+    atomic_cmpxchg(&state, RUNNING, TERMINATE);
9bac43
+    qemu_notify_event();
9bac43
+}
9bac43
+
9bac43
+static void close_server_socket(void)
9bac43
+{
9bac43
+    assert(server_ioc);
9bac43
+
9bac43
+    g_source_remove(server_watch);
9bac43
+    server_watch = -1;
9bac43
+    object_unref(OBJECT(server_ioc));
9bac43
+    num_active_sockets--;
9bac43
+}
9bac43
+
9bac43
+#ifdef CONFIG_LIBCAP
9bac43
+static int drop_privileges(void)
9bac43
+{
9bac43
+    /* clear all capabilities */
9bac43
+    capng_clear(CAPNG_SELECT_BOTH);
9bac43
+
9bac43
+    if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
9bac43
+                     CAP_SYS_RAWIO) < 0) {
9bac43
+        return -1;
9bac43
+    }
9bac43
+
9bac43
+    /* Change user/group id, retaining the capabilities.  Because file descriptors
9bac43
+     * are passed via SCM_RIGHTS, we don't need supplementary groups (and in
9bac43
+     * fact the helper can run as "nobody").
9bac43
+     */
9bac43
+    if (capng_change_id(uid != -1 ? uid : getuid(),
9bac43
+                        gid != -1 ? gid : getgid(),
9bac43
+                        CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING)) {
9bac43
+        return -1;
9bac43
+    }
9bac43
+
9bac43
+    return 0;
9bac43
+}
9bac43
+#endif
9bac43
+
9bac43
+int main(int argc, char **argv)
9bac43
+{
9bac43
+    const char *sopt = "hVk:fdT:u:g:q";
9bac43
+    struct option lopt[] = {
9bac43
+        { "help", no_argument, NULL, 'h' },
9bac43
+        { "version", no_argument, NULL, 'V' },
9bac43
+        { "socket", required_argument, NULL, 'k' },
9bac43
+        { "pidfile", no_argument, NULL, 'f' },
9bac43
+        { "daemon", no_argument, NULL, 'd' },
9bac43
+        { "trace", required_argument, NULL, 'T' },
9bac43
+        { "user", required_argument, NULL, 'u' },
9bac43
+        { "group", required_argument, NULL, 'g' },
9bac43
+        { "quiet", no_argument, NULL, 'q' },
9bac43
+        { NULL, 0, NULL, 0 }
9bac43
+    };
9bac43
+    int opt_ind = 0;
9bac43
+    int quiet = 0;
9bac43
+    int ch;
9bac43
+    Error *local_err = NULL;
9bac43
+    char *trace_file = NULL;
9bac43
+    bool daemonize = false;
9bac43
+    unsigned socket_activation;
9bac43
+
9bac43
+    struct sigaction sa_sigterm;
9bac43
+    memset(&sa_sigterm, 0, sizeof(sa_sigterm));
9bac43
+    sa_sigterm.sa_handler = termsig_handler;
9bac43
+    sigaction(SIGTERM, &sa_sigterm, NULL);
9bac43
+    sigaction(SIGINT, &sa_sigterm, NULL);
9bac43
+    sigaction(SIGHUP, &sa_sigterm, NULL);
9bac43
+
9bac43
+    signal(SIGPIPE, SIG_IGN);
9bac43
+
9bac43
+    module_call_init(MODULE_INIT_TRACE);
9bac43
+    module_call_init(MODULE_INIT_QOM);
9bac43
+    qemu_add_opts(&qemu_trace_opts);
9bac43
+    qemu_init_exec_dir(argv[0]);
9bac43
+
9bac43
+    pidfile = qemu_get_local_state_pathname("run/qemu-pr-helper.pid");
9bac43
+
9bac43
+    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
9bac43
+        switch (ch) {
9bac43
+        case 'k':
9bac43
+            socket_path = optarg;
9bac43
+            if (socket_path[0] != '/') {
9bac43
+                error_report("socket path must be absolute");
9bac43
+                exit(EXIT_FAILURE);
9bac43
+            }
9bac43
+            break;
9bac43
+        case 'f':
9bac43
+            pidfile = optarg;
9bac43
+            break;
9bac43
+#ifdef CONFIG_LIBCAP
9bac43
+        case 'u': {
9bac43
+            unsigned long res;
9bac43
+            struct passwd *userinfo = getpwnam(optarg);
9bac43
+            if (userinfo) {
9bac43
+                uid = userinfo->pw_uid;
9bac43
+            } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
9bac43
+                       (uid_t)res == res) {
9bac43
+                uid = res;
9bac43
+            } else {
9bac43
+                error_report("invalid user '%s'", optarg);
9bac43
+                exit(EXIT_FAILURE);
9bac43
+            }
9bac43
+            break;
9bac43
+        }
9bac43
+        case 'g': {
9bac43
+            unsigned long res;
9bac43
+            struct group *groupinfo = getgrnam(optarg);
9bac43
+            if (groupinfo) {
9bac43
+                gid = groupinfo->gr_gid;
9bac43
+            } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
9bac43
+                       (gid_t)res == res) {
9bac43
+                gid = res;
9bac43
+            } else {
9bac43
+                error_report("invalid group '%s'", optarg);
9bac43
+                exit(EXIT_FAILURE);
9bac43
+            }
9bac43
+            break;
9bac43
+        }
9bac43
+#else
9bac43
+        case 'u':
9bac43
+        case 'g':
9bac43
+            error_report("-%c not supported by this %s", ch, argv[0]);
9bac43
+            exit(1);
9bac43
+#endif
9bac43
+        case 'd':
9bac43
+            daemonize = true;
9bac43
+            break;
9bac43
+        case 'q':
9bac43
+            quiet = 1;
9bac43
+            break;
9bac43
+        case 'T':
9bac43
+            g_free(trace_file);
9bac43
+            trace_file = trace_opt_parse(optarg);
9bac43
+            break;
9bac43
+        case 'V':
9bac43
+            version(argv[0]);
9bac43
+            exit(EXIT_SUCCESS);
9bac43
+            break;
9bac43
+        case 'h':
9bac43
+            usage(argv[0]);
9bac43
+            exit(EXIT_SUCCESS);
9bac43
+            break;
9bac43
+        case '?':
9bac43
+            error_report("Try `%s --help' for more information.", argv[0]);
9bac43
+            exit(EXIT_FAILURE);
9bac43
+        }
9bac43
+    }
9bac43
+
9bac43
+    /* set verbosity */
9bac43
+    verbose = !quiet;
9bac43
+
9bac43
+    if (!trace_init_backends()) {
9bac43
+        exit(EXIT_FAILURE);
9bac43
+    }
9bac43
+    trace_init_file(trace_file);
9bac43
+    qemu_set_log(LOG_TRACE);
9bac43
+
9bac43
+    socket_activation = check_socket_activation();
9bac43
+    if (socket_activation == 0) {
9bac43
+        SocketAddress saddr;
9bac43
+        compute_default_paths();
9bac43
+        saddr = (SocketAddress){
9bac43
+            .type = SOCKET_ADDRESS_TYPE_UNIX,
9bac43
+            .u.q_unix.path = g_strdup(socket_path)
9bac43
+        };
9bac43
+        server_ioc = qio_channel_socket_new();
9bac43
+        if (qio_channel_socket_listen_sync(server_ioc, &saddr, &local_err) < 0) {
9bac43
+            object_unref(OBJECT(server_ioc));
9bac43
+            error_report_err(local_err);
9bac43
+            return 1;
9bac43
+        }
9bac43
+        g_free(saddr.u.q_unix.path);
9bac43
+    } else {
9bac43
+        /* Using socket activation - check user didn't use -p etc. */
9bac43
+        const char *err_msg = socket_activation_validate_opts();
9bac43
+        if (err_msg != NULL) {
9bac43
+            error_report("%s", err_msg);
9bac43
+            exit(EXIT_FAILURE);
9bac43
+        }
9bac43
+
9bac43
+        /* Can only listen on a single socket.  */
9bac43
+        if (socket_activation > 1) {
9bac43
+            error_report("%s does not support socket activation with LISTEN_FDS > 1",
9bac43
+                         argv[0]);
9bac43
+            exit(EXIT_FAILURE);
9bac43
+        }
9bac43
+        server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD,
9bac43
+                                               &local_err);
9bac43
+        if (server_ioc == NULL) {
9bac43
+            error_report("Failed to use socket activation: %s",
9bac43
+                         error_get_pretty(local_err));
9bac43
+            exit(EXIT_FAILURE);
9bac43
+        }
9bac43
+        socket_path = NULL;
9bac43
+    }
9bac43
+
9bac43
+    if (qemu_init_main_loop(&local_err)) {
9bac43
+        error_report_err(local_err);
9bac43
+        exit(EXIT_FAILURE);
9bac43
+    }
9bac43
+
9bac43
+    server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
9bac43
+                                         G_IO_IN,
9bac43
+                                         accept_client,
9bac43
+                                         NULL, NULL);
9bac43
+
9bac43
+#ifdef CONFIG_LIBCAP
9bac43
+    if (drop_privileges() < 0) {
9bac43
+        error_report("Failed to drop privileges: %s", strerror(errno));
9bac43
+        exit(EXIT_FAILURE);
9bac43
+    }
9bac43
+#endif
9bac43
+
9bac43
+    if (daemonize) {
9bac43
+        if (daemon(0, 0) < 0) {
9bac43
+            error_report("Failed to daemonize: %s", strerror(errno));
9bac43
+            exit(EXIT_FAILURE);
9bac43
+        }
9bac43
+        write_pidfile();
9bac43
+    }
9bac43
+
9bac43
+    state = RUNNING;
9bac43
+    do {
9bac43
+        main_loop_wait(false);
9bac43
+        if (state == TERMINATE) {
9bac43
+            state = TERMINATING;
9bac43
+            close_server_socket();
9bac43
+        }
9bac43
+    } while (num_active_sockets > 0);
9bac43
+
9bac43
+    exit(EXIT_SUCCESS);
9bac43
+}
9bac43
-- 
9bac43
1.8.3.1
9bac43