Blame SOURCES/kexec-tools-2.0.7-Provide-an-option-to-use-new-kexec-system-call.patch

de80c6
From 046d1755d2bd723a11a180c265e61a884990712e Mon Sep 17 00:00:00 2001
de80c6
From: Vivek Goyal <vgoyal@redhat.com>
de80c6
Date: Mon, 18 Aug 2014 11:22:32 -0400
de80c6
Subject: [PATCH] kexec: Provide an option to use new kexec system call
de80c6
de80c6
Hi,
de80c6
de80c6
This is v2 of the patch. Since v1, I moved syscall implemented check littler
de80c6
earlier in the function as per the feedback.
de80c6
de80c6
Now a new kexec syscall (kexec_file_load()) has been merged in upstream
de80c6
kernel. This system call takes file descriptors of kernel and initramfs
de80c6
as input (as opposed to list of segments to be loaded). This new system
de80c6
call allows for signature verification of the kernel being loaded.
de80c6
de80c6
One use of signature verification of kernel is secureboot systems where
de80c6
we want to allow kexec into a kernel only if it is validly signed by
de80c6
a key system trusts.
de80c6
de80c6
This patch provides and option --kexec-file-syscall (-s), to force use of
de80c6
new system call for kexec. Default is to continue to use old syscall.
de80c6
de80c6
Currently only bzImage64 on x86_64 can be loaded using this system call.
de80c6
As kernel adds support for more arches and for more image types, kexec-tools
de80c6
can be modified accordingly.
de80c6
de80c6
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
de80c6
Acked-by: Baoquan He <bhe@redhat.com>
de80c6
Signed-off-by: Simon Horman <horms@verge.net.au>
de80c6
---
de80c6
 kexec/arch/x86_64/kexec-bzImage64.c |  86 +++++++++++++++++++++++
de80c6
 kexec/kexec-syscall.h               |  32 +++++++++
de80c6
 kexec/kexec.c                       | 132 +++++++++++++++++++++++++++++++++++-
de80c6
 kexec/kexec.h                       |  11 ++-
de80c6
 4 files changed, 257 insertions(+), 4 deletions(-)
de80c6
de80c6
diff --git a/kexec/arch/x86_64/kexec-bzImage64.c b/kexec/arch/x86_64/kexec-bzImage64.c
de80c6
index 1983bcf..8edb3e4 100644
de80c6
--- a/kexec/arch/x86_64/kexec-bzImage64.c
de80c6
+++ b/kexec/arch/x86_64/kexec-bzImage64.c
de80c6
@@ -235,6 +235,89 @@ static int do_bzImage64_load(struct kexec_info *info,
de80c6
 	return 0;
de80c6
 }
de80c6
 
de80c6
+/* This assumes file is being loaded using file based kexec syscall */
de80c6
+int bzImage64_load_file(int argc, char **argv, struct kexec_info *info)
de80c6
+{
de80c6
+	int ret = 0;
de80c6
+	char *command_line = NULL, *tmp_cmdline = NULL;
de80c6
+	const char *ramdisk = NULL, *append = NULL;
de80c6
+	int entry_16bit = 0, entry_32bit = 0;
de80c6
+	int opt;
de80c6
+	int command_line_len;
de80c6
+
de80c6
+	/* See options.h -- add any more there, too. */
de80c6
+	static const struct option options[] = {
de80c6
+		KEXEC_ARCH_OPTIONS
de80c6
+		{ "command-line",	1, 0, OPT_APPEND },
de80c6
+		{ "append",		1, 0, OPT_APPEND },
de80c6
+		{ "reuse-cmdline",	0, 0, OPT_REUSE_CMDLINE },
de80c6
+		{ "initrd",		1, 0, OPT_RAMDISK },
de80c6
+		{ "ramdisk",		1, 0, OPT_RAMDISK },
de80c6
+		{ "real-mode",		0, 0, OPT_REAL_MODE },
de80c6
+		{ "entry-32bit",	0, 0, OPT_ENTRY_32BIT },
de80c6
+		{ 0,			0, 0, 0 },
de80c6
+	};
de80c6
+	static const char short_options[] = KEXEC_ARCH_OPT_STR "d";
de80c6
+
de80c6
+	while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) {
de80c6
+		switch (opt) {
de80c6
+		default:
de80c6
+			/* Ignore core options */
de80c6
+			if (opt < OPT_ARCH_MAX)
de80c6
+				break;
de80c6
+		case OPT_APPEND:
de80c6
+			append = optarg;
de80c6
+			break;
de80c6
+		case OPT_REUSE_CMDLINE:
de80c6
+			tmp_cmdline = get_command_line();
de80c6
+			break;
de80c6
+		case OPT_RAMDISK:
de80c6
+			ramdisk = optarg;
de80c6
+			break;
de80c6
+		case OPT_REAL_MODE:
de80c6
+			entry_16bit = 1;
de80c6
+			break;
de80c6
+		case OPT_ENTRY_32BIT:
de80c6
+			entry_32bit = 1;
de80c6
+			break;
de80c6
+		}
de80c6
+	}
de80c6
+	command_line = concat_cmdline(tmp_cmdline, append);
de80c6
+	if (tmp_cmdline)
de80c6
+		free(tmp_cmdline);
de80c6
+	command_line_len = 0;
de80c6
+	if (command_line) {
de80c6
+		command_line_len = strlen(command_line) + 1;
de80c6
+	} else {
de80c6
+		command_line = strdup("\0");
de80c6
+		command_line_len = 1;
de80c6
+	}
de80c6
+
de80c6
+	if (entry_16bit || entry_32bit) {
de80c6
+		fprintf(stderr, "Kexec2 syscall does not support 16bit"
de80c6
+			" or 32bit entry yet\n");
de80c6
+		ret = -1;
de80c6
+		goto out;
de80c6
+	}
de80c6
+
de80c6
+	if (ramdisk) {
de80c6
+		info->initrd_fd = open(ramdisk, O_RDONLY);
de80c6
+		if (info->initrd_fd == -1) {
de80c6
+			fprintf(stderr, "Could not open initrd file %s:%s\n",
de80c6
+					ramdisk, strerror(errno));
de80c6
+			ret = -1;
de80c6
+			goto out;
de80c6
+		}
de80c6
+	}
de80c6
+
de80c6
+	info->command_line = command_line;
de80c6
+	info->command_line_len = command_line_len;
de80c6
+	return ret;
de80c6
+out:
de80c6
+	free(command_line);
de80c6
+	return ret;
de80c6
+}
de80c6
+
de80c6
 int bzImage64_load(int argc, char **argv, const char *buf, off_t len,
de80c6
 	struct kexec_info *info)
de80c6
 {
de80c6
@@ -247,6 +330,9 @@ int bzImage64_load(int argc, char **argv, const char *buf, off_t len,
de80c6
 	int opt;
de80c6
 	int result;
de80c6
 
de80c6
+	if (info->file_mode)
de80c6
+		return bzImage64_load_file(argc, argv, info);
de80c6
+
de80c6
 	/* See options.h -- add any more there, too. */
de80c6
 	static const struct option options[] = {
de80c6
 		KEXEC_ARCH_OPTIONS
de80c6
diff --git a/kexec/kexec-syscall.h b/kexec/kexec-syscall.h
de80c6
index 6238044..ce2e20b 100644
de80c6
--- a/kexec/kexec-syscall.h
de80c6
+++ b/kexec/kexec-syscall.h
de80c6
@@ -53,6 +53,19 @@
de80c6
 #endif
de80c6
 #endif /*ifndef __NR_kexec_load*/
de80c6
 
de80c6
+#ifndef __NR_kexec_file_load
de80c6
+
de80c6
+#ifdef __x86_64__
de80c6
+#define __NR_kexec_file_load	320
de80c6
+#endif
de80c6
+
de80c6
+#ifndef __NR_kexec_file_load
de80c6
+/* system call not available for the arch */
de80c6
+#define __NR_kexec_file_load	0xffffffff	/* system call not available */
de80c6
+#endif
de80c6
+
de80c6
+#endif /*ifndef __NR_kexec_file_load*/
de80c6
+
de80c6
 struct kexec_segment;
de80c6
 
de80c6
 static inline long kexec_load(void *entry, unsigned long nr_segments,
de80c6
@@ -61,10 +74,29 @@ static inline long kexec_load(void *entry, unsigned long nr_segments,
de80c6
 	return (long) syscall(__NR_kexec_load, entry, nr_segments, segments, flags);
de80c6
 }
de80c6
 
de80c6
+static inline int is_kexec_file_load_implemented(void) {
de80c6
+	if (__NR_kexec_file_load != 0xffffffff)
de80c6
+		return 1;
de80c6
+	return 0;
de80c6
+}
de80c6
+
de80c6
+static inline long kexec_file_load(int kernel_fd, int initrd_fd,
de80c6
+			unsigned long cmdline_len, const char *cmdline_ptr,
de80c6
+			unsigned long flags)
de80c6
+{
de80c6
+	return (long) syscall(__NR_kexec_file_load, kernel_fd, initrd_fd,
de80c6
+				cmdline_len, cmdline_ptr, flags);
de80c6
+}
de80c6
+
de80c6
 #define KEXEC_ON_CRASH		0x00000001
de80c6
 #define KEXEC_PRESERVE_CONTEXT	0x00000002
de80c6
 #define KEXEC_ARCH_MASK		0xffff0000
de80c6
 
de80c6
+/* Flags for kexec file based system call */
de80c6
+#define KEXEC_FILE_UNLOAD	0x00000001
de80c6
+#define KEXEC_FILE_ON_CRASH	0x00000002
de80c6
+#define KEXEC_FILE_NO_INITRAMFS	0x00000004
de80c6
+
de80c6
 /* These values match the ELF architecture values. 
de80c6
  * Unless there is a good reason that should continue to be the case.
de80c6
  */
de80c6
diff --git a/kexec/kexec.c b/kexec/kexec.c
de80c6
index 133e622..7e7b604 100644
de80c6
--- a/kexec/kexec.c
de80c6
+++ b/kexec/kexec.c
de80c6
@@ -51,6 +51,8 @@
de80c6
 unsigned long long mem_min = 0;
de80c6
 unsigned long long mem_max = ULONG_MAX;
de80c6
 static unsigned long kexec_flags = 0;
de80c6
+/* Flags for kexec file (fd) based syscall */
de80c6
+static unsigned long kexec_file_flags = 0;
de80c6
 int kexec_debug = 0;
de80c6
 
de80c6
 void dbgprint_mem_range(const char *prefix, struct memory_range *mr, int nr_mr)
de80c6
@@ -787,6 +789,19 @@ static int my_load(const char *type, int fileind, int argc, char **argv,
de80c6
 	return result;
de80c6
 }
de80c6
 
de80c6
+static int kexec_file_unload(unsigned long kexec_file_flags)
de80c6
+{
de80c6
+	int ret = 0;
de80c6
+
de80c6
+	ret = kexec_file_load(-1, -1, 0, NULL, kexec_file_flags);
de80c6
+	if (ret != 0) {
de80c6
+		/* The unload failed, print some debugging information */
de80c6
+		fprintf(stderr, "kexec_file_load(unload) failed\n: %s\n",
de80c6
+			strerror(errno));
de80c6
+	}
de80c6
+	return ret;
de80c6
+}
de80c6
+
de80c6
 static int k_unload (unsigned long kexec_flags)
de80c6
 {
de80c6
 	int result;
de80c6
@@ -925,6 +940,7 @@ void usage(void)
de80c6
 	       "                      (0 means it's not jump back or\n"
de80c6
 	       "                      preserve context)\n"
de80c6
 	       "                      to original kernel.\n"
de80c6
+	       " -s, --kexec-file-syscall Use file based syscall for kexec operation\n"
de80c6
 	       " -d, --debug           Enable debugging to help spot a failure.\n"
de80c6
 	       "\n"
de80c6
 	       "Supported kernel file types and options: \n");
de80c6
@@ -1072,6 +1088,82 @@ char *concat_cmdline(const char *base, const char *append)
de80c6
 	return cmdline;
de80c6
 }
de80c6
 
de80c6
+/* New file based kexec system call related code */
de80c6
+static int do_kexec_file_load(int fileind, int argc, char **argv,
de80c6
+			unsigned long flags) {
de80c6
+
de80c6
+	char *kernel;
de80c6
+	int kernel_fd, i;
de80c6
+	struct kexec_info info;
de80c6
+	int ret = 0;
de80c6
+	char *kernel_buf;
de80c6
+	off_t kernel_size;
de80c6
+
de80c6
+	memset(&info, 0, sizeof(info));
de80c6
+	info.segment = NULL;
de80c6
+	info.nr_segments = 0;
de80c6
+	info.entry = NULL;
de80c6
+	info.backup_start = 0;
de80c6
+	info.kexec_flags = flags;
de80c6
+
de80c6
+	info.file_mode = 1;
de80c6
+	info.initrd_fd = -1;
de80c6
+
de80c6
+	if (!is_kexec_file_load_implemented()) {
de80c6
+		fprintf(stderr, "syscall kexec_file_load not available.\n");
de80c6
+		return -1;
de80c6
+	}
de80c6
+
de80c6
+	if (argc - fileind <= 0) {
de80c6
+		fprintf(stderr, "No kernel specified\n");
de80c6
+		usage();
de80c6
+		return -1;
de80c6
+	}
de80c6
+
de80c6
+	kernel = argv[fileind];
de80c6
+
de80c6
+	kernel_fd = open(kernel, O_RDONLY);
de80c6
+	if (kernel_fd == -1) {
de80c6
+		fprintf(stderr, "Failed to open file %s:%s\n", kernel,
de80c6
+				strerror(errno));
de80c6
+		return -1;
de80c6
+	}
de80c6
+
de80c6
+	/* slurp in the input kernel */
de80c6
+	kernel_buf = slurp_decompress_file(kernel, &kernel_size);
de80c6
+
de80c6
+	for (i = 0; i < file_types; i++) {
de80c6
+		if (file_type[i].probe(kernel_buf, kernel_size) >= 0)
de80c6
+			break;
de80c6
+	}
de80c6
+
de80c6
+	if (i == file_types) {
de80c6
+		fprintf(stderr, "Cannot determine the file type " "of %s\n",
de80c6
+				kernel);
de80c6
+		return -1;
de80c6
+	}
de80c6
+
de80c6
+	ret = file_type[i].load(argc, argv, kernel_buf, kernel_size, &info;;
de80c6
+	if (ret < 0) {
de80c6
+		fprintf(stderr, "Cannot load %s\n", kernel);
de80c6
+		return ret;
de80c6
+	}
de80c6
+
de80c6
+	/*
de80c6
+	 * If there is no initramfs, set KEXEC_FILE_NO_INITRAMFS flag so that
de80c6
+	 * kernel does not return error with negative initrd_fd.
de80c6
+	 */
de80c6
+	if (info.initrd_fd == -1)
de80c6
+		info.kexec_flags |= KEXEC_FILE_NO_INITRAMFS;
de80c6
+
de80c6
+	ret = kexec_file_load(kernel_fd, info.initrd_fd, info.command_line_len,
de80c6
+			info.command_line, info.kexec_flags);
de80c6
+	if (ret != 0)
de80c6
+		fprintf(stderr, "kexec_file_load failed: %s\n",
de80c6
+					strerror(errno));
de80c6
+	return ret;
de80c6
+}
de80c6
+
de80c6
 
de80c6
 int main(int argc, char *argv[])
de80c6
 {
de80c6
@@ -1083,6 +1175,7 @@ int main(int argc, char *argv[])
de80c6
 	int do_ifdown = 0;
de80c6
 	int do_unload = 0;
de80c6
 	int do_reuse_initrd = 0;
de80c6
+	int do_kexec_file_syscall = 0;
de80c6
 	void *entry = 0;
de80c6
 	char *type = 0;
de80c6
 	char *endptr;
de80c6
@@ -1095,6 +1188,23 @@ int main(int argc, char *argv[])
de80c6
 	};
de80c6
 	static const char short_options[] = KEXEC_ALL_OPT_STR;
de80c6
 
de80c6
+	/*
de80c6
+	 * First check if --use-kexec-file-syscall is set. That changes lot of
de80c6
+	 * things
de80c6
+	 */
de80c6
+	while ((opt = getopt_long(argc, argv, short_options,
de80c6
+				  options, 0)) != -1) {
de80c6
+		switch(opt) {
de80c6
+		case OPT_KEXEC_FILE_SYSCALL:
de80c6
+			do_kexec_file_syscall = 1;
de80c6
+			break;
de80c6
+		}
de80c6
+	}
de80c6
+
de80c6
+	/* Reset getopt for the next pass. */
de80c6
+	opterr = 1;
de80c6
+	optind = 1;
de80c6
+
de80c6
 	while ((opt = getopt_long(argc, argv, short_options,
de80c6
 				  options, 0)) != -1) {
de80c6
 		switch(opt) {
de80c6
@@ -1127,6 +1237,8 @@ int main(int argc, char *argv[])
de80c6
 			do_shutdown = 0;
de80c6
 			do_sync = 0;
de80c6
 			do_unload = 1;
de80c6
+			if (do_kexec_file_syscall)
de80c6
+				kexec_file_flags |= KEXEC_FILE_UNLOAD;
de80c6
 			break;
de80c6
 		case OPT_EXEC:
de80c6
 			do_load = 0;
de80c6
@@ -1169,7 +1281,10 @@ int main(int argc, char *argv[])
de80c6
 			do_exec = 0;
de80c6
 			do_shutdown = 0;
de80c6
 			do_sync = 0;
de80c6
-			kexec_flags = KEXEC_ON_CRASH;
de80c6
+			if (do_kexec_file_syscall)
de80c6
+				kexec_file_flags |= KEXEC_FILE_ON_CRASH;
de80c6
+			else
de80c6
+				kexec_flags = KEXEC_ON_CRASH;
de80c6
 			break;
de80c6
 		case OPT_MEM_MIN:
de80c6
 			mem_min = strtoul(optarg, &endptr, 0);
de80c6
@@ -1194,6 +1309,9 @@ int main(int argc, char *argv[])
de80c6
 		case OPT_REUSE_INITRD:
de80c6
 			do_reuse_initrd = 1;
de80c6
 			break;
de80c6
+		case OPT_KEXEC_FILE_SYSCALL:
de80c6
+			/* We already parsed it. Nothing to do. */
de80c6
+			break;
de80c6
 		default:
de80c6
 			break;
de80c6
 		}
de80c6
@@ -1238,10 +1356,18 @@ int main(int argc, char *argv[])
de80c6
 	}
de80c6
 
de80c6
 	if (do_unload) {
de80c6
-		result = k_unload(kexec_flags);
de80c6
+		if (do_kexec_file_syscall)
de80c6
+			result = kexec_file_unload(kexec_file_flags);
de80c6
+		else
de80c6
+			result = k_unload(kexec_flags);
de80c6
 	}
de80c6
 	if (do_load && (result == 0)) {
de80c6
-		result = my_load(type, fileind, argc, argv, kexec_flags, entry);
de80c6
+		if (do_kexec_file_syscall)
de80c6
+			result = do_kexec_file_load(fileind, argc, argv,
de80c6
+						 kexec_file_flags);
de80c6
+		else
de80c6
+			result = my_load(type, fileind, argc, argv,
de80c6
+						kexec_flags, entry);
de80c6
 	}
de80c6
 	/* Don't shutdown unless there is something to reboot to! */
de80c6
 	if ((result == 0) && (do_shutdown || do_exec) && !kexec_loaded()) {
de80c6
diff --git a/kexec/kexec.h b/kexec/kexec.h
de80c6
index 2fad7dc..4be2b2f 100644
de80c6
--- a/kexec/kexec.h
de80c6
+++ b/kexec/kexec.h
de80c6
@@ -156,6 +156,13 @@ struct kexec_info {
de80c6
 	unsigned long kexec_flags;
de80c6
 	unsigned long backup_src_start;
de80c6
 	unsigned long backup_src_size;
de80c6
+	/* Set to 1 if we are using kexec file syscall */
de80c6
+	unsigned long file_mode :1;
de80c6
+
de80c6
+	/* Filled by kernel image processing code */
de80c6
+	int initrd_fd;
de80c6
+	char *command_line;
de80c6
+	int command_line_len;
de80c6
 };
de80c6
 
de80c6
 struct arch_map_entry {
de80c6
@@ -207,6 +214,7 @@ extern int file_types;
de80c6
 #define OPT_UNLOAD		'u'
de80c6
 #define OPT_TYPE		't'
de80c6
 #define OPT_PANIC		'p'
de80c6
+#define OPT_KEXEC_FILE_SYSCALL	's'
de80c6
 #define OPT_MEM_MIN             256
de80c6
 #define OPT_MEM_MAX             257
de80c6
 #define OPT_REUSE_INITRD	258
de80c6
@@ -230,9 +238,10 @@ extern int file_types;
de80c6
 	{ "mem-min",		1, 0, OPT_MEM_MIN }, \
de80c6
 	{ "mem-max",		1, 0, OPT_MEM_MAX }, \
de80c6
 	{ "reuseinitrd",	0, 0, OPT_REUSE_INITRD }, \
de80c6
+	{ "kexec-file-syscall",	0, 0, OPT_KEXEC_FILE_SYSCALL }, \
de80c6
 	{ "debug",		0, 0, OPT_DEBUG }, \
de80c6
 
de80c6
-#define KEXEC_OPT_STR "h?vdfxluet:p"
de80c6
+#define KEXEC_OPT_STR "h?vdfxluet:ps"
de80c6
 
de80c6
 extern void dbgprint_mem_range(const char *prefix, struct memory_range *mr, int nr_mr);
de80c6
 extern void die(const char *fmt, ...)
de80c6
-- 
de80c6
1.9.0
de80c6