Blame SOURCES/kexec-tools-2.0.7-Provide-an-option-to-use-new-kexec-system-call.patch

1b417c
From 046d1755d2bd723a11a180c265e61a884990712e Mon Sep 17 00:00:00 2001
1b417c
From: Vivek Goyal <vgoyal@redhat.com>
1b417c
Date: Mon, 18 Aug 2014 11:22:32 -0400
1b417c
Subject: [PATCH] kexec: Provide an option to use new kexec system call
1b417c
1b417c
Hi,
1b417c
1b417c
This is v2 of the patch. Since v1, I moved syscall implemented check littler
1b417c
earlier in the function as per the feedback.
1b417c
1b417c
Now a new kexec syscall (kexec_file_load()) has been merged in upstream
1b417c
kernel. This system call takes file descriptors of kernel and initramfs
1b417c
as input (as opposed to list of segments to be loaded). This new system
1b417c
call allows for signature verification of the kernel being loaded.
1b417c
1b417c
One use of signature verification of kernel is secureboot systems where
1b417c
we want to allow kexec into a kernel only if it is validly signed by
1b417c
a key system trusts.
1b417c
1b417c
This patch provides and option --kexec-file-syscall (-s), to force use of
1b417c
new system call for kexec. Default is to continue to use old syscall.
1b417c
1b417c
Currently only bzImage64 on x86_64 can be loaded using this system call.
1b417c
As kernel adds support for more arches and for more image types, kexec-tools
1b417c
can be modified accordingly.
1b417c
1b417c
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
1b417c
Acked-by: Baoquan He <bhe@redhat.com>
1b417c
Signed-off-by: Simon Horman <horms@verge.net.au>
1b417c
---
1b417c
 kexec/arch/x86_64/kexec-bzImage64.c |  86 +++++++++++++++++++++++
1b417c
 kexec/kexec-syscall.h               |  32 +++++++++
1b417c
 kexec/kexec.c                       | 132 +++++++++++++++++++++++++++++++++++-
1b417c
 kexec/kexec.h                       |  11 ++-
1b417c
 4 files changed, 257 insertions(+), 4 deletions(-)
1b417c
1b417c
diff --git a/kexec/arch/x86_64/kexec-bzImage64.c b/kexec/arch/x86_64/kexec-bzImage64.c
1b417c
index 1983bcf..8edb3e4 100644
1b417c
--- a/kexec/arch/x86_64/kexec-bzImage64.c
1b417c
+++ b/kexec/arch/x86_64/kexec-bzImage64.c
1b417c
@@ -235,6 +235,89 @@ static int do_bzImage64_load(struct kexec_info *info,
1b417c
 	return 0;
1b417c
 }
1b417c
 
1b417c
+/* This assumes file is being loaded using file based kexec syscall */
1b417c
+int bzImage64_load_file(int argc, char **argv, struct kexec_info *info)
1b417c
+{
1b417c
+	int ret = 0;
1b417c
+	char *command_line = NULL, *tmp_cmdline = NULL;
1b417c
+	const char *ramdisk = NULL, *append = NULL;
1b417c
+	int entry_16bit = 0, entry_32bit = 0;
1b417c
+	int opt;
1b417c
+	int command_line_len;
1b417c
+
1b417c
+	/* See options.h -- add any more there, too. */
1b417c
+	static const struct option options[] = {
1b417c
+		KEXEC_ARCH_OPTIONS
1b417c
+		{ "command-line",	1, 0, OPT_APPEND },
1b417c
+		{ "append",		1, 0, OPT_APPEND },
1b417c
+		{ "reuse-cmdline",	0, 0, OPT_REUSE_CMDLINE },
1b417c
+		{ "initrd",		1, 0, OPT_RAMDISK },
1b417c
+		{ "ramdisk",		1, 0, OPT_RAMDISK },
1b417c
+		{ "real-mode",		0, 0, OPT_REAL_MODE },
1b417c
+		{ "entry-32bit",	0, 0, OPT_ENTRY_32BIT },
1b417c
+		{ 0,			0, 0, 0 },
1b417c
+	};
1b417c
+	static const char short_options[] = KEXEC_ARCH_OPT_STR "d";
1b417c
+
1b417c
+	while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) {
1b417c
+		switch (opt) {
1b417c
+		default:
1b417c
+			/* Ignore core options */
1b417c
+			if (opt < OPT_ARCH_MAX)
1b417c
+				break;
1b417c
+		case OPT_APPEND:
1b417c
+			append = optarg;
1b417c
+			break;
1b417c
+		case OPT_REUSE_CMDLINE:
1b417c
+			tmp_cmdline = get_command_line();
1b417c
+			break;
1b417c
+		case OPT_RAMDISK:
1b417c
+			ramdisk = optarg;
1b417c
+			break;
1b417c
+		case OPT_REAL_MODE:
1b417c
+			entry_16bit = 1;
1b417c
+			break;
1b417c
+		case OPT_ENTRY_32BIT:
1b417c
+			entry_32bit = 1;
1b417c
+			break;
1b417c
+		}
1b417c
+	}
1b417c
+	command_line = concat_cmdline(tmp_cmdline, append);
1b417c
+	if (tmp_cmdline)
1b417c
+		free(tmp_cmdline);
1b417c
+	command_line_len = 0;
1b417c
+	if (command_line) {
1b417c
+		command_line_len = strlen(command_line) + 1;
1b417c
+	} else {
1b417c
+		command_line = strdup("\0");
1b417c
+		command_line_len = 1;
1b417c
+	}
1b417c
+
1b417c
+	if (entry_16bit || entry_32bit) {
1b417c
+		fprintf(stderr, "Kexec2 syscall does not support 16bit"
1b417c
+			" or 32bit entry yet\n");
1b417c
+		ret = -1;
1b417c
+		goto out;
1b417c
+	}
1b417c
+
1b417c
+	if (ramdisk) {
1b417c
+		info->initrd_fd = open(ramdisk, O_RDONLY);
1b417c
+		if (info->initrd_fd == -1) {
1b417c
+			fprintf(stderr, "Could not open initrd file %s:%s\n",
1b417c
+					ramdisk, strerror(errno));
1b417c
+			ret = -1;
1b417c
+			goto out;
1b417c
+		}
1b417c
+	}
1b417c
+
1b417c
+	info->command_line = command_line;
1b417c
+	info->command_line_len = command_line_len;
1b417c
+	return ret;
1b417c
+out:
1b417c
+	free(command_line);
1b417c
+	return ret;
1b417c
+}
1b417c
+
1b417c
 int bzImage64_load(int argc, char **argv, const char *buf, off_t len,
1b417c
 	struct kexec_info *info)
1b417c
 {
1b417c
@@ -247,6 +330,9 @@ int bzImage64_load(int argc, char **argv, const char *buf, off_t len,
1b417c
 	int opt;
1b417c
 	int result;
1b417c
 
1b417c
+	if (info->file_mode)
1b417c
+		return bzImage64_load_file(argc, argv, info);
1b417c
+
1b417c
 	/* See options.h -- add any more there, too. */
1b417c
 	static const struct option options[] = {
1b417c
 		KEXEC_ARCH_OPTIONS
1b417c
diff --git a/kexec/kexec-syscall.h b/kexec/kexec-syscall.h
1b417c
index 6238044..ce2e20b 100644
1b417c
--- a/kexec/kexec-syscall.h
1b417c
+++ b/kexec/kexec-syscall.h
1b417c
@@ -53,6 +53,19 @@
1b417c
 #endif
1b417c
 #endif /*ifndef __NR_kexec_load*/
1b417c
 
1b417c
+#ifndef __NR_kexec_file_load
1b417c
+
1b417c
+#ifdef __x86_64__
1b417c
+#define __NR_kexec_file_load	320
1b417c
+#endif
1b417c
+
1b417c
+#ifndef __NR_kexec_file_load
1b417c
+/* system call not available for the arch */
1b417c
+#define __NR_kexec_file_load	0xffffffff	/* system call not available */
1b417c
+#endif
1b417c
+
1b417c
+#endif /*ifndef __NR_kexec_file_load*/
1b417c
+
1b417c
 struct kexec_segment;
1b417c
 
1b417c
 static inline long kexec_load(void *entry, unsigned long nr_segments,
1b417c
@@ -61,10 +74,29 @@ static inline long kexec_load(void *entry, unsigned long nr_segments,
1b417c
 	return (long) syscall(__NR_kexec_load, entry, nr_segments, segments, flags);
1b417c
 }
1b417c
 
1b417c
+static inline int is_kexec_file_load_implemented(void) {
1b417c
+	if (__NR_kexec_file_load != 0xffffffff)
1b417c
+		return 1;
1b417c
+	return 0;
1b417c
+}
1b417c
+
1b417c
+static inline long kexec_file_load(int kernel_fd, int initrd_fd,
1b417c
+			unsigned long cmdline_len, const char *cmdline_ptr,
1b417c
+			unsigned long flags)
1b417c
+{
1b417c
+	return (long) syscall(__NR_kexec_file_load, kernel_fd, initrd_fd,
1b417c
+				cmdline_len, cmdline_ptr, flags);
1b417c
+}
1b417c
+
1b417c
 #define KEXEC_ON_CRASH		0x00000001
1b417c
 #define KEXEC_PRESERVE_CONTEXT	0x00000002
1b417c
 #define KEXEC_ARCH_MASK		0xffff0000
1b417c
 
1b417c
+/* Flags for kexec file based system call */
1b417c
+#define KEXEC_FILE_UNLOAD	0x00000001
1b417c
+#define KEXEC_FILE_ON_CRASH	0x00000002
1b417c
+#define KEXEC_FILE_NO_INITRAMFS	0x00000004
1b417c
+
1b417c
 /* These values match the ELF architecture values. 
1b417c
  * Unless there is a good reason that should continue to be the case.
1b417c
  */
1b417c
diff --git a/kexec/kexec.c b/kexec/kexec.c
1b417c
index 133e622..7e7b604 100644
1b417c
--- a/kexec/kexec.c
1b417c
+++ b/kexec/kexec.c
1b417c
@@ -51,6 +51,8 @@
1b417c
 unsigned long long mem_min = 0;
1b417c
 unsigned long long mem_max = ULONG_MAX;
1b417c
 static unsigned long kexec_flags = 0;
1b417c
+/* Flags for kexec file (fd) based syscall */
1b417c
+static unsigned long kexec_file_flags = 0;
1b417c
 int kexec_debug = 0;
1b417c
 
1b417c
 void dbgprint_mem_range(const char *prefix, struct memory_range *mr, int nr_mr)
1b417c
@@ -787,6 +789,19 @@ static int my_load(const char *type, int fileind, int argc, char **argv,
1b417c
 	return result;
1b417c
 }
1b417c
 
1b417c
+static int kexec_file_unload(unsigned long kexec_file_flags)
1b417c
+{
1b417c
+	int ret = 0;
1b417c
+
1b417c
+	ret = kexec_file_load(-1, -1, 0, NULL, kexec_file_flags);
1b417c
+	if (ret != 0) {
1b417c
+		/* The unload failed, print some debugging information */
1b417c
+		fprintf(stderr, "kexec_file_load(unload) failed\n: %s\n",
1b417c
+			strerror(errno));
1b417c
+	}
1b417c
+	return ret;
1b417c
+}
1b417c
+
1b417c
 static int k_unload (unsigned long kexec_flags)
1b417c
 {
1b417c
 	int result;
1b417c
@@ -925,6 +940,7 @@ void usage(void)
1b417c
 	       "                      (0 means it's not jump back or\n"
1b417c
 	       "                      preserve context)\n"
1b417c
 	       "                      to original kernel.\n"
1b417c
+	       " -s, --kexec-file-syscall Use file based syscall for kexec operation\n"
1b417c
 	       " -d, --debug           Enable debugging to help spot a failure.\n"
1b417c
 	       "\n"
1b417c
 	       "Supported kernel file types and options: \n");
1b417c
@@ -1072,6 +1088,82 @@ char *concat_cmdline(const char *base, const char *append)
1b417c
 	return cmdline;
1b417c
 }
1b417c
 
1b417c
+/* New file based kexec system call related code */
1b417c
+static int do_kexec_file_load(int fileind, int argc, char **argv,
1b417c
+			unsigned long flags) {
1b417c
+
1b417c
+	char *kernel;
1b417c
+	int kernel_fd, i;
1b417c
+	struct kexec_info info;
1b417c
+	int ret = 0;
1b417c
+	char *kernel_buf;
1b417c
+	off_t kernel_size;
1b417c
+
1b417c
+	memset(&info, 0, sizeof(info));
1b417c
+	info.segment = NULL;
1b417c
+	info.nr_segments = 0;
1b417c
+	info.entry = NULL;
1b417c
+	info.backup_start = 0;
1b417c
+	info.kexec_flags = flags;
1b417c
+
1b417c
+	info.file_mode = 1;
1b417c
+	info.initrd_fd = -1;
1b417c
+
1b417c
+	if (!is_kexec_file_load_implemented()) {
1b417c
+		fprintf(stderr, "syscall kexec_file_load not available.\n");
1b417c
+		return -1;
1b417c
+	}
1b417c
+
1b417c
+	if (argc - fileind <= 0) {
1b417c
+		fprintf(stderr, "No kernel specified\n");
1b417c
+		usage();
1b417c
+		return -1;
1b417c
+	}
1b417c
+
1b417c
+	kernel = argv[fileind];
1b417c
+
1b417c
+	kernel_fd = open(kernel, O_RDONLY);
1b417c
+	if (kernel_fd == -1) {
1b417c
+		fprintf(stderr, "Failed to open file %s:%s\n", kernel,
1b417c
+				strerror(errno));
1b417c
+		return -1;
1b417c
+	}
1b417c
+
1b417c
+	/* slurp in the input kernel */
1b417c
+	kernel_buf = slurp_decompress_file(kernel, &kernel_size);
1b417c
+
1b417c
+	for (i = 0; i < file_types; i++) {
1b417c
+		if (file_type[i].probe(kernel_buf, kernel_size) >= 0)
1b417c
+			break;
1b417c
+	}
1b417c
+
1b417c
+	if (i == file_types) {
1b417c
+		fprintf(stderr, "Cannot determine the file type " "of %s\n",
1b417c
+				kernel);
1b417c
+		return -1;
1b417c
+	}
1b417c
+
1b417c
+	ret = file_type[i].load(argc, argv, kernel_buf, kernel_size, &info;;
1b417c
+	if (ret < 0) {
1b417c
+		fprintf(stderr, "Cannot load %s\n", kernel);
1b417c
+		return ret;
1b417c
+	}
1b417c
+
1b417c
+	/*
1b417c
+	 * If there is no initramfs, set KEXEC_FILE_NO_INITRAMFS flag so that
1b417c
+	 * kernel does not return error with negative initrd_fd.
1b417c
+	 */
1b417c
+	if (info.initrd_fd == -1)
1b417c
+		info.kexec_flags |= KEXEC_FILE_NO_INITRAMFS;
1b417c
+
1b417c
+	ret = kexec_file_load(kernel_fd, info.initrd_fd, info.command_line_len,
1b417c
+			info.command_line, info.kexec_flags);
1b417c
+	if (ret != 0)
1b417c
+		fprintf(stderr, "kexec_file_load failed: %s\n",
1b417c
+					strerror(errno));
1b417c
+	return ret;
1b417c
+}
1b417c
+
1b417c
 
1b417c
 int main(int argc, char *argv[])
1b417c
 {
1b417c
@@ -1083,6 +1175,7 @@ int main(int argc, char *argv[])
1b417c
 	int do_ifdown = 0;
1b417c
 	int do_unload = 0;
1b417c
 	int do_reuse_initrd = 0;
1b417c
+	int do_kexec_file_syscall = 0;
1b417c
 	void *entry = 0;
1b417c
 	char *type = 0;
1b417c
 	char *endptr;
1b417c
@@ -1095,6 +1188,23 @@ int main(int argc, char *argv[])
1b417c
 	};
1b417c
 	static const char short_options[] = KEXEC_ALL_OPT_STR;
1b417c
 
1b417c
+	/*
1b417c
+	 * First check if --use-kexec-file-syscall is set. That changes lot of
1b417c
+	 * things
1b417c
+	 */
1b417c
+	while ((opt = getopt_long(argc, argv, short_options,
1b417c
+				  options, 0)) != -1) {
1b417c
+		switch(opt) {
1b417c
+		case OPT_KEXEC_FILE_SYSCALL:
1b417c
+			do_kexec_file_syscall = 1;
1b417c
+			break;
1b417c
+		}
1b417c
+	}
1b417c
+
1b417c
+	/* Reset getopt for the next pass. */
1b417c
+	opterr = 1;
1b417c
+	optind = 1;
1b417c
+
1b417c
 	while ((opt = getopt_long(argc, argv, short_options,
1b417c
 				  options, 0)) != -1) {
1b417c
 		switch(opt) {
1b417c
@@ -1127,6 +1237,8 @@ int main(int argc, char *argv[])
1b417c
 			do_shutdown = 0;
1b417c
 			do_sync = 0;
1b417c
 			do_unload = 1;
1b417c
+			if (do_kexec_file_syscall)
1b417c
+				kexec_file_flags |= KEXEC_FILE_UNLOAD;
1b417c
 			break;
1b417c
 		case OPT_EXEC:
1b417c
 			do_load = 0;
1b417c
@@ -1169,7 +1281,10 @@ int main(int argc, char *argv[])
1b417c
 			do_exec = 0;
1b417c
 			do_shutdown = 0;
1b417c
 			do_sync = 0;
1b417c
-			kexec_flags = KEXEC_ON_CRASH;
1b417c
+			if (do_kexec_file_syscall)
1b417c
+				kexec_file_flags |= KEXEC_FILE_ON_CRASH;
1b417c
+			else
1b417c
+				kexec_flags = KEXEC_ON_CRASH;
1b417c
 			break;
1b417c
 		case OPT_MEM_MIN:
1b417c
 			mem_min = strtoul(optarg, &endptr, 0);
1b417c
@@ -1194,6 +1309,9 @@ int main(int argc, char *argv[])
1b417c
 		case OPT_REUSE_INITRD:
1b417c
 			do_reuse_initrd = 1;
1b417c
 			break;
1b417c
+		case OPT_KEXEC_FILE_SYSCALL:
1b417c
+			/* We already parsed it. Nothing to do. */
1b417c
+			break;
1b417c
 		default:
1b417c
 			break;
1b417c
 		}
1b417c
@@ -1238,10 +1356,18 @@ int main(int argc, char *argv[])
1b417c
 	}
1b417c
 
1b417c
 	if (do_unload) {
1b417c
-		result = k_unload(kexec_flags);
1b417c
+		if (do_kexec_file_syscall)
1b417c
+			result = kexec_file_unload(kexec_file_flags);
1b417c
+		else
1b417c
+			result = k_unload(kexec_flags);
1b417c
 	}
1b417c
 	if (do_load && (result == 0)) {
1b417c
-		result = my_load(type, fileind, argc, argv, kexec_flags, entry);
1b417c
+		if (do_kexec_file_syscall)
1b417c
+			result = do_kexec_file_load(fileind, argc, argv,
1b417c
+						 kexec_file_flags);
1b417c
+		else
1b417c
+			result = my_load(type, fileind, argc, argv,
1b417c
+						kexec_flags, entry);
1b417c
 	}
1b417c
 	/* Don't shutdown unless there is something to reboot to! */
1b417c
 	if ((result == 0) && (do_shutdown || do_exec) && !kexec_loaded()) {
1b417c
diff --git a/kexec/kexec.h b/kexec/kexec.h
1b417c
index 2fad7dc..4be2b2f 100644
1b417c
--- a/kexec/kexec.h
1b417c
+++ b/kexec/kexec.h
1b417c
@@ -156,6 +156,13 @@ struct kexec_info {
1b417c
 	unsigned long kexec_flags;
1b417c
 	unsigned long backup_src_start;
1b417c
 	unsigned long backup_src_size;
1b417c
+	/* Set to 1 if we are using kexec file syscall */
1b417c
+	unsigned long file_mode :1;
1b417c
+
1b417c
+	/* Filled by kernel image processing code */
1b417c
+	int initrd_fd;
1b417c
+	char *command_line;
1b417c
+	int command_line_len;
1b417c
 };
1b417c
 
1b417c
 struct arch_map_entry {
1b417c
@@ -207,6 +214,7 @@ extern int file_types;
1b417c
 #define OPT_UNLOAD		'u'
1b417c
 #define OPT_TYPE		't'
1b417c
 #define OPT_PANIC		'p'
1b417c
+#define OPT_KEXEC_FILE_SYSCALL	's'
1b417c
 #define OPT_MEM_MIN             256
1b417c
 #define OPT_MEM_MAX             257
1b417c
 #define OPT_REUSE_INITRD	258
1b417c
@@ -230,9 +238,10 @@ extern int file_types;
1b417c
 	{ "mem-min",		1, 0, OPT_MEM_MIN }, \
1b417c
 	{ "mem-max",		1, 0, OPT_MEM_MAX }, \
1b417c
 	{ "reuseinitrd",	0, 0, OPT_REUSE_INITRD }, \
1b417c
+	{ "kexec-file-syscall",	0, 0, OPT_KEXEC_FILE_SYSCALL }, \
1b417c
 	{ "debug",		0, 0, OPT_DEBUG }, \
1b417c
 
1b417c
-#define KEXEC_OPT_STR "h?vdfxluet:p"
1b417c
+#define KEXEC_OPT_STR "h?vdfxluet:ps"
1b417c
 
1b417c
 extern void dbgprint_mem_range(const char *prefix, struct memory_range *mr, int nr_mr);
1b417c
 extern void die(const char *fmt, ...)
1b417c
-- 
1b417c
1.9.0
1b417c