Blob Blame History Raw
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0e560d5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,15 @@
+*.o
+.*.o.d
+version.h
+man/*.gz
+btrfs
+btrfs-debug-tree
+btrfs-map-logical
+btrfs-show
+btrfs-vol
+btrfsck
+btrfsctl
+find-root
+mkfs.btrfs
+repair
+restore
diff --git a/INSTALL b/INSTALL
index 16b45a5..6afbd90 100644
--- a/INSTALL
+++ b/INSTALL
@@ -22,27 +22,38 @@ in the e2fsprogs sources, and is usually available as libuuid or
 e2fsprogs-devel from various distros.
 
 Building the utilities is just make ; make install.  The programs go
-into /usr/local/bin.  The commands available are:
+into /usr/local/bin.  The mains commands available are:
 
 mkfs.btrfs: create a filesystem
 
-btrfsctl: control program to create snapshots and subvolumes:
-
+btrfs: control program to create snapshots and subvolumes:
+	# mount a btrfs filesystem
 	mount /dev/sda2 /mnt
-	btrfsctl -s new_subvol_name /mnt
-	btrfsctl -s snapshot_of_default /mnt/default
-	btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
-	btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
+
+	# create a subvolume
+	btrfs subvolume create /mnt/new_subvol_name
+
+	# snapshot of a subvolume
+	btrfs subvolume snapshot /mnt/default /mnt/snapshot_of_default 
+	btrfs subvolume snapshot /mnt/snapshot_of_default \
+		/mnt/snapshot_of_a_snapshot
+
+	# list of the subvolumes
 	ls /mnt
 	default snapshot_of_a_snapshot snapshot_of_new_subvol
 	new_subvol_name snapshot_of_default
 
-	Snapshots and subvolumes cannot be deleted right now, but you can
-	rm -rf all the files and directories inside them.
+	# removal of a subvolume or a snapshot
+	btrfs subvolume delete /mn/snapshot_of_a_snapshot
+
+	# look a the btrfs man page for further information
+	man btrfs
 
 btrfsck: do a limited check of the FS extent trees.</li>
 
-debug-tree: print all of the FS metadata in text form.  Example:
+btrfs-debug-tree: print all of the FS metadata in text form.  Example:
+
+	btrfs-debug-tree /dev/sda2 >& big_output_file
+
 
-	debug-tree /dev/sda2 >& big_output_file
 
diff --git a/Makefile b/Makefile
index 8097b5a..79818e6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,28 +1,32 @@
-CC=gcc
+CC = gcc
 AM_CFLAGS = -Wall -D_FILE_OFFSET_BITS=64 -D_FORTIFY_SOURCE=2
-CFLAGS = -g -Werror -Os
+CFLAGS = -g -O0
 objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
 	  root-tree.o dir-item.o file-item.o inode-item.o \
 	  inode-map.o crc32c.o rbtree.o extent-cache.o extent_io.o \
-	  volumes.o utils.o
+	  volumes.o utils.o btrfs-list.o btrfslabel.o repair.o
+cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \
+	       cmds-inspect.o cmds-balance.o
 
-#
-CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \
-		-Wuninitialized -Wshadow -Wundef
+CHECKFLAGS= -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \
+	    -Wuninitialized -Wshadow -Wundef
 DEPFLAGS = -Wp,-MMD,$(@D)/.$(@F).d,-MT,$@
 
-INSTALL= install
+INSTALL = install
 prefix ?= /usr/local
 bindir = $(prefix)/bin
 LIBS=-luuid
+RESTORE_LIBS=-lz
 
-progs = btrfsctl mkfs.btrfs btrfs-debug-tree btrfs-show btrfs-vol btrfsck
+progs = btrfsctl mkfs.btrfs btrfs-debug-tree btrfs-show btrfs-vol btrfsck \
+	btrfs btrfs-map-logical btrfs-image btrfs-zero-log btrfs-convert \
+	btrfs-find-root btrfs-restore btrfstune
 
 # make C=1 to enable sparse
 ifdef C
-	check=sparse $(CHECKFLAGS)
+	check = sparse $(CHECKFLAGS)
 else
-	check=ls
+	check = ls
 endif
 
 .c.o:
@@ -35,38 +39,66 @@ all: version $(progs) manpages
 version:
 	bash version.sh
 
+btrfs: $(objects) btrfs.o help.o common.o $(cmds_objects)
+	$(CC) $(CFLAGS) -o btrfs btrfs.o help.o common.o $(cmds_objects) \
+		$(objects) $(LDFLAGS) $(LIBS) -lpthread
+
+calc-size: $(objects) calc-size.o
+	gcc $(CFLAGS) -o calc-size calc-size.o $(objects) $(LDFLAGS) $(LIBS)
+
+btrfs-find-root: $(objects) find-root.o
+	gcc $(CFLAGS) -o btrfs-find-root find-root.o $(objects) $(LDFLAGS) $(LIBS)
+
+btrfs-restore: $(objects) restore.o
+	gcc $(CFLAGS) -o btrfs-restore restore.o $(objects) $(LDFLAGS) $(LIBS) $(RESTORE_LIBS)
+
 btrfsctl: $(objects) btrfsctl.o
-	gcc $(CFLAGS) -o btrfsctl btrfsctl.o $(objects) $(LDFLAGS) $(LIBS)
+	$(CC) $(CFLAGS) -o btrfsctl btrfsctl.o $(objects) $(LDFLAGS) $(LIBS)
 
 btrfs-vol: $(objects) btrfs-vol.o
-	gcc $(CFLAGS) -o btrfs-vol btrfs-vol.o $(objects) $(LDFLAGS) $(LIBS)
+	$(CC) $(CFLAGS) -o btrfs-vol btrfs-vol.o $(objects) $(LDFLAGS) $(LIBS)
 
 btrfs-show: $(objects) btrfs-show.o
-	gcc $(CFLAGS) -o btrfs-show btrfs-show.o $(objects) $(LDFLAGS) $(LIBS)
+	$(CC) $(CFLAGS) -o btrfs-show btrfs-show.o $(objects) $(LDFLAGS) $(LIBS)
 
 btrfsck: $(objects) btrfsck.o
-	gcc $(CFLAGS) -o btrfsck btrfsck.o $(objects) $(LDFLAGS) $(LIBS)
+	$(CC) $(CFLAGS) -o btrfsck btrfsck.o $(objects) $(LDFLAGS) $(LIBS)
 
 mkfs.btrfs: $(objects) mkfs.o
-	gcc $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS)
+	$(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS)
 
 btrfs-debug-tree: $(objects) debug-tree.o
-	gcc $(CFLAGS) -o btrfs-debug-tree $(objects) debug-tree.o $(LDFLAGS) $(LIBS)
+	$(CC) $(CFLAGS) -o btrfs-debug-tree $(objects) debug-tree.o $(LDFLAGS) $(LIBS)
+
+btrfs-zero-log: $(objects) btrfs-zero-log.o
+	$(CC) $(CFLAGS) -o btrfs-zero-log $(objects) btrfs-zero-log.o $(LDFLAGS) $(LIBS)
+
+btrfs-select-super: $(objects) btrfs-select-super.o
+	$(CC) $(CFLAGS) -o btrfs-select-super $(objects) btrfs-select-super.o $(LDFLAGS) $(LIBS)
 
 btrfstune: $(objects) btrfstune.o
-	gcc $(CFLAGS) -o btrfstune $(objects) btrfstune.o $(LDFLAGS) $(LIBS)
+	$(CC) $(CFLAGS) -o btrfstune $(objects) btrfstune.o $(LDFLAGS) $(LIBS)
+
+btrfs-map-logical: $(objects) btrfs-map-logical.o
+	$(CC) $(CFLAGS) -o btrfs-map-logical $(objects) btrfs-map-logical.o $(LDFLAGS) $(LIBS)
+
+btrfs-corrupt-block: $(objects) btrfs-corrupt-block.o
+	$(CC) $(CFLAGS) -o btrfs-corrupt-block $(objects) btrfs-corrupt-block.o $(LDFLAGS) $(LIBS)
 
 btrfs-image: $(objects) btrfs-image.o
-	gcc $(CFLAGS) -o btrfs-image $(objects) btrfs-image.o -lpthread -lz $(LDFLAGS) $(LIBS)
+	$(CC) $(CFLAGS) -o btrfs-image $(objects) btrfs-image.o -lpthread -lz $(LDFLAGS) $(LIBS)
 
 dir-test: $(objects) dir-test.o
-	gcc $(CFLAGS) -o dir-test $(objects) dir-test.o $(LDFLAGS) $(LIBS)
+	$(CC) $(CFLAGS) -o dir-test $(objects) dir-test.o $(LDFLAGS) $(LIBS)
 
 quick-test: $(objects) quick-test.o
-	gcc $(CFLAGS) -o quick-test $(objects) quick-test.o $(LDFLAGS) $(LIBS)
+	$(CC) $(CFLAGS) -o quick-test $(objects) quick-test.o $(LDFLAGS) $(LIBS)
+
+btrfs-convert: $(objects) convert.o
+	$(CC) $(CFLAGS) -o btrfs-convert $(objects) convert.o -lext2fs -lcom_err $(LDFLAGS) $(LIBS)
 
-convert: $(objects) convert.o
-	gcc $(CFLAGS) -o btrfs-convert $(objects) convert.o -lext2fs $(LDFLAGS) $(LIBS)
+ioctl-test: $(objects) ioctl-test.o
+	$(CC) $(CFLAGS) -o ioctl-test $(objects) ioctl-test.o $(LDFLAGS) $(LIBS)
 
 manpages:
 	cd man; make
@@ -75,12 +107,12 @@ install-man:
 	cd man; make install
 
 clean :
-	rm -f $(progs) cscope.out *.o .*.d btrfs-convert
+	rm -f $(progs) cscope.out *.o .*.d btrfs-convert btrfs-image btrfs-select-super \
+	      btrfs-zero-log btrfstune dir-test ioctl-test quick-test version.h
 	cd man; make clean
 
 install: $(progs) install-man
 	$(INSTALL) -m755 -d $(DESTDIR)$(bindir)
 	$(INSTALL) $(progs) $(DESTDIR)$(bindir)
-	if [ -e btrfs-convert ]; then $(INSTALL) btrfs-convert $(DESTDIR)$(bindir); fi
 
 -include .*.d
diff --git a/bcp b/bcp
index 5729e91..e7ca641 100755
--- a/bcp
+++ b/bcp
@@ -136,8 +136,7 @@ for srci in xrange(0, src_args):
             srcname = os.path.join(dirpath, x)
             statinfo = os.lstat(srcname)
 
-            if srcname.startswith(src):
-                part = srcname[len(src) + 1:]
+            part = os.path.relpath(srcname, src)
 
             if stat.S_ISLNK(statinfo.st_mode):
                 copylink(srcname, dst, part, statinfo, None)
@@ -152,8 +151,7 @@ for srci in xrange(0, src_args):
 
         for f in filenames:
             srcname = os.path.join(dirpath, f)
-            if srcname.startswith(src):
-                part = srcname[len(src) + 1:]
+            part = os.path.relpath(srcname, src)
 
             statinfo = os.lstat(srcname)
             copyfile(srcname, dst, part, statinfo, None)
diff --git a/btrfs-corrupt-block.c b/btrfs-corrupt-block.c
new file mode 100644
index 0000000..7051e99
--- /dev/null
+++ b/btrfs-corrupt-block.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright (C) 2009 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _XOPEN_SOURCE 500
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <getopt.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "volumes.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "version.h"
+
+struct extent_buffer *debug_corrupt_block(struct btrfs_root *root, u64 bytenr,
+				     u32 blocksize, int copy)
+{
+	int ret;
+	struct extent_buffer *eb;
+	u64 length;
+	struct btrfs_multi_bio *multi = NULL;
+	struct btrfs_device *device;
+	int num_copies;
+	int mirror_num = 1;
+
+	eb = btrfs_find_create_tree_block(root, bytenr, blocksize);
+	if (!eb)
+		return NULL;
+
+	length = blocksize;
+	while (1) {
+		ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+				      eb->start, &length, &multi, mirror_num);
+		BUG_ON(ret);
+		device = multi->stripes[0].dev;
+		eb->fd = device->fd;
+		device->total_ios++;
+		eb->dev_bytenr = multi->stripes[0].physical;
+
+		fprintf(stdout, "mirror %d logical %Lu physical %Lu "
+			"device %s\n", mirror_num, (unsigned long long)bytenr,
+			(unsigned long long)eb->dev_bytenr, device->name);
+		kfree(multi);
+
+		if (!copy || mirror_num == copy) {
+			ret = read_extent_from_disk(eb);
+			printf("corrupting %llu copy %d\n", eb->start,
+			       mirror_num);
+			memset(eb->data, 0, eb->len);
+			write_extent_to_disk(eb);
+			fsync(eb->fd);
+		}
+
+		num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+					      eb->start, eb->len);
+		if (num_copies == 1)
+			break;
+
+		mirror_num++;
+		if (mirror_num > num_copies)
+			break;
+	}
+	return eb;
+}
+
+static void print_usage(void)
+{
+	fprintf(stderr, "usage: btrfs-map-logical [options] mount_point\n");
+	fprintf(stderr, "\t-l Logical extent to map\n");
+	fprintf(stderr, "\t-c Copy of the extent to read (usually 1 or 2)\n");
+	fprintf(stderr, "\t-o Output file to hold the extent\n");
+	fprintf(stderr, "\t-b Number of bytes to read\n");
+	exit(1);
+}
+
+static void corrupt_keys(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root,
+			 struct extent_buffer *eb)
+{
+	int slot;
+	int bad_slot;
+	int nr;
+	struct btrfs_disk_key bad_key;;
+
+	nr = btrfs_header_nritems(eb);
+	if (nr == 0)
+		return;
+
+	slot = rand() % nr;
+	bad_slot = rand() % nr;
+
+	if (bad_slot == slot)
+		return;
+
+	fprintf(stderr, "corrupting keys in block %llu slot %d swapping with %d\n",
+		(unsigned long long)eb->start, slot, bad_slot);
+
+	if (btrfs_header_level(eb) == 0) {
+		btrfs_item_key(eb, &bad_key, bad_slot);
+		btrfs_set_item_key(eb, &bad_key, slot);
+	} else {
+		btrfs_node_key(eb, &bad_key, bad_slot);
+		btrfs_set_node_key(eb, &bad_key, slot);
+	}
+	btrfs_mark_buffer_dirty(eb);
+	if (!trans) {
+		csum_tree_block(root, eb, 0);
+		write_extent_to_disk(eb);
+	}
+}
+
+
+static int corrupt_keys_in_block(struct btrfs_root *root, u64 bytenr)
+{
+	struct extent_buffer *eb;
+
+	eb = read_tree_block(root, bytenr, root->leafsize, 0);
+	if (!eb)
+		return -EIO;;
+
+	corrupt_keys(NULL, root, eb);
+	free_extent_buffer(eb);
+	return 0;
+}
+
+static int corrupt_extent(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root, u64 bytenr, int copy)
+{
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	u32 item_size;
+	unsigned long ptr;
+	struct btrfs_path *path;
+	int ret;
+	int slot;
+	int should_del = rand() % 3;
+
+	path = btrfs_alloc_path();
+
+	key.objectid = bytenr;
+	key.type = (u8)-1;
+	key.offset = (u64)-1;
+
+	while(1) {
+		ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+					&key, path, -1, 1);
+		if (ret < 0)
+			break;
+
+		if (ret > 0) {
+			if (path->slots[0] == 0)
+				break;
+			path->slots[0]--;
+			ret = 0;
+		}
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.objectid != bytenr)
+			break;
+
+		if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+		    key.type != BTRFS_TREE_BLOCK_REF_KEY &&
+		    key.type != BTRFS_EXTENT_DATA_REF_KEY &&
+		    key.type != BTRFS_EXTENT_REF_V0_KEY &&
+		    key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
+		    key.type != BTRFS_SHARED_DATA_REF_KEY)
+			goto next;
+
+		if (should_del) {
+			fprintf(stderr, "deleting extent record: key %Lu %u %Lu\n",
+				key.objectid, key.type, key.offset);
+
+			if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+				/* make sure this extent doesn't get
+				 * reused for other purposes */
+				btrfs_pin_extent(root->fs_info,
+						 key.objectid, key.offset);
+			}
+
+			btrfs_del_item(trans, root, path);
+		} else {
+			fprintf(stderr, "corrupting extent record: key %Lu %u %Lu\n",
+				key.objectid, key.type, key.offset);
+			ptr = btrfs_item_ptr_offset(leaf, slot);
+			item_size = btrfs_item_size_nr(leaf, slot);
+			memset_extent_buffer(leaf, 0, ptr, item_size);
+			btrfs_mark_buffer_dirty(leaf);
+		}
+next:
+		btrfs_release_path(NULL, path);
+
+		if (key.offset > 0)
+			key.offset--;
+		if (key.offset == 0)
+			break;
+	}
+
+	btrfs_free_path(path);
+	return 0;
+}
+
+static void btrfs_corrupt_extent_leaf(struct btrfs_trans_handle *trans,
+				      struct btrfs_root *root, struct extent_buffer *eb)
+{
+	u32 nr = btrfs_header_nritems(eb);
+	u32 victim = rand() % nr;
+	u64 objectid;
+	struct btrfs_key key;
+
+	btrfs_item_key_to_cpu(eb, &key, victim);
+	objectid = key.objectid;
+	corrupt_extent(trans, root, objectid, 1);
+}
+
+static void btrfs_corrupt_extent_tree(struct btrfs_trans_handle *trans,
+				      struct btrfs_root *root, struct extent_buffer *eb)
+{
+	int i;
+	u32 nr;
+
+	if (!eb)
+		return;
+
+	nr = btrfs_header_nritems(eb);
+	if (btrfs_is_leaf(eb)) {
+		btrfs_corrupt_extent_leaf(trans, root, eb);
+		return;
+	}
+
+	if (btrfs_header_level(eb) == 1 && eb != root->node) {
+		if (rand() % 5)
+			return;
+	}
+
+	for (i = 0; i < nr; i++) {
+		struct extent_buffer *next;
+
+		next = read_tree_block(root, btrfs_node_blockptr(eb, i),
+				       root->leafsize, btrfs_node_ptr_generation(eb, i));
+		if (!next)
+			continue;
+		btrfs_corrupt_extent_tree(trans, root, next);
+		free_extent_buffer(next);
+	}
+}
+
+static struct option long_options[] = {
+	/* { "byte-count", 1, NULL, 'b' }, */
+	{ "logical", 1, NULL, 'l' },
+	{ "copy", 1, NULL, 'c' },
+	{ "bytes", 1, NULL, 'b' },
+	{ "extent-record", 0, NULL, 'e' },
+	{ "extent-tree", 0, NULL, 'E' },
+	{ "keys", 0, NULL, 'k' },
+	{ 0, 0, 0, 0}
+};
+
+
+int main(int ac, char **av)
+{
+	struct cache_tree root_cache;
+	struct btrfs_root *root;
+	struct extent_buffer *eb;
+	char *dev;
+	u64 logical = 0;
+	int ret = 0;
+	int option_index = 0;
+	int copy = 0;
+	u64 bytes = 4096;
+	int extent_rec = 0;
+	int extent_tree = 0;
+	int corrupt_block_keys = 0;
+
+	srand(128);
+
+	while(1) {
+		int c;
+		c = getopt_long(ac, av, "l:c:eEk", long_options,
+				&option_index);
+		if (c < 0)
+			break;
+		switch(c) {
+			case 'l':
+				logical = atoll(optarg);
+				if (logical == 0) {
+					fprintf(stderr,
+						"invalid extent number\n");
+					print_usage();
+				}
+				break;
+			case 'c':
+				copy = atoi(optarg);
+				if (copy == 0) {
+					fprintf(stderr,
+						"invalid copy number\n");
+					print_usage();
+				}
+				break;
+			case 'b':
+				bytes = atoll(optarg);
+				if (bytes == 0) {
+					fprintf(stderr,
+						"invalid byte count\n");
+					print_usage();
+				}
+				break;
+			case 'e':
+				extent_rec = 1;
+				break;
+			case 'E':
+				extent_tree = 1;
+				break;
+			case 'k':
+				corrupt_block_keys = 1;
+				break;
+			default:
+				print_usage();
+		}
+	}
+	ac = ac - optind;
+	if (ac == 0)
+		print_usage();
+	if (logical == 0 && !extent_tree)
+		print_usage();
+	if (copy < 0)
+		print_usage();
+
+	dev = av[optind];
+
+	radix_tree_init();
+	cache_tree_init(&root_cache);
+
+	root = open_ctree(dev, 0, 1);
+	if (!root) {
+		fprintf(stderr, "Open ctree failed\n");
+		exit(1);
+	}
+	if (extent_rec) {
+		struct btrfs_trans_handle *trans;
+		trans = btrfs_start_transaction(root, 1);
+		ret = corrupt_extent (trans, root, logical, 0);
+		btrfs_commit_transaction(trans, root);
+		goto out_close;
+	}
+	if (extent_tree) {
+		struct btrfs_trans_handle *trans;
+		trans = btrfs_start_transaction(root, 1);
+		btrfs_corrupt_extent_tree(trans, root->fs_info->extent_root,
+					  root->fs_info->extent_root->node);
+		btrfs_commit_transaction(trans, root);
+		goto out_close;
+	}
+
+	if (bytes == 0)
+		bytes = root->sectorsize;
+
+	bytes = (bytes + root->sectorsize - 1) / root->sectorsize;
+	bytes *= root->sectorsize;
+
+	while (bytes > 0) {
+		if (corrupt_block_keys) {
+			corrupt_keys_in_block(root, logical);
+		} else {
+			eb = debug_corrupt_block(root, logical,
+						 root->sectorsize, copy);
+			free_extent_buffer(eb);
+		}
+		logical += root->sectorsize;
+		bytes -= root->sectorsize;
+	}
+	return ret;
+out_close:
+	close_ctree(root);
+	return ret;
+}
diff --git a/btrfs-defrag.c b/btrfs-defrag.c
new file mode 100644
index 0000000..8f1525a
--- /dev/null
+++ b/btrfs-defrag.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __CHECKER__
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include "ioctl.h"
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <libgen.h>
+#include <getopt.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "transaction.h"
+#include "utils.h"
+#include "version.h"
+
diff --git a/btrfs-list.c b/btrfs-list.c
new file mode 100644
index 0000000..5f4a9be
--- /dev/null
+++ b/btrfs-list.c
@@ -0,0 +1,936 @@
+/*
+ * Copyright (C) 2010 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _GNU_SOURCE
+#ifndef __CHECKER__
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include "ioctl.h"
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <libgen.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "transaction.h"
+#include "utils.h"
+
+/* we store all the roots we find in an rbtree so that we can
+ * search for them later.
+ */
+struct root_lookup {
+	struct rb_root root;
+};
+
+/*
+ * one of these for each root we find.
+ */
+struct root_info {
+	struct rb_node rb_node;
+
+	/* this root's id */
+	u64 root_id;
+
+	/* the id of the root that references this one */
+	u64 ref_tree;
+
+	/* the dir id we're in from ref_tree */
+	u64 dir_id;
+
+	/* path from the subvol we live in to this root, including the
+	 * root's name.  This is null until we do the extra lookup ioctl.
+	 */
+	char *path;
+
+	/* the name of this root in the directory it lives in */
+	char name[];
+};
+
+static void root_lookup_init(struct root_lookup *tree)
+{
+	tree->root.rb_node = NULL;
+}
+
+static int comp_entry(struct root_info *entry, u64 root_id, u64 ref_tree)
+{
+	if (entry->root_id > root_id)
+		return 1;
+	if (entry->root_id < root_id)
+		return -1;
+	if (entry->ref_tree > ref_tree)
+		return 1;
+	if (entry->ref_tree < ref_tree)
+		return -1;
+	return 0;
+}
+
+/*
+ * insert a new root into the tree.  returns the existing root entry
+ * if one is already there.  Both root_id and ref_tree are used
+ * as the key
+ */
+static struct rb_node *tree_insert(struct rb_root *root, u64 root_id,
+				   u64 ref_tree, struct rb_node *node)
+{
+	struct rb_node ** p = &root->rb_node;
+	struct rb_node * parent = NULL;
+	struct root_info *entry;
+	int comp;
+
+	while(*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct root_info, rb_node);
+
+		comp = comp_entry(entry, root_id, ref_tree);
+
+		if (comp < 0)
+			p = &(*p)->rb_left;
+		else if (comp > 0)
+			p = &(*p)->rb_right;
+		else
+			return parent;
+	}
+
+	entry = rb_entry(parent, struct root_info, rb_node);
+	rb_link_node(node, parent, p);
+	rb_insert_color(node, root);
+	return NULL;
+}
+
+/*
+ * find a given root id in the tree.  We return the smallest one,
+ * rb_next can be used to move forward looking for more if required
+ */
+static struct root_info *tree_search(struct rb_root *root, u64 root_id)
+{
+	struct rb_node * n = root->rb_node;
+	struct root_info *entry;
+
+	while(n) {
+		entry = rb_entry(n, struct root_info, rb_node);
+
+		if (entry->root_id < root_id)
+			n = n->rb_left;
+		else if (entry->root_id > root_id)
+			n = n->rb_right;
+		else {
+			struct root_info *prev;
+			struct rb_node *prev_n;
+			while (1) {
+				prev_n = rb_prev(n);
+				if (!prev_n)
+					break;
+				prev = rb_entry(prev_n, struct root_info,
+						      rb_node);
+				if (prev->root_id != root_id)
+					break;
+				entry = prev;
+				n = prev_n;
+			}
+			return entry;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * this allocates a new root in the lookup tree.
+ *
+ * root_id should be the object id of the root
+ *
+ * ref_tree is the objectid of the referring root.
+ *
+ * dir_id is the directory in ref_tree where this root_id can be found.
+ *
+ * name is the name of root_id in that directory
+ *
+ * name_len is the length of name
+ */
+static int add_root(struct root_lookup *root_lookup,
+		    u64 root_id, u64 ref_tree, u64 dir_id, char *name,
+		    int name_len)
+{
+	struct root_info *ri;
+	struct rb_node *ret;
+	ri = malloc(sizeof(*ri) + name_len + 1);
+	if (!ri) {
+		printf("memory allocation failed\n");
+		exit(1);
+	}
+	memset(ri, 0, sizeof(*ri) + name_len + 1);
+	ri->path = NULL;
+	ri->dir_id = dir_id;
+	ri->root_id = root_id;
+	ri->ref_tree = ref_tree;
+	strncpy(ri->name, name, name_len);
+
+	ret = tree_insert(&root_lookup->root, root_id, ref_tree, &ri->rb_node);
+	if (ret) {
+		printf("failed to insert tree %llu\n", (unsigned long long)root_id);
+		exit(1);
+	}
+	return 0;
+}
+
+/*
+ * for a given root_info, search through the root_lookup tree to construct
+ * the full path name to it.
+ *
+ * This can't be called until all the root_info->path fields are filled
+ * in by lookup_ino_path
+ */
+static int resolve_root(struct root_lookup *rl, struct root_info *ri,
+			u64 *root_id, u64 *parent_id, u64 *top_id, char **path)
+{
+	char *full_path = NULL;
+	int len = 0;
+	struct root_info *found;
+
+	/*
+	 * we go backwards from the root_info object and add pathnames
+	 * from parent directories as we go.
+	 */
+	*parent_id = 0;
+	found = ri;
+	while (1) {
+		char *tmp;
+		u64 next;
+		int add_len = strlen(found->path);
+
+		/* room for / and for null */
+		tmp = malloc(add_len + 2 + len);
+		if (full_path) {
+			memcpy(tmp + add_len + 1, full_path, len);
+			tmp[add_len] = '/';
+			memcpy(tmp, found->path, add_len);
+			tmp [add_len + len + 1] = '\0';
+			free(full_path);
+			full_path = tmp;
+			len += add_len + 1;
+		} else {
+			full_path = strdup(found->path);
+			len = add_len;
+		}
+
+		next = found->ref_tree;
+		/* record the first parent */
+		if (*parent_id == 0)
+			*parent_id = next;
+
+		/* if the ref_tree refers to ourselves, we're at the top */
+		if (next == found->root_id) {
+			*top_id = next;
+			break;
+		}
+
+		/*
+		 * if the ref_tree wasn't in our tree of roots, we're
+		 * at the top
+		 */
+		found = tree_search(&rl->root, next);
+		if (!found) {
+			*top_id = next;
+			break;
+		}
+	}
+
+	*root_id = ri->root_id;
+	*path = full_path;
+
+	return 0;
+}
+
+/*
+ * for a single root_info, ask the kernel to give us a path name
+ * inside it's ref_root for the dir_id where it lives.
+ *
+ * This fills in root_info->path with the path to the directory and and
+ * appends this root's name.
+ */
+static int lookup_ino_path(int fd, struct root_info *ri)
+{
+	struct btrfs_ioctl_ino_lookup_args args;
+	int ret, e;
+
+	if (ri->path)
+		return 0;
+
+	memset(&args, 0, sizeof(args));
+	args.treeid = ri->ref_tree;
+	args.objectid = ri->dir_id;
+
+	ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+	e = errno;
+	if (ret) {
+		fprintf(stderr, "ERROR: Failed to lookup path for root %llu - %s\n",
+			(unsigned long long)ri->ref_tree,
+			strerror(e));
+		return ret;
+	}
+
+	if (args.name[0]) {
+		/*
+		 * we're in a subdirectory of ref_tree, the kernel ioctl
+		 * puts a / in there for us
+		 */
+		ri->path = malloc(strlen(ri->name) + strlen(args.name) + 1);
+		if (!ri->path) {
+			perror("malloc failed");
+			exit(1);
+		}
+		strcpy(ri->path, args.name);
+		strcat(ri->path, ri->name);
+	} else {
+		/* we're at the root of ref_tree */
+		ri->path = strdup(ri->name);
+		if (!ri->path) {
+			perror("strdup failed");
+			exit(1);
+		}
+	}
+	return 0;
+}
+
+/* finding the generation for a given path is a two step process.
+ * First we use the inode loookup routine to find out the root id
+ *
+ * Then we use the tree search ioctl to scan all the root items for a
+ * given root id and spit out the latest generation we can find
+ */
+static u64 find_root_gen(int fd)
+{
+	struct btrfs_ioctl_ino_lookup_args ino_args;
+	int ret;
+	struct btrfs_ioctl_search_args args;
+	struct btrfs_ioctl_search_key *sk = &args.key;
+	struct btrfs_ioctl_search_header *sh;
+	unsigned long off = 0;
+	u64 max_found = 0;
+	int i;
+	int e;
+
+	memset(&ino_args, 0, sizeof(ino_args));
+	ino_args.objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+	/* this ioctl fills in ino_args->treeid */
+	ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args);
+	e = errno;
+	if (ret) {
+		fprintf(stderr, "ERROR: Failed to lookup path for dirid %llu - %s\n",
+			(unsigned long long)BTRFS_FIRST_FREE_OBJECTID,
+			strerror(e));
+		return 0;
+	}
+
+	memset(&args, 0, sizeof(args));
+
+	sk->tree_id = 1;
+
+	/*
+	 * there may be more than one ROOT_ITEM key if there are
+	 * snapshots pending deletion, we have to loop through
+	 * them.
+	 */
+	sk->min_objectid = ino_args.treeid;
+	sk->max_objectid = ino_args.treeid;
+	sk->max_type = BTRFS_ROOT_ITEM_KEY;
+	sk->min_type = BTRFS_ROOT_ITEM_KEY;
+	sk->max_offset = (u64)-1;
+	sk->max_transid = (u64)-1;
+	sk->nr_items = 4096;
+
+	while (1) {
+		ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+		e = errno;
+		if (ret < 0) {
+			fprintf(stderr, "ERROR: can't perform the search - %s\n",
+				strerror(e));
+			return 0;
+		}
+		/* the ioctl returns the number of item it found in nr_items */
+		if (sk->nr_items == 0)
+			break;
+
+		off = 0;
+		for (i = 0; i < sk->nr_items; i++) {
+			struct btrfs_root_item *item;
+			sh = (struct btrfs_ioctl_search_header *)(args.buf +
+								  off);
+
+			off += sizeof(*sh);
+			item = (struct btrfs_root_item *)(args.buf + off);
+			off += sh->len;
+
+			sk->min_objectid = sh->objectid;
+			sk->min_type = sh->type;
+			sk->min_offset = sh->offset;
+
+			if (sh->objectid > ino_args.treeid)
+				break;
+
+			if (sh->objectid == ino_args.treeid &&
+			    sh->type == BTRFS_ROOT_ITEM_KEY) {
+				max_found = max(max_found,
+						btrfs_root_generation(item));
+			}
+		}
+		if (sk->min_offset < (u64)-1)
+			sk->min_offset++;
+		else
+			break;
+
+		if (sk->min_type != BTRFS_ROOT_ITEM_KEY)
+			break;
+		if (sk->min_objectid != BTRFS_ROOT_ITEM_KEY)
+			break;
+	}
+	return max_found;
+}
+
+/* pass in a directory id and this will return
+ * the full path of the parent directory inside its
+ * subvolume root.
+ *
+ * It may return NULL if it is in the root, or an ERR_PTR if things
+ * go badly.
+ */
+static char *__ino_resolve(int fd, u64 dirid)
+{
+	struct btrfs_ioctl_ino_lookup_args args;
+	int ret;
+	char *full;
+	int e;
+
+	memset(&args, 0, sizeof(args));
+	args.objectid = dirid;
+
+	ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+	e = errno;
+	if (ret) {
+		fprintf(stderr, "ERROR: Failed to lookup path for dirid %llu - %s\n",
+			(unsigned long long)dirid, strerror(e) );
+		return ERR_PTR(ret);
+	}
+
+	if (args.name[0]) {
+		/*
+		 * we're in a subdirectory of ref_tree, the kernel ioctl
+		 * puts a / in there for us
+		 */
+		full = strdup(args.name);
+		if (!full) {
+			perror("malloc failed");
+			return ERR_PTR(-ENOMEM);
+		}
+	} else {
+		/* we're at the root of ref_tree */
+		full = NULL;
+	}
+	return full;
+}
+
+/*
+ * simple string builder, returning a new string with both
+ * dirid and name
+ */
+char *build_name(char *dirid, char *name)
+{
+	char *full;
+	if (!dirid)
+		return strdup(name);
+
+	full = malloc(strlen(dirid) + strlen(name) + 1);
+	if (!full)
+		return NULL;
+	strcpy(full, dirid);
+	strcat(full, name);
+	return full;
+}
+
+/*
+ * given an inode number, this returns the full path name inside the subvolume
+ * to that file/directory.  cache_dirid and cache_name are used to
+ * cache the results so we can avoid tree searches if a later call goes
+ * to the same directory or file name
+ */
+static char *ino_resolve(int fd, u64 ino, u64 *cache_dirid, char **cache_name)
+
+{
+	u64 dirid;
+	char *dirname;
+	char *name;
+	char *full;
+	int ret;
+	struct btrfs_ioctl_search_args args;
+	struct btrfs_ioctl_search_key *sk = &args.key;
+	struct btrfs_ioctl_search_header *sh;
+	unsigned long off = 0;
+	int namelen;
+	int e;
+
+	memset(&args, 0, sizeof(args));
+
+	sk->tree_id = 0;
+
+	/*
+	 * step one, we search for the inode back ref.  We just use the first
+	 * one
+	 */
+	sk->min_objectid = ino;
+	sk->max_objectid = ino;
+	sk->max_type = BTRFS_INODE_REF_KEY;
+	sk->max_offset = (u64)-1;
+	sk->min_type = BTRFS_INODE_REF_KEY;
+	sk->max_transid = (u64)-1;
+	sk->nr_items = 1;
+
+	ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+	e = errno;
+	if (ret < 0) {
+		fprintf(stderr, "ERROR: can't perform the search - %s\n",
+			strerror(e));
+		return NULL;
+	}
+	/* the ioctl returns the number of item it found in nr_items */
+	if (sk->nr_items == 0)
+		return NULL;
+
+	off = 0;
+	sh = (struct btrfs_ioctl_search_header *)(args.buf + off);
+
+	if (sh->type == BTRFS_INODE_REF_KEY) {
+		struct btrfs_inode_ref *ref;
+		dirid = sh->offset;
+
+		ref = (struct btrfs_inode_ref *)(sh + 1);
+		namelen = btrfs_stack_inode_ref_name_len(ref);
+
+		name = (char *)(ref + 1);
+		name = strndup(name, namelen);
+
+		/* use our cached value */
+		if (dirid == *cache_dirid && *cache_name) {
+			dirname = *cache_name;
+			goto build;
+		}
+	} else {
+		return NULL;
+	}
+	/*
+	 * the inode backref gives us the file name and the parent directory id.
+	 * From here we use __ino_resolve to get the path to the parent
+	 */
+	dirname = __ino_resolve(fd, dirid);
+build:
+	full = build_name(dirname, name);
+	if (*cache_name && dirname != *cache_name)
+		free(*cache_name);
+
+	*cache_name = dirname;
+	*cache_dirid = dirid;
+	free(name);
+
+	return full;
+}
+
+static int __list_subvol_search(int fd, struct root_lookup *root_lookup)
+{
+	int ret;
+	struct btrfs_ioctl_search_args args;
+	struct btrfs_ioctl_search_key *sk = &args.key;
+	struct btrfs_ioctl_search_header *sh;
+	struct btrfs_root_ref *ref;
+	unsigned long off = 0;
+	int name_len;
+	char *name;
+	u64 dir_id;
+	int i;
+
+	root_lookup_init(root_lookup);
+	memset(&args, 0, sizeof(args));
+
+	root_lookup_init(root_lookup);
+
+	memset(&args, 0, sizeof(args));
+
+	/* search in the tree of tree roots */
+	sk->tree_id = 1;
+
+	/*
+	 * set the min and max to backref keys.  The search will
+	 * only send back this type of key now.
+	 */
+	sk->max_type = BTRFS_ROOT_BACKREF_KEY;
+	sk->min_type = BTRFS_ROOT_BACKREF_KEY;
+
+	/*
+	 * set all the other params to the max, we'll take any objectid
+	 * and any trans
+	 */
+	sk->max_objectid = (u64)-1;
+	sk->max_offset = (u64)-1;
+	sk->max_transid = (u64)-1;
+
+	/* just a big number, doesn't matter much */
+	sk->nr_items = 4096;
+
+	while(1) {
+		ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+		if (ret < 0)
+			return ret;
+		/* the ioctl returns the number of item it found in nr_items */
+		if (sk->nr_items == 0)
+			break;
+
+		off = 0;
+
+		/*
+		 * for each item, pull the key out of the header and then
+		 * read the root_ref item it contains
+		 */
+		for (i = 0; i < sk->nr_items; i++) {
+			sh = (struct btrfs_ioctl_search_header *)(args.buf +
+								  off);
+			off += sizeof(*sh);
+			if (sh->type == BTRFS_ROOT_BACKREF_KEY) {
+				ref = (struct btrfs_root_ref *)(args.buf + off);
+				name_len = btrfs_stack_root_ref_name_len(ref);
+				name = (char *)(ref + 1);
+				dir_id = btrfs_stack_root_ref_dirid(ref);
+
+				add_root(root_lookup, sh->objectid, sh->offset,
+					 dir_id, name, name_len);
+			}
+
+			off += sh->len;
+
+			/*
+			 * record the mins in sk so we can make sure the
+			 * next search doesn't repeat this root
+			 */
+			sk->min_objectid = sh->objectid;
+			sk->min_type = sh->type;
+			sk->min_offset = sh->offset;
+		}
+		sk->nr_items = 4096;
+		/* this iteration is done, step forward one root for the next
+		 * ioctl
+		 */
+		if (sk->min_type < BTRFS_ROOT_BACKREF_KEY) {
+			sk->min_type = BTRFS_ROOT_BACKREF_KEY;
+			sk->min_offset = 0;
+		} else  if (sk->min_objectid < (u64)-1) {
+			sk->min_objectid++;
+			sk->min_type = BTRFS_ROOT_BACKREF_KEY;
+			sk->min_offset = 0;
+		} else
+			break;
+	}
+
+	return 0;
+}
+
+static int __list_subvol_fill_paths(int fd, struct root_lookup *root_lookup)
+{
+	struct rb_node *n;
+
+	n = rb_first(&root_lookup->root);
+	while (n) {
+		struct root_info *entry;
+		int ret;
+		entry = rb_entry(n, struct root_info, rb_node);
+		ret = lookup_ino_path(fd, entry);
+		if(ret < 0)
+			return ret;
+		n = rb_next(n);
+	}
+
+	return 0;
+}
+
+int list_subvols(int fd, int print_parent)
+{
+	struct root_lookup root_lookup;
+	struct rb_node *n;
+	int ret;
+
+	ret = __list_subvol_search(fd, &root_lookup);
+	if (ret) {
+		fprintf(stderr, "ERROR: can't perform the search - %s\n",
+				strerror(errno));
+		return ret;
+	}
+
+	/*
+	 * now we have an rbtree full of root_info objects, but we need to fill
+	 * in their path names within the subvol that is referencing each one.
+	 */
+	ret = __list_subvol_fill_paths(fd, &root_lookup);
+	if (ret < 0)
+		return ret;
+
+	/* now that we have all the subvol-relative paths filled in,
+	 * we have to string the subvols together so that we can get
+	 * a path all the way back to the FS root
+	 */
+	n = rb_last(&root_lookup.root);
+	while (n) {
+		struct root_info *entry;
+		u64 root_id;
+		u64 level;
+		u64 parent_id;
+		char *path;
+		entry = rb_entry(n, struct root_info, rb_node);
+		resolve_root(&root_lookup, entry, &root_id, &parent_id,
+				&level, &path);
+		if (print_parent) {
+			printf("ID %llu parent %llu top level %llu path %s\n",
+				(unsigned long long)root_id,
+				(unsigned long long)parent_id,
+				(unsigned long long)level, path);
+		} else {
+			printf("ID %llu top level %llu path %s\n",
+				(unsigned long long)root_id,
+				(unsigned long long)level, path);
+		}
+		free(path);
+		n = rb_prev(n);
+	}
+
+	return ret;
+}
+
+static int print_one_extent(int fd, struct btrfs_ioctl_search_header *sh,
+			    struct btrfs_file_extent_item *item,
+			    u64 found_gen, u64 *cache_dirid,
+			    char **cache_dir_name, u64 *cache_ino,
+			    char **cache_full_name)
+{
+	u64 len = 0;
+	u64 disk_start = 0;
+	u64 disk_offset = 0;
+	u8 type;
+	int compressed = 0;
+	int flags = 0;
+	char *name = NULL;
+
+	if (sh->objectid == *cache_ino) {
+		name = *cache_full_name;
+	} else if (*cache_full_name) {
+		free(*cache_full_name);
+		*cache_full_name = NULL;
+	}
+	if (!name) {
+		name = ino_resolve(fd, sh->objectid, cache_dirid,
+				   cache_dir_name);
+		*cache_full_name = name;
+		*cache_ino = sh->objectid;
+	}
+	if (!name)
+		return -EIO;
+
+	type = btrfs_stack_file_extent_type(item);
+	compressed = btrfs_stack_file_extent_compression(item);
+
+	if (type == BTRFS_FILE_EXTENT_REG ||
+	    type == BTRFS_FILE_EXTENT_PREALLOC) {
+		disk_start = btrfs_stack_file_extent_disk_bytenr(item);
+		disk_offset = btrfs_stack_file_extent_offset(item);
+		len = btrfs_stack_file_extent_num_bytes(item);
+	} else if (type == BTRFS_FILE_EXTENT_INLINE) {
+		disk_start = 0;
+		disk_offset = 0;
+		len = btrfs_stack_file_extent_ram_bytes(item);
+	} else {
+		printf("unhandled extent type %d for inode %llu "
+		       "file offset %llu gen %llu\n",
+			type,
+			(unsigned long long)sh->objectid,
+			(unsigned long long)sh->offset,
+			(unsigned long long)found_gen);
+
+		return -EIO;
+	}
+	printf("inode %llu file offset %llu len %llu disk start %llu "
+	       "offset %llu gen %llu flags ",
+	       (unsigned long long)sh->objectid,
+	       (unsigned long long)sh->offset,
+	       (unsigned long long)len,
+	       (unsigned long long)disk_start,
+	       (unsigned long long)disk_offset,
+	       (unsigned long long)found_gen);
+
+	if (compressed) {
+		printf("COMPRESS");
+		flags++;
+	}
+	if (type == BTRFS_FILE_EXTENT_PREALLOC) {
+		printf("%sPREALLOC", flags ? "|" : "");
+		flags++;
+	}
+	if (type == BTRFS_FILE_EXTENT_INLINE) {
+		printf("%sINLINE", flags ? "|" : "");
+		flags++;
+	}
+	if (!flags)
+		printf("NONE");
+
+	printf(" %s\n", name);
+	return 0;
+}
+
+int find_updated_files(int fd, u64 root_id, u64 oldest_gen)
+{
+	int ret;
+	struct btrfs_ioctl_search_args args;
+	struct btrfs_ioctl_search_key *sk = &args.key;
+	struct btrfs_ioctl_search_header *sh;
+	struct btrfs_file_extent_item *item;
+	unsigned long off = 0;
+	u64 found_gen;
+	u64 max_found = 0;
+	int i;
+	int e;
+	u64 cache_dirid = 0;
+	u64 cache_ino = 0;
+	char *cache_dir_name = NULL;
+	char *cache_full_name = NULL;
+	struct btrfs_file_extent_item backup;
+
+	memset(&backup, 0, sizeof(backup));
+	memset(&args, 0, sizeof(args));
+
+	sk->tree_id = root_id;
+
+	/*
+	 * set all the other params to the max, we'll take any objectid
+	 * and any trans
+	 */
+	sk->max_objectid = (u64)-1;
+	sk->max_offset = (u64)-1;
+	sk->max_transid = (u64)-1;
+	sk->max_type = BTRFS_EXTENT_DATA_KEY;
+	sk->min_transid = oldest_gen;
+	/* just a big number, doesn't matter much */
+	sk->nr_items = 4096;
+
+	max_found = find_root_gen(fd);
+	while(1) {
+		ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args);
+		e = errno;
+		if (ret < 0) {
+			fprintf(stderr, "ERROR: can't perform the search- %s\n",
+				strerror(e));
+			return ret;
+		}
+		/* the ioctl returns the number of item it found in nr_items */
+		if (sk->nr_items == 0)
+			break;
+
+		off = 0;
+
+		/*
+		 * for each item, pull the key out of the header and then
+		 * read the root_ref item it contains
+		 */
+		for (i = 0; i < sk->nr_items; i++) {
+			sh = (struct btrfs_ioctl_search_header *)(args.buf +
+								  off);
+			off += sizeof(*sh);
+
+			/*
+			 * just in case the item was too big, pass something other
+			 * than garbage
+			 */
+			if (sh->len == 0)
+				item = &backup;
+			else
+				item = (struct btrfs_file_extent_item *)(args.buf +
+								 off);
+			found_gen = btrfs_stack_file_extent_generation(item);
+			if (sh->type == BTRFS_EXTENT_DATA_KEY &&
+			    found_gen >= oldest_gen) {
+				print_one_extent(fd, sh, item, found_gen,
+						 &cache_dirid, &cache_dir_name,
+						 &cache_ino, &cache_full_name);
+			}
+			off += sh->len;
+
+			/*
+			 * record the mins in sk so we can make sure the
+			 * next search doesn't repeat this root
+			 */
+			sk->min_objectid = sh->objectid;
+			sk->min_offset = sh->offset;
+			sk->min_type = sh->type;
+		}
+		sk->nr_items = 4096;
+		if (sk->min_offset < (u64)-1)
+			sk->min_offset++;
+		else if (sk->min_objectid < (u64)-1) {
+			sk->min_objectid++;
+			sk->min_offset = 0;
+			sk->min_type = 0;
+		} else
+			break;
+	}
+	free(cache_dir_name);
+	free(cache_full_name);
+	printf("transid marker was %llu\n", (unsigned long long)max_found);
+	return ret;
+}
+
+char *path_for_root(int fd, u64 root)
+{
+	struct root_lookup root_lookup;
+	struct rb_node *n;
+	char *ret_path = NULL;
+	int ret;
+
+	ret = __list_subvol_search(fd, &root_lookup);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	ret = __list_subvol_fill_paths(fd, &root_lookup);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	n = rb_last(&root_lookup.root);
+	while (n) {
+		struct root_info *entry;
+		u64 root_id;
+		u64 parent_id;
+		u64 level;
+		char *path;
+		entry = rb_entry(n, struct root_info, rb_node);
+		resolve_root(&root_lookup, entry, &root_id, &parent_id, &level,
+				&path);
+		if (root_id == root)
+			ret_path = path;
+		else
+			free(path);
+		n = rb_prev(n);
+	}
+
+	return ret_path;
+}
diff --git a/btrfs-map-logical.c b/btrfs-map-logical.c
new file mode 100644
index 0000000..d79a73a
--- /dev/null
+++ b/btrfs-map-logical.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2009 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _XOPEN_SOURCE 500
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <getopt.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "volumes.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "version.h"
+
+/* we write the mirror info to stdout unless they are dumping the data
+ * to stdout
+ * */
+static FILE *info_file;
+
+struct extent_buffer *debug_read_block(struct btrfs_root *root, u64 bytenr,
+				     u32 blocksize, int copy)
+{
+	int ret;
+	struct extent_buffer *eb;
+	u64 length;
+	struct btrfs_multi_bio *multi = NULL;
+	struct btrfs_device *device;
+	int num_copies;
+	int mirror_num = 1;
+
+	eb = btrfs_find_create_tree_block(root, bytenr, blocksize);
+	if (!eb)
+		return NULL;
+
+	length = blocksize;
+	while (1) {
+		ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+				      eb->start, &length, &multi, mirror_num);
+		BUG_ON(ret);
+		device = multi->stripes[0].dev;
+		eb->fd = device->fd;
+		device->total_ios++;
+		eb->dev_bytenr = multi->stripes[0].physical;
+
+		fprintf(info_file, "mirror %d logical %Lu physical %Lu "
+			"device %s\n", mirror_num, (unsigned long long)bytenr,
+			(unsigned long long)eb->dev_bytenr, device->name);
+		kfree(multi);
+
+		if (!copy || mirror_num == copy)
+			ret = read_extent_from_disk(eb);
+
+		num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+					      eb->start, eb->len);
+		if (num_copies == 1)
+			break;
+
+		mirror_num++;
+		if (mirror_num > num_copies)
+			break;
+	}
+	return eb;
+}
+
+static void print_usage(void)
+{
+	fprintf(stderr, "usage: btrfs-map-logical [options] mount_point\n");
+	fprintf(stderr, "\t-l Logical extent to map\n");
+	fprintf(stderr, "\t-c Copy of the extent to read (usually 1 or 2)\n");
+	fprintf(stderr, "\t-o Output file to hold the extent\n");
+	fprintf(stderr, "\t-b Number of bytes to read\n");
+	exit(1);
+}
+
+static struct option long_options[] = {
+	/* { "byte-count", 1, NULL, 'b' }, */
+	{ "logical", 1, NULL, 'l' },
+	{ "copy", 1, NULL, 'c' },
+	{ "output", 1, NULL, 'c' },
+	{ "bytes", 1, NULL, 'b' },
+	{ 0, 0, 0, 0}
+};
+
+int main(int ac, char **av)
+{
+	struct cache_tree root_cache;
+	struct btrfs_root *root;
+	struct extent_buffer *eb;
+	char *dev;
+	char *output_file = NULL;
+	u64 logical = 0;
+	int ret = 0;
+	int option_index = 0;
+	int copy = 0;
+	u64 bytes = 0;
+	int out_fd = 0;
+	int err;
+
+	while(1) {
+		int c;
+		c = getopt_long(ac, av, "l:c:o:b:", long_options,
+				&option_index);
+		if (c < 0)
+			break;
+		switch(c) {
+			case 'l':
+				logical = atoll(optarg);
+				if (logical == 0) {
+					fprintf(stderr,
+						"invalid extent number\n");
+					print_usage();
+				}
+				break;
+			case 'c':
+				copy = atoi(optarg);
+				if (copy == 0) {
+					fprintf(stderr,
+						"invalid copy number\n");
+					print_usage();
+				}
+				break;
+			case 'b':
+				bytes = atoll(optarg);
+				if (bytes == 0) {
+					fprintf(stderr,
+						"invalid byte count\n");
+					print_usage();
+				}
+				break;
+			case 'o':
+				output_file = strdup(optarg);
+				break;
+			default:
+				print_usage();
+		}
+	}
+	ac = ac - optind;
+	if (ac == 0)
+		print_usage();
+	if (logical == 0)
+		print_usage();
+	if (copy < 0)
+		print_usage();
+
+	dev = av[optind];
+
+	radix_tree_init();
+	cache_tree_init(&root_cache);
+
+	root = open_ctree(dev, 0, 0);
+	if (!root) {
+		fprintf(stderr, "Open ctree failed\n");
+		exit(1);
+	}
+
+	info_file = stdout;
+	if (output_file) {
+		if (strcmp(output_file, "-") == 0) {
+			out_fd = 1;
+			info_file = stderr;
+		} else {
+			out_fd = open(output_file, O_RDWR | O_CREAT, 0600);
+			if (out_fd < 0)
+				goto close;
+			err = ftruncate(out_fd, 0);
+			if (err) {
+				close(out_fd);
+				goto close;
+			}
+			info_file = stdout;
+		}
+	}
+
+	if (bytes == 0)
+		bytes = root->sectorsize;
+
+	bytes = (bytes + root->sectorsize - 1) / root->sectorsize;
+	bytes *= root->sectorsize;
+
+	while (bytes > 0) {
+		eb = debug_read_block(root, logical, root->sectorsize, copy);
+		if (eb && output_file) {
+			err = write(out_fd, eb->data, eb->len);
+			if (err < 0 || err != eb->len) {
+				fprintf(stderr, "output file write failed\n");
+				goto out_close_fd;
+			}
+		}
+		free_extent_buffer(eb);
+		logical += root->sectorsize;
+		bytes -= root->sectorsize;
+	}
+
+out_close_fd:
+	if (output_file && out_fd != 1)
+		close(out_fd);
+close:
+	close_ctree(root);
+	return ret;
+}
diff --git a/btrfs-select-super.c b/btrfs-select-super.c
new file mode 100644
index 0000000..51eb9c9
--- /dev/null
+++ b/btrfs-select-super.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _XOPEN_SOURCE 500
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "version.h"
+#include "utils.h"
+
+static void print_usage(void)
+{
+	fprintf(stderr, "usage: btrfs-select-super -s number dev\n");
+	fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION);
+	exit(1);
+}
+
+int main(int ac, char **av)
+{
+	struct btrfs_root *root;
+	int ret;
+	int num;
+	u64 bytenr = 0;
+
+	while(1) {
+		int c;
+		c = getopt(ac, av, "s:");
+		if (c < 0)
+			break;
+		switch(c) {
+			case 's':
+				num = atol(optarg);
+				bytenr = btrfs_sb_offset(num);
+				printf("using SB copy %d, bytenr %llu\n", num,
+				       (unsigned long long)bytenr);
+				break;
+			default:
+				print_usage();
+		}
+	}
+	ac = ac - optind;
+
+	if (ac != 1)
+		print_usage();
+
+	if (bytenr == 0) {
+		fprintf(stderr, "Please select the super copy with -s\n");
+		print_usage();
+	}
+
+	radix_tree_init();
+
+	if((ret = check_mounted(av[optind])) < 0) {
+		fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
+		return ret;
+	} else if(ret) {
+		fprintf(stderr, "%s is currently mounted. Aborting.\n", av[optind]);
+		return -EBUSY;
+	}
+
+	root = open_ctree(av[optind], bytenr, 1);
+
+	if (root == NULL)
+		return 1;
+
+	/* make the super writing code think we've read the first super */
+	root->fs_info->super_bytenr = BTRFS_SUPER_INFO_OFFSET;
+	ret = write_all_supers(root);
+
+	/* we don't close the ctree or anything, because we don't want a real
+	 * transaction commit.  We just want the super copy we pulled off the
+	 * disk to overwrite all the other copies
+	 */ 
+	return ret;
+}
diff --git a/btrfs-show.c b/btrfs-show.c
index c49626c..8210fd2 100644
--- a/btrfs-show.c
+++ b/btrfs-show.c
@@ -117,6 +117,11 @@ int main(int ac, char **av)
 	int ret;
 	int option_index = 0;
 
+	printf( "**\n"
+		"** WARNING: this program is considered deprecated\n"
+		"** Please consider to switch to the btrfs utility\n"
+		"**\n");
+
 	while(1) {
 		int c;
 		c = getopt_long(ac, av, "", long_options,
diff --git a/btrfs-vol.c b/btrfs-vol.c
index 8069778..0efdbc1 100644
--- a/btrfs-vol.c
+++ b/btrfs-vol.c
@@ -78,6 +78,11 @@ int main(int ac, char **av)
 	struct btrfs_ioctl_vol_args args;
 	u64 dev_block_count = 0;
 
+	printf( "**\n"
+		"** WARNING: this program is considered deprecated\n"
+		"** Please consider to switch to the btrfs utility\n"
+		"**\n");
+
 	while(1) {
 		int c;
 		c = getopt_long(ac, av, "a:br:", long_options,
@@ -108,10 +113,24 @@ int main(int ac, char **av)
 	if (device && strcmp(device, "missing") == 0 &&
 	    cmd == BTRFS_IOC_RM_DEV) {
 		fprintf(stderr, "removing missing devices from %s\n", mnt);
-	} else if (device) {
+	} else if (cmd != BTRFS_IOC_BALANCE) {
+		if (cmd == BTRFS_IOC_ADD_DEV) {
+			ret = check_mounted(device);
+			if (ret < 0) {
+				fprintf(stderr,
+					"error checking %s mount status\n",
+					device);
+				exit(1);
+			}
+			if (ret == 1) {
+				fprintf(stderr, "%s is mounted\n", device);
+				exit(1);
+			}
+		}
 		devfd = open(device, O_RDWR);
-		if (!devfd) {
+		if (devfd < 0) {
 			fprintf(stderr, "Unable to open device %s\n", device);
+			exit(1);
 		}
 		ret = fstat(devfd, &st);
 		if (ret) {
@@ -129,7 +148,9 @@ int main(int ac, char **av)
 		exit(1);
 	}
 	if (cmd == BTRFS_IOC_ADD_DEV) {
-		ret = btrfs_prepare_device(devfd, device, 1, &dev_block_count);
+		int mixed = 0;
+
+		ret = btrfs_prepare_device(devfd, device, 1, &dev_block_count, &mixed);
 		if (ret) {
 			fprintf(stderr, "Unable to init %s\n", device);
 			exit(1);
diff --git a/btrfs-zero-log.c b/btrfs-zero-log.c
new file mode 100644
index 0000000..1ea867b
--- /dev/null
+++ b/btrfs-zero-log.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _XOPEN_SOURCE 500
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "version.h"
+#include "utils.h"
+
+static void print_usage(void)
+{
+	fprintf(stderr, "usage: btrfs-zero-log dev\n");
+	fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION);
+	exit(1);
+}
+
+int main(int ac, char **av)
+{
+	struct btrfs_root *root;
+	struct btrfs_trans_handle *trans;
+	int ret;
+
+	if (ac != 2)
+		print_usage();
+
+	radix_tree_init();
+
+	if((ret = check_mounted(av[1])) < 0) {
+		fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
+		return ret;
+	} else if(ret) {
+		fprintf(stderr, "%s is currently mounted. Aborting.\n", av[1]);
+		return -EBUSY;
+	}
+
+	root = open_ctree(av[1], 0, 1);
+
+	if (root == NULL)
+		return 1;
+
+	trans = btrfs_start_transaction(root, 1);
+	btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
+	btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0);
+	btrfs_commit_transaction(trans, root);
+	close_ctree(root);
+	return ret;
+}
diff --git a/btrfs.c b/btrfs.c
new file mode 100644
index 0000000..88238d6
--- /dev/null
+++ b/btrfs.c
@@ -0,0 +1,276 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "commands.h"
+#include "version.h"
+
+static const char * const btrfs_cmd_group_usage[] = {
+	"btrfs [--help] [--version] <group> [<group>...] <command> [<args>]",
+	NULL
+};
+
+static const char btrfs_cmd_group_info[] =
+	"Use --help as an argument for information on a specific group or command.";
+
+char argv0_buf[ARGV0_BUF_SIZE] = "btrfs";
+
+static inline const char *skip_prefix(const char *str, const char *prefix)
+{
+	size_t len = strlen(prefix);
+	return strncmp(str, prefix, len) ? NULL : str + len;
+}
+
+int prefixcmp(const char *str, const char *prefix)
+{
+	for (; ; str++, prefix++)
+		if (!*prefix)
+			return 0;
+		else if (*str != *prefix)
+			return (unsigned char)*prefix - (unsigned char)*str;
+}
+
+static int parse_one_token(const char *arg, const struct cmd_group *grp,
+			   const struct cmd_struct **cmd_ret)
+{
+	const struct cmd_struct *cmd = grp->commands;
+	const struct cmd_struct *abbrev_cmd = NULL, *ambiguous_cmd = NULL;
+
+	for (; cmd->token; cmd++) {
+		const char *rest;
+
+		rest = skip_prefix(arg, cmd->token);
+		if (!rest) {
+			if (!prefixcmp(cmd->token, arg)) {
+				if (abbrev_cmd) {
+					/*
+					 * If this is abbreviated, it is
+					 * ambiguous. So when there is no
+					 * exact match later, we need to
+					 * error out.
+					 */
+					ambiguous_cmd = abbrev_cmd;
+				}
+				abbrev_cmd = cmd;
+			}
+			continue;
+		}
+		if (*rest)
+			continue;
+
+		*cmd_ret = cmd;
+		return 0;
+	}
+
+	if (ambiguous_cmd)
+		return -2;
+
+	if (abbrev_cmd) {
+		*cmd_ret = abbrev_cmd;
+		return 0;
+	}
+
+	return -1;
+}
+
+static const struct cmd_struct *
+parse_command_token(const char *arg, const struct cmd_group *grp)
+{
+	const struct cmd_struct *cmd;
+
+	switch(parse_one_token(arg, grp, &cmd)) {
+	case -1:
+		help_unknown_token(arg, grp);
+	case -2:
+		help_ambiguous_token(arg, grp);
+	}
+
+	return cmd;
+}
+
+void handle_help_options_next_level(const struct cmd_struct *cmd,
+				    int argc, char **argv)
+{
+	if (argc < 2)
+		return;
+
+	if (!strcmp(argv[1], "--help")) {
+		if (cmd->next) {
+			argc--;
+			argv++;
+			help_command_group(cmd->next, argc, argv);
+		} else {
+			usage_command(cmd, 1, 0);
+		}
+
+		exit(0);
+	}
+}
+
+static void fixup_argv0(char **argv, const char *token)
+{
+	int len = strlen(argv0_buf);
+
+	snprintf(argv0_buf + len, sizeof(argv0_buf) - len, " %s", token);
+	argv[0] = argv0_buf;
+}
+
+int handle_command_group(const struct cmd_group *grp, int argc,
+			 char **argv)
+
+{
+	const struct cmd_struct *cmd;
+
+	argc--;
+	argv++;
+	if (argc < 1) {
+		usage_command_group(grp, 0, 0);
+		exit(1);
+	}
+
+	cmd = parse_command_token(argv[0], grp);
+
+	handle_help_options_next_level(cmd, argc, argv);
+
+	fixup_argv0(argv, cmd->token);
+	return cmd->fn(argc, argv);
+}
+
+int check_argc_exact(int nargs, int expected)
+{
+	if (nargs < expected)
+		fprintf(stderr, "%s: too few arguments\n", argv0_buf);
+	if (nargs > expected)
+		fprintf(stderr, "%s: too many arguments\n", argv0_buf);
+
+	return nargs != expected;
+}
+
+int check_argc_min(int nargs, int expected)
+{
+	if (nargs < expected) {
+		fprintf(stderr, "%s: too few arguments\n", argv0_buf);
+		return 1;
+	}
+
+	return 0;
+}
+
+int check_argc_max(int nargs, int expected)
+{
+	if (nargs > expected) {
+		fprintf(stderr, "%s: too many arguments\n", argv0_buf);
+		return 1;
+	}
+
+	return 0;
+}
+
+const struct cmd_group btrfs_cmd_group;
+
+static const char * const cmd_help_usage[] = {
+	"btrfs help [--full]",
+	"Dislay help information",
+	"",
+	"--full     display detailed help on every command",
+	NULL
+};
+
+static int cmd_help(int argc, char **argv)
+{
+	help_command_group(&btrfs_cmd_group, argc, argv);
+	return 0;
+}
+
+static const char * const cmd_version_usage[] = {
+	"btrfs version",
+	"Display btrfs-progs version",
+	NULL
+};
+
+static int cmd_version(int argc, char **argv)
+{
+	printf("%s\n", BTRFS_BUILD_VERSION);
+	return 0;
+}
+
+static int handle_options(int *argc, char ***argv)
+{
+	char **orig_argv = *argv;
+
+	while (*argc > 0) {
+		const char *arg = (*argv)[0];
+		if (arg[0] != '-')
+			break;
+
+		if (!strcmp(arg, "--help")) {
+			break;
+		} else if (!strcmp(arg, "--version")) {
+			break;
+		} else {
+			fprintf(stderr, "Unknown option: %s\n", arg);
+			fprintf(stderr, "usage: %s\n",
+				btrfs_cmd_group.usagestr[0]);
+			exit(129);
+		}
+
+		(*argv)++;
+		(*argc)--;
+	}
+
+	return (*argv) - orig_argv;
+}
+
+const struct cmd_group btrfs_cmd_group = {
+	btrfs_cmd_group_usage, btrfs_cmd_group_info, {
+		{ "subvolume", cmd_subvolume, NULL, &subvolume_cmd_group, 0 },
+		{ "filesystem", cmd_filesystem, NULL, &filesystem_cmd_group, 0 },
+		{ "balance", cmd_balance, NULL, &balance_cmd_group, 0 },
+		{ "device", cmd_device, NULL, &device_cmd_group, 0 },
+		{ "scrub", cmd_scrub, NULL, &scrub_cmd_group, 0 },
+		{ "inspect-internal", cmd_inspect, NULL, &inspect_cmd_group, 0 },
+		{ "help", cmd_help, cmd_help_usage, NULL, 0 },
+		{ "version", cmd_version, cmd_version_usage, NULL, 0 },
+		{ 0, 0, 0, 0, 0 }
+	},
+};
+
+int main(int argc, char **argv)
+{
+	const struct cmd_struct *cmd;
+
+	argc--;
+	argv++;
+	handle_options(&argc, &argv);
+	if (argc > 0) {
+		if (!prefixcmp(argv[0], "--"))
+			argv[0] += 2;
+	} else {
+		usage_command_group(&btrfs_cmd_group, 0, 0);
+		exit(1);
+	}
+
+	cmd = parse_command_token(argv[0], &btrfs_cmd_group);
+
+	handle_help_options_next_level(cmd, argc, argv);
+
+	fixup_argv0(argv, cmd->token);
+	exit(cmd->fn(argc, argv));
+}
diff --git a/btrfsck.c b/btrfsck.c
index 40c90f8..7aac736 100644
--- a/btrfsck.c
+++ b/btrfsck.c
@@ -20,14 +20,20 @@
 #define _GNU_SOURCE 1
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <fcntl.h>
+#include <sys/stat.h>
+#include <getopt.h>
 #include "kerncompat.h"
 #include "ctree.h"
+#include "volumes.h"
+#include "repair.h"
 #include "disk-io.h"
 #include "print-tree.h"
 #include "transaction.h"
 #include "list.h"
 #include "version.h"
+#include "utils.h"
 
 static u64 bytes_used = 0;
 static u64 total_csum_bytes = 0;
@@ -36,7 +42,7 @@ static u64 total_fs_tree_bytes = 0;
 static u64 btree_space_waste = 0;
 static u64 data_bytes_allocated = 0;
 static u64 data_bytes_referenced = 0;
-int found_old_backref = 0;
+static int found_old_backref = 0;
 
 struct extent_backref {
 	struct list_head list;
@@ -71,9 +77,13 @@ struct extent_record {
 	struct cache_extent cache;
 	struct btrfs_disk_key parent_key;
 	u64 start;
+	u64 max_size;
 	u64 nr;
 	u64 refs;
 	u64 extent_item_refs;
+	u64 generation;
+	u64 info_objectid;
+	u8 info_level;
 	unsigned int content_checked:1;
 	unsigned int owner_ref_checked:1;
 	unsigned int is_root:1;
@@ -100,7 +110,11 @@ struct inode_backref {
 #define REF_ERR_DUP_INODE_REF		(1 << 5)
 #define REF_ERR_INDEX_UNMATCH		(1 << 6)
 #define REF_ERR_FILETYPE_UNMATCH	(1 << 7)
-#define REF_ERR_NAME_TOO_LONG		(1 << 8)
+#define REF_ERR_NAME_TOO_LONG		(1 << 8) // 100
+#define REF_ERR_NO_ROOT_REF		(1 << 9)
+#define REF_ERR_NO_ROOT_BACKREF		(1 << 10)
+#define REF_ERR_DUP_ROOT_REF		(1 << 11)
+#define REF_ERR_DUP_ROOT_BACKREF	(1 << 12)
 
 struct inode_record {
 	struct list_head backrefs;
@@ -144,6 +158,29 @@ struct inode_record {
 #define I_ERR_SOME_CSUM_MISSING		(1 << 12)
 #define I_ERR_LINK_COUNT_WRONG		(1 << 13)
 
+struct root_backref {
+	struct list_head list;
+	unsigned int found_dir_item:1;
+	unsigned int found_dir_index:1;
+	unsigned int found_back_ref:1;
+	unsigned int found_forward_ref:1;
+	unsigned int reachable:1;
+	int errors;
+	u64 ref_root;
+	u64 dir;
+	u64 index;
+	u16 namelen;
+	char name[0];
+};
+
+struct root_record {
+	struct list_head backrefs;
+	struct cache_extent cache;
+	unsigned int found_root_item:1;
+	u64 objectid;
+	u32 found_ref;
+};
+
 struct ptr_node {
 	struct cache_extent cache;
 	void *data;
@@ -151,6 +188,7 @@ struct ptr_node {
 
 struct shared_node {
 	struct cache_extent cache;
+	struct cache_tree root_cache;
 	struct cache_tree inode_cache;
 	struct inode_record *current;
 	u32 refs;
@@ -258,6 +296,14 @@ static void free_inode_rec(struct inode_record *rec)
 	free(rec);
 }
 
+static int can_free_inode_rec(struct inode_record *rec)
+{
+	if (!rec->errors && rec->checked && rec->found_inode_item &&
+	    rec->nlink == rec->found_link && list_empty(&rec->backrefs))
+		return 1;
+	return 0;
+}
+
 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
 				 struct inode_record *rec)
 {
@@ -309,8 +355,7 @@ static void maybe_free_inode_rec(struct cache_tree *inode_cache,
 	}
 
 	BUG_ON(rec->refs != 1);
-	if (!rec->errors && rec->nlink == rec->found_link &&
-	    list_empty(&rec->backrefs)) {
+	if (can_free_inode_rec(rec)) {
 		cache = find_cache_extent(inode_cache, rec->ino, 1);
 		node = container_of(cache, struct ptr_node, cache);
 		BUG_ON(node->data != rec);
@@ -338,14 +383,12 @@ static int check_orphan_item(struct btrfs_root *root, u64 ino)
 	return ret;
 }
 
-static int process_inode_item(struct btrfs_root *root,
-			      struct extent_buffer *eb,
+static int process_inode_item(struct extent_buffer *eb,
 			      int slot, struct btrfs_key *key,
 			      struct shared_node *active_node)
 {
 	struct inode_record *rec;
 	struct btrfs_inode_item *item;
-	int ret;
 
 	rec = active_node->current;
 	BUG_ON(rec->ino != key->objectid || rec->refs > 1);
@@ -361,11 +404,8 @@ static int process_inode_item(struct btrfs_root *root,
 	if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
 		rec->nodatasum = 1;
 	rec->found_inode_item = 1;
-	if (rec->nlink == 0) {
-		ret = check_orphan_item(root, rec->ino);
-		if (ret == -ENOENT)
-			rec->errors |= I_ERR_NO_ORPHAN_ITEM;
-	}
+	if (rec->nlink == 0)
+		rec->errors |= I_ERR_NO_ORPHAN_ITEM;
 	maybe_free_inode_rec(&active_node->inode_cache, rec);
 	return 0;
 }
@@ -391,7 +431,6 @@ static struct inode_backref *get_inode_backref(struct inode_record *rec,
 	memcpy(backref->name, name, namelen);
 	backref->name[namelen] = '\0';
 	list_add_tail(&backref->list, &rec->backrefs);
-	rec->found_link++;
 	return backref;
 }
 
@@ -419,6 +458,7 @@ static int add_inode_backref(struct cache_tree *inode_cache,
 		backref->filetype = filetype;
 		backref->found_dir_index = 1;
 	} else if (itemtype == BTRFS_DIR_ITEM_KEY) {
+		rec->found_link++;
 		if (backref->found_dir_item)
 			backref->errors |= REF_ERR_DUP_DIR_ITEM;
 		if (backref->found_dir_index && backref->filetype != filetype)
@@ -443,10 +483,10 @@ static int add_inode_backref(struct cache_tree *inode_cache,
 }
 
 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
-			    struct shared_node *dst_node)
+			    struct cache_tree *dst_cache)
 {
 	struct inode_backref *backref;
-	struct cache_tree *dst_cache = &dst_node->inode_cache;
+	u32 dir_count = 0;
 
 	dst->merging = 1;
 	list_for_each_entry(backref, &src->backrefs, list) {
@@ -457,6 +497,7 @@ static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
 					BTRFS_DIR_INDEX_KEY, backref->errors);
 		}
 		if (backref->found_dir_item) {
+			dir_count++;
 			add_inode_backref(dst_cache, dst->ino,
 					backref->dir, 0, backref->name,
 					backref->namelen, backref->filetype,
@@ -481,6 +522,8 @@ static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
 	if (dst->first_extent_gap > src->first_extent_gap)
 		dst->first_extent_gap = src->first_extent_gap;
 
+	BUG_ON(src->found_link < dir_count);
+	dst->found_link += src->found_link - dir_count;
 	dst->found_size += src->found_size;
 	if (src->extent_start != (u64)-1) {
 		if (dst->extent_start == (u64)-1) {
@@ -510,14 +553,8 @@ static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
 			dst->errors |= I_ERR_DUP_INODE_ITEM;
 		}
 	}
-
-	if (src->checked) {
-		dst->checked = 1;
-		if (dst_node->current == dst)
-			dst_node->current = NULL;
-	}
 	dst->merging = 0;
-	maybe_free_inode_rec(dst_cache, dst);
+
 	return 0;
 }
 
@@ -537,8 +574,9 @@ static int splice_shared_node(struct shared_node *src_node,
 	if (src_node->current)
 		current_ino = src_node->current->ino;
 
-	src = &src_node->inode_cache;
-	dst = &dst_node->inode_cache;
+	src = &src_node->root_cache;
+	dst = &dst_node->root_cache;
+again:
 	cache = find_first_cache_extent(src, 0);
 	while (cache) {
 		node = container_of(cache, struct ptr_node, cache);
@@ -558,13 +596,26 @@ static int splice_shared_node(struct shared_node *src_node,
 		ret = insert_existing_cache_extent(dst, &ins->cache);
 		if (ret == -EEXIST) {
 			conflict = get_inode_rec(dst, rec->ino, 1);
-			merge_inode_recs(rec, conflict, dst_node);
+			merge_inode_recs(rec, conflict, dst);
+			if (rec->checked) {
+				conflict->checked = 1;
+				if (dst_node->current == conflict)
+					dst_node->current = NULL;
+			}
+			maybe_free_inode_rec(dst, conflict);
 			free_inode_rec(rec);
 			free(ins);
 		} else {
 			BUG_ON(ret);
 		}
 	}
+
+	if (src == &src_node->root_cache) {
+		src = &src_node->inode_cache;
+		dst = &dst_node->inode_cache;
+		goto again;
+	}
+
 	if (current_ino > 0 && (!dst_node->current ||
 	    current_ino > dst_node->current->ino)) {
 		if (dst_node->current) {
@@ -616,6 +667,7 @@ static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
 	node = calloc(1, sizeof(*node));
 	node->cache.start = bytenr;
 	node->cache.size = 1;
+	cache_tree_init(&node->root_cache);
 	cache_tree_init(&node->inode_cache);
 	node->refs = refs;
 
@@ -646,6 +698,7 @@ static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
 	if (wc->root_level == wc->active_node &&
 	    btrfs_root_refs(&root->root_item) == 0) {
 		if (--node->refs == 0) {
+			free_inode_recs(&node->root_cache);
 			free_inode_recs(&node->inode_cache);
 			remove_cache_extent(&wc->shared, &node->cache);
 			free(node);
@@ -708,10 +761,12 @@ static int process_dir_item(struct extent_buffer *eb,
 	int filetype;
 	struct btrfs_dir_item *di;
 	struct inode_record *rec;
+	struct cache_tree *root_cache;
 	struct cache_tree *inode_cache;
 	struct btrfs_key location;
 	char namebuf[BTRFS_NAME_LEN];
 
+	root_cache = &active_node->root_cache;
 	inode_cache = &active_node->inode_cache;
 	rec = active_node->current;
 	rec->found_dir_item = 1;
@@ -740,7 +795,9 @@ static int process_dir_item(struct extent_buffer *eb,
 					  key->objectid, key->offset, namebuf,
 					  len, filetype, key->type, error);
 		} else if (location.type == BTRFS_ROOT_ITEM_KEY) {
-			/* fixme: check root back & forward references */
+			add_inode_backref(root_cache, location.objectid,
+					  key->objectid, key->offset, namebuf,
+					  len, filetype, key->type, error);
 		} else {
 			fprintf(stderr, "warning line %d\n", __LINE__);
 		}
@@ -945,7 +1002,7 @@ static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
 	struct btrfs_key key;
 	u32 nritems;
 	int i;
-	int ret;
+	int ret = 0;
 	struct cache_tree *inode_cache;
 	struct shared_node *active_node;
 
@@ -977,8 +1034,7 @@ static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
 			ret = process_inode_ref(eb, i, &key, active_node);
 			break;
 		case BTRFS_INODE_ITEM_KEY:
-			ret = process_inode_item(root, eb, i, &key,
-						 active_node);
+			ret = process_inode_item(eb, i, &key, active_node);
 			break;
 		case BTRFS_EXTENT_DATA_KEY:
 			ret = process_file_extent(root, eb, i, &key,
@@ -988,7 +1044,7 @@ static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
 			break;
 		};
 	}
-	return 0;
+	return ret;
 }
 
 static void reada_walk_down(struct btrfs_root *root,
@@ -1033,7 +1089,9 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
 	ret = btrfs_lookup_extent_info(NULL, root,
 				       path->nodes[*level]->start,
 				       path->nodes[*level]->len, &refs, NULL);
-	BUG_ON(ret);
+	if (ret < 0)
+		goto out;
+
 	if (refs > 1) {
 		ret = enter_shared_node(root, path->nodes[*level]->start,
 					refs, wc, *level);
@@ -1060,7 +1118,8 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
 		blocksize = btrfs_level_size(root, *level - 1);
 		ret = btrfs_lookup_extent_info(NULL, root, bytenr, blocksize,
 					       &refs, NULL);
-		BUG_ON(ret);
+		if (ret < 0)
+			refs = 0;
 
 		if (refs > 1) {
 			ret = enter_shared_node(root, bytenr, refs,
@@ -1120,7 +1179,7 @@ static int check_root_dir(struct inode_record *rec)
 
 	if (!rec->found_inode_item || rec->errors)
 		goto out;
-	if (rec->nlink != 1 || rec->found_link != 1)
+	if (rec->nlink != 1 || rec->found_link != 0)
 		goto out;
 	if (list_empty(&rec->backrefs))
 		goto out;
@@ -1176,13 +1235,23 @@ static int check_inode_recs(struct btrfs_root *root,
 		node = container_of(cache, struct ptr_node, cache);
 		rec = node->data;
 		remove_cache_extent(inode_cache, &node->cache);
+		free(node);
 		if (rec->ino == root_dirid ||
 		    rec->ino == BTRFS_ORPHAN_OBJECTID) {
-			free(node);
 			free_inode_rec(rec);
 			continue;
 		}
 
+		if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
+			ret = check_orphan_item(root, rec->ino);
+			if (ret == 0)
+				rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
+			if (can_free_inode_rec(rec)) {
+				free_inode_rec(rec);
+				continue;
+			}
+		}
+
 		error++;
 		if (!rec->found_inode_item)
 			rec->errors |= I_ERR_NO_INODE_ITEM;
@@ -1205,13 +1274,314 @@ static int check_inode_recs(struct btrfs_root *root,
 				backref->namelen, backref->name,
 				backref->filetype, backref->errors);
 		}
-		free(node);
 		free_inode_rec(rec);
 	}
 	return (error > 0) ? -1 : 0;
 }
 
+static struct root_record *get_root_rec(struct cache_tree *root_cache,
+					u64 objectid)
+{
+	struct cache_extent *cache;
+	struct root_record *rec = NULL;
+	int ret;
+
+	cache = find_cache_extent(root_cache, objectid, 1);
+	if (cache) {
+		rec = container_of(cache, struct root_record, cache);
+	} else {
+		rec = calloc(1, sizeof(*rec));
+		rec->objectid = objectid;
+		INIT_LIST_HEAD(&rec->backrefs);
+		rec->cache.start = objectid;
+		rec->cache.size = 1;
+
+		ret = insert_existing_cache_extent(root_cache, &rec->cache);
+		BUG_ON(ret);
+	}
+	return rec;
+}
+
+static struct root_backref *get_root_backref(struct root_record *rec,
+					     u64 ref_root, u64 dir, u64 index,
+					     const char *name, int namelen)
+{
+	struct root_backref *backref;
+
+	list_for_each_entry(backref, &rec->backrefs, list) {
+		if (backref->ref_root != ref_root || backref->dir != dir ||
+		    backref->namelen != namelen)
+			continue;
+		if (memcmp(name, backref->name, namelen))
+			continue;
+		return backref;
+	}
+
+	backref = malloc(sizeof(*backref) + namelen + 1);
+	memset(backref, 0, sizeof(*backref));
+	backref->ref_root = ref_root;
+	backref->dir = dir;
+	backref->index = index;
+	backref->namelen = namelen;
+	memcpy(backref->name, name, namelen);
+	backref->name[namelen] = '\0';
+	list_add_tail(&backref->list, &rec->backrefs);
+	return backref;
+}
+
+static void free_root_recs(struct cache_tree *root_cache)
+{
+	struct cache_extent *cache;
+	struct root_record *rec;
+	struct root_backref *backref;
+
+	while (1) {
+		cache = find_first_cache_extent(root_cache, 0);
+		if (!cache)
+			break;
+		rec = container_of(cache, struct root_record, cache);
+		remove_cache_extent(root_cache, &rec->cache);
+
+		while (!list_empty(&rec->backrefs)) {
+			backref = list_entry(rec->backrefs.next,
+					     struct root_backref, list);
+			list_del(&backref->list);
+			free(backref);
+		}
+		kfree(rec);
+	}
+}
+
+static int add_root_backref(struct cache_tree *root_cache,
+			    u64 root_id, u64 ref_root, u64 dir, u64 index,
+			    const char *name, int namelen,
+			    int item_type, int errors)
+{
+	struct root_record *rec;
+	struct root_backref *backref;
+
+	rec = get_root_rec(root_cache, root_id);
+	backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
+
+	backref->errors |= errors;
+
+	if (item_type != BTRFS_DIR_ITEM_KEY) {
+		if (backref->found_dir_index || backref->found_back_ref ||
+		    backref->found_forward_ref) {
+			if (backref->index != index)
+				backref->errors |= REF_ERR_INDEX_UNMATCH;
+		} else {
+			backref->index = index;
+		}
+	}
+
+	if (item_type == BTRFS_DIR_ITEM_KEY) {
+		backref->found_dir_item = 1;
+		backref->reachable = 1;
+		rec->found_ref++;
+	} else if (item_type == BTRFS_DIR_INDEX_KEY) {
+		backref->found_dir_index = 1;
+	} else if (item_type == BTRFS_ROOT_REF_KEY) {
+		if (backref->found_forward_ref)
+			backref->errors |= REF_ERR_DUP_ROOT_REF;
+		backref->found_forward_ref = 1;
+	} else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
+		if (backref->found_back_ref)
+			backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
+		backref->found_back_ref = 1;
+	} else {
+		BUG_ON(1);
+	}
+
+	return 0;
+}
+
+static int merge_root_recs(struct btrfs_root *root,
+			   struct cache_tree *src_cache,
+			   struct cache_tree *dst_cache)
+{
+	struct cache_extent *cache;
+	struct ptr_node *node;
+	struct inode_record *rec;
+	struct inode_backref *backref;
+
+	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+		free_inode_recs(src_cache);
+		return 0;
+	}
+
+	while (1) {
+		cache = find_first_cache_extent(src_cache, 0);
+		if (!cache)
+			break;
+		node = container_of(cache, struct ptr_node, cache);
+		rec = node->data;
+		remove_cache_extent(src_cache, &node->cache);
+		free(node);
+
+		list_for_each_entry(backref, &rec->backrefs, list) {
+			BUG_ON(backref->found_inode_ref);
+			if (backref->found_dir_item)
+				add_root_backref(dst_cache, rec->ino,
+					root->root_key.objectid, backref->dir,
+					backref->index, backref->name,
+					backref->namelen, BTRFS_DIR_ITEM_KEY,
+					backref->errors);
+			if (backref->found_dir_index)
+				add_root_backref(dst_cache, rec->ino,
+					root->root_key.objectid, backref->dir,
+					backref->index, backref->name,
+					backref->namelen, BTRFS_DIR_INDEX_KEY,
+					backref->errors);
+		}
+		free_inode_rec(rec);
+	}
+	return 0;
+}
+
+static int check_root_refs(struct btrfs_root *root,
+			   struct cache_tree *root_cache)
+{
+	struct root_record *rec;
+	struct root_record *ref_root;
+	struct root_backref *backref;
+	struct cache_extent *cache;
+	int loop = 1;
+	int ret;
+	int error;
+	int errors = 0;
+
+	rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
+	rec->found_ref = 1;
+
+	/* fixme: this can not detect circular references */
+	while (loop) {
+		loop = 0;
+		cache = find_first_cache_extent(root_cache, 0);
+		while (1) {
+			if (!cache)
+				break;
+			rec = container_of(cache, struct root_record, cache);
+			cache = next_cache_extent(cache);
+
+			if (rec->found_ref == 0)
+				continue;
+
+			list_for_each_entry(backref, &rec->backrefs, list) {
+				if (!backref->reachable)
+					continue;
+
+				ref_root = get_root_rec(root_cache,
+							backref->ref_root);
+				if (ref_root->found_ref > 0)
+					continue;
+
+				backref->reachable = 0;
+				rec->found_ref--;
+				if (rec->found_ref == 0)
+					loop = 1;
+			}
+		}
+	}
+
+	cache = find_first_cache_extent(root_cache, 0);
+	while (1) {
+		if (!cache)
+			break;
+		rec = container_of(cache, struct root_record, cache);
+		cache = next_cache_extent(cache);
+
+		if (rec->found_ref == 0 &&
+		    rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
+		    rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
+			ret = check_orphan_item(root->fs_info->tree_root,
+						rec->objectid);
+			if (ret == 0)
+				continue;
+			errors++;
+			fprintf(stderr, "fs tree %llu not referenced\n",
+				(unsigned long long)rec->objectid);
+		}
+
+		error = 0;
+		if (rec->found_ref > 0 && !rec->found_root_item)
+			error = 1;
+		list_for_each_entry(backref, &rec->backrefs, list) {
+			if (!backref->found_dir_item)
+				backref->errors |= REF_ERR_NO_DIR_ITEM;
+			if (!backref->found_dir_index)
+				backref->errors |= REF_ERR_NO_DIR_INDEX;
+			if (!backref->found_back_ref)
+				backref->errors |= REF_ERR_NO_ROOT_BACKREF;
+			if (!backref->found_forward_ref)
+				backref->errors |= REF_ERR_NO_ROOT_REF;
+			if (backref->reachable && backref->errors)
+				error = 1;
+		}
+		if (!error)
+			continue;
+
+		errors++;
+		fprintf(stderr, "fs tree %llu refs %u %s\n",
+			(unsigned long long)rec->objectid, rec->found_ref,
+			 rec->found_root_item ? "" : "not found");
+
+		list_for_each_entry(backref, &rec->backrefs, list) {
+			if (!backref->reachable)
+				continue;
+			if (!backref->errors && rec->found_root_item)
+				continue;
+			fprintf(stderr, "\tunresolved ref root %llu dir %llu"
+				" index %llu namelen %u name %s error %x\n",
+				(unsigned long long)backref->ref_root,
+				(unsigned long long)backref->dir,
+				(unsigned long long)backref->index,
+				backref->namelen, backref->name,
+				backref->errors);
+		}
+	}
+	return errors > 0 ? 1 : 0;
+}
+
+static int process_root_ref(struct extent_buffer *eb, int slot,
+			    struct btrfs_key *key,
+			    struct cache_tree *root_cache)
+{
+	u64 dirid;
+	u64 index;
+	u32 len;
+	u32 name_len;
+	struct btrfs_root_ref *ref;
+	char namebuf[BTRFS_NAME_LEN];
+	int error;
+
+	ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
+
+	dirid = btrfs_root_ref_dirid(eb, ref);
+	index = btrfs_root_ref_sequence(eb, ref);
+	name_len = btrfs_root_ref_name_len(eb, ref);
+
+	if (name_len <= BTRFS_NAME_LEN) {
+		len = name_len;
+		error = 0;
+	} else {
+		len = BTRFS_NAME_LEN;
+		error = REF_ERR_NAME_TOO_LONG;
+	}
+	read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
+
+	if (key->type == BTRFS_ROOT_REF_KEY) {
+		add_root_backref(root_cache, key->offset, key->objectid, dirid,
+				 index, namebuf, len, key->type, error);
+	} else {
+		add_root_backref(root_cache, key->objectid, key->offset, dirid,
+				 index, namebuf, len, key->type, error);
+	}
+	return 0;
+}
+
 static int check_fs_root(struct btrfs_root *root,
+			 struct cache_tree *root_cache,
 			 struct walk_control *wc)
 {
 	int ret = 0;
@@ -1219,10 +1589,18 @@ static int check_fs_root(struct btrfs_root *root,
 	int level;
 	struct btrfs_path path;
 	struct shared_node root_node;
+	struct root_record *rec;
 	struct btrfs_root_item *root_item = &root->root_item;
 
+	if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+		rec = get_root_rec(root_cache, root->root_key.objectid);
+		if (btrfs_root_refs(root_item) > 0)
+			rec->found_root_item = 1;
+	}
+
 	btrfs_init_path(&path);
 	memset(&root_node, 0, sizeof(root_node));
+	cache_tree_init(&root_node.root_cache);
 	cache_tree_init(&root_node.inode_cache);
 
 	level = btrfs_header_level(root->node);
@@ -1266,6 +1644,8 @@ static int check_fs_root(struct btrfs_root *root,
 	}
 	btrfs_release_path(root, &path);
 
+	merge_root_recs(root, &root_node.root_cache, root_cache);
+
 	if (root_node.current) {
 		root_node.current->checked = 1;
 		maybe_free_inode_rec(&root_node.inode_cache,
@@ -1280,13 +1660,15 @@ static int fs_root_objectid(u64 objectid)
 {
 	if (objectid == BTRFS_FS_TREE_OBJECTID ||
 	    objectid == BTRFS_TREE_RELOC_OBJECTID ||
+	    objectid == BTRFS_DATA_RELOC_TREE_OBJECTID ||
 	    (objectid >= BTRFS_FIRST_FREE_OBJECTID &&
-	     objectid < BTRFS_LAST_FREE_OBJECTID))
+	     objectid <= BTRFS_LAST_FREE_OBJECTID))
 		return 1;
 	return 0;
 }
 
-static int check_fs_roots(struct btrfs_root *root)
+static int check_fs_roots(struct btrfs_root *root,
+			  struct cache_tree *root_cache)
 {
 	struct btrfs_path path;
 	struct btrfs_key key;
@@ -1319,10 +1701,14 @@ static int check_fs_roots(struct btrfs_root *root)
 		    fs_root_objectid(key.objectid)) {
 			tmp_root = btrfs_read_fs_root_no_cache(root->fs_info,
 							       &key);
-			ret = check_fs_root(tmp_root, &wc);
+			ret = check_fs_root(tmp_root, root_cache, &wc);
 			if (ret)
 				err = 1;
 			btrfs_free_fs_root(root->fs_info, tmp_root);
+		} else if (key.type == BTRFS_ROOT_REF_KEY ||
+			   key.type == BTRFS_ROOT_BACKREF_KEY) {
+			process_root_ref(leaf, path.slots[0], &key,
+					 root_cache);
 		}
 		path.slots[0]++;
 	}
@@ -1334,86 +1720,6 @@ static int check_fs_roots(struct btrfs_root *root)
 	return err;
 }
 
-static int check_node(struct btrfs_root *root,
-		      struct btrfs_disk_key *parent_key,
-		      struct extent_buffer *buf)
-{
-	int i;
-	struct btrfs_key cpukey;
-	struct btrfs_disk_key key;
-	u32 nritems = btrfs_header_nritems(buf);
-
-	if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
-		return 1;
-	if (parent_key->type) {
-		btrfs_node_key(buf, &key, 0);
-		if (memcmp(parent_key, &key, sizeof(key)))
-			return 1;
-	}
-	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
-		btrfs_node_key(buf, &key, i);
-		btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
-		if (btrfs_comp_keys(&key, &cpukey) >= 0)
-			return 1;
-	}
-	return 0;
-}
-
-static int check_leaf(struct btrfs_root *root,
-		      struct btrfs_disk_key *parent_key,
-		      struct extent_buffer *buf)
-{
-	int i;
-	struct btrfs_key cpukey;
-	struct btrfs_disk_key key;
-	u32 nritems = btrfs_header_nritems(buf);
-
-	if (btrfs_header_level(buf) != 0) {
-		fprintf(stderr, "leaf is not a leaf %llu\n",
-		       (unsigned long long)btrfs_header_bytenr(buf));
-		return 1;
-	}
-	if (btrfs_leaf_free_space(root, buf) < 0) {
-		fprintf(stderr, "leaf free space incorrect %llu %d\n",
-			(unsigned long long)btrfs_header_bytenr(buf),
-			btrfs_leaf_free_space(root, buf));
-		return 1;
-	}
-
-	if (nritems == 0)
-		return 0;
-
-	btrfs_item_key(buf, &key, 0);
-	if (parent_key->type && memcmp(parent_key, &key, sizeof(key))) {
-		fprintf(stderr, "leaf parent key incorrect %llu\n",
-		       (unsigned long long)btrfs_header_bytenr(buf));
-		return 1;
-	}
-	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
-		btrfs_item_key(buf, &key, i);
-		btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
-		if (btrfs_comp_keys(&key, &cpukey) >= 0) {
-			fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
-			return 1;
-		}
-		if (btrfs_item_offset_nr(buf, i) !=
-			btrfs_item_end_nr(buf, i + 1)) {
-			fprintf(stderr, "incorrect offsets %u %u\n",
-				btrfs_item_offset_nr(buf, i),
-				btrfs_item_end_nr(buf, i + 1));
-			return 1;
-		}
-		if (i == 0 && btrfs_item_end_nr(buf, i) !=
-		    BTRFS_LEAF_DATA_SIZE(root)) {
-			fprintf(stderr, "bad item end %u wanted %u\n",
-				btrfs_item_end_nr(buf, i),
-				(unsigned)BTRFS_LEAF_DATA_SIZE(root));
-			return 1;
-		}
-	}
-	return 0;
-}
-
 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 {
 	struct list_head *cur = rec->backrefs.next;
@@ -1458,12 +1764,12 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 			if (!print_errs)
 				goto out;
 			tback = (struct tree_backref *)back;
-			fprintf(stderr, "Backref %llu %s %llu not referenced\n",
+			fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
 				(unsigned long long)rec->start,
 				back->full_backref ? "parent" : "root",
 				back->full_backref ?
 				(unsigned long long)tback->parent :
-				(unsigned long long)tback->root);
+				(unsigned long long)tback->root, back);
 		}
 		if (back->is_data) {
 			dback = (struct data_backref *)back;
@@ -1473,7 +1779,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 					goto out;
 				fprintf(stderr, "Incorrect local backref count"
 					" on %llu %s %llu owner %llu"
-					" offset %llu found %u wanted %u\n",
+					" offset %llu found %u wanted %u back %p\n",
 					(unsigned long long)rec->start,
 					back->full_backref ?
 					"parent" : "root",
@@ -1482,7 +1788,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 					(unsigned long long)dback->root,
 					(unsigned long long)dback->owner,
 					(unsigned long long)dback->offset,
-					dback->found_ref, dback->num_refs);
+					dback->found_ref, dback->num_refs, back);
 			}
 		}
 		if (!back->is_data) {
@@ -1541,7 +1847,6 @@ static int check_owner_ref(struct btrfs_root *root,
 	struct btrfs_root *ref_root;
 	struct btrfs_key key;
 	struct btrfs_path path;
-	int ret;
 	int level;
 	int found = 0;
 
@@ -1571,10 +1876,10 @@ static int check_owner_ref(struct btrfs_root *root,
 		btrfs_item_key_to_cpu(buf, &key, 0);
 	else
 		btrfs_node_key_to_cpu(buf, &key, 0);
-	
+
 	btrfs_init_path(&path);
 	path.lowest_level = level + 1;
-	ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
+	btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
 
 	if (buf->start == btrfs_node_blockptr(path.nodes[level + 1],
 					      path.slots[level + 1]))
@@ -1584,23 +1889,81 @@ static int check_owner_ref(struct btrfs_root *root,
 	return found ? 0 : 1;
 }
 
+static int is_extent_tree_record(struct extent_record *rec)
+{
+	struct list_head *cur = rec->backrefs.next;
+	struct extent_backref *node;
+	struct tree_backref *back;
+	int is_extent = 0;
+
+	while(cur != &rec->backrefs) {
+		node = list_entry(cur, struct extent_backref, list);
+		cur = cur->next;
+		if (node->is_data)
+			return 0;
+		back = (struct tree_backref *)node;
+		if (node->full_backref)
+			return 0;
+		if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
+			is_extent = 1;
+	}
+	return is_extent;
+}
+
+
+static int record_bad_block_io(struct btrfs_fs_info *info,
+			       struct cache_tree *extent_cache,
+			       u64 start, u64 len)
+{
+	struct extent_record *rec;
+	struct cache_extent *cache;
+	struct btrfs_key key;
+
+	cache = find_cache_extent(extent_cache, start, len);
+	if (!cache)
+		return 0;
+
+	rec = container_of(cache, struct extent_record, cache);
+	if (!is_extent_tree_record(rec))
+		return 0;
+
+	btrfs_disk_key_to_cpu(&key, &rec->parent_key);
+	return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
+}
+
 static int check_block(struct btrfs_root *root,
 		       struct cache_tree *extent_cache,
 		       struct extent_buffer *buf, u64 flags)
 {
 	struct extent_record *rec;
 	struct cache_extent *cache;
+	struct btrfs_key key;
 	int ret = 1;
+	int level;
 
 	cache = find_cache_extent(extent_cache, buf->start, buf->len);
 	if (!cache)
 		return 1;
 	rec = container_of(cache, struct extent_record, cache);
-	if (btrfs_is_leaf(buf)) {
-		ret = check_leaf(root, &rec->parent_key, buf);
-	} else {
-		ret = check_node(root, &rec->parent_key, buf);
+	rec->generation = btrfs_header_generation(buf);
+
+	level = btrfs_header_level(buf);
+	if (btrfs_header_nritems(buf) > 0) {
+
+		if (level == 0)
+			btrfs_item_key_to_cpu(buf, &key, 0);
+		else
+			btrfs_node_key_to_cpu(buf, &key, 0);
+
+		rec->info_objectid = key.objectid;
 	}
+	rec->info_level = level;
+
+	if (btrfs_is_leaf(buf))
+		ret = btrfs_check_leaf(root, &rec->parent_key, buf);
+	else
+		ret = btrfs_check_node(root, &rec->parent_key, buf);
+
 	if (ret) {
 		fprintf(stderr, "bad block %llu\n",
 			(unsigned long long)buf->start);
@@ -1660,6 +2023,7 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
 		ref->node.full_backref = 0;
 	}
 	list_add_tail(&ref->node.list, &rec->backrefs);
+
 	return ref;
 }
 
@@ -1677,7 +2041,7 @@ static struct data_backref *find_data_backref(struct extent_record *rec,
 		if (!node->is_data)
 			continue;
 		back = (struct data_backref *)node;
-		if (parent > 0) { 
+		if (parent > 0) {
 			if (!node->full_backref)
 				continue;
 			if (parent == back->parent)
@@ -1695,11 +2059,13 @@ static struct data_backref *find_data_backref(struct extent_record *rec,
 
 static struct data_backref *alloc_data_backref(struct extent_record *rec,
 						u64 parent, u64 root,
-						u64 owner, u64 offset)
+						u64 owner, u64 offset,
+						u64 max_size)
 {
 	struct data_backref *ref = malloc(sizeof(*ref));
 	memset(&ref->node, 0, sizeof(ref->node));
 	ref->node.is_data = 1;
+
 	if (parent > 0) {
 		ref->parent = parent;
 		ref->owner = 0;
@@ -1714,13 +2080,16 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec,
 	ref->found_ref = 0;
 	ref->num_refs = 0;
 	list_add_tail(&ref->node.list, &rec->backrefs);
+	if (max_size > rec->max_size)
+		rec->max_size = max_size;
 	return ref;
 }
 
 static int add_extent_rec(struct cache_tree *extent_cache,
 			  struct btrfs_key *parent_key,
 			  u64 start, u64 nr, u64 extent_item_refs,
-			  int is_root, int inc_ref, int set_checked)
+			  int is_root, int inc_ref, int set_checked,
+			  u64 max_size)
 {
 	struct extent_record *rec;
 	struct cache_extent *cache;
@@ -1732,7 +2101,7 @@ static int add_extent_rec(struct cache_tree *extent_cache,
 		if (inc_ref)
 			rec->refs++;
 		if (rec->nr == 1)
-			rec->nr = nr;
+			rec->nr = max(nr, max_size);
 
 		if (start != rec->start) {
 			fprintf(stderr, "warning, start mismatch %llu %llu\n",
@@ -1761,12 +2130,16 @@ static int add_extent_rec(struct cache_tree *extent_cache,
 		if (parent_key)
 			btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
 
+		if (rec->max_size < max_size)
+			rec->max_size = max_size;
+
 		maybe_free_extent_rec(extent_cache, rec);
 		return ret;
 	}
 	rec = malloc(sizeof(*rec));
 	rec->start = start;
-	rec->nr = nr;
+	rec->max_size = max_size;
+	rec->nr = max(nr, max_size);
 	rec->content_checked = 0;
 	rec->owner_ref_checked = 0;
 	INIT_LIST_HEAD(&rec->backrefs);
@@ -1812,7 +2185,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
 
 	cache = find_cache_extent(extent_cache, bytenr, 1);
 	if (!cache) {
-		add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0);
+		add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0, 0);
 		cache = find_cache_extent(extent_cache, bytenr, 1);
 		if (!cache)
 			abort();
@@ -1851,7 +2224,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
 
 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
 			    u64 parent, u64 root, u64 owner, u64 offset,
-			    u32 num_refs, int found_ref)
+			    u32 num_refs, int found_ref, u64 max_size)
 {
 	struct extent_record *rec;
 	struct data_backref *back;
@@ -1859,7 +2232,8 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
 
 	cache = find_cache_extent(extent_cache, bytenr, 1);
 	if (!cache) {
-		add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0);
+		add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0,
+			       max_size);
 		cache = find_cache_extent(extent_cache, bytenr, 1);
 		if (!cache)
 			abort();
@@ -1869,9 +2243,13 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
 	if (rec->start != bytenr) {
 		abort();
 	}
+	if (rec->max_size < max_size)
+		rec->max_size = max_size;
+
 	back = find_data_backref(rec, parent, root, owner, offset);
 	if (!back)
-		back = alloc_data_backref(rec, parent, root, owner, offset);
+		back = alloc_data_backref(rec, parent, root, owner, offset,
+					  max_size);
 
 	if (found_ref) {
 		BUG_ON(num_refs != 1);
@@ -1895,7 +2273,6 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
 	return 0;
 }
 
-
 static int add_pending(struct cache_tree *pending,
 		       struct cache_tree *seen, u64 bytenr, u32 size)
 {
@@ -1985,11 +2362,10 @@ static int process_extent_ref_v0(struct cache_tree *extent_cache,
 	btrfs_item_key_to_cpu(leaf, &key, slot);
 	ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
 	if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
-		add_tree_backref(extent_cache, key.objectid, key.offset,
-				 0, 0);
+		add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
 	} else {
 		add_data_backref(extent_cache, key.objectid, key.offset, 0,
-				 0, 0, btrfs_ref_count_v0(leaf, ref0), 0);
+				 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
 	}
 	return 0;
 }
@@ -2022,14 +2398,14 @@ static int process_extent_item(struct cache_tree *extent_cache,
 		BUG();
 #endif
 		return add_extent_rec(extent_cache, NULL, key.objectid,
-				      key.offset, refs, 0, 0, 0);
+				      key.offset, refs, 0, 0, 0, key.offset);
 	}
 
 	ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
 	refs = btrfs_extent_refs(eb, ei);
 
 	add_extent_rec(extent_cache, NULL, key.objectid, key.offset,
-		       refs, 0, 0, 0);
+		       refs, 0, 0, 0, key.offset);
 
 	ptr = (unsigned long)(ei + 1);
 	if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
@@ -2057,21 +2433,24 @@ static int process_extent_item(struct cache_tree *extent_cache,
 								       dref),
 					btrfs_extent_data_ref_offset(eb, dref),
 					btrfs_extent_data_ref_count(eb, dref),
-					0);
+					0, key.offset);
 			break;
 		case BTRFS_SHARED_DATA_REF_KEY:
 			sref = (struct btrfs_shared_data_ref *)(iref + 1);
 			add_data_backref(extent_cache, key.objectid, offset,
 					0, 0, 0,
 					btrfs_shared_data_ref_count(eb, sref),
-					0);
+					0, key.offset);
 			break;
 		default:
-			BUG();
+			fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
+				key.objectid, key.type, key.offset);
+			goto out;
 		}
 		ptr += btrfs_extent_inline_ref_size(type);
 	}
 	WARN_ON(ptr > end);
+out:
 	return 0;
 }
 
@@ -2135,9 +2514,18 @@ static int run_next_block(struct btrfs_root *root,
 
 	/* fixme, get the real parent transid */
 	buf = read_tree_block(root, bytenr, size, 0);
+	if (!extent_buffer_uptodate(buf)) {
+		record_bad_block_io(root->fs_info,
+				    extent_cache, bytenr, size);
+		free_extent_buffer(buf);
+		goto out;
+	}
+
 	nritems = btrfs_header_nritems(buf);
 
 	ret = btrfs_lookup_extent_info(NULL, root, bytenr, size, NULL, &flags);
+	if (ret < 0)
+		flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
 
 	if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
 		parent = bytenr;
@@ -2148,6 +2536,8 @@ static int run_next_block(struct btrfs_root *root,
 	}
 
 	ret = check_block(root, extent_cache, buf, flags);
+	if (ret)
+		goto out;
 
 	if (btrfs_is_leaf(buf)) {
 		btree_space_waste += btrfs_leaf_free_space(root, buf);
@@ -2164,16 +2554,6 @@ static int run_next_block(struct btrfs_root *root,
 				continue;
 			}
 			if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
-				struct btrfs_block_group_item *bi;
-				bi = btrfs_item_ptr(buf, i,
-					    struct btrfs_block_group_item);
-#if 0
-				fprintf(stderr,"block group %Lu %Lu used %Lu ",
-					btrfs_disk_key_objectid(disk_key),
-					btrfs_disk_key_offset(disk_key),
-					btrfs_block_group_used(bi));
-				fprintf(stderr, "flags %x\n", bi->flags);
-#endif
 				continue;
 			}
 			if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
@@ -2206,7 +2586,7 @@ static int run_next_block(struct btrfs_root *root,
 								       ref),
 					btrfs_extent_data_ref_offset(buf, ref),
 					btrfs_extent_data_ref_count(buf, ref),
-					0);
+					0, root->sectorsize);
 				continue;
 			}
 			if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
@@ -2216,7 +2596,7 @@ static int run_next_block(struct btrfs_root *root,
 				add_data_backref(extent_cache,
 					key.objectid, key.offset, 0, 0, 0, 
 					btrfs_shared_data_ref_count(buf, ref),
-					0);
+					0, root->sectorsize);
 				continue;
 			}
 			if (key.type != BTRFS_EXTENT_DATA_KEY)
@@ -2239,26 +2619,33 @@ static int run_next_block(struct btrfs_root *root,
 			ret = add_extent_rec(extent_cache, NULL,
 				   btrfs_file_extent_disk_bytenr(buf, fi),
 				   btrfs_file_extent_disk_num_bytes(buf, fi),
-				   0, 0, 1, 1);
+				   0, 0, 1, 1,
+				   btrfs_file_extent_disk_num_bytes(buf, fi));
 			add_data_backref(extent_cache,
 				btrfs_file_extent_disk_bytenr(buf, fi),
 				parent, owner, key.objectid, key.offset -
-				btrfs_file_extent_offset(buf, fi), 1, 1);
+				btrfs_file_extent_offset(buf, fi), 1, 1,
+				btrfs_file_extent_disk_num_bytes(buf, fi));
 			BUG_ON(ret);
 		}
 	} else {
 		int level;
+		struct btrfs_key first_key;
+
+		first_key.objectid = 0;
+
+		if (nritems > 0)
+			btrfs_item_key_to_cpu(buf, &first_key, 0);
 		level = btrfs_header_level(buf);
 		for (i = 0; i < nritems; i++) {
 			u64 ptr = btrfs_node_blockptr(buf, i);
 			u32 size = btrfs_level_size(root, level - 1);
 			btrfs_node_key_to_cpu(buf, &key, i);
 			ret = add_extent_rec(extent_cache, &key,
-					     ptr, size, 0, 0, 1, 0);
+					     ptr, size, 0, 0, 1, 0, size);
 			BUG_ON(ret);
 
-			add_tree_backref(extent_cache, ptr, parent, 
-					 owner, 1);
+			add_tree_backref(extent_cache, ptr, parent, owner, 1);
 
 			if (level > 1) {
 				add_pending(nodes, seen, ptr, size);
@@ -2277,6 +2664,7 @@ static int run_next_block(struct btrfs_root *root,
 	    btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
 	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
 		found_old_backref = 1;
+out:
 	free_extent_buffer(buf);
 	return 0;
 }
@@ -2296,25 +2684,553 @@ static int add_root_to_pending(struct extent_buffer *buf,
 	else
 		add_pending(pending, seen, buf->start, buf->len);
 	add_extent_rec(extent_cache, NULL, buf->start, buf->len,
-		       0, 1, 1, 0);
+		       0, 1, 1, 0, buf->len);
 
 	if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
 	    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
-		add_tree_backref(extent_cache, buf->start, buf->start, 0, 1);
+		add_tree_backref(extent_cache, buf->start, buf->start,
+				 0, 1);
 	else
 		add_tree_backref(extent_cache, buf->start, 0,
 				 root_key->objectid, 1);
 	return 0;
 }
 
-static int check_extent_refs(struct btrfs_root *root,
-		      struct cache_tree *extent_cache)
+/* as we fix the tree, we might be deleting blocks that
+ * we're tracking for repair.  This hook makes sure we
+ * remove any backrefs for blocks as we are fixing them.
+ */
+static int free_extent_hook(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root,
+			    u64 bytenr, u64 num_bytes, u64 parent,
+			    u64 root_objectid, u64 owner, u64 offset,
+			    int refs_to_drop)
+{
+	struct extent_record *rec;
+	struct cache_extent *cache;
+	int is_data;
+	struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
+
+	is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
+	cache = find_cache_extent(extent_cache, bytenr, num_bytes);
+	if (!cache)
+		return 0;
+
+	rec = container_of(cache, struct extent_record, cache);
+	if (is_data) {
+		struct data_backref *back;
+		back = find_data_backref(rec, parent, root_objectid, owner,
+					 offset);
+		if (!back)
+			goto out;
+		if (back->node.found_ref) {
+			back->found_ref -= refs_to_drop;
+			if (rec->refs)
+				rec->refs -= refs_to_drop;
+		}
+		if (back->node.found_extent_tree) {
+			back->num_refs -= refs_to_drop;
+			if (rec->extent_item_refs)
+				rec->extent_item_refs -= refs_to_drop;
+		}
+		if (back->found_ref == 0)
+			back->node.found_ref = 0;
+		if (back->num_refs == 0)
+			back->node.found_extent_tree = 0;
+
+		if (!back->node.found_extent_tree && back->node.found_ref) {
+			list_del(&back->node.list);
+			free(back);
+		}
+	} else {
+		struct tree_backref *back;
+		back = find_tree_backref(rec, parent, root_objectid);
+		if (!back)
+			goto out;
+		if (back->node.found_ref) {
+			if (rec->refs)
+				rec->refs--;
+			back->node.found_ref = 0;
+		}
+		if (back->node.found_extent_tree) {
+			if (rec->extent_item_refs)
+				rec->extent_item_refs--;
+			back->node.found_extent_tree = 0;
+		}
+		if (!back->node.found_extent_tree && back->node.found_ref) {
+			list_del(&back->node.list);
+			free(back);
+		}
+	}
+	maybe_free_extent_rec(extent_cache, rec);
+out:
+	return 0;
+}
+
+static int delete_extent_records(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 u64 bytenr, u64 new_len)
+{
+	struct btrfs_key key;
+	struct btrfs_key found_key;
+	struct extent_buffer *leaf;
+	int ret;
+	int slot;
+
+
+	key.objectid = bytenr;
+	key.type = (u8)-1;
+	key.offset = (u64)-1;
+
+	while(1) {
+		ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+					&key, path, 0, 1);
+		if (ret < 0)
+			break;
+
+		if (ret > 0) {
+			ret = 0;
+			if (path->slots[0] == 0)
+				break;
+			path->slots[0]--;
+		}
+		ret = 0;
+
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+
+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
+		if (found_key.objectid != bytenr)
+			break;
+
+		if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
+		    found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
+		    found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
+		    found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
+		    found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
+		    found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
+			btrfs_release_path(NULL, path);
+			if (found_key.type == 0) {
+				if (found_key.offset == 0)
+					break;
+				key.offset = found_key.offset - 1;
+				key.type = found_key.type;
+			}
+			key.type = found_key.type - 1;
+			key.offset = (u64)-1;
+			continue;
+		}
+
+		fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
+			found_key.objectid, found_key.type, found_key.offset);
+
+		ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
+		if (ret)
+			break;
+		btrfs_release_path(NULL, path);
+
+		if (found_key.type == BTRFS_EXTENT_ITEM_KEY) {
+			ret = btrfs_update_block_group(trans, root, bytenr,
+						       found_key.offset, 0, 0);
+			if (ret)
+				break;
+		}
+	}
+
+	btrfs_release_path(NULL, path);
+	return ret;
+}
+
+/*
+ * for a single backref, this will allocate a new extent
+ * and add the backref to it.
+ */
+static int record_extent(struct btrfs_trans_handle *trans,
+			 struct btrfs_fs_info *info,
+			 struct btrfs_path *path,
+			 struct extent_record *rec,
+			 struct extent_backref *back,
+			 int allocated, u64 flags)
+{
+	int ret;
+	struct btrfs_root *extent_root = info->extent_root;
+	struct extent_buffer *leaf;
+	struct btrfs_key ins_key;
+	struct btrfs_extent_item *ei;
+	struct tree_backref *tback;
+	struct data_backref *dback;
+	struct btrfs_tree_block_info *bi;
+
+	if (!back->is_data)
+		rec->max_size = max_t(u64, rec->max_size,
+				    info->extent_root->leafsize);
+
+	if (!allocated) {
+		u32 item_size = sizeof(*ei);
+
+		if (!back->is_data)
+			item_size += sizeof(*bi);
+
+		ins_key.objectid = rec->start;
+		ins_key.offset = rec->max_size;
+		ins_key.type = BTRFS_EXTENT_ITEM_KEY;
+
+		ret = btrfs_insert_empty_item(trans, extent_root, path,
+					&ins_key, item_size);
+		if (ret)
+			goto fail;
+
+		leaf = path->nodes[0];
+		ei = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_extent_item);
+
+		btrfs_set_extent_refs(leaf, ei, 0);
+		btrfs_set_extent_generation(leaf, ei, rec->generation);
+
+		if (back->is_data) {
+			btrfs_set_extent_flags(leaf, ei,
+					       BTRFS_EXTENT_FLAG_DATA);
+		} else {
+			struct btrfs_disk_key copy_key;;
+
+			tback = (struct tree_backref *)back;
+			bi = (struct btrfs_tree_block_info *)(ei + 1);
+			memset_extent_buffer(leaf, 0, (unsigned long)bi,
+					     sizeof(*bi));
+			memset(&copy_key, 0, sizeof(copy_key));
+
+			copy_key.objectid = le64_to_cpu(rec->info_objectid);
+			btrfs_set_tree_block_level(leaf, bi, rec->info_level);
+			btrfs_set_tree_block_key(leaf, bi, &copy_key);
+
+			btrfs_set_extent_flags(leaf, ei,
+					       BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
+		}
+
+		btrfs_mark_buffer_dirty(leaf);
+		ret = btrfs_update_block_group(trans, extent_root, rec->start,
+					       rec->max_size, 1, 0);
+		if (ret)
+			goto fail;
+		btrfs_release_path(NULL, path);
+	}
+
+	if (back->is_data) {
+		u64 parent;
+		int i;
+
+		dback = (struct data_backref *)back;
+		if (back->full_backref)
+			parent = dback->parent;
+		else
+			parent = 0;
+
+		for (i = 0; i < dback->found_ref; i++) {
+			/* if parent != 0, we're doing a full backref
+			 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
+			 * just makes the backref allocator create a data
+			 * backref
+			 */
+			ret = btrfs_inc_extent_ref(trans, info->extent_root,
+						   rec->start, rec->max_size,
+						   parent,
+						   dback->root,
+						   parent ?
+						   BTRFS_FIRST_FREE_OBJECTID :
+						   dback->owner,
+						   dback->offset);
+			if (ret)
+				break;
+		}
+		fprintf(stderr, "adding new data backref"
+				" on %llu %s %llu owner %llu"
+				" offset %llu found %d\n",
+				(unsigned long long)rec->start,
+				back->full_backref ?
+				"parent" : "root",
+				back->full_backref ?
+				(unsigned long long)parent :
+				(unsigned long long)dback->root,
+				(unsigned long long)dback->owner,
+				(unsigned long long)dback->offset,
+				dback->found_ref);
+	} else {
+		u64 parent;
+
+		tback = (struct tree_backref *)back;
+		if (back->full_backref)
+			parent = tback->parent;
+		else
+			parent = 0;
+
+		ret = btrfs_inc_extent_ref(trans, info->extent_root,
+					   rec->start, rec->max_size,
+					   parent, tback->root, 0, 0);
+		fprintf(stderr, "adding new tree backref on "
+			"start %llu len %llu parent %llu root %llu\n",
+			rec->start, rec->max_size, tback->parent, tback->root);
+	}
+	if (ret)
+		goto fail;
+fail:
+	btrfs_release_path(NULL, path);
+	return ret;
+}
+
+/*
+ * when an incorrect extent item is found, this will delete
+ * all of the existing entries for it and recreate them
+ * based on what the tree scan found.
+ */
+static int fixup_extent_refs(struct btrfs_trans_handle *trans,
+			     struct btrfs_fs_info *info,
+			     struct extent_record *rec)
+{
+	int ret;
+	struct btrfs_path *path;
+	struct list_head *cur = rec->backrefs.next;
+	struct cache_extent *cache;
+	struct extent_backref *back;
+	int allocated = 0;
+	u64 flags = 0;
+
+	/* remember our flags for recreating the extent */
+	ret = btrfs_lookup_extent_info(NULL, info->extent_root, rec->start,
+				       rec->max_size, NULL, &flags);
+	if (ret < 0)
+		flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+
+	path = btrfs_alloc_path();
+
+	/* step one, delete all the existing records */
+	ret = delete_extent_records(trans, info->extent_root, path,
+				    rec->start, rec->max_size);
+
+	if (ret < 0)
+		goto out;
+
+	/* was this block corrupt?  If so, don't add references to it */
+	cache = find_cache_extent(info->corrupt_blocks, rec->start, rec->max_size);
+	if (cache) {
+		ret = 0;
+		goto out;
+	}
+
+	/* step two, recreate all the refs we did find */
+	while(cur != &rec->backrefs) {
+		back = list_entry(cur, struct extent_backref, list);
+		cur = cur->next;
+
+		/*
+		 * if we didn't find any references, don't create a
+		 * new extent record
+		 */
+		if (!back->found_ref)
+			continue;
+
+		ret = record_extent(trans, info, path, rec, back, allocated, flags);
+		allocated = 1;
+
+		if (ret)
+			goto out;
+	}
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+/* right now we only prune from the extent allocation tree */
+static int prune_one_block(struct btrfs_trans_handle *trans,
+			   struct btrfs_fs_info *info,
+			   struct btrfs_corrupt_block *corrupt)
+{
+	int ret;
+	struct btrfs_path path;
+	struct extent_buffer *eb;
+	u64 found;
+	int slot;
+	int nritems;
+	int level = corrupt->level + 1;
+
+	btrfs_init_path(&path);
+again:
+	/* we want to stop at the parent to our busted block */
+	path.lowest_level = level;
+
+	ret = btrfs_search_slot(trans, info->extent_root,
+				&corrupt->key, &path, -1, 1);
+
+	if (ret < 0)
+		goto out;
+
+	eb = path.nodes[level];
+	if (!eb) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	/*
+	 * hopefully the search gave us the block we want to prune,
+	 * lets try that first
+	 */
+	slot = path.slots[level];
+	found =  btrfs_node_blockptr(eb, slot);
+	if (found == corrupt->cache.start)
+		goto del_ptr;
+
+	nritems = btrfs_header_nritems(eb);
+
+	/* the search failed, lets scan this node and hope we find it */
+	for (slot = 0; slot < nritems; slot++) {
+		found =  btrfs_node_blockptr(eb, slot);
+		if (found == corrupt->cache.start)
+			goto del_ptr;
+	}
+	/*
+	 * we couldn't find the bad block.  TODO, search all the nodes for pointers
+	 * to this block
+	 */
+	if (eb == info->extent_root->node) {
+		ret = -ENOENT;
+		goto out;
+	} else {
+		level++;
+		btrfs_release_path(NULL, &path);
+		goto again;
+	}
+
+del_ptr:
+	printk("deleting pointer to block %Lu\n", corrupt->cache.start);
+	ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
+
+out:
+	btrfs_release_path(NULL, &path);
+	return ret;
+}
+
+static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
+				struct btrfs_fs_info *info)
+{
+	struct cache_extent *cache;
+	struct btrfs_corrupt_block *corrupt;
+
+	cache = find_first_cache_extent(info->corrupt_blocks, 0);
+	while (1) {
+		if (!cache)
+			break;
+		corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
+		prune_one_block(trans, info, corrupt);
+		cache = next_cache_extent(cache);
+	}
+	return 0;
+}
+
+static void free_corrupt_blocks(struct btrfs_fs_info *info)
+{
+	struct cache_extent *cache;
+	struct btrfs_corrupt_block *corrupt;
+
+	while (1) {
+		cache = find_first_cache_extent(info->corrupt_blocks, 0);
+		if (!cache)
+			break;
+		corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
+		remove_cache_extent(info->corrupt_blocks, cache);
+		free(corrupt);
+	}
+}
+
+static int check_block_group(struct btrfs_trans_handle *trans,
+			      struct btrfs_fs_info *info,
+			      struct map_lookup *map,
+			      int *reinit)
+{
+	struct btrfs_key key;
+	struct btrfs_path path;
+	int ret;
+
+	key.objectid = map->ce.start;
+	key.offset = map->ce.size;
+	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+
+	btrfs_init_path(&path);
+	ret = btrfs_search_slot(NULL, info->extent_root,
+				&key, &path, 0, 0);
+	btrfs_release_path(NULL, &path);
+	if (ret <= 0)
+		goto out;
+
+	ret = btrfs_make_block_group(trans, info->extent_root, 0, map->type,
+			       BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+			       key.objectid, key.offset);
+	*reinit = 1;
+out:
+	return ret;
+}
+
+static int check_block_groups(struct btrfs_trans_handle *trans,
+			      struct btrfs_fs_info *info, int *reinit)
+{
+	struct cache_extent *ce;
+	struct map_lookup *map;
+	struct btrfs_mapping_tree *map_tree = &info->mapping_tree;
+
+	/* this isn't quite working */
+	return 0;
+
+	ce = find_first_cache_extent(&map_tree->cache_tree, 0);
+	while (1) {
+		if (!ce)
+			break;
+		map = container_of(ce, struct map_lookup, ce);
+		check_block_group(trans, info, map, reinit);
+		ce = next_cache_extent(ce);
+	}
+	return 0;
+}
+
+static int check_extent_refs(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root,
+			     struct cache_tree *extent_cache, int repair)
 {
 	struct extent_record *rec;
 	struct cache_extent *cache;
 	int err = 0;
+	int ret = 0;
+	int fixed = 0;
+	int reinit = 0;
+
+	if (repair) {
+		/*
+		 * if we're doing a repair, we have to make sure
+		 * we don't allocate from the problem extents.
+		 * In the worst case, this will be all the
+		 * extents in the FS
+		 */
+		cache = find_first_cache_extent(extent_cache, 0);
+		while(cache) {
+			rec = container_of(cache, struct extent_record, cache);
+			btrfs_pin_extent(root->fs_info,
+					 rec->start, rec->max_size);
+			cache = next_cache_extent(cache);
+		}
 
+		/* pin down all the corrupted blocks too */
+		cache = find_first_cache_extent(root->fs_info->corrupt_blocks, 0);
+		while(cache) {
+			rec = container_of(cache, struct extent_record, cache);
+			btrfs_pin_extent(root->fs_info,
+					 rec->start, rec->max_size);
+			cache = next_cache_extent(cache);
+		}
+		prune_corrupt_blocks(trans, root->fs_info);
+		check_block_groups(trans, root->fs_info, &reinit);
+		if (reinit)
+			btrfs_read_block_groups(root->fs_info->extent_root);
+	}
 	while(1) {
+		fixed = 0;
 		cache = find_first_cache_extent(extent_cache, 0);
 		if (!cache)
 			break;
@@ -2326,19 +3242,39 @@ static int check_extent_refs(struct btrfs_root *root,
 			fprintf(stderr, "extent item %llu, found %llu\n",
 				(unsigned long long)rec->extent_item_refs,
 				(unsigned long long)rec->refs);
+			if (!fixed && repair) {
+				ret = fixup_extent_refs(trans, root->fs_info, rec);
+				if (ret)
+					goto repair_abort;
+				fixed = 1;
+			}
 			err = 1;
+
 		}
 		if (all_backpointers_checked(rec, 1)) {
 			fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
 				(unsigned long long)rec->start,
 				(unsigned long long)rec->nr);
 
+			if (!fixed && repair) {
+				ret = fixup_extent_refs(trans, root->fs_info, rec);
+				if (ret)
+					goto repair_abort;
+				fixed = 1;
+			}
+
 			err = 1;
 		}
 		if (!rec->owner_ref_checked) {
 			fprintf(stderr, "owner ref check failed [%llu %llu]\n",
 				(unsigned long long)rec->start,
 				(unsigned long long)rec->nr);
+			if (!fixed && repair) {
+				ret = fixup_extent_refs(trans, root->fs_info, rec);
+				if (ret)
+					goto repair_abort;
+				fixed = 1;
+			}
 			err = 1;
 		}
 
@@ -2346,16 +3282,30 @@ static int check_extent_refs(struct btrfs_root *root,
 		free_all_extent_backrefs(rec);
 		free(rec);
 	}
+repair_abort:
+	if (repair) {
+		if (ret) {
+			fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
+			exit(1);
+		} else {
+			btrfs_fix_block_accounting(trans, root);
+		}
+		if (err)
+			fprintf(stderr, "repaired damaged extent references\n");
+		return ret;
+	}
 	return err;
 }
 
-static int check_extents(struct btrfs_root *root)
+static int check_extents(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root, int repair)
 {
 	struct cache_tree extent_cache;
 	struct cache_tree seen;
 	struct cache_tree pending;
 	struct cache_tree reada;
 	struct cache_tree nodes;
+	struct cache_tree corrupt_blocks;
 	struct btrfs_path path;
 	struct btrfs_key key;
 	struct btrfs_key found_key;
@@ -2372,6 +3322,13 @@ static int check_extents(struct btrfs_root *root)
 	cache_tree_init(&pending);
 	cache_tree_init(&nodes);
 	cache_tree_init(&reada);
+	cache_tree_init(&corrupt_blocks);
+
+	if (repair) {
+		root->fs_info->fsck_extent_cache = &extent_cache;
+		root->fs_info->free_extent_hook = free_extent_hook;
+		root->fs_info->corrupt_blocks = &corrupt_blocks;
+	}
 
 	bits_nr = 1024;
 	bits = malloc(bits_nr * sizeof(struct block_info));
@@ -2430,7 +3387,15 @@ static int check_extents(struct btrfs_root *root)
 		if (ret != 0)
 			break;
 	}
-	ret = check_extent_refs(root, &extent_cache);
+	ret = check_extent_refs(trans, root, &extent_cache, repair);
+
+	if (repair) {
+		free_corrupt_blocks(root->fs_info);
+		root->fs_info->fsck_extent_cache = NULL;
+		root->fs_info->free_extent_hook = NULL;
+		root->fs_info->corrupt_blocks = NULL;
+	}
+
 	return ret;
 }
 
@@ -2441,29 +3406,120 @@ static void print_usage(void)
 	exit(1);
 }
 
+static struct option long_options[] = {
+	{ "super", 1, NULL, 's' },
+	{ "repair", 0, NULL, 0 },
+	{ "init-csum-tree", 0, NULL, 0 },
+	{ "init-extent-tree", 0, NULL, 0 },
+	{ 0, 0, 0, 0}
+};
+
 int main(int ac, char **av)
 {
+	struct cache_tree root_cache;
 	struct btrfs_root *root;
+	struct btrfs_fs_info *info;
+	struct btrfs_trans_handle *trans = NULL;
+	u64 bytenr = 0;
 	int ret;
+	int num;
+	int repair = 0;
+	int option_index = 0;
+	int init_csum_tree = 0;
+	int rw = 0;
+
+	while(1) {
+		int c;
+		c = getopt_long(ac, av, "", long_options,
+				&option_index);
+		if (c < 0)
+			break;
+		switch(c) {
+			case 's':
+				num = atol(optarg);
+				bytenr = btrfs_sb_offset(num);
+				printf("using SB copy %d, bytenr %llu\n", num,
+				       (unsigned long long)bytenr);
+				break;
+			case '?':
+				print_usage();
+		}
+		if (option_index == 1) {
+			printf("enabling repair mode\n");
+			repair = 1;
+			rw = 1;
+		} else if (option_index == 2) {
+			printf("Creating a new CRC tree\n");
+			init_csum_tree = 1;
+			rw = 1;
+		}
+
+	}
+	ac = ac - optind;
 
-	if (ac < 2)
+	if (ac != 1)
 		print_usage();
 
 	radix_tree_init();
-	root = open_ctree(av[1], 0, 0);
+	cache_tree_init(&root_cache);
 
-	if (root == NULL)
+	if((ret = check_mounted(av[optind])) < 0) {
+		fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
+		return ret;
+	} else if(ret) {
+		fprintf(stderr, "%s is currently mounted. Aborting.\n", av[optind]);
+		return -EBUSY;
+	}
+
+	info = open_ctree_fs_info(av[optind], bytenr, rw, 1);
+
+	if (info == NULL)
 		return 1;
 
-	ret = check_extents(root);
+	if (!extent_buffer_uptodate(info->tree_root->node) ||
+	    !extent_buffer_uptodate(info->dev_root->node) ||
+	    !extent_buffer_uptodate(info->extent_root->node) ||
+	    !extent_buffer_uptodate(info->chunk_root->node)) {
+		fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
+		return -EIO;
+	}
+
+	root = info->fs_root;
+
+	fprintf(stderr, "checking extents\n");
+	if (rw)
+		trans = btrfs_start_transaction(root, 1);
+
+	if (init_csum_tree) {
+		fprintf(stderr, "Reinit crc root\n");
+		ret = btrfs_fsck_reinit_root(trans, info->csum_root);
+		if (ret) {
+			fprintf(stderr, "crc root initialization failed\n");
+			return -EIO;
+		}
+		goto out;
+	}
+	ret = check_extents(trans, root, repair);
+	if (ret)
+		fprintf(stderr, "Errors found in extent allocation tree\n");
+
+	fprintf(stderr, "checking fs roots\n");
+	ret = check_fs_roots(root, &root_cache);
 	if (ret)
 		goto out;
-	ret = check_fs_roots(root);
 
+	fprintf(stderr, "checking root refs\n");
+	ret = check_root_refs(root, &root_cache);
 out:
+	free_root_recs(&root_cache);
+	if (rw) {
+		ret = btrfs_commit_transaction(trans, root);
+		if (ret)
+			exit(1);
+	}
 	close_ctree(root);
-	if (found_old_backref) {
-		/*
+
+	if (found_old_backref) { /*
 		 * there was a disk format change when mixed
 		 * backref was in testing tree. The old format
 		 * existed about one week.
diff --git a/btrfsctl.c b/btrfsctl.c
index b323818..d45e2a7 100644
--- a/btrfsctl.c
+++ b/btrfsctl.c
@@ -29,6 +29,7 @@
 #include <unistd.h>
 #include <dirent.h>
 #include <libgen.h>
+#include <stdlib.h>
 #include "kerncompat.h"
 #include "ctree.h"
 #include "transaction.h"
@@ -46,7 +47,7 @@ static inline int ioctl(int fd, int define, void *arg) { return 0; }
 static void print_usage(void)
 {
 	printf("usage: btrfsctl [ -d file|dir] [ -s snap_name subvol|tree ]\n");
-	printf("                [-r size] [-A device] [-a] [-c]\n");
+	printf("                [-r size] [-A device] [-a] [-c] [-D dir .]\n");
 	printf("\t-d filename: defragments one file\n");
 	printf("\t-d directory: defragments the entire Btree\n");
 	printf("\t-s snap_name dir: creates a new snapshot of dir\n");
@@ -55,6 +56,9 @@ static void print_usage(void)
 	printf("\t-A device: scans the device file for a Btrfs filesystem\n");
 	printf("\t-a: scans all devices for Btrfs filesystems\n");
 	printf("\t-c: forces a single FS sync\n");
+	printf("\t-D: delete snapshot\n");
+	printf("\t-m [tree id] directory: set the default mounted subvolume"
+	       " to the [tree id] or the directory\n");
 	printf("%s\n", BTRFS_BUILD_VERSION);
 	exit(1);
 }
@@ -99,8 +103,15 @@ int main(int ac, char **av)
 	int i;
 	unsigned long command = 0;
 	int len;
+	char *pos;
 	char *fullpath;
+	u64 objectid = 0;
 
+	printf( "**\n"
+		"** WARNING: this program is considered deprecated\n"
+		"** Please consider to switch to the btrfs utility\n"
+		"**\n");
+	
 	if (ac == 2 && strcmp(av[1], "-a") == 0) {
 		fprintf(stderr, "Scanning for Btrfs filesystems\n");
 		btrfs_scan_one_dir("/dev", 1);
@@ -158,6 +169,28 @@ int main(int ac, char **av)
 				print_usage();
 			}
 			command = BTRFS_IOC_DEFRAG;
+		} else if (strcmp(av[i], "-D") == 0) {
+			if (i >= ac - 1) {
+				fprintf(stderr, "-D requires an arg\n");
+				print_usage();
+			}
+			command = BTRFS_IOC_SNAP_DESTROY;
+			name = av[i + 1];
+			len = strlen(name);
+			pos = strchr(name, '/');
+			if (pos) {
+				if (*(pos + 1) == '\0')
+					*(pos) = '\0';
+				else {
+					fprintf(stderr,
+						"error: / not allowed in names\n");
+					exit(1);
+				}
+			}
+			if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
+				fprintf(stderr, "-D size too long\n");
+				exit(1);
+			}
 		} else if (strcmp(av[i], "-A") == 0) {
 			if (i >= ac - 1) {
 				fprintf(stderr, "-A requires an arg\n");
@@ -178,6 +211,16 @@ int main(int ac, char **av)
 			command = BTRFS_IOC_RESIZE;
 		} else if (strcmp(av[i], "-c") == 0) {
 			command = BTRFS_IOC_SYNC;
+		} else if (strcmp(av[i], "-m") == 0) {
+			command = BTRFS_IOC_DEFAULT_SUBVOL;
+			if (i == ac - 3) {
+				objectid = (unsigned long long)
+					    strtoll(av[i + 1], NULL, 0);
+				if (errno == ERANGE) {
+					fprintf(stderr, "invalid tree id\n");
+					exit(1);
+				}
+			}
 		}
 	}
 	if (command == 0) {
@@ -199,13 +242,16 @@ int main(int ac, char **av)
 	 }
 
 	if (name)
-		strcpy(args.name, name);
+                strncpy(args.name, name, BTRFS_PATH_NAME_MAX + 1);
 	else
 		args.name[0] = '\0';
 
 	if (command == BTRFS_IOC_SNAP_CREATE) {
 		args.fd = fd;
 		ret = ioctl(snap_fd, command, &args);
+	} else if (command == BTRFS_IOC_DEFAULT_SUBVOL) {
+		printf("objectid is %llu\n", (unsigned long long)objectid);
+		ret = ioctl(fd, command, &objectid);
 	} else
 		ret = ioctl(fd, command, &args);
 	if (ret < 0) {
@@ -219,8 +265,8 @@ int main(int ac, char **av)
 	}
 	printf("%s\n", BTRFS_BUILD_VERSION);
 	if (ret)
-		exit(0);
-	else
 		exit(1);
+
+	return 0;
 }
 
diff --git a/btrfslabel.c b/btrfslabel.c
new file mode 100644
index 0000000..c9f4684
--- /dev/null
+++ b/btrfslabel.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2008 Morey Roof.   All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _GNU_SOURCE
+
+#ifndef __CHECKER__
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include "ioctl.h"
+#endif /* __CHECKER__ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <ctype.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "utils.h"
+#include "version.h"
+#include "disk-io.h"
+#include "transaction.h"
+
+#define MOUNTED                        1
+#define UNMOUNTED                      2
+#define GET_LABEL                      3
+#define SET_LABEL                      4
+
+static void change_label_unmounted(char *dev, char *nLabel)
+{
+       struct btrfs_root *root;
+       struct btrfs_trans_handle *trans;
+
+       /* Open the super_block at the default location
+        * and as read-write.
+        */
+       root = open_ctree(dev, 0, 1);
+
+       trans = btrfs_start_transaction(root, 1);
+       strncpy(root->fs_info->super_copy.label, nLabel, BTRFS_LABEL_SIZE);
+       btrfs_commit_transaction(trans, root);
+
+       /* Now we close it since we are done. */
+       close_ctree(root);
+}
+
+static void get_label_unmounted(char *dev)
+{
+       struct btrfs_root *root;
+
+       /* Open the super_block at the default location
+        * and as read-only.
+        */
+       root = open_ctree(dev, 0, 0);
+
+       fprintf(stdout, "%s\n", root->fs_info->super_copy.label);
+
+       /* Now we close it since we are done. */
+       close_ctree(root);
+}
+
+int get_label(char *btrfs_dev)
+{
+
+	int ret;
+	ret = check_mounted(btrfs_dev);
+	if (ret < 0)
+	{
+	       fprintf(stderr, "FATAL: error checking %s mount status\n", btrfs_dev);
+	       return -1;
+	}
+
+	if(ret != 0)
+	{
+	       fprintf(stderr, "FATAL: the filesystem has to be unmounted\n");
+	       return -2;
+	}
+	get_label_unmounted(btrfs_dev);
+	return 0;
+}
+
+
+int set_label(char *btrfs_dev, char *nLabel)
+{
+
+	int ret;
+	ret = check_mounted(btrfs_dev);
+	if (ret < 0)
+	{
+	       fprintf(stderr, "FATAL: error checking %s mount status\n", btrfs_dev);
+	       return -1;
+	}
+
+	if(ret != 0)
+	{
+	       fprintf(stderr, "FATAL: the filesystem has to be unmounted\n");
+	       return -2;
+	}
+	change_label_unmounted(btrfs_dev, nLabel);
+	return 0;
+}
diff --git a/btrfslabel.h b/btrfslabel.h
new file mode 100644
index 0000000..abf43ad
--- /dev/null
+++ b/btrfslabel.h
@@ -0,0 +1,5 @@
+/* btrflabel.h */
+
+
+int get_label(char *btrfs_dev);
+int set_label(char *btrfs_dev, char *nLabel);
\ No newline at end of file
diff --git a/calc-size.c b/calc-size.c
new file mode 100644
index 0000000..c4adfb0
--- /dev/null
+++ b/calc-size.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2011 Red Hat.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _XOPEN_SOURCE 500
+#define _GNU_SOURCE 1
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <zlib.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "version.h"
+#include "volumes.h"
+#include "utils.h"
+
+static int verbose = 0;
+static int no_pretty = 0;
+
+struct root_stats {
+	u64 total_nodes;
+	u64 total_leaves;
+	u64 total_bytes;
+	u64 total_inline;
+	int total_levels;
+};
+
+struct fs_root {
+	struct btrfs_key key;
+	struct btrfs_key *snaps;
+};
+
+static int walk_leaf(struct btrfs_root *root, struct btrfs_path *path,
+		     struct root_stats *stat, int find_inline)
+{
+	struct extent_buffer *b = path->nodes[0];
+	struct btrfs_file_extent_item *fi;
+	struct btrfs_key found_key;
+	int i;
+
+	stat->total_bytes += root->leafsize;
+	stat->total_leaves++;
+
+	if (!find_inline)
+		return 0;
+
+	for (i = 0; i < btrfs_header_nritems(b); i++) {
+		btrfs_item_key_to_cpu(b, &found_key, i);
+		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
+			continue;
+
+		fi = btrfs_item_ptr(b, i, struct btrfs_file_extent_item);
+		if (btrfs_file_extent_type(b, fi) == BTRFS_FILE_EXTENT_INLINE)
+			stat->total_inline +=
+				btrfs_file_extent_inline_item_len(b,
+							btrfs_item_nr(b, i));
+	}
+
+	return 0;
+}
+
+static int walk_nodes(struct btrfs_root *root, struct btrfs_path *path,
+		      struct root_stats *stat, int level, int find_inline)
+{
+	struct extent_buffer *b = path->nodes[level];
+	int i;
+	int ret = 0;
+
+	stat->total_bytes += root->nodesize;
+	stat->total_nodes++;
+
+	for (i = 0; i < btrfs_header_nritems(b); i++) {
+		struct extent_buffer *tmp = NULL;
+
+		path->slots[level] = i;
+		if ((level - 1) > 0 || find_inline) {
+			tmp = read_tree_block(root, btrfs_node_blockptr(b, i),
+					      btrfs_level_size(root, level - 1),
+					      btrfs_node_ptr_generation(b, i));
+			if (!tmp) {
+				fprintf(stderr, "Failed to read blocknr %Lu\n",
+					btrfs_node_blockptr(b, i));
+				continue;
+			}
+			path->nodes[level - 1] = tmp;
+		}
+		if (level - 1)
+			ret = walk_nodes(root, path, stat, level - 1,
+					 find_inline);
+		else
+			ret = walk_leaf(root, path, stat, find_inline);
+		free_extent_buffer(tmp);
+		if (ret) {
+			fprintf(stderr, "Error walking down path\n");
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static int calc_root_size(struct btrfs_root *tree_root, struct btrfs_key *key,
+			  int find_inline)
+{
+	struct btrfs_root *root;
+	struct btrfs_path *path;
+	struct root_stats stat;
+	int level;
+	int ret = 0;
+	int size_fail = 0;
+
+	root = btrfs_read_fs_root(tree_root->fs_info, key);
+	if (!root) {
+		fprintf(stderr, "Failed to read root %Lu\n", key->objectid);
+		return 1;
+	}
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		fprintf(stderr, "Could not allocate path\n");
+		return 1;
+	}
+
+	memset(&stat, 0, sizeof(stat));
+	level = btrfs_header_level(root->node);
+	path->nodes[level] = root->node;
+	if (!level) {
+		ret = walk_leaf(root, path, &stat, find_inline);
+		if (ret)
+			goto out;
+		goto out_print;
+	}
+
+	ret = walk_nodes(root, path, &stat, level, find_inline);
+	if (ret)
+		goto out;
+out_print:
+	if (no_pretty || size_fail) {
+		printf("\t%Lu total bytes, %Lu inline data bytes, %Lu nodes, "
+		       "%Lu leaves, %d levels\n", stat.total_bytes,
+		       stat.total_inline, stat.total_nodes, stat.total_leaves,
+		       level + 1);
+	} else {
+		char *total_size;
+		char *inline_size;
+
+		total_size = pretty_sizes(stat.total_bytes);
+		inline_size = pretty_sizes(stat.total_inline);
+
+		printf("\t%s total size, %s inline data, %Lu nodes, "
+		       "%Lu leaves, %d levels\n",
+		       total_size, inline_size, stat.total_nodes,
+		       stat.total_leaves, level + 1);
+		free(total_size);
+		free(inline_size);
+	}
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+static void usage()
+{
+	fprintf(stderr, "Usage: calc-size [-v] [-b] <device>\n");
+}
+
+int main(int argc, char **argv)
+{
+	struct btrfs_key key;
+	struct fs_root *roots;
+	struct btrfs_root *root;
+	size_t fs_roots_size = sizeof(struct fs_root);
+	int opt;
+	int ret = 0;
+
+	while ((opt = getopt(argc, argv, "vb")) != -1) {
+		switch (opt) {
+			case 'v':
+				verbose++;
+				break;
+			case 'b':
+				no_pretty = 1;
+				break;
+			default:
+				usage();
+				exit(1);
+		}
+	}
+
+	if (optind >= argc) {
+		usage();
+		exit(1);
+	}
+
+	/*
+	if ((ret = check_mounted(argv[optind])) < 0) {
+		fprintf(stderr, "Could not check mount status: %d\n", ret);
+		if (ret == -EACCES)
+			fprintf(stderr, "Maybe you need to run as root?\n");
+		return ret;
+	} else if (ret) {
+		fprintf(stderr, "%s is currently mounted.  Aborting.\n",
+			argv[optind]);
+		return -EBUSY;
+	}
+	*/
+
+	root = open_ctree(argv[optind], 0, 0);
+	if (!root) {
+		fprintf(stderr, "Couldn't open ctree\n");
+		exit(1);
+	}
+
+	roots = malloc(fs_roots_size);
+	if (!roots) {
+		fprintf(stderr, "No memory\n");
+		goto out;
+	}
+
+	printf("Calculating size of root tree\n");
+	key.objectid = BTRFS_ROOT_TREE_OBJECTID;
+	ret = calc_root_size(root, &key, 0);
+	if (ret)
+		goto out;
+
+	printf("Calculating size of extent tree\n");
+	key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
+	ret = calc_root_size(root, &key, 0);
+	if (ret)
+		goto out;
+
+	printf("Calculating size of csum tree\n");
+	key.objectid = BTRFS_CSUM_TREE_OBJECTID;
+	ret = calc_root_size(root, &key, 0);
+	if (ret)
+		goto out;
+
+	roots[0].key.objectid = BTRFS_FS_TREE_OBJECTID;
+	roots[0].key.offset = (u64)-1;
+	printf("Calculatin' size of fs tree\n");
+	ret = calc_root_size(root, &roots[0].key, 1);
+	if (ret)
+		goto out;
+out:
+	close_ctree(root);
+	return ret;
+}
diff --git a/cmds-balance.c b/cmds-balance.c
new file mode 100644
index 0000000..38a7426
--- /dev/null
+++ b/cmds-balance.c
@@ -0,0 +1,713 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "ioctl.h"
+#include "volumes.h"
+
+#include "commands.h"
+
+static const char * const balance_cmd_group_usage[] = {
+	"btrfs [filesystem] balance <command> [options] <path>",
+	"btrfs [filesystem] balance <path>",
+	NULL
+};
+
+static const char balance_cmd_group_info[] =
+	"'btrfs filesystem balance' command is deprecated, please use\n"
+	"'btrfs balance start' command instead.";
+
+static int parse_one_profile(const char *profile, u64 *flags)
+{
+	if (!strcmp(profile, "raid0")) {
+		*flags |= BTRFS_BLOCK_GROUP_RAID0;
+	} else if (!strcmp(profile, "raid1")) {
+		*flags |= BTRFS_BLOCK_GROUP_RAID1;
+	} else if (!strcmp(profile, "raid10")) {
+		*flags |= BTRFS_BLOCK_GROUP_RAID10;
+	} else if (!strcmp(profile, "dup")) {
+		*flags |= BTRFS_BLOCK_GROUP_DUP;
+	} else if (!strcmp(profile, "single")) {
+		*flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+	} else {
+		fprintf(stderr, "Unknown profile '%s'\n", profile);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int parse_profiles(char *profiles, u64 *flags)
+{
+	char *this_char;
+	char *save_ptr;
+
+	for (this_char = strtok_r(profiles, "|", &save_ptr);
+	     this_char != NULL;
+	     this_char = strtok_r(NULL, "|", &save_ptr)) {
+		if (parse_one_profile(this_char, flags))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int parse_u64(const char *str, u64 *result)
+{
+	char *endptr;
+	u64 val;
+
+	val = strtoull(str, &endptr, 10);
+	if (*endptr)
+		return 1;
+
+	*result = val;
+	return 0;
+}
+
+static int parse_range(const char *range, u64 *start, u64 *end)
+{
+	char *dots;
+
+	dots = strstr(range, "..");
+	if (dots) {
+		const char *rest = dots + 2;
+		int skipped = 0;
+
+		*dots = 0;
+
+		if (!*rest) {
+			*end = (u64)-1;
+			skipped++;
+		} else {
+			if (parse_u64(rest, end))
+				return 1;
+		}
+		if (dots == range) {
+			*start = 0;
+			skipped++;
+		} else {
+			if (parse_u64(range, start))
+				return 1;
+		}
+
+		if (*start >= *end) {
+			fprintf(stderr, "Range %llu..%llu doesn't make "
+				"sense\n", (unsigned long long)*start,
+				(unsigned long long)*end);
+			return 1;
+		}
+
+		if (skipped <= 1)
+			return 0;
+	}
+
+	return 1;
+}
+
+static int parse_filters(char *filters, struct btrfs_balance_args *args)
+{
+	char *this_char;
+	char *value;
+	char *save_ptr;
+
+	if (!filters)
+		return 0;
+
+	for (this_char = strtok_r(filters, ",", &save_ptr);
+	     this_char != NULL;
+	     this_char = strtok_r(NULL, ",", &save_ptr)) {
+		if ((value = strchr(this_char, '=')) != NULL)
+			*value++ = 0;
+		if (!strcmp(this_char, "profiles")) {
+			if (!value || !*value) {
+				fprintf(stderr, "the profiles filter requires "
+				       "an argument\n");
+				return 1;
+			}
+			if (parse_profiles(value, &args->profiles)) {
+				fprintf(stderr, "Invalid profiles argument\n");
+				return 1;
+			}
+			args->flags |= BTRFS_BALANCE_ARGS_PROFILES;
+		} else if (!strcmp(this_char, "usage")) {
+			if (!value || !*value) {
+				fprintf(stderr, "the usage filter requires "
+				       "an argument\n");
+				return 1;
+			}
+			if (parse_u64(value, &args->usage) ||
+			    args->usage < 1 || args->usage > 100) {
+				fprintf(stderr, "Invalid usage argument: %s\n",
+				       value);
+				return 1;
+			}
+			args->flags |= BTRFS_BALANCE_ARGS_USAGE;
+		} else if (!strcmp(this_char, "devid")) {
+			if (!value || !*value) {
+				fprintf(stderr, "the devid filter requires "
+				       "an argument\n");
+				return 1;
+			}
+			if (parse_u64(value, &args->devid) ||
+			    args->devid == 0) {
+				fprintf(stderr, "Invalid devid argument: %s\n",
+				       value);
+				return 1;
+			}
+			args->flags |= BTRFS_BALANCE_ARGS_DEVID;
+		} else if (!strcmp(this_char, "drange")) {
+			if (!value || !*value) {
+				fprintf(stderr, "the drange filter requires "
+				       "an argument\n");
+				return 1;
+			}
+			if (parse_range(value, &args->pstart, &args->pend)) {
+				fprintf(stderr, "Invalid drange argument\n");
+				return 1;
+			}
+			args->flags |= BTRFS_BALANCE_ARGS_DRANGE;
+		} else if (!strcmp(this_char, "vrange")) {
+			if (!value || !*value) {
+				fprintf(stderr, "the vrange filter requires "
+				       "an argument\n");
+				return 1;
+			}
+			if (parse_range(value, &args->vstart, &args->vend)) {
+				fprintf(stderr, "Invalid vrange argument\n");
+				return 1;
+			}
+			args->flags |= BTRFS_BALANCE_ARGS_VRANGE;
+		} else if (!strcmp(this_char, "convert")) {
+			if (!value || !*value) {
+				fprintf(stderr, "the convert option requires "
+				       "an argument\n");
+				return 1;
+			}
+			if (parse_one_profile(value, &args->target)) {
+				fprintf(stderr, "Invalid convert argument\n");
+				return 1;
+			}
+			args->flags |= BTRFS_BALANCE_ARGS_CONVERT;
+		} else if (!strcmp(this_char, "soft")) {
+			args->flags |= BTRFS_BALANCE_ARGS_SOFT;
+		} else {
+			fprintf(stderr, "Unrecognized balance option '%s'\n",
+				this_char);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static void dump_balance_args(struct btrfs_balance_args *args)
+{
+	if (args->flags & BTRFS_BALANCE_ARGS_CONVERT) {
+		printf("converting, target=%llu, soft is %s",
+		       (unsigned long long)args->target,
+		       (args->flags & BTRFS_BALANCE_ARGS_SOFT) ? "on" : "off");
+	} else {
+		printf("balancing");
+	}
+
+	if (args->flags & BTRFS_BALANCE_ARGS_PROFILES)
+		printf(", profiles=%llu", (unsigned long long)args->profiles);
+	if (args->flags & BTRFS_BALANCE_ARGS_USAGE)
+		printf(", usage=%llu", (unsigned long long)args->usage);
+	if (args->flags & BTRFS_BALANCE_ARGS_DEVID)
+		printf(", devid=%llu", (unsigned long long)args->devid);
+	if (args->flags & BTRFS_BALANCE_ARGS_DRANGE)
+		printf(", drange=%llu..%llu",
+		       (unsigned long long)args->pstart,
+		       (unsigned long long)args->pend);
+	if (args->flags & BTRFS_BALANCE_ARGS_VRANGE)
+		printf(", vrange=%llu..%llu",
+		       (unsigned long long)args->vstart,
+		       (unsigned long long)args->vend);
+
+	printf("\n");
+}
+
+static void dump_ioctl_balance_args(struct btrfs_ioctl_balance_args *args)
+{
+	printf("Dumping filters: flags 0x%llx, state 0x%llx, force is %s\n",
+	       (unsigned long long)args->flags, (unsigned long long)args->state,
+	       (args->flags & BTRFS_BALANCE_FORCE) ? "on" : "off");
+	if (args->flags & BTRFS_BALANCE_DATA) {
+		printf("  DATA (flags 0x%llx): ",
+		       (unsigned long long)args->data.flags);
+		dump_balance_args(&args->data);
+	}
+	if (args->flags & BTRFS_BALANCE_METADATA) {
+		printf("  METADATA (flags 0x%llx): ",
+		       (unsigned long long)args->meta.flags);
+		dump_balance_args(&args->meta);
+	}
+	if (args->flags & BTRFS_BALANCE_SYSTEM) {
+		printf("  SYSTEM (flags 0x%llx): ",
+		       (unsigned long long)args->sys.flags);
+		dump_balance_args(&args->sys);
+	}
+}
+
+static int do_balance_v1(int fd)
+{
+	struct btrfs_ioctl_vol_args args;
+	int ret;
+
+	memset(&args, 0, sizeof(args));
+	ret = ioctl(fd, BTRFS_IOC_BALANCE, &args);
+	return ret;
+}
+
+static int do_balance(const char *path, struct btrfs_ioctl_balance_args *args,
+		      int nofilters)
+{
+	int fd;
+	int ret;
+	int e;
+
+	fd = open_file_or_dir(path);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
+		return 12;
+	}
+
+	ret = ioctl(fd, BTRFS_IOC_BALANCE_V2, args);
+	e = errno;
+
+	if (ret < 0) {
+		/*
+		 * older kernels don't have the new balance ioctl, try the
+		 * old one.  But, the old one doesn't know any filters, so
+		 * don't fall back if they tried to use the fancy new things
+		 */
+		if (e == ENOTTY && nofilters) {
+			ret = do_balance_v1(fd);
+			if (ret == 0)
+				goto out;
+			e = errno;
+		}
+
+		if (e == ECANCELED) {
+			if (args->state & BTRFS_BALANCE_STATE_PAUSE_REQ)
+				fprintf(stderr, "balance paused by user\n");
+			if (args->state & BTRFS_BALANCE_STATE_CANCEL_REQ)
+				fprintf(stderr, "balance canceled by user\n");
+			ret = 0;
+		} else {
+			fprintf(stderr, "ERROR: error during balancing '%s' "
+				"- %s\n", path, strerror(e));
+			if (e != EINPROGRESS)
+				fprintf(stderr, "There may be more info in "
+					"syslog - try dmesg | tail\n");
+			ret = 19;
+		}
+	} else {
+		printf("Done, had to relocate %llu out of %llu chunks\n",
+		       (unsigned long long)args->stat.completed,
+		       (unsigned long long)args->stat.considered);
+		ret = 0;
+	}
+
+out:
+	close(fd);
+	return ret;
+}
+
+static const char * const cmd_balance_start_usage[] = {
+	"btrfs [filesystem] balance start [options] <path>",
+	"Balance chunks across the devices",
+	"Balance and/or convert (change allocation profile of) chunks that",
+	"passed all filters in a comma-separated list of filters for a",
+	"particular chunk type.  If filter list is not given balance all",
+	"chunks of that type.  In case none of the -d, -m or -s options is",
+	"given balance all chunks in a filesystem.",
+	"",
+	"-d[filters]    act on data chunks",
+	"-m[filters]    act on metadata chunks",
+	"-s[filetrs]    act on system chunks (only under -f)",
+	"-v             be verbose",
+	"-f             force reducing of metadata integrity",
+	NULL
+};
+
+static int cmd_balance_start(int argc, char **argv)
+{
+	struct btrfs_ioctl_balance_args args;
+	struct btrfs_balance_args *ptrs[] = { &args.data, &args.sys,
+						&args.meta, NULL };
+	int force = 0;
+	int verbose = 0;
+	int nofilters = 1;
+	int i;
+
+	memset(&args, 0, sizeof(args));
+
+	optind = 1;
+	while (1) {
+		int longindex;
+		static struct option longopts[] = {
+			{ "data", optional_argument, NULL, 'd'},
+			{ "metadata", optional_argument, NULL, 'm' },
+			{ "system", optional_argument, NULL, 's' },
+			{ "force", no_argument, NULL, 'f' },
+			{ "verbose", no_argument, NULL, 'v' },
+			{ 0, 0, 0, 0 }
+		};
+
+		int opt = getopt_long(argc, argv, "d::s::m::fv", longopts,
+				      &longindex);
+		if (opt < 0)
+			break;
+
+		switch (opt) {
+		case 'd':
+			nofilters = 0;
+			args.flags |= BTRFS_BALANCE_DATA;
+
+			if (parse_filters(optarg, &args.data))
+				return 1;
+			break;
+		case 's':
+			nofilters = 0;
+			args.flags |= BTRFS_BALANCE_SYSTEM;
+
+			if (parse_filters(optarg, &args.sys))
+				return 1;
+			break;
+		case 'm':
+			nofilters = 0;
+			args.flags |= BTRFS_BALANCE_METADATA;
+
+			if (parse_filters(optarg, &args.meta))
+				return 1;
+			break;
+		case 'f':
+			force = 1;
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		default:
+			usage(cmd_balance_start_usage);
+		}
+	}
+
+	if (check_argc_exact(argc - optind, 1))
+		usage(cmd_balance_start_usage);
+
+	/*
+	 * allow -s only under --force, otherwise do with system chunks
+	 * the same thing we were ordered to do with meta chunks
+	 */
+	if (args.flags & BTRFS_BALANCE_SYSTEM) {
+		if (!force) {
+			fprintf(stderr,
+"Refusing to explicitly operate on system chunks.\n"
+"Pass --force if you really want to do that.\n");
+			return 1;
+		}
+	} else if (args.flags & BTRFS_BALANCE_METADATA) {
+		args.flags |= BTRFS_BALANCE_SYSTEM;
+		memcpy(&args.sys, &args.meta,
+			sizeof(struct btrfs_balance_args));
+	}
+
+	if (nofilters) {
+		/* relocate everything - no filters */
+		args.flags |= BTRFS_BALANCE_TYPE_MASK;
+	}
+
+	/* drange makes sense only when devid is set */
+	for (i = 0; ptrs[i]; i++) {
+		if ((ptrs[i]->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
+		    !(ptrs[i]->flags & BTRFS_BALANCE_ARGS_DEVID)) {
+			fprintf(stderr, "drange filter can be used only if "
+				"devid filter is used\n");
+			return 1;
+		}
+	}
+
+	/* soft makes sense only when convert for corresponding type is set */
+	for (i = 0; ptrs[i]; i++) {
+		if ((ptrs[i]->flags & BTRFS_BALANCE_ARGS_SOFT) &&
+		    !(ptrs[i]->flags & BTRFS_BALANCE_ARGS_CONVERT)) {
+			fprintf(stderr, "'soft' option can be used only if "
+				"changing profiles\n");
+			return 1;
+		}
+	}
+
+	if (force)
+		args.flags |= BTRFS_BALANCE_FORCE;
+	if (verbose)
+		dump_ioctl_balance_args(&args);
+
+	return do_balance(argv[optind], &args, nofilters);
+}
+
+static const char * const cmd_balance_pause_usage[] = {
+	"btrfs [filesystem] balance pause <path>",
+	"Pause running balance",
+	NULL
+};
+
+static int cmd_balance_pause(int argc, char **argv)
+{
+	const char *path;
+	int fd;
+	int ret;
+	int e;
+
+	if (check_argc_exact(argc, 2))
+		usage(cmd_balance_pause_usage);
+
+	path = argv[1];
+
+	fd = open_file_or_dir(path);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
+		return 12;
+	}
+
+	ret = ioctl(fd, BTRFS_IOC_BALANCE_CTL, BTRFS_BALANCE_CTL_PAUSE);
+	e = errno;
+	close(fd);
+
+	if (ret < 0) {
+		fprintf(stderr, "ERROR: balance pause on '%s' failed - %s\n",
+			path, (e == ENOTCONN) ? "Not running" : strerror(e));
+		return 19;
+	}
+
+	return 0;
+}
+
+static const char * const cmd_balance_cancel_usage[] = {
+	"btrfs [filesystem] balance cancel <path>",
+	"Cancel running or paused balance",
+	NULL
+};
+
+static int cmd_balance_cancel(int argc, char **argv)
+{
+	const char *path;
+	int fd;
+	int ret;
+	int e;
+
+	if (check_argc_exact(argc, 2))
+		usage(cmd_balance_cancel_usage);
+
+	path = argv[1];
+
+	fd = open_file_or_dir(path);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
+		return 12;
+	}
+
+	ret = ioctl(fd, BTRFS_IOC_BALANCE_CTL, BTRFS_BALANCE_CTL_CANCEL);
+	e = errno;
+	close(fd);
+
+	if (ret < 0) {
+		fprintf(stderr, "ERROR: balance cancel on '%s' failed - %s\n",
+			path, (e == ENOTCONN) ? "Not in progress" : strerror(e));
+		return 19;
+	}
+
+	return 0;
+}
+
+static const char * const cmd_balance_resume_usage[] = {
+	"btrfs [filesystem] balance resume <path>",
+	"Resume interrupted balance",
+	NULL
+};
+
+static int cmd_balance_resume(int argc, char **argv)
+{
+	struct btrfs_ioctl_balance_args args;
+	const char *path;
+	int fd;
+	int ret;
+	int e;
+
+	if (check_argc_exact(argc, 2))
+		usage(cmd_balance_resume_usage);
+
+	path = argv[1];
+
+	fd = open_file_or_dir(path);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
+		return 12;
+	}
+
+	memset(&args, 0, sizeof(args));
+	args.flags |= BTRFS_BALANCE_RESUME;
+
+	ret = ioctl(fd, BTRFS_IOC_BALANCE_V2, &args);
+	e = errno;
+	close(fd);
+
+	if (ret < 0) {
+		if (e == ECANCELED) {
+			if (args.state & BTRFS_BALANCE_STATE_PAUSE_REQ)
+				fprintf(stderr, "balance paused by user\n");
+			if (args.state & BTRFS_BALANCE_STATE_CANCEL_REQ)
+				fprintf(stderr, "balance canceled by user\n");
+		} else if (e == ENOTCONN || e == EINPROGRESS) {
+			fprintf(stderr, "ERROR: balance resume on '%s' "
+				"failed - %s\n", path,
+				(e == ENOTCONN) ? "Not in progress" :
+						  "Already running");
+			return 19;
+		} else {
+			fprintf(stderr,
+"ERROR: error during balancing '%s' - %s\n"
+"There may be more info in syslog - try dmesg | tail\n", path, strerror(e));
+			return 19;
+		}
+	} else {
+		printf("Done, had to relocate %llu out of %llu chunks\n",
+		       (unsigned long long)args.stat.completed,
+		       (unsigned long long)args.stat.considered);
+	}
+
+	return 0;
+}
+
+static const char * const cmd_balance_status_usage[] = {
+	"btrfs [filesystem] balance status [-v] <path>",
+	"Show status of running or paused balance",
+	"",
+	"-v     be verbose",
+	NULL
+};
+
+static int cmd_balance_status(int argc, char **argv)
+{
+	struct btrfs_ioctl_balance_args args;
+	const char *path;
+	int fd;
+	int verbose = 0;
+	int ret;
+	int e;
+
+	optind = 1;
+	while (1) {
+		int longindex;
+		static struct option longopts[] = {
+			{ "verbose", no_argument, NULL, 'v' },
+			{ 0, 0, 0, 0}
+		};
+
+		int opt = getopt_long(argc, argv, "v", longopts, &longindex);
+		if (opt < 0)
+			break;
+
+		switch (opt) {
+		case 'v':
+			verbose = 1;
+			break;
+		default:
+			usage(cmd_balance_status_usage);
+		}
+	}
+
+	if (check_argc_exact(argc - optind, 1))
+		usage(cmd_balance_status_usage);
+
+	path = argv[optind];
+
+	fd = open_file_or_dir(path);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
+		return 12;
+	}
+
+	ret = ioctl(fd, BTRFS_IOC_BALANCE_PROGRESS, &args);
+	e = errno;
+	close(fd);
+
+	if (ret < 0) {
+		fprintf(stderr, "ERROR: balance status on '%s' failed - %s\n",
+			path, (e == ENOTCONN) ? "Not in progress" : strerror(e));
+		return 19;
+	}
+
+	if (args.state & BTRFS_BALANCE_STATE_RUNNING) {
+		printf("Balance on '%s' is running", path);
+		if (args.state & BTRFS_BALANCE_STATE_CANCEL_REQ)
+			printf(", cancel requested\n");
+		else if (args.state & BTRFS_BALANCE_STATE_PAUSE_REQ)
+			printf(", pause requested\n");
+		else
+			printf("\n");
+	} else {
+		printf("Balance on '%s' is paused\n", path);
+	}
+
+	printf("%llu out of about %llu chunks balanced (%llu considered), "
+	       "%3.f%% left\n", (unsigned long long)args.stat.completed,
+	       (unsigned long long)args.stat.expected,
+	       (unsigned long long)args.stat.considered,
+	       100 * (1 - (float)args.stat.completed/args.stat.expected));
+
+	if (verbose)
+		dump_ioctl_balance_args(&args);
+
+	return 0;
+}
+
+const struct cmd_group balance_cmd_group = {
+	balance_cmd_group_usage, balance_cmd_group_info, {
+		{ "start", cmd_balance_start, cmd_balance_start_usage, NULL, 0 },
+		{ "pause", cmd_balance_pause, cmd_balance_pause_usage, NULL, 0 },
+		{ "cancel", cmd_balance_cancel, cmd_balance_cancel_usage, NULL, 0 },
+		{ "resume", cmd_balance_resume, cmd_balance_resume_usage, NULL, 0 },
+		{ "status", cmd_balance_status, cmd_balance_status_usage, NULL, 0 },
+		{ 0, 0, 0, 0, 0 }
+	}
+};
+
+int cmd_balance(int argc, char **argv)
+{
+	if (argc == 2) {
+		/* old 'btrfs filesystem balance <path>' syntax */
+		struct btrfs_ioctl_balance_args args;
+
+		memset(&args, 0, sizeof(args));
+		args.flags |= BTRFS_BALANCE_TYPE_MASK;
+
+		return do_balance(argv[1], &args, 1);
+	}
+
+	return handle_command_group(&balance_cmd_group, argc, argv);
+}
diff --git a/cmds-device.c b/cmds-device.c
new file mode 100644
index 0000000..db625a6
--- /dev/null
+++ b/cmds-device.c
@@ -0,0 +1,261 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "ioctl.h"
+#include "utils.h"
+
+#include "commands.h"
+
+/* FIXME - imported cruft, fix sparse errors and warnings */
+#ifdef __CHECKER__
+#define BLKGETSIZE64 0
+#define BTRFS_IOC_SNAP_CREATE_V2 0
+#define BTRFS_VOL_NAME_MAX 255
+struct btrfs_ioctl_vol_args { char name[BTRFS_VOL_NAME_MAX]; };
+static inline int ioctl(int fd, int define, void *arg) { return 0; }
+#endif
+
+static const char * const device_cmd_group_usage[] = {
+	"btrfs device <command> [<args>]",
+	NULL
+};
+
+static const char * const cmd_add_dev_usage[] = {
+	"btrfs device add <device> [<device>...] <path>",
+	"Add a device to a filesystem",
+	NULL
+};
+
+static int cmd_add_dev(int argc, char **argv)
+{
+	char	*mntpnt;
+	int	i, fdmnt, ret=0, e;
+
+	if (check_argc_min(argc, 3))
+		usage(cmd_add_dev_usage);
+
+	mntpnt = argv[argc - 1];
+
+	fdmnt = open_file_or_dir(mntpnt);
+	if (fdmnt < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", mntpnt);
+		return 12;
+	}
+
+	for (i = 1; i < argc - 1; i++ ){
+		struct btrfs_ioctl_vol_args ioctl_args;
+		int	devfd, res;
+		u64 dev_block_count = 0;
+		struct stat st;
+		int mixed = 0;
+
+		res = check_mounted(argv[i]);
+		if (res < 0) {
+			fprintf(stderr, "error checking %s mount status\n",
+				argv[i]);
+			ret++;
+			continue;
+		}
+		if (res == 1) {
+			fprintf(stderr, "%s is mounted\n", argv[i]);
+			ret++;
+			continue;
+		}
+
+		devfd = open(argv[i], O_RDWR);
+		if (!devfd) {
+			fprintf(stderr, "ERROR: Unable to open device '%s'\n", argv[i]);
+			close(devfd);
+			ret++;
+			continue;
+		}
+		res = fstat(devfd, &st);
+		if (res) {
+			fprintf(stderr, "ERROR: Unable to stat '%s'\n", argv[i]);
+			close(devfd);
+			ret++;
+			continue;
+		}
+		if (!S_ISBLK(st.st_mode)) {
+			fprintf(stderr, "ERROR: '%s' is not a block device\n", argv[i]);
+			close(devfd);
+			ret++;
+			continue;
+		}
+
+		res = btrfs_prepare_device(devfd, argv[i], 1, &dev_block_count, &mixed);
+		if (res) {
+			fprintf(stderr, "ERROR: Unable to init '%s'\n", argv[i]);
+			close(devfd);
+			ret++;
+			continue;
+		}
+		close(devfd);
+
+		strncpy(ioctl_args.name, argv[i], BTRFS_PATH_NAME_MAX);
+		res = ioctl(fdmnt, BTRFS_IOC_ADD_DEV, &ioctl_args);
+		e = errno;
+		if(res<0){
+			fprintf(stderr, "ERROR: error adding the device '%s' - %s\n",
+				argv[i], strerror(e));
+			ret++;
+		}
+
+	}
+
+	close(fdmnt);
+	if (ret)
+		return ret+20;
+	else
+		return 0;
+}
+
+static const char * const cmd_rm_dev_usage[] = {
+	"btrfs device delete <device> [<device>...] <path>",
+	"Remove a device from a filesystem",
+	NULL
+};
+
+static int cmd_rm_dev(int argc, char **argv)
+{
+	char	*mntpnt;
+	int	i, fdmnt, ret=0, e;
+
+	if (check_argc_min(argc, 3))
+		usage(cmd_rm_dev_usage);
+
+	mntpnt = argv[argc - 1];
+
+	fdmnt = open_file_or_dir(mntpnt);
+	if (fdmnt < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", mntpnt);
+		return 12;
+	}
+
+	for(i=1 ; i < argc - 1; i++ ){
+		struct	btrfs_ioctl_vol_args arg;
+		int	res;
+
+		strncpy(arg.name, argv[i], BTRFS_PATH_NAME_MAX);
+		res = ioctl(fdmnt, BTRFS_IOC_RM_DEV, &arg);
+		e = errno;
+		if(res<0){
+			fprintf(stderr, "ERROR: error removing the device '%s' - %s\n",
+				argv[i], strerror(e));
+			ret++;
+		}
+	}
+
+	close(fdmnt);
+	if( ret)
+		return ret+20;
+	else
+		return 0;
+}
+
+static const char * const cmd_scan_dev_usage[] = {
+	"btrfs device scan [<device>...]",
+	"Scan devices for a btrfs filesystem",
+	NULL
+};
+
+static int cmd_scan_dev(int argc, char **argv)
+{
+	int	i, fd, e;
+	int	checklist = 1;
+	int	devstart = 1;
+
+	if( argc > 1 && !strcmp(argv[1],"--all-devices")){
+		if (check_argc_max(argc, 2))
+			usage(cmd_scan_dev_usage);
+
+		checklist = 0;
+		devstart += 1;
+	}
+
+	if(argc<=devstart){
+
+		int ret;
+
+		printf("Scanning for Btrfs filesystems\n");
+		if(checklist)
+			ret = btrfs_scan_block_devices(1);
+		else
+			ret = btrfs_scan_one_dir("/dev", 1);
+		if (ret){
+			fprintf(stderr, "ERROR: error %d while scanning\n", ret);
+			return 18;
+		}
+		return 0;
+	}
+
+	fd = open("/dev/btrfs-control", O_RDWR);
+	if (fd < 0) {
+		perror("failed to open /dev/btrfs-control");
+		return 10;
+	}
+
+	for( i = devstart ; i < argc ; i++ ){
+		struct btrfs_ioctl_vol_args args;
+		int ret;
+
+		printf("Scanning for Btrfs filesystems in '%s'\n", argv[i]);
+
+		strncpy(args.name, argv[i], BTRFS_PATH_NAME_MAX);
+		/*
+		 * FIXME: which are the error code returned by this ioctl ?
+		 * it seems that is impossible to understand if there no is
+		 * a btrfs filesystem from an I/O error !!!
+		 */
+		ret = ioctl(fd, BTRFS_IOC_SCAN_DEV, &args);
+		e = errno;
+
+		if( ret < 0 ){
+			close(fd);
+			fprintf(stderr, "ERROR: unable to scan the device '%s' - %s\n",
+				argv[i], strerror(e));
+			return 11;
+		}
+	}
+
+	close(fd);
+	return 0;
+}
+
+const struct cmd_group device_cmd_group = {
+	device_cmd_group_usage, NULL, {
+		{ "add", cmd_add_dev, cmd_add_dev_usage, NULL, 0 },
+		{ "delete", cmd_rm_dev, cmd_rm_dev_usage, NULL, 0 },
+		{ "scan", cmd_scan_dev, cmd_scan_dev_usage, NULL, 0 },
+		{ 0, 0, 0, 0, 0 }
+	}
+};
+
+int cmd_device(int argc, char **argv)
+{
+	return handle_command_group(&device_cmd_group, argc, argv);
+}
diff --git a/cmds-filesystem.c b/cmds-filesystem.c
new file mode 100644
index 0000000..1f53d1c
--- /dev/null
+++ b/cmds-filesystem.c
@@ -0,0 +1,538 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <uuid/uuid.h>
+#include <ctype.h>
+
+#include "kerncompat.h"
+#include "ctree.h"
+#include "ioctl.h"
+#include "utils.h"
+#include "volumes.h"
+
+#include "version.h"
+
+#include "commands.h"
+#include "btrfslabel.h"
+
+static const char * const filesystem_cmd_group_usage[] = {
+	"btrfs filesystem [<group>] <command> [<args>]",
+	NULL
+};
+
+static const char * const cmd_df_usage[] = {
+	"btrfs filesystem df <path>",
+	"Show space usage information for a mount point",
+	NULL
+};
+
+static int cmd_df(int argc, char **argv)
+{
+	struct btrfs_ioctl_space_args *sargs;
+	u64 count = 0, i;
+	int ret;
+	int fd;
+	int e;
+	char *path;
+
+	if (check_argc_exact(argc, 2))
+		usage(cmd_df_usage);
+
+	path = argv[1];
+
+	fd = open_file_or_dir(path);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
+		return 12;
+	}
+
+	sargs = malloc(sizeof(struct btrfs_ioctl_space_args));
+	if (!sargs)
+		return -ENOMEM;
+
+	sargs->space_slots = 0;
+	sargs->total_spaces = 0;
+
+	ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs);
+	e = errno;
+	if (ret) {
+		fprintf(stderr, "ERROR: couldn't get space info on '%s' - %s\n",
+			path, strerror(e));
+		free(sargs);
+		return ret;
+	}
+	if (!sargs->total_spaces)
+		return 0;
+
+	count = sargs->total_spaces;
+
+	sargs = realloc(sargs, sizeof(struct btrfs_ioctl_space_args) +
+			(count * sizeof(struct btrfs_ioctl_space_info)));
+	if (!sargs)
+		return -ENOMEM;
+
+	sargs->space_slots = count;
+	sargs->total_spaces = 0;
+
+	ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs);
+	e = errno;
+	if (ret) {
+		fprintf(stderr, "ERROR: couldn't get space info on '%s' - %s\n",
+			path, strerror(e));
+		close(fd);
+		free(sargs);
+		return ret;
+	}
+
+	for (i = 0; i < sargs->total_spaces; i++) {
+		char description[80];
+		char *total_bytes;
+		char *used_bytes;
+		int written = 0;
+		u64 flags = sargs->spaces[i].flags;
+
+		memset(description, 0, 80);
+
+		if (flags & BTRFS_BLOCK_GROUP_DATA) {
+			if (flags & BTRFS_BLOCK_GROUP_METADATA) {
+				snprintf(description, 14, "%s",
+					 "Data+Metadata");
+				written += 13;
+			} else {
+				snprintf(description, 5, "%s", "Data");
+				written += 4;
+			}
+		} else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) {
+			snprintf(description, 7, "%s", "System");
+			written += 6;
+		} else if (flags & BTRFS_BLOCK_GROUP_METADATA) {
+			snprintf(description, 9, "%s", "Metadata");
+			written += 8;
+		}
+
+		if (flags & BTRFS_BLOCK_GROUP_RAID0) {
+			snprintf(description+written, 8, "%s", ", RAID0");
+			written += 7;
+		} else if (flags & BTRFS_BLOCK_GROUP_RAID1) {
+			snprintf(description+written, 8, "%s", ", RAID1");
+			written += 7;
+		} else if (flags & BTRFS_BLOCK_GROUP_DUP) {
+			snprintf(description+written, 6, "%s", ", DUP");
+			written += 5;
+		} else if (flags & BTRFS_BLOCK_GROUP_RAID10) {
+			snprintf(description+written, 9, "%s", ", RAID10");
+			written += 8;
+		}
+
+		total_bytes = pretty_sizes(sargs->spaces[i].total_bytes);
+		used_bytes = pretty_sizes(sargs->spaces[i].used_bytes);
+		printf("%s: total=%s, used=%s\n", description, total_bytes,
+		       used_bytes);
+	}
+	free(sargs);
+
+	return 0;
+}
+
+static int uuid_search(struct btrfs_fs_devices *fs_devices, char *search)
+{
+	struct list_head *cur;
+	struct btrfs_device *device;
+
+	list_for_each(cur, &fs_devices->devices) {
+		device = list_entry(cur, struct btrfs_device, dev_list);
+		if ((device->label && strcmp(device->label, search) == 0) ||
+		    strcmp(device->name, search) == 0)
+			return 1;
+	}
+	return 0;
+}
+
+static void print_one_uuid(struct btrfs_fs_devices *fs_devices)
+{
+	char uuidbuf[37];
+	struct list_head *cur;
+	struct btrfs_device *device;
+	char *super_bytes_used;
+	u64 devs_found = 0;
+	u64 total;
+
+	uuid_unparse(fs_devices->fsid, uuidbuf);
+	device = list_entry(fs_devices->devices.next, struct btrfs_device,
+			    dev_list);
+	if (device->label && device->label[0])
+		printf("Label: '%s' ", device->label);
+	else
+		printf("Label: none ");
+
+	super_bytes_used = pretty_sizes(device->super_bytes_used);
+
+	total = device->total_devs;
+	printf(" uuid: %s\n\tTotal devices %llu FS bytes used %s\n", uuidbuf,
+	       (unsigned long long)total, super_bytes_used);
+
+	free(super_bytes_used);
+
+	list_for_each(cur, &fs_devices->devices) {
+		char *total_bytes;
+		char *bytes_used;
+		device = list_entry(cur, struct btrfs_device, dev_list);
+		total_bytes = pretty_sizes(device->total_bytes);
+		bytes_used = pretty_sizes(device->bytes_used);
+		printf("\tdevid %4llu size %s used %s path %s\n",
+		       (unsigned long long)device->devid,
+		       total_bytes, bytes_used, device->name);
+		free(total_bytes);
+		free(bytes_used);
+		devs_found++;
+	}
+	if (devs_found < total) {
+		printf("\t*** Some devices missing\n");
+	}
+	printf("\n");
+}
+
+static const char * const cmd_show_usage[] = {
+	"btrfs filesystem show [--all-devices] [<uuid>|<label>]",
+	"Show the structure of a filesystem",
+	"If no argument is given, structure of all present filesystems is shown.",
+	NULL
+};
+
+static int cmd_show(int argc, char **argv)
+{
+	struct list_head *all_uuids;
+	struct btrfs_fs_devices *fs_devices;
+	struct list_head *cur_uuid;
+	char *search = 0;
+	int ret;
+	int checklist = 1;
+	int searchstart = 1;
+
+	if( argc > 1 && !strcmp(argv[1],"--all-devices")){
+		checklist = 0;
+		searchstart += 1;
+	}
+
+	if (check_argc_max(argc, searchstart + 1))
+		usage(cmd_show_usage);
+
+	if(checklist)
+		ret = btrfs_scan_block_devices(0);
+	else
+		ret = btrfs_scan_one_dir("/dev", 0);
+
+	if (ret){
+		fprintf(stderr, "ERROR: error %d while scanning\n", ret);
+		return 18;
+	}
+	
+	if(searchstart < argc)
+		search = argv[searchstart];
+
+	all_uuids = btrfs_scanned_uuids();
+	list_for_each(cur_uuid, all_uuids) {
+		fs_devices = list_entry(cur_uuid, struct btrfs_fs_devices,
+					list);
+		if (search && uuid_search(fs_devices, search) == 0)
+			continue;
+		print_one_uuid(fs_devices);
+	}
+	printf("%s\n", BTRFS_BUILD_VERSION);
+	return 0;
+}
+
+static const char * const cmd_sync_usage[] = {
+	"btrfs filesystem sync <path>",
+	"Force a sync on a filesystem",
+	NULL
+};
+
+static int cmd_sync(int argc, char **argv)
+{
+	int 	fd, res, e;
+	char	*path;
+
+	if (check_argc_exact(argc, 2))
+		usage(cmd_sync_usage);
+
+	path = argv[1];
+
+	fd = open_file_or_dir(path);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
+		return 12;
+	}
+
+	printf("FSSync '%s'\n", path);
+	res = ioctl(fd, BTRFS_IOC_SYNC);
+	e = errno;
+	close(fd);
+	if( res < 0 ){
+		fprintf(stderr, "ERROR: unable to fs-syncing '%s' - %s\n", 
+			path, strerror(e));
+		return 16;
+	}
+
+	return 0;
+}
+
+static u64 parse_size(char *s)
+{
+	int len = strlen(s);
+	char c;
+	u64 mult = 1;
+
+	if (!isdigit(s[len - 1])) {
+		c = tolower(s[len - 1]);
+		switch (c) {
+		case 'g':
+			mult *= 1024;
+		case 'm':
+			mult *= 1024;
+		case 'k':
+			mult *= 1024;
+		case 'b':
+			break;
+		default:
+			fprintf(stderr, "Unknown size descriptor %c\n", c);
+			exit(1);
+		}
+		s[len - 1] = '\0';
+	}
+	return atoll(s) * mult;
+}
+
+static int parse_compress_type(char *s)
+{
+	if (strcmp(optarg, "zlib") == 0)
+		return BTRFS_COMPRESS_ZLIB;
+	else if (strcmp(optarg, "lzo") == 0)
+		return BTRFS_COMPRESS_LZO;
+	else {
+		fprintf(stderr, "Unknown compress type %s\n", s);
+		exit(1);
+	};
+}
+
+static const char * const cmd_defrag_usage[] = {
+	"btrfs filesystem defragment [options] <file>|<dir> [<file>|<dir>...]",
+	"Defragment a file or a directory",
+	"",
+	"-v             be verbose",
+	"-c[zlib,lzo]   compress the file while defragmenting",
+	"-f             flush data to disk immediately after defragmenting",
+	"-s start       defragment only from byte onward",
+	"-l len         defragment only up to len bytes",
+	"-t size        minimal size of file to be considered for defragmenting",
+	NULL
+};
+
+static int cmd_defrag(int argc, char **argv)
+{
+	int fd;
+	int flush = 0;
+	u64 start = 0;
+	u64 len = (u64)-1;
+	u32 thresh = 0;
+	int i;
+	int errors = 0;
+	int ret = 0;
+	int verbose = 0;
+	int fancy_ioctl = 0;
+	struct btrfs_ioctl_defrag_range_args range;
+	int e=0;
+	int compress_type = BTRFS_COMPRESS_NONE;
+
+	optind = 1;
+	while(1) {
+		int c = getopt(argc, argv, "vc::fs:l:t:");
+		if (c < 0)
+			break;
+
+		switch(c) {
+		case 'c':
+			compress_type = BTRFS_COMPRESS_ZLIB;
+			if (optarg)
+				compress_type = parse_compress_type(optarg);
+			fancy_ioctl = 1;
+			break;
+		case 'f':
+			flush = 1;
+			fancy_ioctl = 1;
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		case 's':
+			start = parse_size(optarg);
+			fancy_ioctl = 1;
+			break;
+		case 'l':
+			len = parse_size(optarg);
+			fancy_ioctl = 1;
+			break;
+		case 't':
+			thresh = parse_size(optarg);
+			fancy_ioctl = 1;
+			break;
+		default:
+			usage(cmd_defrag_usage);
+		}
+	}
+
+	if (check_argc_min(argc - optind, 1))
+		usage(cmd_defrag_usage);
+
+	memset(&range, 0, sizeof(range));
+	range.start = start;
+	range.len = len;
+	range.extent_thresh = thresh;
+	if (compress_type) {
+		range.flags |= BTRFS_DEFRAG_RANGE_COMPRESS;
+		range.compress_type = compress_type;
+	}
+	if (flush)
+		range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
+
+	for (i = optind; i < argc; i++) {
+		if (verbose)
+			printf("%s\n", argv[i]);
+		fd = open_file_or_dir(argv[i]);
+		if (fd < 0) {
+			fprintf(stderr, "failed to open %s\n", argv[i]);
+			perror("open:");
+			errors++;
+			continue;
+		}
+		if (!fancy_ioctl) {
+			ret = ioctl(fd, BTRFS_IOC_DEFRAG, NULL);
+			e=errno;
+		} else {
+			ret = ioctl(fd, BTRFS_IOC_DEFRAG_RANGE, &range);
+			if (ret && errno == ENOTTY) {
+				fprintf(stderr, "ERROR: defrag range ioctl not "
+					"supported in this kernel, please try "
+					"without any options.\n");
+				errors++;
+				close(fd);
+				break;
+			}
+		}
+		if (ret) {
+			fprintf(stderr, "ERROR: defrag failed on %s - %s\n",
+				argv[i], strerror(e));
+			errors++;
+		}
+		close(fd);
+	}
+	if (verbose)
+		printf("%s\n", BTRFS_BUILD_VERSION);
+	if (errors) {
+		fprintf(stderr, "total %d failures\n", errors);
+		exit(1);
+	}
+
+	return errors + 20;
+}
+
+static const char * const cmd_resize_usage[] = {
+	"btrfs filesystem resize [+/-]<newsize>[gkm]|max <path>",
+	"Resize a filesystem",
+	"If 'max' is passed, the filesystem will occupy all available space",
+	"on the device.",
+	NULL
+};
+
+static int cmd_resize(int argc, char **argv)
+{
+	struct btrfs_ioctl_vol_args	args;
+	int	fd, res, len, e;
+	char	*amount, *path;
+
+	if (check_argc_exact(argc, 3))
+		usage(cmd_resize_usage);
+
+	amount = argv[1];
+	path = argv[2];
+
+	fd = open_file_or_dir(path);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
+		return 12;
+	}
+	len = strlen(amount);
+	if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
+		fprintf(stderr, "ERROR: size value too long ('%s)\n",
+			amount);
+		return 14;
+	}
+
+	printf("Resize '%s' of '%s'\n", path, amount);
+	strncpy(args.name, amount, BTRFS_PATH_NAME_MAX);
+	res = ioctl(fd, BTRFS_IOC_RESIZE, &args);
+	e = errno;
+	close(fd);
+	if( res < 0 ){
+		fprintf(stderr, "ERROR: unable to resize '%s' - %s\n", 
+			path, strerror(e));
+		return 30;
+	}
+	return 0;
+}
+
+static const char * const cmd_label_usage[] = {
+	"btrfs filesystem label <device> [<newlabel>]",
+	"Get or change the label of an unmounted filesystem",
+	"With one argument, get the label of filesystem on <device>.",
+	"If <newlabel> is passed, set the filesystem label to <newlabel>.",
+	NULL
+};
+
+static int cmd_label(int argc, char **argv)
+{
+	if (check_argc_min(argc, 2) || check_argc_max(argc, 3))
+		usage(cmd_label_usage);
+
+	if (argc > 2)
+		return set_label(argv[1], argv[2]);
+	else
+		return get_label(argv[1]);
+}
+
+const struct cmd_group filesystem_cmd_group = {
+	filesystem_cmd_group_usage, NULL, {
+		{ "df", cmd_df, cmd_df_usage, NULL, 0 },
+		{ "show", cmd_show, cmd_show_usage, NULL, 0 },
+		{ "sync", cmd_sync, cmd_sync_usage, NULL, 0 },
+		{ "defragment", cmd_defrag, cmd_defrag_usage, NULL, 0 },
+		{ "balance", cmd_balance, NULL, &balance_cmd_group, 1 },
+		{ "resize", cmd_resize, cmd_resize_usage, NULL, 0 },
+		{ "label", cmd_label, cmd_label_usage, NULL, 0 },
+		{ 0, 0, 0, 0, 0 },
+	}
+};
+
+int cmd_filesystem(int argc, char **argv)
+{
+	return handle_command_group(&filesystem_cmd_group, argc, argv);
+}
diff --git a/cmds-inspect.c b/cmds-inspect.c
new file mode 100644
index 0000000..2f0228f
--- /dev/null
+++ b/cmds-inspect.c
@@ -0,0 +1,243 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+#include "kerncompat.h"
+#include "ioctl.h"
+
+#include "commands.h"
+
+/* btrfs-list.c */
+char *path_for_root(int fd, u64 root);
+
+static const char * const inspect_cmd_group_usage[] = {
+	"btrfs inspect-internal <command> <args>",
+	NULL
+};
+
+static int __ino_to_path_fd(u64 inum, int fd, int verbose, const char *prepend)
+{
+	int ret;
+	int i;
+	struct btrfs_ioctl_ino_path_args ipa;
+	struct btrfs_data_container *fspath;
+
+	fspath = malloc(4096);
+	if (!fspath)
+		return 1;
+
+	ipa.inum = inum;
+	ipa.size = 4096;
+	ipa.fspath = (u64)fspath;
+
+	ret = ioctl(fd, BTRFS_IOC_INO_PATHS, &ipa);
+	if (ret) {
+		printf("ioctl ret=%d, error: %s\n", ret, strerror(errno));
+		goto out;
+	}
+
+	if (verbose)
+		printf("ioctl ret=%d, bytes_left=%lu, bytes_missing=%lu, "
+			"cnt=%d, missed=%d\n", ret,
+			(unsigned long)fspath->bytes_left,
+			(unsigned long)fspath->bytes_missing,
+			fspath->elem_cnt, fspath->elem_missed);
+
+	for (i = 0; i < fspath->elem_cnt; ++i) {
+		char **str = (char **)fspath->val;
+		str[i] += (unsigned long)fspath->val;
+		if (prepend)
+			printf("%s/%s\n", prepend, str[i]);
+		else
+			printf("%s\n", str[i]);
+	}
+
+out:
+	free(fspath);
+	return ret;
+}
+
+static const char * const cmd_inode_resolve_usage[] = {
+	"btrfs inspect-internal inode-resolve [-v] <inode> <path>",
+	"Get file system paths for the given inode",
+	NULL
+};
+
+static int cmd_inode_resolve(int argc, char **argv)
+{
+	int fd;
+	int verbose = 0;
+
+	optind = 1;
+	while (1) {
+		int c = getopt(argc, argv, "v");
+		if (c < 0)
+			break;
+
+		switch (c) {
+		case 'v':
+			verbose = 1;
+			break;
+		default:
+			usage(cmd_inode_resolve_usage);
+		}
+	}
+
+	if (check_argc_exact(argc - optind, 2))
+		usage(cmd_inode_resolve_usage);
+
+	fd = open_file_or_dir(argv[optind+1]);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access '%s'\n", argv[optind+1]);
+		return 12;
+	}
+
+	return __ino_to_path_fd(atoll(argv[optind]), fd, verbose,
+				argv[optind+1]);
+}
+
+static const char * const cmd_logical_resolve_usage[] = {
+	"btrfs inspect-internal logical-resolve [-Pv] <logical> <path>",
+	"Get file system paths for the given logical address",
+	NULL
+};
+
+static int cmd_logical_resolve(int argc, char **argv)
+{
+	int ret;
+	int fd;
+	int i;
+	int verbose = 0;
+	int getpath = 1;
+	int bytes_left;
+	struct btrfs_ioctl_logical_ino_args loi;
+	struct btrfs_data_container *inodes;
+	char full_path[4096];
+	char *path_ptr;
+
+	optind = 1;
+	while (1) {
+		int c = getopt(argc, argv, "Pv");
+		if (c < 0)
+			break;
+
+		switch (c) {
+		case 'P':
+			getpath = 0;
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		default:
+			usage(cmd_logical_resolve_usage);
+		}
+	}
+
+	if (check_argc_exact(argc - optind, 2))
+		usage(cmd_logical_resolve_usage);
+
+	inodes = malloc(4096);
+	if (!inodes)
+		return 1;
+
+	loi.logical = atoll(argv[optind]);
+	loi.size = 4096;
+	loi.inodes = (u64)inodes;
+
+	fd = open_file_or_dir(argv[optind+1]);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access '%s'\n", argv[optind+1]);
+		ret = 12;
+		goto out;
+	}
+
+	ret = ioctl(fd, BTRFS_IOC_LOGICAL_INO, &loi);
+	if (ret) {
+		printf("ioctl ret=%d, error: %s\n", ret, strerror(errno));
+		goto out;
+	}
+
+	if (verbose)
+		printf("ioctl ret=%d, bytes_left=%lu, bytes_missing=%lu, "
+			"cnt=%d, missed=%d\n", ret,
+			(unsigned long)inodes->bytes_left,
+			(unsigned long)inodes->bytes_missing,
+			inodes->elem_cnt, inodes->elem_missed);
+
+	bytes_left = sizeof(full_path);
+	ret = snprintf(full_path, bytes_left, "%s/", argv[optind+1]);
+	path_ptr = full_path + ret;
+	bytes_left -= ret + 1;
+	BUG_ON(bytes_left < 0);
+
+	for (i = 0; i < inodes->elem_cnt; i += 3) {
+		u64 inum = inodes->val[i];
+		u64 offset = inodes->val[i+1];
+		u64 root = inodes->val[i+2];
+		int path_fd;
+		char *name;
+
+		if (getpath) {
+			name = path_for_root(fd, root);
+			if (IS_ERR(name))
+				return PTR_ERR(name);
+			if (!name) {
+				path_ptr[-1] = '\0';
+				path_fd = fd;
+			} else {
+				path_ptr[-1] = '/';
+				ret = snprintf(path_ptr, bytes_left, "%s",
+						name);
+				BUG_ON(ret >= bytes_left);
+				free(name);
+				path_fd = open_file_or_dir(full_path);
+				if (path_fd < 0) {
+					fprintf(stderr, "ERROR: can't access "
+						"'%s'\n", full_path);
+					goto out;
+				}
+			}
+			__ino_to_path_fd(inum, path_fd, verbose, full_path);
+		} else {
+			printf("inode %llu offset %llu root %llu\n", inum,
+				offset, root);
+		}
+	}
+
+out:
+	free(inodes);
+	return ret;
+}
+
+const struct cmd_group inspect_cmd_group = {
+	inspect_cmd_group_usage, NULL, {
+		{ "inode-resolve", cmd_inode_resolve, cmd_inode_resolve_usage,
+			NULL, 0 },
+		{ "logical-resolve", cmd_logical_resolve,
+			cmd_logical_resolve_usage, NULL, 0 },
+		{ 0, 0, 0, 0, 0 }
+	}
+};
+
+int cmd_inspect(int argc, char **argv)
+{
+	return handle_command_group(&inspect_cmd_group, argc, argv);
+}
diff --git a/cmds-scrub.c b/cmds-scrub.c
new file mode 100644
index 0000000..c4503f4
--- /dev/null
+++ b/cmds-scrub.c
@@ -0,0 +1,1721 @@
+/*
+ * Copyright (C) 2011 STRATO.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <poll.h>
+#include <sys/file.h>
+#include <uuid/uuid.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <ctype.h>
+#include <signal.h>
+#include <stdarg.h>
+
+#include "ctree.h"
+#include "ioctl.h"
+#include "utils.h"
+#include "volumes.h"
+#include "disk-io.h"
+
+#include "commands.h"
+
+static const char * const scrub_cmd_group_usage[] = {
+	"btrfs scrub <command> [options] <path>|<device>",
+	NULL
+};
+
+#define SCRUB_DATA_FILE "/var/lib/btrfs/scrub.status"
+#define SCRUB_PROGRESS_SOCKET_PATH "/var/lib/btrfs/scrub.progress"
+#define SCRUB_FILE_VERSION_PREFIX "scrub status"
+#define SCRUB_FILE_VERSION "1"
+
+struct scrub_stats {
+	time_t t_start;
+	time_t t_resumed;
+	u64 duration;
+	u64 finished;
+	u64 canceled;
+};
+
+struct scrub_progress {
+	struct btrfs_ioctl_scrub_args scrub_args;
+	int fd;
+	int ret;
+	int skip;
+	struct scrub_stats stats;
+	struct scrub_file_record *resumed;
+	int ioctl_errno;
+	pthread_mutex_t progress_mutex;
+};
+
+struct scrub_file_record {
+	u8 fsid[BTRFS_FSID_SIZE];
+	u64 devid;
+	struct scrub_stats stats;
+	struct btrfs_scrub_progress p;
+};
+
+struct scrub_progress_cycle {
+	int fdmnt;
+	int prg_fd;
+	int do_record;
+	struct btrfs_ioctl_fs_info_args *fi;
+	struct scrub_progress *progress;
+	struct scrub_progress *shared_progress;
+	pthread_mutex_t *write_mutex;
+};
+
+struct scrub_fs_stat {
+	struct btrfs_scrub_progress p;
+	struct scrub_stats s;
+	int i;
+};
+
+static void print_scrub_full(struct btrfs_scrub_progress *sp)
+{
+	printf("\tdata_extents_scrubbed: %lld\n", sp->data_extents_scrubbed);
+	printf("\ttree_extents_scrubbed: %lld\n", sp->tree_extents_scrubbed);
+	printf("\tdata_bytes_scrubbed: %lld\n", sp->data_bytes_scrubbed);
+	printf("\ttree_bytes_scrubbed: %lld\n", sp->tree_bytes_scrubbed);
+	printf("\tread_errors: %lld\n", sp->read_errors);
+	printf("\tcsum_errors: %lld\n", sp->csum_errors);
+	printf("\tverify_errors: %lld\n", sp->verify_errors);
+	printf("\tno_csum: %lld\n", sp->no_csum);
+	printf("\tcsum_discards: %lld\n", sp->csum_discards);
+	printf("\tsuper_errors: %lld\n", sp->super_errors);
+	printf("\tmalloc_errors: %lld\n", sp->malloc_errors);
+	printf("\tuncorrectable_errors: %lld\n", sp->uncorrectable_errors);
+	printf("\tunverified_errors: %lld\n", sp->unverified_errors);
+	printf("\tcorrected_errors: %lld\n", sp->corrected_errors);
+	printf("\tlast_physical: %lld\n", sp->last_physical);
+}
+
+#define ERR(test, ...) do {			\
+	if (test)				\
+		fprintf(stderr, __VA_ARGS__);	\
+} while (0)
+
+#define PRINT_SCRUB_ERROR(test, desc) do {	\
+	if (test)				\
+		printf(" %s=%llu", desc, test);	\
+} while (0)
+
+static void print_scrub_summary(struct btrfs_scrub_progress *p)
+{
+	u64 err_cnt;
+	u64 err_cnt2;
+	char *bytes;
+
+	err_cnt = p->read_errors +
+			p->csum_errors +
+			p->verify_errors +
+			p->super_errors;
+
+	err_cnt2 = p->corrected_errors + p->uncorrectable_errors;
+
+	if (p->malloc_errors)
+		printf("*** WARNING: memory allocation failed while scrubbing. "
+		       "results may be inaccurate\n");
+	bytes = pretty_sizes(p->data_bytes_scrubbed + p->tree_bytes_scrubbed);
+	printf("\ttotal bytes scrubbed: %s with %llu errors\n", bytes,
+		max(err_cnt, err_cnt2));
+	free(bytes);
+	if (err_cnt || err_cnt2) {
+		printf("\terror details:");
+		PRINT_SCRUB_ERROR(p->read_errors, "read");
+		PRINT_SCRUB_ERROR(p->super_errors, "super");
+		PRINT_SCRUB_ERROR(p->verify_errors, "verify");
+		PRINT_SCRUB_ERROR(p->csum_errors, "csum");
+		printf("\n");
+		printf("\tcorrected errors: %llu, uncorrectable errors: %llu, "
+			"unverified errors: %llu\n", p->corrected_errors,
+			p->uncorrectable_errors, p->unverified_errors);
+	}
+}
+
+#define _SCRUB_FS_STAT(p, name, fs_stat) do {	\
+	fs_stat->p.name += p->name;		\
+} while (0)
+
+#define _SCRUB_FS_STAT_MIN(ss, name, fs_stat)	\
+do {						\
+	if (fs_stat->s.name > ss->name) {	\
+		fs_stat->s.name = ss->name;	\
+	}					\
+} while (0)
+
+#define _SCRUB_FS_STAT_ZMIN(ss, name, fs_stat)			\
+do {								\
+	if (!fs_stat->s.name || fs_stat->s.name > ss->name) {	\
+		fs_stat->s.name = ss->name;			\
+	}							\
+} while (0)
+
+#define _SCRUB_FS_STAT_ZMAX(ss, name, fs_stat)				\
+do {									\
+	if (!(fs_stat)->s.name || (fs_stat)->s.name < (ss)->name) {	\
+		(fs_stat)->s.name = (ss)->name;				\
+	}								\
+} while (0)
+
+static void add_to_fs_stat(struct btrfs_scrub_progress *p,
+				struct scrub_stats *ss,
+				struct scrub_fs_stat *fs_stat)
+{
+	_SCRUB_FS_STAT(p, data_extents_scrubbed, fs_stat);
+	_SCRUB_FS_STAT(p, tree_extents_scrubbed, fs_stat);
+	_SCRUB_FS_STAT(p, data_bytes_scrubbed, fs_stat);
+	_SCRUB_FS_STAT(p, tree_bytes_scrubbed, fs_stat);
+	_SCRUB_FS_STAT(p, read_errors, fs_stat);
+	_SCRUB_FS_STAT(p, csum_errors, fs_stat);
+	_SCRUB_FS_STAT(p, verify_errors, fs_stat);
+	_SCRUB_FS_STAT(p, no_csum, fs_stat);
+	_SCRUB_FS_STAT(p, csum_discards, fs_stat);
+	_SCRUB_FS_STAT(p, super_errors, fs_stat);
+	_SCRUB_FS_STAT(p, malloc_errors, fs_stat);
+	_SCRUB_FS_STAT(p, uncorrectable_errors, fs_stat);
+	_SCRUB_FS_STAT(p, corrected_errors, fs_stat);
+	_SCRUB_FS_STAT(p, last_physical, fs_stat);
+	_SCRUB_FS_STAT_ZMIN(ss, t_start, fs_stat);
+	_SCRUB_FS_STAT_ZMIN(ss, t_resumed, fs_stat);
+	_SCRUB_FS_STAT_ZMAX(ss, duration, fs_stat);
+	_SCRUB_FS_STAT_ZMAX(ss, canceled, fs_stat);
+	_SCRUB_FS_STAT_MIN(ss, finished, fs_stat);
+}
+
+static void init_fs_stat(struct scrub_fs_stat *fs_stat)
+{
+	memset(fs_stat, 0, sizeof(*fs_stat));
+	fs_stat->s.finished = 1;
+}
+
+static void _print_scrub_ss(struct scrub_stats *ss)
+{
+	char t[4096];
+	struct tm tm;
+
+	if (!ss || !ss->t_start) {
+		printf("\tno stats available\n");
+		return;
+	}
+	if (ss->t_resumed) {
+		localtime_r(&ss->t_resumed, &tm);
+		strftime(t, sizeof(t), "%c", &tm);
+		t[sizeof(t) - 1] = '\0';
+		printf("\tscrub resumed at %s", t);
+	} else {
+		localtime_r(&ss->t_start, &tm);
+		strftime(t, sizeof(t), "%c", &tm);
+		t[sizeof(t) - 1] = '\0';
+		printf("\tscrub started at %s", t);
+	}
+	if (ss->finished && !ss->canceled) {
+		printf(" and finished after %llu seconds\n",
+		       ss->duration);
+	} else if (ss->canceled) {
+		printf(" and was aborted after %llu seconds\n",
+		       ss->duration);
+	} else {
+		printf(", running for %llu seconds\n", ss->duration);
+	}
+}
+
+static void print_scrub_dev(struct btrfs_ioctl_dev_info_args *di,
+				struct btrfs_scrub_progress *p, int raw,
+				const char *append, struct scrub_stats *ss)
+{
+	printf("scrub device %s (id %llu) %s\n", di->path, di->devid,
+	       append ? append : "");
+
+	_print_scrub_ss(ss);
+
+	if (p) {
+		if (raw)
+			print_scrub_full(p);
+		else
+			print_scrub_summary(p);
+	}
+}
+
+static void print_fs_stat(struct scrub_fs_stat *fs_stat, int raw)
+{
+	_print_scrub_ss(&fs_stat->s);
+
+	if (raw)
+		print_scrub_full(&fs_stat->p);
+	else
+		print_scrub_summary(&fs_stat->p);
+}
+
+static void free_history(struct scrub_file_record **last_scrubs)
+{
+	struct scrub_file_record **l = last_scrubs;
+	if (!l)
+		return;
+	while (*l)
+		free(*l++);
+	free(last_scrubs);
+}
+
+/*
+ * cancels a running scrub and makes the master process record the current
+ * progress status before exiting.
+ */
+static int cancel_fd = -1;
+static void scrub_sigint_record_progress(int signal)
+{
+	ioctl(cancel_fd, BTRFS_IOC_SCRUB_CANCEL, NULL);
+}
+
+static int scrub_handle_sigint_parent(void)
+{
+	struct sigaction sa = {
+		.sa_handler = SIG_IGN,
+		.sa_flags = SA_RESTART,
+	};
+
+	return sigaction(SIGINT, &sa, NULL);
+}
+
+static int scrub_handle_sigint_child(int fd)
+{
+	struct sigaction sa = {
+		.sa_handler = fd == -1 ? SIG_DFL : scrub_sigint_record_progress,
+	};
+
+	cancel_fd = fd;
+	return sigaction(SIGINT, &sa, NULL);
+}
+
+static int scrub_datafile(const char *fn_base, const char *fn_local,
+				const char *fn_tmp, char *datafile, int size)
+{
+	int ret;
+	int end = size - 2;
+
+	datafile[end + 1] = '\0';
+	strncpy(datafile, fn_base, end);
+	ret = strlen(datafile);
+
+	if (ret + 1 > end)
+		return -EOVERFLOW;
+
+	datafile[ret] = '.';
+	strncpy(datafile + ret + 1, fn_local, end - ret - 1);
+	ret = strlen(datafile);
+
+	if (ret + 1 > end)
+		return -EOVERFLOW;
+
+	if (fn_tmp) {
+		datafile[ret] = '_';
+		strncpy(datafile + ret + 1, fn_tmp, end - ret - 1);
+		ret = strlen(datafile);
+
+		if (ret > end)
+			return -EOVERFLOW;
+	}
+
+	return 0;
+}
+
+static int scrub_open_file(const char *datafile, int m)
+{
+	int fd;
+	int ret;
+
+	fd = open(datafile, m, 0600);
+	if (fd < 0)
+		return -errno;
+
+	ret = flock(fd, LOCK_EX|LOCK_NB);
+	if (ret) {
+		ret = errno;
+		close(fd);
+		return -ret;
+	}
+
+	return fd;
+}
+
+static int scrub_open_file_r(const char *fn_base, const char *fn_local)
+{
+	int ret;
+	char datafile[BTRFS_PATH_NAME_MAX + 1];
+	ret = scrub_datafile(fn_base, fn_local, NULL,
+				datafile, sizeof(datafile));
+	if (ret < 0)
+		return ret;
+	return scrub_open_file(datafile, O_RDONLY);
+}
+
+static int scrub_open_file_w(const char *fn_base, const char *fn_local,
+				const char *tmp)
+{
+	int ret;
+	char datafile[BTRFS_PATH_NAME_MAX + 1];
+	ret = scrub_datafile(fn_base, fn_local, tmp,
+				datafile, sizeof(datafile));
+	if (ret < 0)
+		return ret;
+	return scrub_open_file(datafile, O_WRONLY|O_CREAT);
+}
+
+static int scrub_rename_file(const char *fn_base, const char *fn_local,
+				const char *tmp)
+{
+	int ret;
+	char datafile_old[BTRFS_PATH_NAME_MAX + 1];
+	char datafile_new[BTRFS_PATH_NAME_MAX + 1];
+	ret = scrub_datafile(fn_base, fn_local, tmp,
+				datafile_old, sizeof(datafile_old));
+	if (ret < 0)
+		return ret;
+	ret = scrub_datafile(fn_base, fn_local, NULL,
+				datafile_new, sizeof(datafile_new));
+	if (ret < 0)
+		return ret;
+	ret = rename(datafile_old, datafile_new);
+	return ret ? -errno : 0;
+}
+
+#define _SCRUB_KVREAD(ret, i, name, avail, l, dest) if (ret == 0) {	  \
+	ret = scrub_kvread(i, sizeof(#name), avail, l, #name, dest.name); \
+}
+
+/*
+ * returns 0 if the key did not match (nothing was read)
+ *         1 if the key did match (success)
+ *        -1 if the key did match and an error occured
+ */
+static int scrub_kvread(int *i, int len, int avail, const char *buf,
+			const char *key, u64 *dest)
+{
+	int j;
+
+	if (*i + len + 1 < avail && strncmp(&buf[*i], key, len - 1) == 0) {
+		*i += len - 1;
+		if (buf[*i] != ':')
+			return -1;
+		*i += 1;
+		for (j = 0; isdigit(buf[*i + j]) && *i + j < avail; ++j)
+			;
+		if (*i + j >= avail)
+			return -1;
+		*dest = atoll(&buf[*i]);
+		*i += j;
+		return 1;
+	}
+
+	return 0;
+}
+
+#define _SCRUB_INVALID do {						\
+	if (report_errors)						\
+		fprintf(stderr, "WARNING: invalid data in line %d pos "	\
+			"%d state %d (near \"%.*s\") at %s:%d\n",	\
+			lineno, i, state, 20 > avail ? avail : 20,	\
+			l + i,	__FILE__, __LINE__);			\
+	goto skip;							\
+} while (0)
+
+static struct scrub_file_record **scrub_read_file(int fd, int report_errors)
+{
+	int avail = 0;
+	int old_avail = 0;
+	char l[16 * 1024];
+	int state = 0;
+	int curr = -1;
+	int i = 0;
+	int j;
+	int ret;
+	int eof = 0;
+	int lineno = 0;
+	u64 version;
+	char empty_uuid[BTRFS_FSID_SIZE] = {0};
+	struct scrub_file_record **p = NULL;
+
+	if (fd < 0)
+		return ERR_PTR(-EINVAL);
+
+again:
+	old_avail = avail - i;
+	BUG_ON(old_avail < 0);
+	if (old_avail)
+		memmove(l, l + i, old_avail);
+	avail = read(fd, l + old_avail, sizeof(l) - old_avail);
+	if (avail == 0)
+		eof = 1;
+	if (avail == 0 && old_avail == 0) {
+		if (curr >= 0 &&
+		    memcmp(p[curr]->fsid, empty_uuid, BTRFS_FSID_SIZE) == 0) {
+			p[curr] = NULL;
+		} else if (curr == -1) {
+			p = ERR_PTR(-ENODATA);
+		}
+		return p;
+	}
+	if (avail == -1)
+		return ERR_PTR(-errno);
+	avail += old_avail;
+
+	i = 0;
+	while (i < avail) {
+		switch (state) {
+		case 0: /* start of file */
+			ret = scrub_kvread(&i,
+				sizeof(SCRUB_FILE_VERSION_PREFIX), avail, l,
+				SCRUB_FILE_VERSION_PREFIX, &version);
+			if (ret != 1)
+				_SCRUB_INVALID;
+			if (version != atoll(SCRUB_FILE_VERSION))
+				return ERR_PTR(-ENOTSUP);
+			state = 6;
+			continue;
+		case 1: /* start of line, alloc */
+			/*
+			 * this state makes sure we have a complete line in
+			 * further processing, so we don't need wrap-tracking
+			 * everywhere.
+			 */
+			if (!eof && !memchr(l + i, '\n', avail - i))
+				goto again;
+			++lineno;
+			if (curr > -1 && memcmp(p[curr]->fsid, empty_uuid,
+						BTRFS_FSID_SIZE) == 0) {
+				state = 2;
+				continue;
+			}
+			++curr;
+			p = realloc(p, (curr + 2) * sizeof(*p));
+			if (p)
+				p[curr] = malloc(sizeof(**p));
+			if (!p || !p[curr])
+				return ERR_PTR(-errno);
+			memset(p[curr], 0, sizeof(**p));
+			p[curr + 1] = NULL;
+			++state;
+			/* fall through */
+		case 2: /* start of line, skip space */
+			while (isspace(l[i]) && i < avail) {
+				if (l[i] == '\n')
+					++lineno;
+				++i;
+			}
+			if (i >= avail ||
+			    (!eof && !memchr(l + i, '\n', avail - i)))
+				goto again;
+			++state;
+			/* fall through */
+		case 3: /* read fsid */
+			if (i == avail)
+				continue;
+			for (j = 0; l[i + j] != ':' && i + j < avail; ++j)
+				;
+			if (i + j + 1 >= avail)
+				_SCRUB_INVALID;
+			if (j != 36)
+				_SCRUB_INVALID;
+			l[i + j] = '\0';
+			ret = uuid_parse(l + i, p[curr]->fsid);
+			if (ret)
+				_SCRUB_INVALID;
+			i += j + 1;
+			++state;
+			/* fall through */
+		case 4: /* read dev id */
+			for (j = 0; isdigit(l[i + j]) && i+j < avail; ++j)
+				;
+			if (j == 0 || i + j + 1 >= avail)
+				_SCRUB_INVALID;
+			p[curr]->devid = atoll(&l[i]);
+			i += j + 1;
+			++state;
+			/* fall through */
+		case 5: /* read key/value pair */
+			ret = 0;
+			_SCRUB_KVREAD(ret, &i, data_extents_scrubbed, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, data_extents_scrubbed, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, tree_extents_scrubbed, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, data_bytes_scrubbed, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, tree_bytes_scrubbed, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, read_errors, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, csum_errors, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, verify_errors, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, no_csum, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, csum_discards, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, super_errors, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, malloc_errors, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, uncorrectable_errors, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, corrected_errors, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, last_physical, avail, l,
+					&p[curr]->p);
+			_SCRUB_KVREAD(ret, &i, finished, avail, l,
+					&p[curr]->stats);
+			_SCRUB_KVREAD(ret, &i, t_start, avail, l,
+					(u64 *)&p[curr]->stats);
+			_SCRUB_KVREAD(ret, &i, t_resumed, avail, l,
+					(u64 *)&p[curr]->stats);
+			_SCRUB_KVREAD(ret, &i, duration, avail, l,
+					(u64 *)&p[curr]->stats);
+			_SCRUB_KVREAD(ret, &i, canceled, avail, l,
+					&p[curr]->stats);
+			if (ret != 1)
+				_SCRUB_INVALID;
+			++state;
+			/* fall through */
+		case 6: /* after number */
+			if (l[i] == '|')
+				state = 5;
+			else if (l[i] == '\n')
+				state = 1;
+			else
+				_SCRUB_INVALID;
+			++i;
+			continue;
+		case 99: /* skip rest of line */
+skip:
+			state = 99;
+			do {
+				++i;
+				if (l[i - 1] == '\n') {
+					state = 1;
+					break;
+				}
+			} while (i < avail);
+			continue;
+		}
+		BUG();
+	}
+	goto again;
+}
+
+static int scrub_write_buf(int fd, const void *data, int len)
+{
+	int ret;
+	ret = write(fd, data, len);
+	return ret - len;
+}
+
+static int scrub_writev(int fd, char *buf, int max, const char *fmt, ...)
+				__attribute__ ((format (printf, 4, 5)));
+static int scrub_writev(int fd, char *buf, int max, const char *fmt, ...)
+{
+	int ret;
+	va_list args;
+
+	va_start(args, fmt);
+	ret = vsnprintf(buf, max, fmt, args);
+	va_end(args);
+	if (ret >= max)
+		return ret - max;
+	return scrub_write_buf(fd, buf, ret);
+}
+
+#define _SCRUB_SUM(dest, data, name) dest->scrub_args.progress.name =	\
+			data->resumed->p.name + data->scrub_args.progress.name
+
+static struct scrub_progress *scrub_resumed_stats(struct scrub_progress *data,
+						  struct scrub_progress *dest)
+{
+	if (!data->resumed || data->skip)
+		return data;
+
+	_SCRUB_SUM(dest, data, data_extents_scrubbed);
+	_SCRUB_SUM(dest, data, tree_extents_scrubbed);
+	_SCRUB_SUM(dest, data, data_bytes_scrubbed);
+	_SCRUB_SUM(dest, data, tree_bytes_scrubbed);
+	_SCRUB_SUM(dest, data, read_errors);
+	_SCRUB_SUM(dest, data, csum_errors);
+	_SCRUB_SUM(dest, data, verify_errors);
+	_SCRUB_SUM(dest, data, no_csum);
+	_SCRUB_SUM(dest, data, csum_discards);
+	_SCRUB_SUM(dest, data, super_errors);
+	_SCRUB_SUM(dest, data, malloc_errors);
+	_SCRUB_SUM(dest, data, uncorrectable_errors);
+	_SCRUB_SUM(dest, data, corrected_errors);
+	_SCRUB_SUM(dest, data, last_physical);
+	dest->stats.canceled = data->stats.canceled;
+	dest->stats.finished = data->stats.finished;
+	dest->stats.t_resumed = data->stats.t_start;
+	dest->stats.t_start = data->resumed->stats.t_start;
+	dest->stats.duration = data->resumed->stats.duration +
+							data->stats.duration;
+	dest->scrub_args.devid = data->scrub_args.devid;
+	return dest;
+}
+
+#define _SCRUB_KVWRITE(fd, buf, name, use)		\
+	scrub_kvwrite(fd, buf, sizeof(buf), #name,	\
+			use->scrub_args.progress.name)
+
+#define _SCRUB_KVWRITE_STATS(fd, buf, name, use)	\
+	scrub_kvwrite(fd, buf, sizeof(buf), #name,	\
+			use->stats.name)
+
+static int scrub_kvwrite(int fd, char *buf, int max, const char *key, u64 val)
+{
+	return scrub_writev(fd, buf, max, "|%s:%lld", key, val);
+}
+
+static int scrub_write_file(int fd, const char *fsid,
+				struct scrub_progress *data, int n)
+{
+	int ret = 0;
+	int i;
+	char buf[1024];
+	struct scrub_progress local;
+	struct scrub_progress *use;
+
+	if (n < 1)
+		return -EINVAL;
+
+	/* each -1 is to subtract one \0 byte, the + 2 is for ':' and '\n' */
+	ret = scrub_write_buf(fd, SCRUB_FILE_VERSION_PREFIX ":"
+				SCRUB_FILE_VERSION "\n",
+				(sizeof(SCRUB_FILE_VERSION_PREFIX) - 1) +
+				(sizeof(SCRUB_FILE_VERSION) - 1) + 2);
+	if (ret)
+		return -EOVERFLOW;
+
+	for (i = 0; i < n; ++i) {
+		use = scrub_resumed_stats(&data[i], &local);
+		if (scrub_write_buf(fd, fsid, strlen(fsid)) ||
+		    scrub_write_buf(fd, ":", 1) ||
+		    scrub_writev(fd, buf, sizeof(buf), "%lld",
+					use->scrub_args.devid) ||
+		    scrub_write_buf(fd, buf, ret) ||
+		    _SCRUB_KVWRITE(fd, buf, data_extents_scrubbed, use) ||
+		    _SCRUB_KVWRITE(fd, buf, tree_extents_scrubbed, use) ||
+		    _SCRUB_KVWRITE(fd, buf, data_bytes_scrubbed, use) ||
+		    _SCRUB_KVWRITE(fd, buf, tree_bytes_scrubbed, use) ||
+		    _SCRUB_KVWRITE(fd, buf, read_errors, use) ||
+		    _SCRUB_KVWRITE(fd, buf, csum_errors, use) ||
+		    _SCRUB_KVWRITE(fd, buf, verify_errors, use) ||
+		    _SCRUB_KVWRITE(fd, buf, no_csum, use) ||
+		    _SCRUB_KVWRITE(fd, buf, csum_discards, use) ||
+		    _SCRUB_KVWRITE(fd, buf, super_errors, use) ||
+		    _SCRUB_KVWRITE(fd, buf, malloc_errors, use) ||
+		    _SCRUB_KVWRITE(fd, buf, uncorrectable_errors, use) ||
+		    _SCRUB_KVWRITE(fd, buf, corrected_errors, use) ||
+		    _SCRUB_KVWRITE(fd, buf, last_physical, use) ||
+		    _SCRUB_KVWRITE_STATS(fd, buf, t_start, use) ||
+		    _SCRUB_KVWRITE_STATS(fd, buf, t_resumed, use) ||
+		    _SCRUB_KVWRITE_STATS(fd, buf, duration, use) ||
+		    _SCRUB_KVWRITE_STATS(fd, buf, canceled, use) ||
+		    _SCRUB_KVWRITE_STATS(fd, buf, finished, use) ||
+		    scrub_write_buf(fd, "\n", 1)) {
+			return -EOVERFLOW;
+		}
+	}
+
+	return 0;
+}
+
+static int scrub_write_progress(pthread_mutex_t *m, const char *fsid,
+				struct scrub_progress *data, int n)
+{
+	int ret;
+	int err;
+	int fd = 0;
+	int old;
+
+	ret = pthread_mutex_lock(m);
+	if (ret) {
+		err = -errno;
+		goto out;
+	}
+
+	ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old);
+	if (ret) {
+		err = -ret;
+		goto out;
+	}
+
+	fd = scrub_open_file_w(SCRUB_DATA_FILE, fsid, "tmp");
+	if (fd < 0) {
+		err = fd;
+		goto out;
+	}
+	err = scrub_write_file(fd, fsid, data, n);
+	if (err)
+		goto out;
+	err = scrub_rename_file(SCRUB_DATA_FILE, fsid, "tmp");
+	if (err)
+		goto out;
+
+out:
+	if (fd > 0) {
+		ret = close(fd);
+		if (ret)
+			err = -errno;
+	}
+
+	ret = pthread_mutex_unlock(m);
+	if (ret && !err)
+		err = -ret;
+
+	ret = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old);
+	if (ret && !err)
+		err = -ret;
+
+	return err;
+}
+
+static void *scrub_one_dev(void *ctx)
+{
+	struct scrub_progress *sp = ctx;
+	int ret;
+	struct timeval tv;
+
+	sp->stats.canceled = 0;
+	sp->stats.duration = 0;
+	sp->stats.finished = 0;
+
+	ret = ioctl(sp->fd, BTRFS_IOC_SCRUB, &sp->scrub_args);
+	gettimeofday(&tv, NULL);
+	sp->ret = ret;
+	sp->stats.duration = tv.tv_sec - sp->stats.t_start;
+	sp->stats.canceled = !!ret;
+	sp->ioctl_errno = errno;
+	ret = pthread_mutex_lock(&sp->progress_mutex);
+	if (ret)
+		return ERR_PTR(-ret);
+	sp->stats.finished = 1;
+	ret = pthread_mutex_unlock(&sp->progress_mutex);
+	if (ret)
+		return ERR_PTR(-ret);
+
+	return NULL;
+}
+
+static void *progress_one_dev(void *ctx)
+{
+	struct scrub_progress *sp = ctx;
+
+	sp->ret = ioctl(sp->fd, BTRFS_IOC_SCRUB_PROGRESS, &sp->scrub_args);
+	sp->ioctl_errno = errno;
+
+	return NULL;
+}
+
+static void *scrub_progress_cycle(void *ctx)
+{
+	int ret;
+	int old;
+	int i;
+	char fsid[37];
+	struct scrub_progress *sp;
+	struct scrub_progress *sp_last;
+	struct scrub_progress *sp_shared;
+	struct timeval tv;
+	struct scrub_progress_cycle *spc = ctx;
+	int ndev = spc->fi->num_devices;
+	int this = 1;
+	int last = 0;
+	int peer_fd = -1;
+	struct pollfd accept_poll_fd = {
+		.fd = spc->prg_fd,
+		.events = POLLIN,
+		.revents = 0,
+	};
+	struct pollfd write_poll_fd = {
+		.events = POLLOUT,
+		.revents = 0,
+	};
+	struct sockaddr_un peer;
+	socklen_t peer_size = sizeof(peer);
+
+	ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+	if (ret)
+		return ERR_PTR(-ret);
+
+	uuid_unparse(spc->fi->fsid, fsid);
+
+	for (i = 0; i < ndev; ++i) {
+		sp = &spc->progress[i];
+		sp_last = &spc->progress[i + ndev];
+		sp_shared = &spc->shared_progress[i];
+		sp->scrub_args.devid = sp_last->scrub_args.devid =
+						sp_shared->scrub_args.devid;
+		sp->fd = sp_last->fd = spc->fdmnt;
+		sp->stats.t_start = sp_last->stats.t_start =
+						sp_shared->stats.t_start;
+		sp->resumed = sp_last->resumed = sp_shared->resumed;
+		sp->skip = sp_last->skip = sp_shared->skip;
+		sp->stats.finished = sp_last->stats.finished =
+						sp_shared->stats.finished;
+	}
+
+	while (1) {
+		ret = poll(&accept_poll_fd, 1, 5 * 1000);
+		if (ret == -1)
+			return ERR_PTR(-errno);
+		if (ret)
+			peer_fd = accept(spc->prg_fd, (struct sockaddr *)&peer,
+					 &peer_size);
+		gettimeofday(&tv, NULL);
+		this = (this + 1)%2;
+		last = (last + 1)%2;
+		for (i = 0; i < ndev; ++i) {
+			sp = &spc->progress[this * ndev + i];
+			sp_last = &spc->progress[last * ndev + i];
+			sp_shared = &spc->shared_progress[i];
+			if (sp->stats.finished)
+				continue;
+			progress_one_dev(sp);
+			sp->stats.duration = tv.tv_sec - sp->stats.t_start;
+			if (!sp->ret)
+				continue;
+			if (sp->ioctl_errno != ENOTCONN &&
+			    sp->ioctl_errno != ENODEV)
+				return ERR_PTR(-sp->ioctl_errno);
+			/*
+			 * scrub finished or device removed, check the
+			 * finished flag. if unset, just use the last
+			 * result we got for the current write and go
+			 * on. flag should be set on next cycle, then.
+			 */
+			ret = pthread_mutex_lock(&sp_shared->progress_mutex);
+			if (ret)
+				return ERR_PTR(-ret);
+			if (!sp_shared->stats.finished) {
+				ret = pthread_mutex_unlock(
+						&sp_shared->progress_mutex);
+				if (ret)
+					return ERR_PTR(-ret);
+				memcpy(sp, sp_last, sizeof(*sp));
+				continue;
+			}
+			ret = pthread_mutex_unlock(&sp_shared->progress_mutex);
+			if (ret)
+				return ERR_PTR(-ret);
+			memcpy(sp, sp_shared, sizeof(*sp));
+			memcpy(sp_last, sp_shared, sizeof(*sp));
+		}
+		if (peer_fd != -1) {
+			write_poll_fd.fd = peer_fd;
+			ret = poll(&write_poll_fd, 1, 0);
+			if (ret == -1)
+				return ERR_PTR(-errno);
+			if (ret) {
+				ret = scrub_write_file(
+					peer_fd, fsid,
+					&spc->progress[this * ndev], ndev);
+				if (ret)
+					return ERR_PTR(ret);
+			}
+			close(peer_fd);
+			peer_fd = -1;
+		}
+		if (!spc->do_record)
+			continue;
+		ret = scrub_write_progress(spc->write_mutex, fsid,
+					   &spc->progress[this * ndev], ndev);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+}
+
+static struct scrub_file_record *last_dev_scrub(
+		struct scrub_file_record *const *const past_scrubs, u64 devid)
+{
+	int i;
+
+	if (!past_scrubs || IS_ERR(past_scrubs))
+		return NULL;
+
+	for (i = 0; past_scrubs[i]; ++i)
+		if (past_scrubs[i]->devid == devid)
+			return past_scrubs[i];
+
+	return NULL;
+}
+
+static int scrub_device_info(int fd, u64 devid,
+			     struct btrfs_ioctl_dev_info_args *di_args)
+{
+	int ret;
+
+	di_args->devid = devid;
+	memset(&di_args->uuid, '\0', sizeof(di_args->uuid));
+
+	ret = ioctl(fd, BTRFS_IOC_DEV_INFO, di_args);
+	return ret ? -errno : 0;
+}
+
+static int scrub_fs_info(int fd, char *path,
+				struct btrfs_ioctl_fs_info_args *fi_args,
+				struct btrfs_ioctl_dev_info_args **di_ret)
+{
+	int ret = 0;
+	int ndevs = 0;
+	int i = 1;
+	struct btrfs_fs_devices *fs_devices_mnt = NULL;
+	struct btrfs_ioctl_dev_info_args *di_args;
+	char mp[BTRFS_PATH_NAME_MAX + 1];
+
+	memset(fi_args, 0, sizeof(*fi_args));
+
+	ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args);
+	if (ret && errno == EINVAL) {
+		/* path is no mounted btrfs. try if it's a device */
+		ret = check_mounted_where(fd, path, mp, sizeof(mp),
+						&fs_devices_mnt);
+		if (!ret)
+			return -EINVAL;
+		if (ret < 0)
+			return ret;
+		fi_args->num_devices = 1;
+		fi_args->max_id = fs_devices_mnt->latest_devid;
+		i = fs_devices_mnt->latest_devid;
+		memcpy(fi_args->fsid, fs_devices_mnt->fsid, BTRFS_FSID_SIZE);
+		close(fd);
+		fd = open_file_or_dir(mp);
+		if (fd < 0)
+			return -errno;
+	} else if (ret) {
+		return -errno;
+	}
+
+	if (!fi_args->num_devices)
+		return 0;
+
+	di_args = *di_ret = malloc(fi_args->num_devices * sizeof(*di_args));
+	if (!di_args)
+		return -errno;
+
+	for (; i <= fi_args->max_id; ++i) {
+		BUG_ON(ndevs >= fi_args->num_devices);
+		ret = scrub_device_info(fd, i, &di_args[ndevs]);
+		if (ret == -ENODEV)
+			continue;
+		if (ret)
+			return ret;
+		++ndevs;
+	}
+
+	BUG_ON(ndevs == 0);
+
+	return 0;
+}
+
+int mkdir_p(char *path)
+{
+	int i;
+	int ret;
+
+	for (i = 1; i < strlen(path); ++i) {
+		if (path[i] != '/')
+			continue;
+		path[i] = '\0';
+		ret = mkdir(path, 0777);
+		if (ret && errno != EEXIST)
+			return 1;
+		path[i] = '/';
+	}
+
+	return 0;
+}
+
+static const char * const cmd_scrub_start_usage[];
+static const char * const cmd_scrub_resume_usage[];
+
+static int scrub_start(int argc, char **argv, int resume)
+{
+	int fdmnt;
+	int prg_fd = -1;
+	int fdres = -1;
+	int ret;
+	pid_t pid;
+	int c;
+	int i;
+	int err = 0;
+	int e_uncorrectable = 0;
+	int e_correctable = 0;
+	int print_raw = 0;
+	char *path;
+	int do_background = 1;
+	int do_wait = 0;
+	int do_print = 0;
+	int do_quiet = 0;
+	int do_record = 1;
+	int readonly = 0;
+	int do_stats_per_dev = 0;
+	int n_start = 0;
+	int n_skip = 0;
+	int n_resume = 0;
+	struct btrfs_ioctl_fs_info_args fi_args;
+	struct btrfs_ioctl_dev_info_args *di_args = NULL;
+	struct scrub_progress *sp = NULL;
+	struct scrub_fs_stat fs_stat;
+	struct timeval tv;
+	struct sockaddr_un addr = {
+		.sun_family = AF_UNIX,
+	};
+	pthread_t *t_devs = NULL;
+	pthread_t t_prog;
+	pthread_attr_t t_attr;
+	struct scrub_file_record **past_scrubs = NULL;
+	struct scrub_file_record *last_scrub = NULL;
+	char *datafile = strdup(SCRUB_DATA_FILE);
+	char fsid[37];
+	char sock_path[BTRFS_PATH_NAME_MAX + 1] = "";
+	struct scrub_progress_cycle spc;
+	pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER;
+	void *terr;
+	u64 devid;
+
+	optind = 1;
+	while ((c = getopt(argc, argv, "BdqrR")) != -1) {
+		switch (c) {
+		case 'B':
+			do_background = 0;
+			do_wait = 1;
+			do_print = 1;
+			break;
+		case 'd':
+			do_stats_per_dev = 1;
+			break;
+		case 'q':
+			do_quiet = 1;
+			break;
+		case 'r':
+			readonly = 1;
+			break;
+		case 'R':
+			print_raw = 1;
+			break;
+		case '?':
+		default:
+			usage(resume ? cmd_scrub_resume_usage :
+						cmd_scrub_start_usage);
+		}
+	}
+
+	/* try to catch most error cases before forking */
+
+	if (check_argc_exact(argc - optind, 1)) {
+		usage(resume ? cmd_scrub_resume_usage :
+					cmd_scrub_start_usage);
+	}
+
+	spc.progress = NULL;
+	if (do_quiet && do_print)
+		do_print = 0;
+
+	if (mkdir_p(datafile)) {
+		ERR(!do_quiet, "WARNING: cannot create scrub data "
+			       "file, mkdir %s failed: %s. Status recording "
+			       "disabled\n", datafile, strerror(errno));
+		do_record = 0;
+	}
+	free(datafile);
+
+	path = argv[optind];
+
+	fdmnt = open_file_or_dir(path);
+	if (fdmnt < 0) {
+		ERR(!do_quiet, "ERROR: can't access '%s'\n", path);
+		return 12;
+	}
+
+	ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args);
+	if (ret) {
+		ERR(!do_quiet, "ERROR: getting dev info for scrub failed: "
+		    "%s\n", strerror(-ret));
+		err = 1;
+		goto out;
+	}
+	if (!fi_args.num_devices) {
+		ERR(!do_quiet, "ERROR: no devices found\n");
+		err = 1;
+		goto out;
+	}
+
+	uuid_unparse(fi_args.fsid, fsid);
+	fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid);
+	if (fdres < 0 && fdres != -ENOENT) {
+		ERR(!do_quiet, "WARNING: failed to open status file: "
+		    "%s\n", strerror(-fdres));
+	} else if (fdres >= 0) {
+		past_scrubs = scrub_read_file(fdres, !do_quiet);
+		if (IS_ERR(past_scrubs))
+			ERR(!do_quiet, "WARNING: failed to read status file: "
+			    "%s\n", strerror(-PTR_ERR(past_scrubs)));
+		close(fdres);
+	}
+
+	t_devs = malloc(fi_args.num_devices * sizeof(*t_devs));
+	sp = calloc(fi_args.num_devices, sizeof(*sp));
+	spc.progress = calloc(fi_args.num_devices * 2, sizeof(*spc.progress));
+
+	if (!t_devs || !sp || !spc.progress) {
+		ERR(!do_quiet, "ERROR: scrub failed: %s", strerror(errno));
+		err = 1;
+		goto out;
+	}
+
+	ret = pthread_attr_init(&t_attr);
+	if (ret) {
+		ERR(!do_quiet, "ERROR: pthread_attr_init failed: %s\n",
+		    strerror(ret));
+		err = 1;
+		goto out;
+	}
+
+	for (i = 0; i < fi_args.num_devices; ++i) {
+		devid = di_args[i].devid;
+		ret = pthread_mutex_init(&sp[i].progress_mutex, NULL);
+		if (ret) {
+			ERR(!do_quiet, "ERROR: pthread_mutex_init failed: "
+			    "%s\n", strerror(ret));
+			err = 1;
+			goto out;
+		}
+		last_scrub = last_dev_scrub(past_scrubs, devid);
+		sp[i].scrub_args.devid = devid;
+		sp[i].fd = fdmnt;
+		if (resume && last_scrub && (last_scrub->stats.canceled ||
+					     !last_scrub->stats.finished)) {
+			++n_resume;
+			sp[i].scrub_args.start = last_scrub->p.last_physical;
+			sp[i].resumed = last_scrub;
+		} else if (resume) {
+			++n_skip;
+			sp[i].skip = 1;
+			sp[i].resumed = last_scrub;
+			continue;
+		} else {
+			++n_start;
+			sp[i].scrub_args.start = 0ll;
+			sp[i].resumed = NULL;
+		}
+		sp[i].skip = 0;
+		sp[i].scrub_args.end = (u64)-1ll;
+		sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0;
+	}
+
+	if (!n_start && !n_resume) {
+		if (!do_quiet)
+			printf("scrub: nothing to resume for %s, fsid %s\n",
+			       path, fsid);
+		err = 0;
+		goto out;
+	}
+
+	ret = prg_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	while (ret != -1) {
+		ret = scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, NULL,
+					sock_path, sizeof(sock_path));
+		/* ignore EOVERFLOW, try using a shorter path for the socket */
+		addr.sun_path[sizeof(addr.sun_path) - 1] = '\0';
+		strncpy(addr.sun_path, sock_path, sizeof(addr.sun_path) - 1);
+		ret = bind(prg_fd, (struct sockaddr *)&addr, sizeof(addr));
+		if (ret != -1 || errno != EADDRINUSE)
+			break;
+		/*
+		 * bind failed with EADDRINUSE. so let's see if anyone answers
+		 * when we make a call to the socket ...
+		 */
+		ret = connect(prg_fd, (struct sockaddr *)&addr, sizeof(addr));
+		if (!ret || errno != ECONNREFUSED) {
+			/* ... yes, so scrub must be running. error out */
+			fprintf(stderr, "ERROR: scrub already running\n");
+			close(prg_fd);
+			goto out;
+		}
+		/*
+		 * ... no, this means someone left us alone with an unused
+		 * socket in the file system. remove it and try again.
+		 */
+		ret = unlink(sock_path);
+	}
+	if (ret != -1)
+		ret = listen(prg_fd, 100);
+	if (ret == -1) {
+		ERR(!do_quiet, "WARNING: failed to open the progress status "
+		    "socket at %s: %s. Progress cannot be queried\n",
+		    sock_path[0] ? sock_path : SCRUB_PROGRESS_SOCKET_PATH,
+		    strerror(errno));
+		if (prg_fd != -1) {
+			close(prg_fd);
+			prg_fd = -1;
+			if (sock_path[0])
+				unlink(sock_path);
+		}
+	}
+
+	if (do_record) {
+		/* write all-zero progress file for a start */
+		ret = scrub_write_progress(&spc_write_mutex, fsid, sp,
+					   fi_args.num_devices);
+		if (ret) {
+			ERR(!do_quiet, "WARNING: failed to write the progress "
+			    "status file: %s. Status recording disabled\n",
+			    strerror(-ret));
+			do_record = 0;
+		}
+	}
+
+	if (do_background) {
+		pid = fork();
+		if (pid == -1) {
+			ERR(!do_quiet, "ERROR: cannot scrub, fork failed: "
+					"%s\n", strerror(errno));
+			err = 1;
+			goto out;
+		}
+
+		if (pid) {
+			int stat;
+			scrub_handle_sigint_parent();
+			if (!do_quiet)
+				printf("scrub %s on %s, fsid %s (pid=%d)\n",
+				       n_start ? "started" : "resumed",
+				       path, fsid, pid);
+			if (!do_wait) {
+				err = 0;
+				goto out;
+			}
+			ret = wait(&stat);
+			if (ret != pid) {
+				ERR(!do_quiet, "ERROR: wait failed: (ret=%d) "
+				    "%s\n", ret, strerror(errno));
+				err = 1;
+				goto out;
+			}
+			if (!WIFEXITED(stat) || WEXITSTATUS(stat)) {
+				ERR(!do_quiet, "ERROR: scrub process failed\n");
+				err = WIFEXITED(stat) ? WEXITSTATUS(stat) : -1;
+				goto out;
+			}
+			err = 0;
+			goto out;
+		}
+	}
+
+	scrub_handle_sigint_child(fdmnt);
+
+	for (i = 0; i < fi_args.num_devices; ++i) {
+		if (sp[i].skip) {
+			sp[i].scrub_args.progress = sp[i].resumed->p;
+			sp[i].stats = sp[i].resumed->stats;
+			sp[i].ret = 0;
+			sp[i].stats.finished = 1;
+			continue;
+		}
+		devid = di_args[i].devid;
+		gettimeofday(&tv, NULL);
+		sp[i].stats.t_start = tv.tv_sec;
+		ret = pthread_create(&t_devs[i], &t_attr,
+					scrub_one_dev, &sp[i]);
+		if (ret) {
+			if (do_print)
+				fprintf(stderr, "ERROR: creating "
+					"scrub_one_dev[%llu] thread failed: "
+					"%s\n", devid, strerror(ret));
+			err = 1;
+			goto out;
+		}
+	}
+
+	spc.fdmnt = fdmnt;
+	spc.prg_fd = prg_fd;
+	spc.do_record = do_record;
+	spc.write_mutex = &spc_write_mutex;
+	spc.shared_progress = sp;
+	spc.fi = &fi_args;
+	ret = pthread_create(&t_prog, &t_attr, scrub_progress_cycle, &spc);
+	if (ret) {
+		if (do_print)
+			fprintf(stderr, "ERROR: creating progress thread "
+				"failed: %s\n", strerror(ret));
+		err = 1;
+		goto out;
+	}
+
+	err = 0;
+	for (i = 0; i < fi_args.num_devices; ++i) {
+		if (sp[i].skip)
+			continue;
+		devid = di_args[i].devid;
+		ret = pthread_join(t_devs[i], NULL);
+		if (ret) {
+			if (do_print)
+				fprintf(stderr, "ERROR: pthread_join failed "
+					"for scrub_one_dev[%llu]: %s\n", devid,
+					strerror(ret));
+			++err;
+			continue;
+		}
+		if (sp[i].ret && sp[i].ioctl_errno == ENODEV) {
+			if (do_print)
+				fprintf(stderr, "WARNING: device %lld not "
+					"present\n", devid);
+			continue;
+		}
+		if (sp[i].ret && sp[i].ioctl_errno == ECANCELED) {
+			++err;
+		} else if (sp[i].ret) {
+			if (do_print)
+				fprintf(stderr, "ERROR: scrubbing %s failed "
+					"for device id %lld (%s)\n", path,
+					devid, strerror(sp[i].ioctl_errno));
+			++err;
+			continue;
+		}
+		if (sp[i].scrub_args.progress.uncorrectable_errors > 0)
+			e_uncorrectable++;
+		if (sp[i].scrub_args.progress.corrected_errors > 0
+		    || sp[i].scrub_args.progress.unverified_errors > 0)
+			e_correctable++;
+	}
+
+	if (do_print) {
+		const char *append = "done";
+		if (!do_stats_per_dev)
+			init_fs_stat(&fs_stat);
+		for (i = 0; i < fi_args.num_devices; ++i) {
+			if (do_stats_per_dev) {
+				print_scrub_dev(&di_args[i],
+						&sp[i].scrub_args.progress,
+						print_raw,
+						sp[i].ret ? "canceled" : "done",
+						&sp[i].stats);
+			} else {
+				if (sp[i].ret)
+					append = "canceled";
+				add_to_fs_stat(&sp[i].scrub_args.progress,
+						&sp[i].stats, &fs_stat);
+			}
+		}
+		if (!do_stats_per_dev) {
+			printf("scrub %s for %s\n", append, fsid);
+			print_fs_stat(&fs_stat, print_raw);
+		}
+	}
+
+	ret = pthread_cancel(t_prog);
+	if (!ret)
+		ret = pthread_join(t_prog, &terr);
+	if (do_print && ret) {
+		fprintf(stderr, "ERROR: progress thead handling failed: %s\n",
+			strerror(ret));
+	}
+
+	if (do_print && terr && terr != PTHREAD_CANCELED) {
+		fprintf(stderr, "ERROR: recording progress "
+			"failed: %s\n", strerror(-PTR_ERR(terr)));
+	}
+
+	if (do_record) {
+		ret = scrub_write_progress(&spc_write_mutex, fsid, sp,
+					   fi_args.num_devices);
+		if (ret && do_print) {
+			fprintf(stderr, "ERROR: failed to record the result: "
+				"%s\n", strerror(-ret));
+		}
+	}
+
+	scrub_handle_sigint_child(-1);
+
+out:
+	free_history(past_scrubs);
+	free(di_args);
+	free(t_devs);
+	free(sp);
+	free(spc.progress);
+	if (prg_fd > -1) {
+		close(prg_fd);
+		if (sock_path[0])
+			unlink(sock_path);
+	}
+	close(fdmnt);
+
+	if (err)
+		return 1;
+	if (e_correctable)
+		return 7;
+	if (e_uncorrectable)
+		return 8;
+	return 0;
+}
+
+static const char * const cmd_scrub_start_usage[] = {
+	"btrfs scrub start [-Bdqr] <path>|<device>",
+	"Start a new scrub",
+	"",
+	"-B     do not background",
+	"-d     stats per device (-B only)",
+	"-q     be quiet",
+	"-r     read only mode",
+	NULL
+};
+
+static int cmd_scrub_start(int argc, char **argv)
+{
+	return scrub_start(argc, argv, 0);
+}
+
+static const char * const cmd_scrub_cancel_usage[] = {
+	"btrfs scrub cancel <path>|<device>",
+	"Cancel a running scrub",
+	NULL
+};
+
+static int cmd_scrub_cancel(int argc, char **argv)
+{
+	char *path;
+	int ret;
+	int fdmnt;
+	int err;
+	char mp[BTRFS_PATH_NAME_MAX + 1];
+	struct btrfs_fs_devices *fs_devices_mnt = NULL;
+
+	if (check_argc_exact(argc, 2))
+		usage(cmd_scrub_cancel_usage);
+
+	path = argv[1];
+
+	fdmnt = open_file_or_dir(path);
+	if (fdmnt < 0) {
+		fprintf(stderr, "ERROR: scrub cancel failed\n");
+		return 12;
+	}
+
+again:
+	ret = ioctl(fdmnt, BTRFS_IOC_SCRUB_CANCEL, NULL);
+	err = errno;
+	close(fdmnt);
+
+	if (ret && err == EINVAL) {
+		/* path is no mounted btrfs. try if it's a device */
+		ret = check_mounted_where(fdmnt, path, mp, sizeof(mp),
+					  &fs_devices_mnt);
+		close(fdmnt);
+		if (ret) {
+			fdmnt = open_file_or_dir(mp);
+			if (fdmnt >= 0) {
+				path = mp;
+				goto again;
+			}
+		}
+	}
+
+	if (ret) {
+		fprintf(stderr, "ERROR: scrub cancel failed on %s: %s\n", path,
+			err == ENOTCONN ? "not running" : strerror(errno));
+		return 1;
+	}
+
+	printf("scrub cancelled\n");
+
+	return 0;
+}
+
+static const char * const cmd_scrub_resume_usage[] = {
+	"btrfs scrub resume [-Bdqr] <path>|<device>",
+	"Resume previously canceled or interrupted scrub",
+	"",
+	"-B     do not background",
+	"-d     stats per device (-B only)",
+	"-q     be quiet",
+	"-r     read only mode",
+	NULL
+};
+
+static int cmd_scrub_resume(int argc, char **argv)
+{
+	return scrub_start(argc, argv, 1);
+}
+
+static const char * const cmd_scrub_status_usage[] = {
+	"btrfs scrub status [-dR] <path>|<device>",
+	"Show status of running or finished scrub",
+	"",
+	"-d     stats per device",
+	"-R     print raw stats",
+	NULL
+};
+
+static int cmd_scrub_status(int argc, char **argv)
+{
+	char *path;
+	struct btrfs_ioctl_fs_info_args fi_args;
+	struct btrfs_ioctl_dev_info_args *di_args = NULL;
+	struct scrub_file_record **past_scrubs = NULL;
+	struct scrub_file_record *last_scrub;
+	struct scrub_fs_stat fs_stat;
+	struct sockaddr_un addr = {
+		.sun_family = AF_UNIX,
+	};
+	int ret;
+	int fdmnt;
+	int i;
+	int print_raw = 0;
+	int do_stats_per_dev = 0;
+	char c;
+	char fsid[37];
+	int fdres = -1;
+	int err = 0;
+
+	optind = 1;
+	while ((c = getopt(argc, argv, "dR")) != -1) {
+		switch (c) {
+		case 'd':
+			do_stats_per_dev = 1;
+			break;
+		case 'R':
+			print_raw = 1;
+			break;
+		case '?':
+		default:
+			usage(cmd_scrub_status_usage);
+		}
+	}
+
+	if (check_argc_exact(argc - optind, 1))
+		usage(cmd_scrub_status_usage);
+
+	path = argv[optind];
+
+	fdmnt = open_file_or_dir(path);
+	if (fdmnt < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
+		return 12;
+	}
+
+	ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args);
+	if (ret) {
+		fprintf(stderr, "ERROR: getting dev info for scrub failed: "
+				"%s\n", strerror(-ret));
+		err = 1;
+		goto out;
+	}
+	if (!fi_args.num_devices) {
+		fprintf(stderr, "ERROR: no devices found\n");
+		err = 1;
+		goto out;
+	}
+
+	uuid_unparse(fi_args.fsid, fsid);
+
+	fdres = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fdres == -1) {
+		fprintf(stderr, "ERROR: failed to create socket to "
+			"receive progress information: %s\n",
+			strerror(errno));
+		err = 1;
+		goto out;
+	}
+	scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid,
+			NULL, addr.sun_path, sizeof(addr.sun_path));
+	/* ignore EOVERFLOW, just use shorter name and hope for the best */
+	addr.sun_path[sizeof(addr.sun_path) - 1] = '\0';
+	ret = connect(fdres, (struct sockaddr *)&addr, sizeof(addr));
+	if (ret == -1) {
+		fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid);
+		if (fdres < 0 && fdres != -ENOENT) {
+			fprintf(stderr, "WARNING: failed to open status file: "
+				"%s\n", strerror(-fdres));
+			err = 1;
+			goto out;
+		}
+	}
+
+	if (fdres >= 0) {
+		past_scrubs = scrub_read_file(fdres, 1);
+		if (IS_ERR(past_scrubs))
+			fprintf(stderr, "WARNING: failed to read status: %s\n",
+				strerror(-PTR_ERR(past_scrubs)));
+	}
+
+	printf("scrub status for %s\n", fsid);
+
+	if (do_stats_per_dev) {
+		for (i = 0; i < fi_args.num_devices; ++i) {
+			last_scrub = last_dev_scrub(past_scrubs,
+							di_args[i].devid);
+			if (!last_scrub) {
+				print_scrub_dev(&di_args[i], NULL, print_raw,
+						NULL, NULL);
+				continue;
+			}
+			print_scrub_dev(&di_args[i], &last_scrub->p, print_raw,
+					last_scrub->stats.finished ?
+							"history" : "status",
+					&last_scrub->stats);
+		}
+	} else {
+		init_fs_stat(&fs_stat);
+		for (i = 0; i < fi_args.num_devices; ++i) {
+			last_scrub = last_dev_scrub(past_scrubs,
+							di_args[i].devid);
+			if (!last_scrub)
+				continue;
+			add_to_fs_stat(&last_scrub->p, &last_scrub->stats,
+					&fs_stat);
+		}
+		print_fs_stat(&fs_stat, print_raw);
+	}
+
+out:
+	free_history(past_scrubs);
+	free(di_args);
+	close(fdmnt);
+	if (fdres > -1)
+		close(fdres);
+
+	return err;
+}
+
+const struct cmd_group scrub_cmd_group = {
+	scrub_cmd_group_usage, NULL, {
+		{ "start", cmd_scrub_start, cmd_scrub_start_usage, NULL, 0 },
+		{ "cancel", cmd_scrub_cancel, cmd_scrub_cancel_usage, NULL, 0 },
+		{ "resume", cmd_scrub_resume, cmd_scrub_resume_usage, NULL, 0 },
+		{ "status", cmd_scrub_status, cmd_scrub_status_usage, NULL, 0 },
+		{ 0, 0, 0, 0, 0 }
+	}
+};
+
+int cmd_scrub(int argc, char **argv)
+{
+	return handle_command_group(&scrub_cmd_group, argc, argv);
+}
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
new file mode 100644
index 0000000..950fa8f
--- /dev/null
+++ b/cmds-subvolume.c
@@ -0,0 +1,533 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <libgen.h>
+#include <limits.h>
+
+#include "kerncompat.h"
+#include "ioctl.h"
+
+#include "commands.h"
+
+/* btrfs-list.c */
+int list_subvols(int fd, int print_parent, int get_default);
+int find_updated_files(int fd, u64 root_id, u64 oldest_gen);
+
+static const char * const subvolume_cmd_group_usage[] = {
+	"btrfs subvolume <command> <args>",
+	NULL
+};
+
+/*
+ * test if path is a directory
+ * this function return
+ * 0-> path exists but it is not a directory
+ * 1-> path exists and it is  a directory
+ * -1 -> path is unaccessible
+ */
+static int test_isdir(char *path)
+{
+	struct stat	st;
+	int		res;
+
+	res = stat(path, &st);
+	if(res < 0 )
+		return -1;
+
+	return S_ISDIR(st.st_mode);
+}
+
+static const char * const cmd_subvol_create_usage[] = {
+	"btrfs subvolume create [<dest>/]<name>",
+	"Create a subvolume",
+	"Create a subvolume <name> in <dest>.  If <dest> is not given",
+	"subvolume <name> will be created in the current directory.",
+	NULL
+};
+
+static int cmd_subvol_create(int argc, char **argv)
+{
+	int	res, fddst, len, e;
+	char	*newname;
+	char	*dstdir;
+	struct btrfs_ioctl_vol_args	args;
+	char	*dst;
+
+	if (check_argc_exact(argc, 2))
+		usage(cmd_subvol_create_usage);
+
+	dst = argv[1];
+
+	res = test_isdir(dst);
+	if(res >= 0 ){
+		fprintf(stderr, "ERROR: '%s' exists\n", dst);
+		return 12;
+	}
+
+	newname = strdup(dst);
+	newname = basename(newname);
+	dstdir = strdup(dst);
+	dstdir = dirname(dstdir);
+
+	if( !strcmp(newname,".") || !strcmp(newname,"..") ||
+	     strchr(newname, '/') ){
+		fprintf(stderr, "ERROR: uncorrect subvolume name ('%s')\n",
+			newname);
+		return 14;
+	}
+
+	len = strlen(newname);
+	if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
+		fprintf(stderr, "ERROR: subvolume name too long ('%s)\n",
+			newname);
+		return 14;
+	}
+
+	fddst = open_file_or_dir(dstdir);
+	if (fddst < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", dstdir);
+		return 12;
+	}
+
+	printf("Create subvolume '%s/%s'\n", dstdir, newname);
+	strncpy(args.name, newname, BTRFS_PATH_NAME_MAX);
+	res = ioctl(fddst, BTRFS_IOC_SUBVOL_CREATE, &args);
+	e = errno;
+
+	close(fddst);
+
+	if(res < 0 ){
+		fprintf( stderr, "ERROR: cannot create subvolume - %s\n",
+			strerror(e));
+		return 11;
+	}
+
+	return 0;
+}
+
+/*
+ * test if path is a subvolume:
+ * this function return
+ * 0-> path exists but it is not a subvolume
+ * 1-> path exists and it is  a subvolume
+ * -1 -> path is unaccessible
+ */
+static int test_issubvolume(char *path)
+{
+	struct stat	st;
+	int		res;
+
+	res = stat(path, &st);
+	if(res < 0 )
+		return -1;
+
+	return (st.st_ino == 256) && S_ISDIR(st.st_mode);
+}
+
+static const char * const cmd_subvol_delete_usage[] = {
+	"btrfs subvolume delete <name>",
+	"Delete a subvolume",
+	NULL
+};
+
+static int cmd_subvol_delete(int argc, char **argv)
+{
+	int	res, fd, len, e;
+	struct btrfs_ioctl_vol_args	args;
+	char	*dname, *vname, *cpath;
+	char	*path;
+
+	if (check_argc_exact(argc, 2))
+		usage(cmd_subvol_delete_usage);
+
+	path = argv[1];
+
+	res = test_issubvolume(path);
+	if(res<0){
+		fprintf(stderr, "ERROR: error accessing '%s'\n", path);
+		return 12;
+	}
+	if(!res){
+		fprintf(stderr, "ERROR: '%s' is not a subvolume\n", path);
+		return 13;
+	}
+
+	cpath = realpath(path, 0);
+	dname = strdup(cpath);
+	dname = dirname(dname);
+	vname = strdup(cpath);
+	vname = basename(vname);
+	free(cpath);
+
+	if( !strcmp(vname,".") || !strcmp(vname,"..") ||
+	     strchr(vname, '/') ){
+		fprintf(stderr, "ERROR: incorrect subvolume name ('%s')\n",
+			vname);
+		return 14;
+	}
+
+	len = strlen(vname);
+	if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
+		fprintf(stderr, "ERROR: snapshot name too long ('%s)\n",
+			vname);
+		return 14;
+	}
+
+	fd = open_file_or_dir(dname);
+	if (fd < 0) {
+		close(fd);
+		fprintf(stderr, "ERROR: can't access to '%s'\n", dname);
+		return 12;
+	}
+
+	printf("Delete subvolume '%s/%s'\n", dname, vname);
+	strncpy(args.name, vname, BTRFS_PATH_NAME_MAX);
+	res = ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &args);
+	e = errno;
+
+	close(fd);
+
+	if(res < 0 ){
+		fprintf( stderr, "ERROR: cannot delete '%s/%s' - %s\n",
+			dname, vname, strerror(e));
+		return 11;
+	}
+
+	return 0;
+}
+
+static const char * const cmd_subvol_list_usage[] = {
+	"btrfs subvolume list [-p] <path>",
+	"List subvolumes (and snapshots)",
+	"",
+	"-p     print parent ID",
+	NULL
+};
+
+static int cmd_subvol_list(int argc, char **argv)
+{
+	int fd;
+	int ret;
+	int print_parent = 0;
+	char *subvol;
+
+	optind = 1;
+	while(1) {
+		int c = getopt(argc, argv, "p");
+		if (c < 0)
+			break;
+
+		switch(c) {
+		case 'p':
+			print_parent = 1;
+			break;
+		default:
+			usage(cmd_subvol_list_usage);
+		}
+	}
+
+	if (check_argc_exact(argc - optind, 1))
+		usage(cmd_subvol_list_usage);
+
+	subvol = argv[optind];
+
+	ret = test_issubvolume(subvol);
+	if (ret < 0) {
+		fprintf(stderr, "ERROR: error accessing '%s'\n", subvol);
+		return 12;
+	}
+	if (!ret) {
+		fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol);
+		return 13;
+	}
+
+	fd = open_file_or_dir(subvol);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access '%s'\n", subvol);
+		return 12;
+	}
+	ret = list_subvols(fd, print_parent, 0);
+	if (ret)
+		return 19;
+	return 0;
+}
+
+static const char * const cmd_snapshot_usage[] = {
+	"btrfs subvolume snapshot [-r] <source> [<dest>/]<name>",
+	"Create a snapshot of the subvolume",
+	"Create a writable/readonly snapshot of the subvolume <source> with",
+	"the name <name> in the <dest> directory",
+	"",
+	"-r     create a readonly snapshot",
+	NULL
+};
+
+static int cmd_snapshot(int argc, char **argv)
+{
+	char	*subvol, *dst;
+	int	res, fd, fddst, len, e, readonly = 0;
+	char	*newname;
+	char	*dstdir;
+	struct btrfs_ioctl_vol_args_v2	args;
+
+	memset(&args, 0, sizeof(args));
+
+	optind = 1;
+	while (1) {
+		int c = getopt(argc, argv, "r");
+		if (c < 0)
+			break;
+
+		switch (c) {
+		case 'r':
+			readonly = 1;
+			break;
+		default:
+			usage(cmd_snapshot_usage);
+		}
+	}
+
+	if (check_argc_exact(argc - optind, 2))
+		usage(cmd_snapshot_usage);
+
+	subvol = argv[optind];
+	dst = argv[optind + 1];
+
+	res = test_issubvolume(subvol);
+	if(res<0){
+		fprintf(stderr, "ERROR: error accessing '%s'\n", subvol);
+		return 12;
+	}
+	if(!res){
+		fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol);
+		return 13;
+	}
+
+	res = test_isdir(dst);
+	if(res == 0 ){
+		fprintf(stderr, "ERROR: '%s' exists and it is not a directory\n", dst);
+		return 12;
+	}
+
+	if(res>0){
+		newname = strdup(subvol);
+		newname = basename(newname);
+		dstdir = dst;
+	}else{
+		newname = strdup(dst);
+		newname = basename(newname);
+		dstdir = strdup(dst);
+		dstdir = dirname(dstdir);
+	}
+
+	if( !strcmp(newname,".") || !strcmp(newname,"..") ||
+	     strchr(newname, '/') ){
+		fprintf(stderr, "ERROR: incorrect snapshot name ('%s')\n",
+			newname);
+		return 14;
+	}
+
+	len = strlen(newname);
+	if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
+		fprintf(stderr, "ERROR: snapshot name too long ('%s)\n",
+			newname);
+		return 14;
+	}
+
+	fddst = open_file_or_dir(dstdir);
+	if (fddst < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", dstdir);
+		return 12;
+	}
+
+	fd = open_file_or_dir(subvol);
+	if (fd < 0) {
+		close(fddst);
+		fprintf(stderr, "ERROR: can't access to '%s'\n", dstdir);
+		return 12;
+	}
+
+	if (readonly) {
+		args.flags |= BTRFS_SUBVOL_RDONLY;
+		printf("Create a readonly snapshot of '%s' in '%s/%s'\n",
+		       subvol, dstdir, newname);
+	} else {
+		printf("Create a snapshot of '%s' in '%s/%s'\n",
+		       subvol, dstdir, newname);
+	}
+
+	args.fd = fd;
+	strncpy(args.name, newname, BTRFS_SUBVOL_NAME_MAX);
+	res = ioctl(fddst, BTRFS_IOC_SNAP_CREATE_V2, &args);
+	e = errno;
+
+	close(fd);
+	close(fddst);
+
+	if(res < 0 ){
+		fprintf( stderr, "ERROR: cannot snapshot '%s' - %s\n",
+			subvol, strerror(e));
+		return 11;
+	}
+
+	return 0;
+}
+
+static const char * const cmd_subvol_get_default_usage[] = {
+	"btrfs subvolume get-dafault <path>",
+	"Get the default subvolume of a filesystem",
+	NULL
+};
+
+static int cmd_subvol_get_default(int argc, char **argv)
+{
+	int fd;
+	int ret;
+	char *subvol;
+
+	if (check_argc_exact(argc, 2))
+		usage(cmd_subvol_get_default_usage);
+
+	subvol = argv[1];
+
+	ret = test_issubvolume(subvol);
+	if (ret < 0) {
+		fprintf(stderr, "ERROR: error accessing '%s'\n", subvol);
+		return 12;
+	}
+	if (!ret) {
+		fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol);
+		return 13;
+	}
+
+	fd = open_file_or_dir(subvol);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access '%s'\n", subvol);
+		return 12;
+	}
+	ret = list_subvols(fd, 0, 1);
+	if (ret)
+		return 19;
+	return 0;
+}
+
+static const char * const cmd_subvol_set_default_usage[] = {
+	"btrfs subvolume set-dafault <subvolid> <path>",
+	"Set the default subvolume of a filesystem",
+	NULL
+};
+
+static int cmd_subvol_set_default(int argc, char **argv)
+{
+	int	ret=0, fd, e;
+	u64	objectid;
+	char	*path;
+	char	*subvolid;
+
+	if (check_argc_exact(argc, 3))
+		usage(cmd_subvol_set_default_usage);
+
+	subvolid = argv[1];
+	path = argv[2];
+
+	fd = open_file_or_dir(path);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
+		return 12;
+	}
+
+	objectid = (unsigned long long)strtoll(subvolid, NULL, 0);
+	if (errno == ERANGE) {
+		fprintf(stderr, "ERROR: invalid tree id (%s)\n",subvolid);
+		return 30;
+	}
+	ret = ioctl(fd, BTRFS_IOC_DEFAULT_SUBVOL, &objectid);
+	e = errno;
+	close(fd);
+	if( ret < 0 ){
+		fprintf(stderr, "ERROR: unable to set a new default subvolume - %s\n",
+			strerror(e));
+		return 30;
+	}
+	return 0;
+}
+
+static const char * const cmd_find_new_usage[] = {
+	"btrfs subvolume find-new <path> <lastgen>",
+	"List the recently modified files in a filesystem",
+	NULL
+};
+
+static int cmd_find_new(int argc, char **argv)
+{
+	int fd;
+	int ret;
+	char *subvol;
+	u64 last_gen;
+
+	if (check_argc_exact(argc, 3))
+		usage(cmd_find_new_usage);
+
+	subvol = argv[1];
+	last_gen = atoll(argv[2]);
+
+	ret = test_issubvolume(subvol);
+	if (ret < 0) {
+		fprintf(stderr, "ERROR: error accessing '%s'\n", subvol);
+		return 12;
+	}
+	if (!ret) {
+		fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol);
+		return 13;
+	}
+
+	fd = open_file_or_dir(subvol);
+	if (fd < 0) {
+		fprintf(stderr, "ERROR: can't access '%s'\n", subvol);
+		return 12;
+	}
+	ret = find_updated_files(fd, 0, last_gen);
+	if (ret)
+		return 19;
+	return 0;
+}
+
+const struct cmd_group subvolume_cmd_group = {
+	subvolume_cmd_group_usage, NULL, {
+		{ "create", cmd_subvol_create, cmd_subvol_create_usage, NULL, 0 },
+		{ "delete", cmd_subvol_delete, cmd_subvol_delete_usage, NULL, 0 },
+		{ "list", cmd_subvol_list, cmd_subvol_list_usage, NULL, 0 },
+		{ "snapshot", cmd_snapshot, cmd_snapshot_usage, NULL, 0 },
+		{ "get-default", cmd_subvol_get_default,
+			cmd_subvol_get_default_usage, NULL, 0 },
+		{ "set-default", cmd_subvol_set_default,
+			cmd_subvol_set_default_usage, NULL, 0 },
+		{ "find-new", cmd_find_new, cmd_find_new_usage, NULL, 0 },
+		{ 0, 0, 0, 0, 0 }
+	}
+};
+
+int cmd_subvolume(int argc, char **argv)
+{
+	return handle_command_group(&subvolume_cmd_group, argc, argv);
+}
diff --git a/commands.h b/commands.h
new file mode 100644
index 0000000..a303a50
--- /dev/null
+++ b/commands.h
@@ -0,0 +1,97 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define ARGV0_BUF_SIZE	64
+
+struct cmd_struct {
+	const char *token;
+	int (*fn)(int, char **);
+
+	/*
+	 * Usage strings
+	 *
+	 * A NULL-terminated array of the following format:
+	 *
+	 *   usagestr[0] - one-line synopsis (required)
+	 *   usagestr[1] - one-line short description (required)
+	 *   usagestr[2..m] - a long (possibly multi-line) description
+	 *                    (optional)
+	 *   usagestr[m + 1] - an empty line separator (required if at least one
+	 *                     option string is given, not needed otherwise)
+	 *   usagestr[m + 2..n] - option strings, one option per line
+	 *                        (optional)
+	 *   usagestr[n + 1] - NULL terminator
+	 *
+	 * Options (if present) should always (even if there is no long
+	 * description) be prepended with an empty line.  Supplied strings are
+	 * indented but otherwise printed as-is, no automatic wrapping is done.
+	 *
+	 * Grep for cmd_*_usage[] for examples.
+	 */
+	const char * const *usagestr;
+
+	/* should be NULL if token is not a subgroup */
+	const struct cmd_group *next;
+
+	/* if true don't list this token in help listings */
+	int hidden;
+};
+
+struct cmd_group {
+	const char * const *usagestr;
+	const char *infostr;
+
+	const struct cmd_struct commands[];
+};
+
+/* btrfs.c */
+int prefixcmp(const char *str, const char *prefix);
+
+int check_argc_exact(int nargs, int expected);
+int check_argc_min(int nargs, int expected);
+int check_argc_max(int nargs, int expected);
+
+int handle_command_group(const struct cmd_group *grp, int argc,
+			 char **argv);
+
+/* help.c */
+extern const char * const generic_cmd_help_usage[];
+
+void usage(const char * const *usagestr);
+void usage_command(const struct cmd_struct *cmd, int full, int err);
+void usage_command_group(const struct cmd_group *grp, int all, int err);
+
+void help_unknown_token(const char *arg, const struct cmd_group *grp);
+void help_ambiguous_token(const char *arg, const struct cmd_group *grp);
+
+void help_command_group(const struct cmd_group *grp, int argc, char **argv);
+
+/* common.c */
+int open_file_or_dir(const char *fname);
+
+extern const struct cmd_group subvolume_cmd_group;
+extern const struct cmd_group filesystem_cmd_group;
+extern const struct cmd_group balance_cmd_group;
+extern const struct cmd_group device_cmd_group;
+extern const struct cmd_group scrub_cmd_group;
+extern const struct cmd_group inspect_cmd_group;
+
+int cmd_subvolume(int argc, char **argv);
+int cmd_filesystem(int argc, char **argv);
+int cmd_balance(int argc, char **argv);
+int cmd_device(int argc, char **argv);
+int cmd_scrub(int argc, char **argv);
+int cmd_inspect(int argc, char **argv);
diff --git a/common.c b/common.c
new file mode 100644
index 0000000..03f6570
--- /dev/null
+++ b/common.c
@@ -0,0 +1,46 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <fcntl.h>
+
+int open_file_or_dir(const char *fname)
+{
+	int ret;
+	struct stat st;
+	DIR *dirstream;
+	int fd;
+
+	ret = stat(fname, &st);
+	if (ret < 0) {
+		return -1;
+	}
+	if (S_ISDIR(st.st_mode)) {
+		dirstream = opendir(fname);
+		if (!dirstream) {
+			return -2;
+		}
+		fd = dirfd(dirstream);
+	} else {
+		fd = open(fname, O_RDWR);
+	}
+	if (fd < 0) {
+		return -3;
+	}
+	return fd;
+}
diff --git a/convert.c b/convert.c
index d2c9efa..fa7bf8c 100644
--- a/convert.c
+++ b/convert.c
@@ -370,7 +370,6 @@ static int record_file_extent(struct btrfs_trans_handle *trans,
 	struct btrfs_extent_item *ei;
 	u32 blocksize = root->sectorsize;
 	u64 nbytes;
-	u64 bytes_used;
 
 	if (disk_bytenr == 0) {
 		ret = btrfs_insert_file_extent(trans, root, objectid,
@@ -432,9 +431,6 @@ static int record_file_extent(struct btrfs_trans_handle *trans,
 	nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes;
 	btrfs_set_stack_inode_nbytes(inode, nbytes);
 
-	bytes_used = btrfs_root_used(&root->root_item);
-	btrfs_set_root_used(&root->root_item, bytes_used + num_bytes);
-
 	btrfs_release_path(root, &path);
 
 	ins_key.objectid = disk_bytenr;
@@ -454,9 +450,6 @@ static int record_file_extent(struct btrfs_trans_handle *trans,
 
 		btrfs_mark_buffer_dirty(leaf);
 
-		bytes_used = btrfs_super_bytes_used(&info->super_copy);
-		btrfs_set_super_bytes_used(&info->super_copy, bytes_used +
-					   num_bytes);
 		ret = btrfs_update_block_group(trans, root, disk_bytenr,
 					       num_bytes, 1, 0);
 		if (ret)
@@ -864,7 +857,7 @@ static int copy_single_xattr(struct btrfs_trans_handle *trans,
 		data = databuf;
 		datalen = bufsize;
 	}
-	strcpy(namebuf, xattr_prefix_table[name_index]);
+	strncpy(namebuf, xattr_prefix_table[name_index], XATTR_NAME_MAX);
 	strncat(namebuf, EXT2_EXT_ATTR_NAME(entry), entry->e_name_len);
 	if (name_len + datalen > BTRFS_LEAF_DATA_SIZE(root) -
 	    sizeof(struct btrfs_item) - sizeof(struct btrfs_dir_item)) {
@@ -1127,7 +1120,7 @@ fail:
 	return ret;
 }
 /*
- * scan ext2's inode bitmap and copy all used inode.
+ * scan ext2's inode bitmap and copy all used inodes.
  */
 static int copy_inodes(struct btrfs_root *root, ext2_filsys ext2_fs,
 		       int datacsum, int packing, int noxattr)
@@ -1511,66 +1504,6 @@ fail:
 	return new_root;
 }
 
-/*
- * Fixup block accounting. The initial block accounting created by
- * make_block_groups isn't accuracy in this case.
- */
-static int fixup_block_accounting(struct btrfs_trans_handle *trans,
-				  struct btrfs_root *root)
-{
-	int ret;
-	int slot;
-	u64 start = 0;
-	u64 bytes_used = 0;
-	struct btrfs_path path;
-	struct btrfs_key key;
-	struct extent_buffer *leaf;
-	struct btrfs_block_group_cache *cache;
-	struct btrfs_fs_info *fs_info = root->fs_info;
-
-	while(1) {
-		cache = btrfs_lookup_block_group(fs_info, start);
-		if (!cache)
-			break;
-		start = cache->key.objectid + cache->key.offset;
-		btrfs_set_block_group_used(&cache->item, 0);
-		cache->space_info->bytes_used = 0;
-	}
-
-	btrfs_init_path(&path);
-	key.offset = 0;
-	key.objectid = 0;
-	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
-	ret = btrfs_search_slot(trans, root->fs_info->extent_root,
-				&key, &path, 0, 0);
-	if (ret < 0)
-		return ret;
-	while(1) {
-		leaf = path.nodes[0];
-		slot = path.slots[0];
-		if (slot >= btrfs_header_nritems(leaf)) {
-			ret = btrfs_next_leaf(root, &path);
-			if (ret < 0)
-				return ret;
-			if (ret > 0)
-				break;
-			leaf = path.nodes[0];
-			slot = path.slots[0];
-		}
-		btrfs_item_key_to_cpu(leaf, &key, slot);
-		if (key.type == BTRFS_EXTENT_ITEM_KEY) {
-			bytes_used += key.offset;
-			ret = btrfs_update_block_group(trans, root,
-				  key.objectid, key.offset, 1, 0);
-			BUG_ON(ret);
-		}
-		path.slots[0]++;
-	}
-	btrfs_set_super_bytes_used(&root->fs_info->super_copy, bytes_used);
-	btrfs_release_path(root, &path);
-	return 0;
-}
-
 static int create_chunk_mapping(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root)
 {
@@ -1742,7 +1675,7 @@ static int init_btrfs(struct btrfs_root *root)
 	ret = btrfs_make_block_groups(trans, root);
 	if (ret)
 		goto err;
-	ret = fixup_block_accounting(trans, root);
+	ret = btrfs_fix_block_accounting(trans, root);
 	if (ret)
 		goto err;
 	ret = create_chunk_mapping(trans, root);
diff --git a/ctree.c b/ctree.c
index f70e10c..2d86b1e 100644
--- a/ctree.c
+++ b/ctree.c
@@ -19,6 +19,7 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "print-tree.h"
+#include "repair.h"
 
 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
 		      *root, struct btrfs_path *path, int level);
@@ -32,8 +33,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      struct extent_buffer *dst_buf,
 			      struct extent_buffer *src_buf);
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		   struct btrfs_path *path, int level, int slot);
 
 inline void btrfs_init_path(struct btrfs_path *p)
 {
@@ -138,6 +137,48 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
+int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
+		      struct btrfs_root *root)
+{
+	struct extent_buffer *c;
+	struct extent_buffer *old = root->node;
+	int level;
+	struct btrfs_disk_key disk_key = {0,0,0};
+
+	level = 0;
+
+	c = btrfs_alloc_free_block(trans, root,
+				   btrfs_level_size(root, 0),
+				   root->root_key.objectid,
+				   &disk_key, level, 0, 0);
+	if (IS_ERR(c)) {
+		c = old;
+		extent_buffer_get(c);
+	}
+
+	memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
+	btrfs_set_header_level(c, level);
+	btrfs_set_header_bytenr(c, c->start);
+	btrfs_set_header_generation(c, trans->transid);
+	btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
+	btrfs_set_header_owner(c, root->root_key.objectid);
+
+	write_extent_buffer(c, root->fs_info->fsid,
+			    (unsigned long)btrfs_header_fsid(c),
+			    BTRFS_FSID_SIZE);
+
+	write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
+			    (unsigned long)btrfs_header_chunk_tree_uuid(c),
+			    BTRFS_UUID_SIZE);
+
+	btrfs_mark_buffer_dirty(c);
+
+	free_extent_buffer(old);
+	root->node = c;
+	add_root_to_dirty_list(root);
+	return 0;
+}
+
 /*
  * check if the tree block can be shared by multiple trees
  */
@@ -262,7 +303,6 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 			     struct extent_buffer **cow_ret,
 			     u64 search_start, u64 empty_size)
 {
-	u64 generation;
 	struct extent_buffer *cow;
 	struct btrfs_disk_key disk_key;
 	int level;
@@ -272,7 +312,6 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 
 	level = btrfs_header_level(buf);
-	generation = btrfs_header_generation(buf);
 
 	if (level == 0)
 		btrfs_item_key(buf, &disk_key, 0);
@@ -551,156 +590,125 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
 	return btrfs_item_offset_nr(leaf, nr - 1);
 }
 
-static int check_node(struct btrfs_root *root, struct btrfs_path *path,
-		      int level)
+int btrfs_check_node(struct btrfs_root *root,
+		      struct btrfs_disk_key *parent_key,
+		      struct extent_buffer *buf)
 {
-	struct extent_buffer *parent = NULL;
-	struct extent_buffer *node = path->nodes[level];
-	struct btrfs_disk_key parent_key;
-	struct btrfs_disk_key node_key;
-	int parent_slot;
-	int slot;
+	int i;
 	struct btrfs_key cpukey;
-	u32 nritems = btrfs_header_nritems(node);
+	struct btrfs_disk_key key;
+	u32 nritems = btrfs_header_nritems(buf);
 
-	if (path->nodes[level + 1])
-		parent = path->nodes[level + 1];
+	if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
+		goto fail;
 
-	slot = path->slots[level];
-	BUG_ON(nritems == 0);
-	if (parent) {
-		parent_slot = path->slots[level + 1];
-		btrfs_node_key(parent, &parent_key, parent_slot);
-		btrfs_node_key(node, &node_key, 0);
-		BUG_ON(memcmp(&parent_key, &node_key,
-			      sizeof(struct btrfs_disk_key)));
-		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
-		       btrfs_header_bytenr(node));
-	}
-	BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
-	if (slot != 0) {
-		btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
-		btrfs_node_key(node, &node_key, slot);
-		BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0);
-	}
-	if (slot < nritems - 1) {
-		btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
-		btrfs_node_key(node, &node_key, slot);
-		BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0);
+	if (parent_key && parent_key->type) {
+		btrfs_node_key(buf, &key, 0);
+		if (memcmp(parent_key, &key, sizeof(key)))
+			goto fail;
+	}
+	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+		btrfs_node_key(buf, &key, i);
+		btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
+		if (btrfs_comp_keys(&key, &cpukey) >= 0)
+			goto fail;
 	}
 	return 0;
+fail:
+	if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
+		if (parent_key)
+			btrfs_disk_key_to_cpu(&cpukey, parent_key);
+		else
+			btrfs_node_key_to_cpu(buf, &cpukey, 0);
+		btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
+						buf->start, buf->len,
+						btrfs_header_level(buf));
+	}
+	return -EIO;
 }
 
-static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
-		      int level)
+int btrfs_check_leaf(struct btrfs_root *root,
+		      struct btrfs_disk_key *parent_key,
+		      struct extent_buffer *buf)
 {
-	struct extent_buffer *leaf = path->nodes[level];
-	struct extent_buffer *parent = NULL;
-	int parent_slot;
+	int i;
 	struct btrfs_key cpukey;
-	struct btrfs_disk_key parent_key;
-	struct btrfs_disk_key leaf_key;
-	int slot = path->slots[0];
-
-	u32 nritems = btrfs_header_nritems(leaf);
+	struct btrfs_disk_key key;
+	u32 nritems = btrfs_header_nritems(buf);
 
-	if (path->nodes[level + 1])
-		parent = path->nodes[level + 1];
+	if (btrfs_header_level(buf) != 0) {
+		fprintf(stderr, "leaf is not a leaf %llu\n",
+		       (unsigned long long)btrfs_header_bytenr(buf));
+		goto fail;
+	}
+	if (btrfs_leaf_free_space(root, buf) < 0) {
+		fprintf(stderr, "leaf free space incorrect %llu %d\n",
+			(unsigned long long)btrfs_header_bytenr(buf),
+			btrfs_leaf_free_space(root, buf));
+		goto fail;
+	}
 
 	if (nritems == 0)
 		return 0;
 
-	if (parent) {
-		parent_slot = path->slots[level + 1];
-		btrfs_node_key(parent, &parent_key, parent_slot);
-		btrfs_item_key(leaf, &leaf_key, 0);
-
-		BUG_ON(memcmp(&parent_key, &leaf_key,
-		       sizeof(struct btrfs_disk_key)));
-		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
-		       btrfs_header_bytenr(leaf));
+	btrfs_item_key(buf, &key, 0);
+	if (parent_key && parent_key->type &&
+	    memcmp(parent_key, &key, sizeof(key))) {
+		fprintf(stderr, "leaf parent key incorrect %llu\n",
+		       (unsigned long long)btrfs_header_bytenr(buf));
+		goto fail;
 	}
-#if 0
 	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
-		btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
-		btrfs_item_key(leaf, &leaf_key, i);
-		if (comp_keys(&leaf_key, &cpukey) >= 0) {
-			btrfs_print_leaf(root, leaf);
-			printk("slot %d offset bad key\n", i);
-			BUG_ON(1);
-		}
-		if (btrfs_item_offset_nr(leaf, i) !=
-			btrfs_item_end_nr(leaf, i + 1)) {
-			btrfs_print_leaf(root, leaf);
-			printk("slot %d offset bad\n", i);
-			BUG_ON(1);
-		}
-		if (i == 0) {
-			if (btrfs_item_offset_nr(leaf, i) +
-			       btrfs_item_size_nr(leaf, i) !=
-			       BTRFS_LEAF_DATA_SIZE(root)) {
-				btrfs_print_leaf(root, leaf);
-				printk("slot %d first offset bad\n", i);
-				BUG_ON(1);
-			}
-		}
-	}
-	if (nritems > 0) {
-		if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
-				btrfs_print_leaf(root, leaf);
-				printk("slot %d bad size \n", nritems - 1);
-				BUG_ON(1);
-		}
-	}
-#endif
-	if (slot != 0 && slot < nritems - 1) {
-		btrfs_item_key(leaf, &leaf_key, slot);
-		btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
-		if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) {
-			btrfs_print_leaf(root, leaf);
-			printk("slot %d offset bad key\n", slot);
-			BUG_ON(1);
+		btrfs_item_key(buf, &key, i);
+		btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
+		if (btrfs_comp_keys(&key, &cpukey) >= 0) {
+			fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
+			goto fail;
 		}
-		if (btrfs_item_offset_nr(leaf, slot - 1) !=
-		       btrfs_item_end_nr(leaf, slot)) {
-			btrfs_print_leaf(root, leaf);
-			printk("slot %d offset bad\n", slot);
-			BUG_ON(1);
+		if (btrfs_item_offset_nr(buf, i) !=
+			btrfs_item_end_nr(buf, i + 1)) {
+			fprintf(stderr, "incorrect offsets %u %u\n",
+				btrfs_item_offset_nr(buf, i),
+				btrfs_item_end_nr(buf, i + 1));
+			goto fail;
 		}
-	}
-	if (slot < nritems - 1) {
-		btrfs_item_key(leaf, &leaf_key, slot);
-		btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
-		BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0);
-		if (btrfs_item_offset_nr(leaf, slot) !=
-			btrfs_item_end_nr(leaf, slot + 1)) {
-			btrfs_print_leaf(root, leaf);
-			printk("slot %d offset bad\n", slot);
-			BUG_ON(1);
+		if (i == 0 && btrfs_item_end_nr(buf, i) !=
+		    BTRFS_LEAF_DATA_SIZE(root)) {
+			fprintf(stderr, "bad item end %u wanted %u\n",
+				btrfs_item_end_nr(buf, i),
+				(unsigned)BTRFS_LEAF_DATA_SIZE(root));
+			goto fail;
 		}
 	}
-	BUG_ON(btrfs_item_offset_nr(leaf, 0) +
-	       btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
 	return 0;
+fail:
+	if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
+		if (parent_key)
+			btrfs_disk_key_to_cpu(&cpukey, parent_key);
+		else
+			btrfs_item_key_to_cpu(buf, &cpukey, 0);
+
+		btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
+						buf->start, buf->len, 0);
+	}
+	return -EIO;
 }
 
 static int noinline check_block(struct btrfs_root *root,
 				struct btrfs_path *path, int level)
 {
-	return 0;
-#if 0
-	struct extent_buffer *buf = path->nodes[level];
+	struct btrfs_disk_key key;
+	struct btrfs_disk_key *key_ptr = NULL;
+	struct extent_buffer *parent;
 
-	if (memcmp_extent_buffer(buf, root->fs_info->fsid,
-				 (unsigned long)btrfs_header_fsid(buf),
-				 BTRFS_FSID_SIZE)) {
-		printk("warning bad block %Lu\n", buf->start);
-		return 1;
+	if (path->nodes[level + 1]) {
+		parent = path->nodes[level + 1];
+		btrfs_node_key(parent, &key, path->slots[level + 1]);
+		key_ptr = &key;
 	}
-#endif
 	if (level == 0)
-		return check_leaf(root, path, level);
-	return check_node(root, path, level);
+		return btrfs_check_leaf(root, key_ptr, path->nodes[0]);
+	return btrfs_check_node(root, key_ptr, path->nodes[level]);
 }
 
 /*
@@ -767,7 +775,7 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
 	return -1;
 }
 
-static struct extent_buffer *read_node_slot(struct btrfs_root *root,
+struct extent_buffer *read_node_slot(struct btrfs_root *root,
 				   struct extent_buffer *parent, int slot)
 {
 	int level = btrfs_header_level(parent);
@@ -795,7 +803,6 @@ static int balance_level(struct btrfs_trans_handle *trans,
 	int wret;
 	int pslot;
 	int orig_slot = path->slots[level];
-	int err_on_enospc = 0;
 	u64 orig_ptr;
 
 	if (level == 0)
@@ -845,9 +852,6 @@ static int balance_level(struct btrfs_trans_handle *trans,
 	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
 		return 0;
 
-	if (btrfs_header_nritems(mid) < 2)
-		err_on_enospc = 1;
-
 	left = read_node_slot(root, parent, pslot - 1);
 	if (left) {
 		wret = btrfs_cow_block(trans, root, left,
@@ -873,8 +877,6 @@ static int balance_level(struct btrfs_trans_handle *trans,
 		wret = push_node_left(trans, root, left, mid, 1);
 		if (wret < 0)
 			ret = wret;
-		if (btrfs_header_nritems(mid) < 2)
-			err_on_enospc = 1;
 	}
 
 	/*
@@ -892,8 +894,8 @@ static int balance_level(struct btrfs_trans_handle *trans,
 			wait_on_tree_block_writeback(root, right);
 			free_extent_buffer(right);
 			right = NULL;
-			wret = del_ptr(trans, root, path, level + 1, pslot +
-				       1);
+			wret = btrfs_del_ptr(trans, root, path,
+					     level + 1, pslot + 1);
 			if (wret)
 				ret = wret;
 			wret = btrfs_free_extent(trans, root, bytenr,
@@ -940,7 +942,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
 		wait_on_tree_block_writeback(root, mid);
 		free_extent_buffer(mid);
 		mid = NULL;
-		wret = del_ptr(trans, root, path, level + 1, pslot);
+		wret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
 		if (wret)
 			ret = wret;
 		wret = btrfs_free_extent(trans, root, bytenr, blocksize,
@@ -996,14 +998,12 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
 	int wret;
 	int pslot;
 	int orig_slot = path->slots[level];
-	u64 orig_ptr;
 
 	if (level == 0)
 		return 1;
 
 	mid = path->nodes[level];
 	WARN_ON(btrfs_header_generation(mid) != trans->transid);
-	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
 
 	if (level < BTRFS_MAX_LEVEL - 1)
 		parent = path->nodes[level + 1];
@@ -1102,7 +1102,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
 /*
  * readahead one full node of leaves
  */
-static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
+void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
 			     int level, int slot, u64 objectid)
 {
 	struct extent_buffer *node;
@@ -1199,7 +1199,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 	u8 lowest_level = 0;
 
 	lowest_level = p->lowest_level;
-	WARN_ON(lowest_level && ins_len);
+	WARN_ON(lowest_level && ins_len > 0);
 	WARN_ON(p->nodes[0] != NULL);
 	/*
 	WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
@@ -1264,6 +1264,8 @@ again:
 						 key->objectid);
 
 			b = read_node_slot(root, b, slot);
+			if (!extent_buffer_uptodate(b))
+				return -EIO;
 		} else {
 			p->slots[level] = slot;
 			if (ins_len > 0 &&
@@ -2370,7 +2372,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
 {
 	int ret = 0;
 	int slot;
-	int slot_orig;
 	struct extent_buffer *leaf;
 	struct btrfs_item *item;
 	u32 nritems;
@@ -2380,7 +2381,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
 	unsigned int size_diff;
 	int i;
 
-	slot_orig = path->slots[0];
 	leaf = path->nodes[0];
 	slot = path->slots[0];
 
@@ -2468,7 +2468,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
 {
 	int ret = 0;
 	int slot;
-	int slot_orig;
 	struct extent_buffer *leaf;
 	struct btrfs_item *item;
 	u32 nritems;
@@ -2477,7 +2476,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
 	unsigned int old_size;
 	int i;
 
-	slot_orig = path->slots[0];
 	leaf = path->nodes[0];
 
 	nritems = btrfs_header_nritems(leaf);
@@ -2541,7 +2539,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
 	struct btrfs_item *item;
 	int ret = 0;
 	int slot;
-	int slot_orig;
 	int i;
 	u32 nritems;
 	u32 total_size = 0;
@@ -2565,7 +2562,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
 	if (ret < 0)
 		goto out;
 
-	slot_orig = path->slots[0];
 	leaf = path->nodes[0];
 
 	nritems = btrfs_header_nritems(leaf);
@@ -2675,7 +2671,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
  * continuing all the way the root if required.  The root is converted into
  * a leaf if all the nodes are emptied.
  */
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		   struct btrfs_path *path, int level, int slot)
 {
 	struct extent_buffer *parent = path->nodes[level];
@@ -2727,7 +2723,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
 	int ret;
 
 	WARN_ON(btrfs_header_generation(leaf) != trans->transid);
-	ret = del_ptr(trans, root, path, 1, path->slots[1]);
+	ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
 	if (ret)
 		return ret;
 
@@ -2931,6 +2927,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
 			reada_for_search(root, path, level, slot, 0);
 
 		next = read_node_slot(root, c, slot);
+		if (!next)
+			return -EIO;
 		break;
 	}
 	path->slots[level] = slot;
@@ -2945,6 +2943,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
 		if (path->reada)
 			reada_for_search(root, path, level, 0, 0);
 		next = read_node_slot(root, next, 0);
+		if (!next)
+			return -EIO;
 	}
 	return 0;
 }
diff --git a/ctree.h b/ctree.h
index a9062ea..6545c50 100644
--- a/ctree.h
+++ b/ctree.h
@@ -24,6 +24,7 @@
 #include "radix-tree.h"
 #include "extent-cache.h"
 #include "extent_io.h"
+#include "ioctl.h"
 
 struct btrfs_root;
 struct btrfs_trans_handle;
@@ -60,6 +61,9 @@ struct btrfs_trans_handle;
 #define BTRFS_CSUM_TREE_OBJECTID 7ULL
 
 
+/* for storing balance parameters in the root tree */
+#define BTRFS_BALANCE_OBJECTID -4ULL
+
 /* oprhan objectid for tracking unlinked/truncated files */
 #define BTRFS_ORPHAN_OBJECTID -5ULL
 
@@ -78,6 +82,15 @@ struct btrfs_trans_handle;
  */
 #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
 
+/* For storing free space cache */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
+/*
+ * The inode number assigned to the special inode for sotring
+ * free ino cache
+ */
+#define BTRFS_FREE_INO_OBJECTID -12ULL
+
 /* dummy objectid represents multiple objectids */
 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
 
@@ -250,7 +263,6 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
 		sizeof(struct btrfs_stripe) * (num_stripes - 1);
 }
 
-#define BTRFS_FSID_SIZE 16
 #define BTRFS_HEADER_FLAG_WRITTEN		(1ULL << 0)
 #define BTRFS_HEADER_FLAG_RELOC			(1ULL << 1)
 #define BTRFS_SUPER_FLAG_SEEDING		(1ULL << 32)
@@ -290,6 +302,9 @@ struct btrfs_header {
 #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
 					sizeof(struct btrfs_item) - \
 					sizeof(struct btrfs_file_extent_item))
+#define BTRFS_MAX_XATTR_SIZE(r)	(BTRFS_LEAF_DATA_SIZE(r) - \
+				 sizeof(struct btrfs_item) -\
+				 sizeof(struct btrfs_dir_item))
 
 
 /*
@@ -300,6 +315,47 @@ struct btrfs_header {
 #define BTRFS_LABEL_SIZE 256
 
 /*
+ * just in case we somehow lose the roots and are not able to mount,
+ * we store an array of the roots from previous transactions
+ * in the super.
+ */
+#define BTRFS_NUM_BACKUP_ROOTS 4
+struct btrfs_root_backup {
+	__le64 tree_root;
+	__le64 tree_root_gen;
+
+	__le64 chunk_root;
+	__le64 chunk_root_gen;
+
+	__le64 extent_root;
+	__le64 extent_root_gen;
+
+	__le64 fs_root;
+	__le64 fs_root_gen;
+
+	__le64 dev_root;
+	__le64 dev_root_gen;
+
+	__le64 csum_root;
+	__le64 csum_root_gen;
+
+	__le64 total_bytes;
+	__le64 bytes_used;
+	__le64 num_devices;
+	/* future */
+	__le64 unsed_64[4];
+
+	u8 tree_root_level;
+	u8 chunk_root_level;
+	u8 extent_root_level;
+	u8 fs_root_level;
+	u8 dev_root_level;
+	u8 csum_root_level;
+	/* future and to align */
+	u8 unused_8[10];
+} __attribute__ ((__packed__));
+
+/*
  * the super block basically lists the main trees of the FS
  * it currently lacks any block count etc etc
  */
@@ -340,9 +396,12 @@ struct btrfs_super_block {
 
 	char label[BTRFS_LABEL_SIZE];
 
+	__le64 cache_generation;
+
 	/* future expansion */
-	__le64 reserved[32];
+	__le64 reserved[31];
 	u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
+	struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
 } __attribute__ ((__packed__));
 
 /*
@@ -350,11 +409,32 @@ struct btrfs_super_block {
  * ones specified below then we will fail to mount
  */
 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS	(1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO	(1ULL << 3)
+/*
+ * some patches floated around with a second compression method
+ * lets save that incompat here for when they do get in
+ * Note we don't actually support it, we're just reserving the
+ * number
+ */
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2   (1ULL << 4)
+
+/*
+ * older kernels tried to do bigger metadata blocks, but the
+ * code was pretty buggy.  Lets not let them try anymore.
+ */
+#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA     (1ULL << 5)
+
 
 #define BTRFS_FEATURE_COMPAT_SUPP		0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SUPP		0ULL
-#define BTRFS_FEATURE_INCOMPAT_SUPP		\
-	BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF
+#define BTRFS_FEATURE_INCOMPAT_SUPP			\
+	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
+	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
+	 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |		\
+	 BTRFS_FEATURE_INCOMPAT_BIG_METADATA |		\
+	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
 
 /*
  * A leaf is full of items. offset and size tell us where to find
@@ -499,9 +579,11 @@ struct btrfs_timespec {
 } __attribute__ ((__packed__));
 
 typedef enum {
-	BTRFS_COMPRESS_NONE = 0,
-	BTRFS_COMPRESS_ZLIB = 1,
-	BTRFS_COMPRESS_LAST = 2,
+	BTRFS_COMPRESS_NONE  = 0,
+	BTRFS_COMPRESS_ZLIB  = 1,
+	BTRFS_COMPRESS_LZO   = 2,
+	BTRFS_COMPRESS_TYPES = 2,
+	BTRFS_COMPRESS_LAST  = 3,
 } btrfs_compression_type;
 
 /* we don't understand any encryption methods right now */
@@ -633,13 +715,17 @@ struct btrfs_csum_item {
 } __attribute__ ((__packed__));
 
 /* tag for the radix tree of block groups in ram */
-#define BTRFS_BLOCK_GROUP_DATA     (1 << 0)
-#define BTRFS_BLOCK_GROUP_SYSTEM   (1 << 1)
-#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
-#define BTRFS_BLOCK_GROUP_RAID0    (1 << 3)
-#define BTRFS_BLOCK_GROUP_RAID1    (1 << 4)
-#define BTRFS_BLOCK_GROUP_DUP	   (1 << 5)
-#define BTRFS_BLOCK_GROUP_RAID10   (1 << 6)
+#define BTRFS_BLOCK_GROUP_DATA		(1ULL << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM	(1ULL << 1)
+#define BTRFS_BLOCK_GROUP_METADATA	(1ULL << 2)
+#define BTRFS_BLOCK_GROUP_RAID0		(1ULL << 3)
+#define BTRFS_BLOCK_GROUP_RAID1		(1ULL << 4)
+#define BTRFS_BLOCK_GROUP_DUP		(1ULL << 5)
+#define BTRFS_BLOCK_GROUP_RAID10	(1ULL << 6)
+#define BTRFS_BLOCK_GROUP_RESERVED	BTRFS_AVAIL_ALLOC_BIT_SINGLE
+
+/* used in struct btrfs_balance_args fields */
+#define BTRFS_AVAIL_ALLOC_BIT_SINGLE	(1ULL << 48)
 
 struct btrfs_block_group_item {
 	__le64 used;
@@ -726,6 +812,13 @@ struct btrfs_fs_info {
 	struct list_head space_info;
 	int system_allocs;
 	int readonly;
+	int (*free_extent_hook)(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				u64 bytenr, u64 num_bytes, u64 parent,
+				u64 root_objectid, u64 owner, u64 offset,
+				int refs_to_drop);
+	struct cache_tree *fsck_extent_cache;
+	struct cache_tree *corrupt_blocks;
 };
 
 /*
@@ -847,6 +940,8 @@ struct btrfs_root {
 #define BTRFS_DEV_ITEM_KEY	216
 #define BTRFS_CHUNK_ITEM_KEY	228
 
+#define BTRFS_BALANCE_ITEM_KEY	248
+
 /*
  * string items are for debugging.  They just store a short string of
  * data in the FS
@@ -1047,6 +1142,7 @@ BTRFS_SETGET_STACK_FUNCS(block_group_flags,
 
 /* struct btrfs_inode_ref */
 BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
 BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
 
 /* struct btrfs_inode_item */
@@ -1325,6 +1421,10 @@ BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64);
 BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64);
 BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16);
 
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_dirid, struct btrfs_root_ref, dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_sequence, struct btrfs_root_ref, sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_name_len, struct btrfs_root_ref, name_len, 16);
+
 /* struct btrfs_dir_item */
 BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
 BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
@@ -1510,6 +1610,55 @@ BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
 			 last_snapshot, 64);
 
 
+/* struct btrfs_root_backup */
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
+		   tree_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup,
+		   tree_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup,
+		   tree_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup,
+		   chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup,
+		   chunk_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup,
+		   chunk_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup,
+		   extent_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup,
+		   extent_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup,
+		   extent_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup,
+		   fs_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup,
+		   fs_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup,
+		   fs_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup,
+		   dev_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup,
+		   dev_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup,
+		   dev_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup,
+		   csum_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup,
+		   csum_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup,
+		   csum_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup,
+		   total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
+		   bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
+		   num_devices, 64);
+
 /* struct btrfs_super_block */
 
 BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
@@ -1557,6 +1706,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
 			 incompat_flags, 64);
 BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
 			 csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+			 cache_generation, 64);
 
 static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
 {
@@ -1572,6 +1723,7 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
 
 /* struct btrfs_file_extent_item */
 BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, type, 8);
 
 static inline unsigned long btrfs_file_extent_inline_start(struct
 						   btrfs_file_extent_item *e)
@@ -1588,18 +1740,30 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
 
 BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
 		   disk_bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, struct btrfs_file_extent_item,
+		   disk_bytenr, 64);
 BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
 		   generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, struct btrfs_file_extent_item,
+		   generation, 64);
 BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item,
 		   disk_num_bytes, 64);
 BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
 		  offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, struct btrfs_file_extent_item,
+		  offset, 64);
 BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
 		   num_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, struct btrfs_file_extent_item,
+		   num_bytes, 64);
 BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item,
 		   ram_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, struct btrfs_file_extent_item,
+		   ram_bytes, 64);
 BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item,
 		   compression, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, struct btrfs_file_extent_item,
+		   compression, 8);
 BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
 		   encryption, 8);
 BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
@@ -1643,6 +1807,10 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) {
 	btrfs_item_offset_nr(leaf, slot)))
 
 /* extent-tree.c */
+int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root);
+int btrfs_check_block_accounting(struct btrfs_root *root);
+void btrfs_pin_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes);
 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root);
 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
@@ -1705,6 +1873,20 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 bytenr, u64 num,
 			     int alloc, int mark_free);
 /* ctree.c */
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		   struct btrfs_path *path, int level, int slot);
+int btrfs_check_node(struct btrfs_root *root,
+		      struct btrfs_disk_key *parent_key,
+		      struct extent_buffer *buf);
+int btrfs_check_leaf(struct btrfs_root *root,
+		      struct btrfs_disk_key *parent_key,
+		      struct extent_buffer *buf);
+int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
+		      struct btrfs_root *root);
+void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
+			     int level, int slot, u64 objectid);
+struct extent_buffer *read_node_slot(struct btrfs_root *root,
+				   struct extent_buffer *parent, int slot);
 int btrfs_previous_item(struct btrfs_root *root,
 			struct btrfs_path *path, u64 min_objectid,
 			int type);
diff --git a/debug-tree.c b/debug-tree.c
index 1d47519..c497892 100644
--- a/debug-tree.c
+++ b/debug-tree.c
@@ -35,44 +35,6 @@ static int print_usage(void)
 	exit(1);
 }
 
-static void print_extent_leaf(struct btrfs_root *root, struct extent_buffer *l)
-{
-	int i;
-	struct btrfs_item *item;
-//	struct btrfs_extent_ref *ref;
-	struct btrfs_key key;
-	static u64 last = 0;
-	static u64 last_len = 0;
-	u32 nr = btrfs_header_nritems(l);
-	u32 type;
-
-	for (i = 0 ; i < nr ; i++) {
-		item = btrfs_item_nr(l, i);
-		btrfs_item_key_to_cpu(l, &key, i);
-		type = btrfs_key_type(&key);
-		switch (type) {
-		case BTRFS_EXTENT_ITEM_KEY:
-			last_len = key.offset;
-			last = key.objectid;
-			break;
-#if 0
-		case BTRFS_EXTENT_REF_KEY:
-			ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref);
-			printf("%llu %llu extent back ref root %llu gen %llu "
-			       "owner %llu num_refs %lu\n",
-			       (unsigned long long)last,
-			       (unsigned long long)last_len,
-			       (unsigned long long)btrfs_ref_root(l, ref),
-			       (unsigned long long)btrfs_ref_generation(l, ref),
-			       (unsigned long long)btrfs_ref_objectid(l, ref),
-			       (unsigned long)btrfs_ref_num_refs(l, ref));
-			break;
-#endif
-		};
-		fflush(stdout);
-	}
-}
-
 static void print_extents(struct btrfs_root *root, struct extent_buffer *eb)
 {
 	int i;
@@ -81,10 +43,7 @@ static void print_extents(struct btrfs_root *root, struct extent_buffer *eb)
 
 	if (!eb)
 		return;
-	if (btrfs_is_leaf(eb)) {
-		print_extent_leaf(root, eb);
-		return;
-	}
+
 	size = btrfs_level_size(root, btrfs_header_level(eb) - 1);
 	nr = btrfs_header_nritems(eb);
 	for (i = 0; i < nr; i++) {
@@ -103,9 +62,49 @@ static void print_extents(struct btrfs_root *root, struct extent_buffer *eb)
 	}
 }
 
+static void print_old_roots(struct btrfs_super_block *super)
+{
+	struct btrfs_root_backup *backup;
+	int i;
+
+	for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
+		backup = super->super_roots + i;
+		printf("btrfs root backup slot %d\n", i);
+		printf("\ttree root gen %llu block %llu\n",
+		       (unsigned long long)btrfs_backup_tree_root_gen(backup),
+		       (unsigned long long)btrfs_backup_tree_root(backup));
+
+		printf("\t\textent root gen %llu block %llu\n",
+		       (unsigned long long)btrfs_backup_extent_root_gen(backup),
+		       (unsigned long long)btrfs_backup_extent_root(backup));
+
+		printf("\t\tchunk root gen %llu block %llu\n",
+		       (unsigned long long)btrfs_backup_chunk_root_gen(backup),
+		       (unsigned long long)btrfs_backup_chunk_root(backup));
+
+		printf("\t\tdevice root gen %llu block %llu\n",
+		       (unsigned long long)btrfs_backup_dev_root_gen(backup),
+		       (unsigned long long)btrfs_backup_dev_root(backup));
+
+		printf("\t\tcsum root gen %llu block %llu\n",
+		       (unsigned long long)btrfs_backup_csum_root_gen(backup),
+		       (unsigned long long)btrfs_backup_csum_root(backup));
+
+		printf("\t\tfs root gen %llu block %llu\n",
+		       (unsigned long long)btrfs_backup_fs_root_gen(backup),
+		       (unsigned long long)btrfs_backup_fs_root(backup));
+
+		printf("\t\t%llu used %llu total %llu devices\n",
+		       (unsigned long long)btrfs_backup_bytes_used(backup),
+		       (unsigned long long)btrfs_backup_total_bytes(backup),
+		       (unsigned long long)btrfs_backup_num_devices(backup));
+	}
+}
+
 int main(int ac, char **av)
 {
 	struct btrfs_root *root;
+	struct btrfs_fs_info *info;
 	struct btrfs_path path;
 	struct btrfs_key key;
 	struct btrfs_root_item ri;
@@ -116,19 +115,36 @@ int main(int ac, char **av)
 	int ret;
 	int slot;
 	int extent_only = 0;
+	int device_only = 0;
+	int roots_only = 0;
+	int root_backups = 0;
+	u64 block_only = 0;
 	struct btrfs_root *tree_root_scan;
 
 	radix_tree_init();
 
 	while(1) {
 		int c;
-		c = getopt(ac, av, "e");
+		c = getopt(ac, av, "deb:rR");
 		if (c < 0)
 			break;
 		switch(c) {
 			case 'e':
 				extent_only = 1;
 				break;
+			case 'd':
+				device_only = 1;
+				break;
+			case 'r':
+				roots_only = 1;
+				break;
+			case 'R':
+				roots_only = 1;
+				root_backups = 1;
+				break;
+			case 'b':
+				block_only = atoll(optarg);
+				break;
 			default:
 				print_usage();
 		}
@@ -137,24 +153,70 @@ int main(int ac, char **av)
 	if (ac != 1)
 		print_usage();
 
-	root = open_ctree(av[optind], 0, 0);
-	if (!root) {
+	info = open_ctree_fs_info(av[optind], 0, 0, 1);
+	if (!info) {
 		fprintf(stderr, "unable to open %s\n", av[optind]);
 		exit(1);
 	}
+	root = info->fs_root;
+
+	if (block_only) {
+		if (!root) {
+			fprintf(stderr, "unable to open %s\n", av[optind]);
+			exit(1);
+		}
+		leaf = read_tree_block(root,
+				      block_only,
+				      root->leafsize, 0);
+
+		if (leaf && btrfs_header_level(leaf) != 0) {
+			free_extent_buffer(leaf);
+			leaf = NULL;
+		}
+
+		if (!leaf) {
+			leaf = read_tree_block(root,
+					      block_only,
+					      root->nodesize, 0);
+		}
+		if (!leaf) {
+			fprintf(stderr, "failed to read %llu\n",
+				(unsigned long long)block_only);
+			return 0;
+		}
+		btrfs_print_tree(root, leaf, 0);
+		return 0;
+	}
+
 	if (!extent_only) {
-		printf("root tree\n");
-		btrfs_print_tree(root->fs_info->tree_root,
-				 root->fs_info->tree_root->node);
+		if (roots_only) {
+			printf("root tree: %llu level %d\n",
+			     (unsigned long long)info->tree_root->node->start,
+			     btrfs_header_level(info->tree_root->node));
+			printf("chunk tree: %llu level %d\n",
+			     (unsigned long long)info->chunk_root->node->start,
+			     btrfs_header_level(info->chunk_root->node));
+		} else {
+			if (info->tree_root->node) {
+				printf("root tree\n");
+				btrfs_print_tree(info->tree_root,
+						 info->tree_root->node, 1);
+			}
 
-		printf("chunk tree\n");
-		btrfs_print_tree(root->fs_info->chunk_root,
-				 root->fs_info->chunk_root->node);
+			if (info->chunk_root->node) {
+				printf("chunk tree\n");
+				btrfs_print_tree(info->chunk_root,
+						 info->chunk_root->node, 1);
+			}
+		}
 	}
-	tree_root_scan = root->fs_info->tree_root;
+	tree_root_scan = info->tree_root;
 
 	btrfs_init_path(&path);
 again:
+	if (!extent_buffer_uptodate(tree_root_scan->node))
+		goto no_node;
+
 	key.offset = 0;
 	key.objectid = 0;
 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
@@ -175,21 +237,27 @@ again:
 		if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
 			unsigned long offset;
 			struct extent_buffer *buf;
-			int skip = extent_only;
+			int skip = extent_only | device_only;
 
 			offset = btrfs_item_ptr_offset(leaf, slot);
 			read_extent_buffer(leaf, &ri, offset, sizeof(ri));
 			buf = read_tree_block(tree_root_scan,
 					      btrfs_root_bytenr(&ri),
-					      tree_root_scan->leafsize, 0);
+					      btrfs_level_size(tree_root_scan,
+							btrfs_root_level(&ri)),
+					      0);
+			if (!extent_buffer_uptodate(buf))
+				goto next;
+
 			switch(found_key.objectid) {
 			case BTRFS_ROOT_TREE_OBJECTID:
 				if (!skip)
 					printf("root");
 				break;
 			case BTRFS_EXTENT_TREE_OBJECTID:
-				skip = 0;
-				if (!extent_only)
+				if (!device_only)
+					skip = 0;
+				if (!extent_only && !device_only)
 					printf("extent");
 				break;
 			case BTRFS_CHUNK_TREE_OBJECTID:
@@ -198,9 +266,8 @@ again:
 				}
 				break;
 			case BTRFS_DEV_TREE_OBJECTID:
-				if (!skip) {
-					printf("device");
-				}
+				skip = 0;
+				printf("device");
 				break;
 			case BTRFS_FS_TREE_OBJECTID:
 				if (!skip) {
@@ -208,9 +275,8 @@ again:
 				}
 				break;
 			case BTRFS_ROOT_TREE_DIR_OBJECTID:
-				if (!skip) {
-					printf("directory");
-				}
+				skip = 0;
+				printf("directory");
 				break;
 			case BTRFS_CSUM_TREE_OBJECTID:
 				if (!skip) {
@@ -256,34 +322,45 @@ again:
 					printf("file");
 				}
 			}
-			if (!skip && !extent_only) {
+			if (extent_only && !skip) {
+				print_extents(tree_root_scan, buf);
+			} else if (!skip) {
 				printf(" tree ");
 				btrfs_print_key(&disk_key);
-				printf(" \n");
-				btrfs_print_tree(tree_root_scan, buf);
-			} else if (extent_only && !skip) {
-				print_extents(tree_root_scan, buf);
+				if (roots_only) {
+					printf(" %llu level %d\n",
+					       (unsigned long long)buf->start,
+					       btrfs_header_level(buf));
+				} else {
+					printf(" \n");
+					btrfs_print_tree(tree_root_scan, buf, 1);
+				}
 			}
 		}
+next:
 		path.slots[0]++;
 	}
+no_node:
 	btrfs_release_path(root, &path);
 
-	if (tree_root_scan == root->fs_info->tree_root &&
-	    root->fs_info->log_root_tree) {
-		tree_root_scan = root->fs_info->log_root_tree;
+	if (tree_root_scan == info->tree_root &&
+	    info->log_root_tree) {
+		tree_root_scan = info->log_root_tree;
 		goto again;
 	}
 
-	if (extent_only)
+	if (extent_only || device_only)
 		return 0;
 
+	if (root_backups)
+		print_old_roots(&info->super_copy);
+
 	printf("total bytes %llu\n",
-	       (unsigned long long)btrfs_super_total_bytes(&root->fs_info->super_copy));
+	       (unsigned long long)btrfs_super_total_bytes(&info->super_copy));
 	printf("bytes used %llu\n",
-	       (unsigned long long)btrfs_super_bytes_used(&root->fs_info->super_copy));
+	       (unsigned long long)btrfs_super_bytes_used(&info->super_copy));
 	uuidbuf[36] = '\0';
-	uuid_unparse(root->fs_info->super_copy.fsid, uuidbuf);
+	uuid_unparse(info->super_copy.fsid, uuidbuf);
 	printf("uuid %s\n", uuidbuf);
 	printf("%s\n", BTRFS_BUILD_VERSION);
 	return 0;
diff --git a/dir-item.c b/dir-item.c
index 71373b8..f00485a 100644
--- a/dir-item.c
+++ b/dir-item.c
@@ -332,5 +332,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
 		ret = btrfs_truncate_item(trans, root, path,
 					  item_len - sub_item_len, 1);
 	}
-	return 0;
+	return ret;
 }
diff --git a/dir-test.c b/dir-test.c
index 44f2758..3ae9c68 100644
--- a/dir-test.c
+++ b/dir-test.c
@@ -485,7 +485,7 @@ int main(int ac, char **av)
 			if (ret) {
 				fprintf(stderr, "op %d failed %d:%d\n",
 					op, i, iterations);
-				btrfs_print_tree(root, root->node);
+				btrfs_print_tree(root, root->node, 1);
 				fprintf(stderr, "op %d failed %d:%d\n",
 					op, i, iterations);
 				err = ret;
diff --git a/disk-io.c b/disk-io.c
index addebe1..b21a87f 100644
--- a/disk-io.c
+++ b/disk-io.c
@@ -35,14 +35,19 @@
 #include "utils.h"
 #include "print-tree.h"
 
+static int close_all_devices(struct btrfs_fs_info *fs_info);
+
 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
 {
 
 	struct btrfs_fs_devices *fs_devices;
 	int ret = 1;
 
-	if (buf->start != btrfs_header_bytenr(buf))
+	if (buf->start != btrfs_header_bytenr(buf)) {
+		printk("Check tree block failed, want=%Lu, have=%Lu\n",
+		       buf->start, btrfs_header_bytenr(buf));
 		return ret;
+	}
 
 	fs_devices = root->fs_info->fs_devices;
 	while (fs_devices) {
@@ -86,7 +91,7 @@ int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
 		if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
 			printk("checksum verify failed on %llu wanted %X "
 			       "found %X\n", (unsigned long long)buf->start,
-			       *((int *)result), *((int *)buf));
+			       *((int *)result), *((char *)buf->data));
 			free(result);
 			return 1;
 		}
@@ -123,7 +128,6 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
 			 u64 parent_transid)
 {
 	int ret;
-	int dev_nr;
 	struct extent_buffer *eb;
 	u64 length;
 	struct btrfs_multi_bio *multi = NULL;
@@ -135,7 +139,6 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
 		return 0;
 	}
 
-	dev_nr = 0;
 	length = blocksize;
 	ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
 			      bytenr, &length, &multi, 0);
@@ -149,7 +152,8 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
 }
 
 static int verify_parent_transid(struct extent_io_tree *io_tree,
-				 struct extent_buffer *eb, u64 parent_transid)
+				 struct extent_buffer *eb, u64 parent_transid,
+				 int ignore)
 {
 	int ret;
 
@@ -165,6 +169,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 	       (unsigned long long)eb->start,
 	       (unsigned long long)parent_transid,
 	       (unsigned long long)btrfs_header_generation(eb));
+	if (ignore) {
+		printk("Ignoring transid failure\n");
+		return 0;
+	}
+
 	ret = 1;
 out:
 	clear_extent_buffer_uptodate(io_tree, eb);
@@ -177,13 +186,15 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 				     u32 blocksize, u64 parent_transid)
 {
 	int ret;
-	int dev_nr;
 	struct extent_buffer *eb;
 	u64 length;
+	u64 best_transid = 0;
 	struct btrfs_multi_bio *multi = NULL;
 	struct btrfs_device *device;
 	int mirror_num = 0;
+	int good_mirror = 0;
 	int num_copies;
+	int ignore = 0;
 
 	eb = btrfs_find_create_tree_block(root, bytenr, blocksize);
 	if (!eb)
@@ -192,32 +203,50 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 	if (btrfs_buffer_uptodate(eb, parent_transid))
 		return eb;
 
-	dev_nr = 0;
 	length = blocksize;
 	while (1) {
 		ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
 				      eb->start, &length, &multi, mirror_num);
-		BUG_ON(ret);
+		if (ret) {
+			printk("Couldn't map the block %Lu\n", bytenr);
+			break;
+		}
 		device = multi->stripes[0].dev;
 		eb->fd = device->fd;
 		device->total_ios++;
 		eb->dev_bytenr = multi->stripes[0].physical;
 		kfree(multi);
 		ret = read_extent_from_disk(eb);
+
 		if (ret == 0 && check_tree_block(root, eb) == 0 &&
 		    csum_tree_block(root, eb, 1) == 0 &&
-		    verify_parent_transid(eb->tree, eb, parent_transid) == 0) {
+		    verify_parent_transid(eb->tree, eb, parent_transid, ignore)
+		    == 0) {
 			btrfs_set_buffer_uptodate(eb);
 			return eb;
 		}
+		if (ignore) {
+			if (check_tree_block(root, eb))
+				printk("read block failed check_tree_block\n");
+			else
+				printk("Csum didn't match\n");
+			break;
+		}
 		num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
 					      eb->start, eb->len);
 		if (num_copies == 1) {
-			break;
+			ignore = 1;
+			continue;
+		}
+		if (btrfs_header_generation(eb) > best_transid) {
+			best_transid = btrfs_header_generation(eb);
+			good_mirror = mirror_num;
 		}
 		mirror_num++;
 		if (mirror_num > num_copies) {
-			break;
+			mirror_num = good_mirror;
+			ignore = 1;
+			continue;
 		}
 	}
 	free_extent_buffer(eb);
@@ -364,6 +393,7 @@ static int __commit_transaction(struct btrfs_trans_handle *trans,
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root)
 {
+	u64 transid = trans->transid;
 	int ret = 0;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
@@ -391,6 +421,7 @@ commit_tree:
 	free_extent_buffer(root->commit_root);
 	root->commit_root = NULL;
 	fs_info->running_transaction = NULL;
+	fs_info->last_trans_committed = transid;
 	return 0;
 }
 
@@ -413,7 +444,9 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
 	generation = btrfs_root_generation(&root->root_item);
 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
 				     blocksize, generation);
-	BUG_ON(!root->node);
+	if (!extent_buffer_uptodate(root->node))
+		return -EIO;
+
 	return 0;
 }
 
@@ -440,7 +473,9 @@ static int find_and_setup_log_root(struct btrfs_root *tree_root,
 				     btrfs_super_generation(disk_super) + 1);
 
 	fs_info->log_root_tree = log_root;
-	BUG_ON(!log_root->node);
+
+	if (!extent_buffer_uptodate(log_root->node))
+		return -EIO;
 	return 0;
 }
 
@@ -549,7 +584,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
 		return fs_info->dev_root;
 	if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
 		return fs_info->csum_root;
-	
+
 	BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID ||
 	       location->offset != (u64)-1);
 
@@ -570,28 +605,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
 	return root;
 }
 
-struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes)
-{
-	int fp;
-	struct btrfs_root *root;
-	int flags = O_CREAT | O_RDWR;
-
-	if (!writes)
-		flags = O_RDONLY;
-
-	fp = open(filename, flags, 0600);
-	if (fp < 0) {
-		fprintf (stderr, "Could not open %s\n", filename);
-		return NULL;
-	}
-	root = open_ctree_fd(fp, filename, sb_bytenr, writes);
-	close(fp);
-
-	return root;
-}
-
-struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
-				 int writes)
+static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path,
+					     u64 sb_bytenr,
+					     u64 root_tree_bytenr, int writes,
+					     int partial)
 {
 	u32 sectorsize;
 	u32 nodesize;
@@ -620,12 +637,13 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
 
 	if (ret) {
 		fprintf(stderr, "No valid Btrfs found on %s\n", path);
-		return NULL;
+		goto out;
 	}
 
 	if (total_devs != 1) {
 		ret = btrfs_scan_for_fsid(fs_devices, total_devs, 1);
-		BUG_ON(ret);
+		if (ret)
+			goto out;
 	}
 
 	memset(fs_info, 0, sizeof(*fs_info));
@@ -660,7 +678,8 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
 		ret = btrfs_open_devices(fs_devices, O_RDWR);
 	else
 		ret = btrfs_open_devices(fs_devices, O_RDONLY);
-	BUG_ON(ret);
+	if (ret)
+		goto out_cleanup;
 
 	fs_info->super_bytenr = sb_bytenr;
 	disk_super = &fs_info->super_copy;
@@ -668,7 +687,7 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
 				   disk_super, sb_bytenr);
 	if (ret) {
 		printk("No valid btrfs found\n");
-		BUG_ON(1);
+		goto out_devices;
 	}
 
 	memcpy(fs_info->fsid, &disk_super->fsid, BTRFS_FSID_SIZE);
@@ -678,8 +697,9 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
 		   ~BTRFS_FEATURE_INCOMPAT_SUPP;
 	if (features) {
 		printk("couldn't open because of unsupported "
-		       "option features (%Lx).\n", features);
-		BUG_ON(1);
+		       "option features (%Lx).\n",
+		       (unsigned long long)features);
+		goto out_devices;
 	}
 
 	features = btrfs_super_incompat_flags(disk_super);
@@ -692,8 +712,9 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
 		~BTRFS_FEATURE_COMPAT_RO_SUPP;
 	if (writes && features) {
 		printk("couldn't open RDWR because of unsupported "
-		       "option features (%Lx).\n", features);
-		BUG_ON(1);
+		       "option features (%Lx).\n",
+		       (unsigned long long)features);
+		goto out_devices;
 	}
 
 	nodesize = btrfs_super_nodesize(disk_super);
@@ -706,7 +727,8 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
 	tree_root->stripesize = stripesize;
 
 	ret = btrfs_read_sys_array(tree_root);
-	BUG_ON(ret);
+	if (ret)
+		goto out_devices;
 	blocksize = btrfs_level_size(tree_root,
 				     btrfs_super_chunk_root_level(disk_super));
 	generation = btrfs_super_chunk_root_generation(disk_super);
@@ -717,8 +739,10 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
 	chunk_root->node = read_tree_block(chunk_root,
 					   btrfs_super_chunk_root(disk_super),
 					   blocksize, generation);
-
-	BUG_ON(!chunk_root->node);
+	if (!extent_buffer_uptodate(chunk_root->node)) {
+		printk("Couldn't read chunk root\n");
+		goto out_devices;
+	}
 
 	read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
 	         (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
@@ -726,37 +750,52 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
 
 	if (!(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP)) {
 		ret = btrfs_read_chunk_tree(chunk_root);
-		BUG_ON(ret);
+		if (ret)
+			goto out_failed;
 	}
 
 	blocksize = btrfs_level_size(tree_root,
 				     btrfs_super_root_level(disk_super));
 	generation = btrfs_super_generation(disk_super);
 
+	if (!root_tree_bytenr)
+		root_tree_bytenr = btrfs_super_root(disk_super);
 	tree_root->node = read_tree_block(tree_root,
-					  btrfs_super_root(disk_super),
+					  root_tree_bytenr,
 					  blocksize, generation);
-	BUG_ON(!tree_root->node);
+	if (!extent_buffer_uptodate(tree_root->node)) {
+		printk("Couldn't read tree root\n");
+		goto out_failed;
+	}
 	ret = find_and_setup_root(tree_root, fs_info,
 				  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
-	BUG_ON(ret);
+	if (ret) {
+		printk("Couldn't setup extent tree\n");
+		goto out_failed;
+	}
 	extent_root->track_dirty = 1;
 
 	ret = find_and_setup_root(tree_root, fs_info,
 				  BTRFS_DEV_TREE_OBJECTID, dev_root);
-	BUG_ON(ret);
+	if (ret) {
+		printk("Couldn't setup device tree\n");
+		goto out_failed;
+	}
 	dev_root->track_dirty = 1;
 
 	ret = find_and_setup_root(tree_root, fs_info,
 				  BTRFS_CSUM_TREE_OBJECTID, csum_root);
-	BUG_ON(ret);
+	if (ret) {
+		printk("Couldn't setup csum tree\n");
+		if (!partial)
+			goto out_failed;
+	}
 	csum_root->track_dirty = 1;
 
-	BUG_ON(ret);
-
 	find_and_setup_log_root(tree_root, fs_info, disk_super);
 
-	fs_info->generation = generation + 1;
+	fs_info->generation = generation;
+	fs_info->last_trans_committed = generation;
 	btrfs_read_block_groups(fs_info->tree_root);
 
 	key.objectid = BTRFS_FS_TREE_OBJECTID;
@@ -764,11 +803,108 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
 	key.offset = (u64)-1;
 	fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
 
+	if (!fs_info->fs_root)
+		goto out_failed;
+
 	fs_info->data_alloc_profile = (u64)-1;
 	fs_info->metadata_alloc_profile = (u64)-1;
 	fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
 
-	return fs_info->fs_root;
+	return fs_info;
+
+out_failed:
+	if (partial)
+		return fs_info;
+
+	if (fs_info->csum_root)
+		free_extent_buffer(fs_info->csum_root->node);
+	if (fs_info->dev_root)
+		free_extent_buffer(fs_info->dev_root->node);
+	if (fs_info->extent_root)
+		free_extent_buffer(fs_info->extent_root->node);
+	if (fs_info->tree_root)
+		free_extent_buffer(fs_info->tree_root->node);
+	if (fs_info->chunk_root)
+		free_extent_buffer(fs_info->chunk_root->node);
+out_devices:
+	close_all_devices(fs_info);
+out_cleanup:
+	extent_io_tree_cleanup(&fs_info->extent_cache);
+	extent_io_tree_cleanup(&fs_info->free_space_cache);
+	extent_io_tree_cleanup(&fs_info->block_group_cache);
+	extent_io_tree_cleanup(&fs_info->pinned_extents);
+	extent_io_tree_cleanup(&fs_info->pending_del);
+	extent_io_tree_cleanup(&fs_info->extent_ins);
+out:
+	free(tree_root);
+	free(extent_root);
+	free(chunk_root);
+	free(dev_root);
+	free(csum_root);
+	free(fs_info);
+	return NULL;
+}
+
+struct btrfs_fs_info *open_ctree_fs_info(const char *filename,
+					 u64 sb_bytenr, int writes,
+					 int partial)
+{
+	int fp;
+	struct btrfs_fs_info *info;
+	int flags = O_CREAT | O_RDWR;
+
+	if (!writes)
+		flags = O_RDONLY;
+
+	fp = open(filename, flags, 0600);
+	if (fp < 0) {
+		fprintf (stderr, "Could not open %s\n", filename);
+		return NULL;
+	}
+	info = __open_ctree_fd(fp, filename, sb_bytenr, 0, writes, partial);
+	close(fp);
+	return info;
+}
+
+struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes)
+{
+	struct btrfs_fs_info *info;
+
+	info = open_ctree_fs_info(filename, sb_bytenr, writes, 0);
+	if (!info)
+		return NULL;
+	return info->fs_root;
+}
+
+struct btrfs_root *open_ctree_recovery(const char *filename, u64 sb_bytenr,
+				       u64 root_tree_bytenr)
+{
+	int fp;
+	struct btrfs_fs_info *info;
+
+
+	fp = open(filename, O_RDONLY);
+	if (fp < 0) {
+		fprintf (stderr, "Could not open %s\n", filename);
+		return NULL;
+	}
+	info = __open_ctree_fd(fp, filename, sb_bytenr,
+			       root_tree_bytenr, 0, 0);
+	close(fp);
+
+	if (!info)
+		return NULL;
+	return info->fs_root;
+}
+
+struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
+				 int writes)
+{
+	struct btrfs_fs_info *info;
+	info = __open_ctree_fd(fp, path, sb_bytenr, 0, writes, 0);
+	if (!info)
+		return NULL;
+	return info->fs_root;
 }
 
 int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr)
@@ -828,7 +964,6 @@ int write_dev_supers(struct btrfs_root *root, struct btrfs_super_block *sb,
 
 	if (root->fs_info->super_bytenr != BTRFS_SUPER_INFO_OFFSET) {
 		btrfs_set_super_bytenr(sb, root->fs_info->super_bytenr);
-
 		crc = ~(u32)0;
 		crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc,
 				      BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
@@ -946,15 +1081,18 @@ int close_ctree(struct btrfs_root *root)
 	struct btrfs_trans_handle *trans;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
-	trans = btrfs_start_transaction(root, 1);
-	btrfs_commit_transaction(trans, root);
-	trans = btrfs_start_transaction(root, 1);
-	ret = commit_tree_roots(trans, fs_info);
-	BUG_ON(ret);
-	ret = __commit_transaction(trans, root);
-	BUG_ON(ret);
-	write_ctree_super(trans, root);
-	btrfs_free_transaction(root, trans);
+	if (fs_info->last_trans_committed !=
+	    fs_info->generation) {
+		trans = btrfs_start_transaction(root, 1);
+		btrfs_commit_transaction(trans, root);
+		trans = btrfs_start_transaction(root, 1);
+		ret = commit_tree_roots(trans, fs_info);
+		BUG_ON(ret);
+		ret = __commit_transaction(trans, root);
+		BUG_ON(ret);
+		write_ctree_super(trans, root);
+		btrfs_free_transaction(root, trans);
+	}
 	btrfs_free_block_groups(fs_info);
 
 	free_fs_roots(fs_info);
@@ -970,13 +1108,13 @@ int close_ctree(struct btrfs_root *root)
 	if (fs_info->csum_root->node)
 		free_extent_buffer(fs_info->csum_root->node);
 
-	if (root->fs_info->log_root_tree) {
-		if (root->fs_info->log_root_tree->node)
-			free_extent_buffer(root->fs_info->log_root_tree->node);
-		free(root->fs_info->log_root_tree);
+	if (fs_info->log_root_tree) {
+		if (fs_info->log_root_tree->node)
+			free_extent_buffer(fs_info->log_root_tree->node);
+		free(fs_info->log_root_tree);
 	}
 
-	close_all_devices(root->fs_info);
+	close_all_devices(fs_info);
 	extent_io_tree_cleanup(&fs_info->extent_cache);
 	extent_io_tree_cleanup(&fs_info->free_space_cache);
 	extent_io_tree_cleanup(&fs_info->block_group_cache);
@@ -1019,7 +1157,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
 	if (!ret)
 		return ret;
 
-	ret = verify_parent_transid(buf->tree, buf, parent_transid);
+	ret = verify_parent_transid(buf->tree, buf, parent_transid, 1);
 	return !ret;
 }
 
diff --git a/disk-io.h b/disk-io.h
index 49e5692..53e9b17 100644
--- a/disk-io.h
+++ b/disk-io.h
@@ -46,7 +46,13 @@ int clean_tree_block(struct btrfs_trans_handle *trans,
 struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes);
 struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
 				 int writes);
+struct btrfs_root *open_ctree_recovery(const char *filename, u64 sb_bytenr,
+				       u64 root_tree_bytenr);
+struct btrfs_fs_info *open_ctree_fs_info(const char *filename,
+					 u64 sb_bytenr, int writes,
+					 int partial);
 int close_ctree(struct btrfs_root *root);
+int write_all_supers(struct btrfs_root *root);
 int write_ctree_super(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root);
 int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr);
diff --git a/extent-cache.c b/extent-cache.c
index b871e18..3dd6434 100644
--- a/extent-cache.c
+++ b/extent-cache.c
@@ -96,13 +96,11 @@ int insert_existing_cache_extent(struct cache_tree *tree,
 				 struct cache_extent *pe)
 {
 	struct rb_node *found;
-	struct cache_extent *entry;
 
 	found = tree_insert(&tree->root, pe->start, pe->size, &pe->rb_node);
-	if (found) {
-		entry = rb_entry(found, struct cache_extent, rb_node);
+	if (found)
 		return -EEXIST;
-	}
+
 	return 0;
 }
 
diff --git a/extent-tree.c b/extent-tree.c
index b2f9bb2..20cdffa 100644
--- a/extent-tree.c
+++ b/extent-tree.c
@@ -1039,6 +1039,11 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 		err = ret;
 		goto out;
 	}
+	if (ret) {
+		printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset);
+		return -ENOENT;
+	}
+
 	BUG_ON(ret);
 
 	leaf = path->nodes[0];
@@ -1059,6 +1064,13 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
 	}
 #endif
+	if (item_size < sizeof(*ei)) {
+		printf("Size is %u, needs to be %u, slot %d\n",
+		       (unsigned)item_size,
+		       (unsigned)sizeof(*ei), path->slots[0]);
+		btrfs_print_leaf(root, leaf);
+		return -EINVAL;
+	}
 	BUG_ON(item_size < sizeof(*ei));
 
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1071,7 +1083,9 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 		ptr += sizeof(struct btrfs_tree_block_info);
 		BUG_ON(ptr > end);
 	} else {
-		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
+		if (!(flags & BTRFS_EXTENT_FLAG_DATA)) {
+			return -EIO;
+		}
 	}
 
 	err = -ENOENT;
@@ -1447,9 +1461,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 	if (ret < 0)
 		goto out;
 	if (ret != 0) {
-		btrfs_print_leaf(root, path->nodes[0]);
-		printk("failed to find block number %Lu\n", bytenr);
-		BUG();
+		ret = -EIO;
+		goto out;
 	}
 
 	l = path->nodes[0];
@@ -1470,9 +1483,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
 #else
 			BUG();
-#endif		
-		}
-		BUG_ON(num_refs == 0);
+#endif
+	}
 	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
 	if (refs)
 		*refs = num_refs;
@@ -1549,7 +1561,6 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 	int i;
 	int level;
 	int ret = 0;
-	int faili = 0;
 	int (*process_func)(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root,
 			    u64, u64, u64, u64, u64, u64);
@@ -1592,7 +1603,6 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 					   parent, ref_root, key.objectid,
 					   key.offset);
 			if (ret) {
-				faili = i;
 				WARN_ON(1);
 				goto fail;
 			}
@@ -1602,7 +1612,6 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 			ret = process_func(trans, root, bytenr, num_bytes,
 					   parent, ref_root, level - 1, 0);
 			if (ret) {
-				faili = i;
 				WARN_ON(1);
 				goto fail;
 			}
@@ -1611,33 +1620,6 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 	return 0;
 fail:
 	WARN_ON(1);
-#if 0
-	for (i =0; i < faili; i++) {
-		if (level == 0) {
-			u64 disk_bytenr;
-			btrfs_item_key_to_cpu(buf, &key, i);
-			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
-				continue;
-			fi = btrfs_item_ptr(buf, i,
-					    struct btrfs_file_extent_item);
-			if (btrfs_file_extent_type(buf, fi) ==
-			    BTRFS_FILE_EXTENT_INLINE)
-				continue;
-			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
-			if (disk_bytenr == 0)
-				continue;
-			err = btrfs_free_extent(trans, root, disk_bytenr,
-				    btrfs_file_extent_disk_num_bytes(buf,
-								      fi), 0);
-			BUG_ON(err);
-		} else {
-			bytenr = btrfs_node_blockptr(buf, i);
-			err = btrfs_free_extent(trans, root, bytenr,
-					btrfs_level_size(root, level - 1), 0);
-			BUG_ON(err);
-		}
-	}
-#endif
 	return ret;
 }
 
@@ -1721,7 +1703,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 
 		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
 		ret = write_one_cache_group(trans, root, path, cache);
-		BUG_ON(ret);
 	}
 	btrfs_free_path(path);
 	return 0;
@@ -1735,7 +1716,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 	struct btrfs_space_info *found;
 	list_for_each(cur, head) {
 		found = list_entry(cur, struct btrfs_space_info, list);
-		if (found->flags == flags)
+		if (found->flags & flags)
 			return found;
 	}
 	return NULL;
@@ -1752,7 +1733,12 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	if (found) {
 		found->total_bytes += total_bytes;
 		found->bytes_used += bytes_used;
-		WARN_ON(found->total_bytes < found->bytes_used);
+		if (found->total_bytes < found->bytes_used) {
+			fprintf(stderr, "warning, bad space info total_bytes "
+				"%llu used %llu\n",
+			       (unsigned long long)found->total_bytes,
+			       (unsigned long long)found->bytes_used);
+		}
 		*space_info = found;
 		return 0;
 	}
@@ -1775,6 +1761,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 {
 	u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
 				   BTRFS_BLOCK_GROUP_RAID1 |
+				   BTRFS_BLOCK_GROUP_RAID10 |
 				   BTRFS_BLOCK_GROUP_DUP);
 	if (extra_flags) {
 		if (flags & BTRFS_BLOCK_GROUP_DATA)
@@ -1812,7 +1799,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 	    thresh)
 		return 0;
 
-	ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
+	ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes,
+	                        space_info->flags);
 	if (ret == -ENOSPC) {
 		space_info->full = 1;
 		return 0;
@@ -1820,7 +1808,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 
 	BUG_ON(ret);
 
-	ret = btrfs_make_block_group(trans, extent_root, 0, flags,
+	ret = btrfs_make_block_group(trans, extent_root, 0, space_info->flags,
 		     BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
 	BUG_ON(ret);
 	return 0;
@@ -1869,6 +1857,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 
 		old_val = btrfs_block_group_used(&cache->item);
 		num_bytes = min(total, cache->key.offset - byte_in_group);
+
 		if (alloc) {
 			old_val += num_bytes;
 			cache->space_info->bytes_used += num_bytes;
@@ -1904,6 +1893,10 @@ static int update_pinned_extents(struct btrfs_root *root,
 	}
 	while (num > 0) {
 		cache = btrfs_lookup_block_group(fs_info, bytenr);
+		if (!cache) {
+			len = min((u64)root->sectorsize, num);
+			goto next;
+		}
 		WARN_ON(!cache);
 		len = min(num, cache->key.offset -
 			  (bytenr - cache->key.objectid));
@@ -1916,6 +1909,7 @@ static int update_pinned_extents(struct btrfs_root *root,
 			cache->space_info->bytes_pinned -= len;
 			fs_info->total_pinned -= len;
 		}
+next:
 		bytenr += len;
 		num -= len;
 	}
@@ -1963,6 +1957,21 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
+static int extent_root_pending_ops(struct btrfs_fs_info *info)
+{
+	u64 start;
+	u64 end;
+	int ret;
+
+	ret = find_first_extent_bit(&info->extent_ins, 0, &start,
+				    &end, EXTENT_LOCKED);
+	if (!ret) {
+		ret = find_first_extent_bit(&info->pending_del, 0, &start, &end,
+					    EXTENT_LOCKED);
+	}
+	return ret == 0;
+
+}
 static int finish_current_insert(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *extent_root)
 {
@@ -2047,6 +2056,12 @@ pinit:
 	return 0;
 }
 
+void btrfs_pin_extent(struct btrfs_fs_info *fs_info,
+		       u64 bytenr, u64 num_bytes)
+{
+	update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 1);
+}
+
 /*
  * remove an extent from the root, returns 0 on success
  */
@@ -2072,6 +2087,12 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 	u32 item_size;
 	u64 refs;
 
+	if (root->fs_info->free_extent_hook) {
+		root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes,
+						parent, root_objectid, owner_objectid,
+						owner_offset, refs_to_drop);
+
+	}
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -2132,8 +2153,6 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 			extent_slot = path->slots[0];
 		}
 	} else {
-		btrfs_print_leaf(extent_root, path->nodes[0]);
-		WARN_ON(1);
 		printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
 		       "parent %llu root %llu  owner %llu offset %llu\n",
 		       (unsigned long long)bytenr,
@@ -2141,6 +2160,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 		       (unsigned long long)root_objectid,
 		       (unsigned long long)owner_objectid,
 		       (unsigned long long)owner_offset);
+		ret = -EIO;
+		goto fail;
 	}
 
 	leaf = path->nodes[0];
@@ -2246,10 +2267,9 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 			BUG_ON(ret);
 		}
 
-		ret = update_block_group(trans, root, bytenr, num_bytes, 0,
-					 mark_free);
-		BUG_ON(ret);
+		update_block_group(trans, root, bytenr, num_bytes, 0, mark_free);
 	}
+fail:
 	btrfs_free_path(path);
 	finish_current_insert(trans, extent_root);
 	return ret;
@@ -2578,13 +2598,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 
 	ret = update_block_group(trans, root, ins->objectid, ins->offset,
 				 1, 0);
-	if (ret) {
-		printk(KERN_ERR "btrfs update block group failed for %llu "
-		       "%llu\n", (unsigned long long)ins->objectid,
-		       (unsigned long long)ins->offset);
-		BUG();
-	}
-	return ret;
+	return 0;
 }
 
 static int alloc_tree_block(struct btrfs_trans_handle *trans,
@@ -3167,7 +3181,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
 	finish_current_insert(trans, extent_root);
 	ret = del_pending_extents(trans, extent_root);
-	BUG_ON(ret);
 	set_avail_alloc_bits(extent_root->fs_info, type);
 	return 0;
 }
@@ -3283,3 +3296,158 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
 	return update_block_group(trans, root, bytenr, num_bytes,
 				  alloc, mark_free);
 }
+
+static int btrfs_count_extents_in_block_group(struct btrfs_root *root,
+					      struct btrfs_path *path, u64 start,
+					      u64 len,
+					      u64 *total)
+{
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	u64 bytes_used = 0;
+	int ret;
+	int slot;
+
+	key.offset = 0;
+	key.objectid = start;
+	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+	ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
+				&key, path, 0, 0);
+	if (ret < 0)
+		return ret;
+	while(1) {
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				return ret;
+			if (ret > 0)
+				break;
+			leaf = path->nodes[0];
+			slot = path->slots[0];
+		}
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.objectid > start + len)
+			break;
+		if (key.type == BTRFS_EXTENT_ITEM_KEY)
+			bytes_used += key.offset;
+		path->slots[0]++;
+	}
+	*total = bytes_used;
+	btrfs_release_path(root, path);
+	return 0;
+}
+
+int btrfs_check_block_accounting(struct btrfs_root *root)
+{
+	int ret;
+	u64 start = 0;
+	u64 bytes_used = 0;
+	struct btrfs_path path;
+	struct btrfs_block_group_cache *cache;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+
+	btrfs_init_path(&path);
+
+	while(1) {
+		cache = btrfs_lookup_block_group(fs_info, start);
+		if (!cache)
+			break;
+
+		ret = btrfs_count_extents_in_block_group(root, &path,
+							 cache->key.objectid,
+							 cache->key.offset,
+							 &bytes_used);
+
+		if (ret == 0) {
+			u64 on_disk = btrfs_block_group_used(&cache->item);
+			if (on_disk != bytes_used) {
+				fprintf(stderr, "bad block group accounting found %llu "
+					"expected %llu block group %llu\n",
+					(unsigned long long)bytes_used,
+					(unsigned long long)on_disk,
+					(unsigned long long)cache->key.objectid);
+			}
+		}
+		start = cache->key.objectid + cache->key.offset;
+
+		cache->space_info->bytes_used = 0;
+	}
+	return 0;
+}
+
+/*
+ * Fixup block accounting. The initial block accounting created by
+ * make_block_groups isn't accuracy in this case.
+ */
+int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans,
+			       struct btrfs_root *root)
+{
+	int ret;
+	int slot;
+	u64 start = 0;
+	u64 bytes_used = 0;
+	struct btrfs_path path;
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct btrfs_block_group_cache *cache;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+
+	root = root->fs_info->extent_root;
+
+	while(extent_root_pending_ops(fs_info)) {
+		ret = finish_current_insert(trans, root);
+		if (ret)
+			return ret;
+		ret = del_pending_extents(trans, root);
+		if (ret)
+			return ret;
+	}
+
+	while(1) {
+		cache = btrfs_lookup_block_group(fs_info, start);
+		if (!cache)
+			break;
+		start = cache->key.objectid + cache->key.offset;
+		btrfs_set_block_group_used(&cache->item, 0);
+		cache->space_info->bytes_used = 0;
+		set_extent_bits(&root->fs_info->block_group_cache,
+				cache->key.objectid,
+				cache->key.objectid + cache->key.offset -1,
+				BLOCK_GROUP_DIRTY, GFP_NOFS);
+	}
+
+	btrfs_init_path(&path);
+	key.offset = 0;
+	key.objectid = 0;
+	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+	ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+				&key, &path, 0, 0);
+	if (ret < 0)
+		return ret;
+	while(1) {
+		leaf = path.nodes[0];
+		slot = path.slots[0];
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, &path);
+			if (ret < 0)
+				return ret;
+			if (ret > 0)
+				break;
+			leaf = path.nodes[0];
+			slot = path.slots[0];
+		}
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+			bytes_used += key.offset;
+			ret = btrfs_update_block_group(trans, root,
+				  key.objectid, key.offset, 1, 0);
+			BUG_ON(ret);
+		}
+		path.slots[0]++;
+	}
+	btrfs_set_super_bytes_used(&root->fs_info->super_copy, bytes_used);
+	btrfs_release_path(root, &path);
+	return 0;
+}
diff --git a/extent_io.c b/extent_io.c
index 069c199..ebb35b2 100644
--- a/extent_io.c
+++ b/extent_io.c
@@ -28,7 +28,8 @@
 #include "extent_io.h"
 #include "list.h"
 
-u64 cache_max = 1024 * 1024 * 32;
+u64 cache_soft_max = 1024 * 1024 * 256;
+u64 cache_hard_max = 1 * 1024 * 1024 * 1024;
 
 void extent_io_tree_init(struct extent_io_tree *tree)
 {
@@ -296,7 +297,6 @@ int set_extent_bits(struct extent_io_tree *tree, u64 start,
 	struct extent_state *prealloc = NULL;
 	struct cache_extent *node;
 	int err = 0;
-	int set;
 	u64 last_start;
 	u64 last_end;
 again:
@@ -327,7 +327,6 @@ again:
 	 * Just lock what we found and keep going
 	 */
 	if (state->start == start && state->end <= end) {
-		set = state->state & bits;
 		state->state |= bits;
 		merge_state(tree, state);
 		if (last_end == (u64)-1)
@@ -352,7 +351,6 @@ again:
 	 * desired bit on it.
 	 */
 	if (state->start < start) {
-		set = state->state & bits;
 		err = split_state(tree, state, prealloc, start);
 		BUG_ON(err == -EEXIST);
 		prealloc = NULL;
@@ -398,7 +396,6 @@ again:
 	 * We need to split the extent, and set the bit
 	 * on the first half
 	 */
-	set = state->state & bits;
 	err = split_state(tree, state, prealloc, end + 1);
 	BUG_ON(err == -EEXIST);
 
@@ -544,18 +541,19 @@ static int free_some_buffers(struct extent_io_tree *tree)
 	struct extent_buffer *eb;
 	struct list_head *node, *next;
 
-	if (tree->cache_size < cache_max)
+	if (tree->cache_size < cache_soft_max)
 		return 0;
+
 	list_for_each_safe(node, next, &tree->lru) {
 		eb = list_entry(node, struct extent_buffer, lru);
 		if (eb->refs == 1) {
 			free_extent_buffer(eb);
-			if (tree->cache_size < cache_max)
+			if (tree->cache_size < cache_hard_max)
 				break;
 		} else {
 			list_move_tail(&eb->lru, &tree->lru);
 		}
-		if (nrscan++ > 64)
+		if (nrscan++ > 64 && tree->cache_size < cache_hard_max)
 			break;
 	}
 	return 0;
@@ -572,6 +570,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
 		BUG();
 		return NULL;
 	}
+	memset(eb, 0, sizeof(struct extent_buffer) + blocksize);
 
 	eb->start = bytenr;
 	eb->len = blocksize;
@@ -657,7 +656,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 		if (cache) {
 			eb = container_of(cache, struct extent_buffer,
 					  cache_node);
-			BUG_ON(eb->refs != 1);
 			free_extent_buffer(eb);
 		}
 		eb = __alloc_extent_buffer(tree, bytenr, blocksize);
@@ -710,6 +708,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
 
 int extent_buffer_uptodate(struct extent_buffer *eb)
 {
+	if (!eb)
+		return 0;
+
 	if (eb->flags & EXTENT_UPTODATE)
 		return 1;
 	return 0;
diff --git a/file-item.c b/file-item.c
index 9732282..c746b44 100644
--- a/file-item.c
+++ b/file-item.c
@@ -193,7 +193,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, u64 alloc_end,
 			  u64 bytenr, char *data, size_t len)
 {
-	int ret;
+	int ret = 0;
 	struct btrfs_key file_key;
 	struct btrfs_key found_key;
 	u64 next_offset = (u64)-1;
@@ -218,6 +218,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
 	item = btrfs_lookup_csum(trans, root, path, bytenr, 1);
 	if (!IS_ERR(item)) {
 		leaf = path->nodes[0];
+		ret = 0;
 		goto found;
 	}
 	ret = PTR_ERR(item);
diff --git a/find-root.c b/find-root.c
new file mode 100644
index 0000000..c0f38b8
--- /dev/null
+++ b/find-root.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright (C) 2011 Red Hat.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _XOPEN_SOURCE 500
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <zlib.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "version.h"
+#include "volumes.h"
+#include "utils.h"
+#include "crc32c.h"
+
+static int verbose = 0;
+static u16 csum_size = 0;
+static u64 search_objectid = BTRFS_ROOT_TREE_OBJECTID;
+
+static void usage()
+{
+	fprintf(stderr, "Usage: find-roots [-v] <device>\n");
+}
+
+int csum_block(void *buf, u32 len)
+{
+	char *result;
+	u32 crc = ~(u32)0;
+	int ret = 0;
+
+	result = malloc(csum_size * sizeof(char));
+	if (!result) {
+		fprintf(stderr, "No memory\n");
+		return 1;
+	}
+
+	len -= BTRFS_CSUM_SIZE;
+	crc = crc32c(crc, buf + BTRFS_CSUM_SIZE, len);
+	btrfs_csum_final(crc, result);
+
+	if (memcmp(buf, result, csum_size))
+		ret = 1;
+	free(result);
+	return ret;
+}
+
+static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
+			u32 stripesize, struct btrfs_root *root,
+			struct btrfs_fs_info *fs_info, u64 objectid)
+{
+	root->node = NULL;
+	root->commit_root = NULL;
+	root->sectorsize = sectorsize;
+	root->nodesize = nodesize;
+	root->leafsize = leafsize;
+	root->stripesize = stripesize;
+	root->ref_cows = 0;
+	root->track_dirty = 0;
+
+	root->fs_info = fs_info;
+	root->objectid = objectid;
+	root->last_trans = 0;
+	root->highest_inode = 0;
+	root->last_inode_alloc = 0;
+
+	INIT_LIST_HEAD(&root->dirty_list);
+	memset(&root->root_key, 0, sizeof(root->root_key));
+	memset(&root->root_item, 0, sizeof(root->root_item));
+	root->root_key.objectid = objectid;
+	return 0;
+}
+
+static int close_all_devices(struct btrfs_fs_info *fs_info)
+{
+	struct list_head *list;
+	struct list_head *next;
+	struct btrfs_device *device;
+
+	return 0;
+
+	list = &fs_info->fs_devices->devices;
+	list_for_each(next, list) {
+		device = list_entry(next, struct btrfs_device, dev_list);
+		close(device->fd);
+	}
+	return 0;
+}
+
+static struct btrfs_root *open_ctree_broken(int fd, const char *device)
+{
+	u32 sectorsize;
+	u32 nodesize;
+	u32 leafsize;
+	u32 blocksize;
+	u32 stripesize;
+	u64 generation;
+	struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root));
+	struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root));
+	struct btrfs_root *chunk_root = malloc(sizeof(struct btrfs_root));
+	struct btrfs_root *dev_root = malloc(sizeof(struct btrfs_root));
+	struct btrfs_root *csum_root = malloc(sizeof(struct btrfs_root));
+	struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info));
+	int ret;
+	struct btrfs_super_block *disk_super;
+	struct btrfs_fs_devices *fs_devices = NULL;
+	u64 total_devs;
+	u64 features;
+
+	ret = btrfs_scan_one_device(fd, device, &fs_devices,
+				    &total_devs, BTRFS_SUPER_INFO_OFFSET);
+
+	if (ret) {
+		fprintf(stderr, "No valid Btrfs found on %s\n", device);
+		goto out;
+	}
+
+	if (total_devs != 1) {
+		ret = btrfs_scan_for_fsid(fs_devices, total_devs, 1);
+		if (ret)
+			goto out;
+	}
+
+	memset(fs_info, 0, sizeof(*fs_info));
+	fs_info->tree_root = tree_root;
+	fs_info->extent_root = extent_root;
+	fs_info->chunk_root = chunk_root;
+	fs_info->dev_root = dev_root;
+	fs_info->csum_root = csum_root;
+
+	fs_info->readonly = 1;
+
+	extent_io_tree_init(&fs_info->extent_cache);
+	extent_io_tree_init(&fs_info->free_space_cache);
+	extent_io_tree_init(&fs_info->block_group_cache);
+	extent_io_tree_init(&fs_info->pinned_extents);
+	extent_io_tree_init(&fs_info->pending_del);
+	extent_io_tree_init(&fs_info->extent_ins);
+	cache_tree_init(&fs_info->fs_root_cache);
+
+	cache_tree_init(&fs_info->mapping_tree.cache_tree);
+
+	mutex_init(&fs_info->fs_mutex);
+	fs_info->fs_devices = fs_devices;
+	INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
+	INIT_LIST_HEAD(&fs_info->space_info);
+
+	__setup_root(4096, 4096, 4096, 4096, tree_root,
+		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
+
+	ret = btrfs_open_devices(fs_devices, O_RDONLY);
+	if (ret)
+		goto out_cleanup;
+
+	fs_info->super_bytenr = BTRFS_SUPER_INFO_OFFSET;
+	disk_super = &fs_info->super_copy;
+	ret = btrfs_read_dev_super(fs_devices->latest_bdev,
+				   disk_super, BTRFS_SUPER_INFO_OFFSET);
+	if (ret) {
+		printk("No valid btrfs found\n");
+		goto out_devices;
+	}
+
+	memcpy(fs_info->fsid, &disk_super->fsid, BTRFS_FSID_SIZE);
+
+
+	features = btrfs_super_incompat_flags(disk_super) &
+		   ~BTRFS_FEATURE_INCOMPAT_SUPP;
+	if (features) {
+		printk("couldn't open because of unsupported "
+		       "option features (%Lx).\n", features);
+		goto out_devices;
+	}
+
+	features = btrfs_super_incompat_flags(disk_super);
+	if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
+		features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+		btrfs_set_super_incompat_flags(disk_super, features);
+	}
+
+	nodesize = btrfs_super_nodesize(disk_super);
+	leafsize = btrfs_super_leafsize(disk_super);
+	sectorsize = btrfs_super_sectorsize(disk_super);
+	stripesize = btrfs_super_stripesize(disk_super);
+	tree_root->nodesize = nodesize;
+	tree_root->leafsize = leafsize;
+	tree_root->sectorsize = sectorsize;
+	tree_root->stripesize = stripesize;
+
+	ret = btrfs_read_sys_array(tree_root);
+	if (ret)
+		goto out_devices;
+	blocksize = btrfs_level_size(tree_root,
+				     btrfs_super_chunk_root_level(disk_super));
+	generation = btrfs_super_chunk_root_generation(disk_super);
+
+	__setup_root(nodesize, leafsize, sectorsize, stripesize,
+		     chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
+
+	chunk_root->node = read_tree_block(chunk_root,
+					   btrfs_super_chunk_root(disk_super),
+					   blocksize, generation);
+	if (!chunk_root->node) {
+		printk("Couldn't read chunk root\n");
+		goto out_devices;
+	}
+
+	read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
+	         (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
+		 BTRFS_UUID_SIZE);
+
+	if (!(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP)) {
+		ret = btrfs_read_chunk_tree(chunk_root);
+		if (ret)
+			goto out_chunk;
+	}
+
+	return fs_info->chunk_root;
+out_chunk:
+	free_extent_buffer(fs_info->chunk_root->node);
+out_devices:
+	close_all_devices(fs_info);
+out_cleanup:
+	extent_io_tree_cleanup(&fs_info->extent_cache);
+	extent_io_tree_cleanup(&fs_info->free_space_cache);
+	extent_io_tree_cleanup(&fs_info->block_group_cache);
+	extent_io_tree_cleanup(&fs_info->pinned_extents);
+	extent_io_tree_cleanup(&fs_info->pending_del);
+	extent_io_tree_cleanup(&fs_info->extent_ins);
+out:
+	free(tree_root);
+	free(extent_root);
+	free(chunk_root);
+	free(dev_root);
+	free(csum_root);
+	free(fs_info);
+	return NULL;
+}
+
+static int search_iobuf(struct btrfs_root *root, void *iobuf,
+			size_t iobuf_size, off_t offset)
+{
+	u64 gen = btrfs_super_generation(&root->fs_info->super_copy);
+	u64 objectid = search_objectid;
+	u32 size = btrfs_super_nodesize(&root->fs_info->super_copy);
+	u8 level = root->fs_info->super_copy.root_level;
+	size_t block_off = 0;
+
+	while (block_off < iobuf_size) {
+		void *block = iobuf + block_off;
+		struct btrfs_header *header = block;
+		u64 h_byte, h_level, h_gen, h_owner;
+
+//		printf("searching %Lu\n", offset + block_off);
+		h_byte = le64_to_cpu(header->bytenr);
+		h_owner = le64_to_cpu(header->owner);
+		h_level = header->level;
+		h_gen = le64_to_cpu(header->generation);
+
+		if (h_owner != objectid)
+			goto next;
+		if (h_byte != (offset + block_off))
+			goto next;
+		if (h_level != level)
+			goto next;
+		if (csum_block(block, size)) {
+			fprintf(stderr, "Well block %Lu seems good, "
+				"but the csum doesn't match\n",
+				h_byte);
+			goto next;
+		}
+		if (h_gen != gen) {
+			fprintf(stderr, "Well block %Lu seems great, "
+				"but generation doesn't match, "
+				"have=%Lu, want=%Lu\n", h_byte, h_gen,
+				gen);
+			goto next;
+		}
+		printf("Found tree root at %Lu\n", h_byte);
+		return 0;
+next:
+		block_off += size;
+	}
+
+	return 1;
+}
+
+static int read_physical(struct btrfs_root *root, int fd, u64 offset,
+			 u64 bytenr, u64 len)
+{
+	char *iobuf = malloc(len);
+	ssize_t done;
+	size_t total_read = 0;
+	int ret = 1;
+
+	if (!iobuf) {
+		fprintf(stderr, "No memory\n");
+		return -1;
+	}
+
+	while (total_read < len) {
+		done = pread64(fd, iobuf + total_read, len - total_read,
+			       bytenr + total_read);
+		if (done < 0) {
+			fprintf(stderr, "Failed to read: %s\n",
+				strerror(errno));
+			ret = -1;
+			goto out;
+		}
+		total_read += done;
+	}
+
+	ret = search_iobuf(root, iobuf, total_read, offset);
+out:
+	free(iobuf);
+	return ret;
+}
+
+static int find_root(struct btrfs_root *root)
+{
+	struct btrfs_multi_bio *multi = NULL;
+	struct btrfs_device *device;
+	u64 metadata_offset = 0, metadata_size = 0;
+	off_t offset = 0;
+	off_t bytenr;
+	int fd;
+	int err;
+	int ret = 1;
+
+	printf("Super think's the tree root is at %Lu, chunk root %Lu\n",
+	       btrfs_super_root(&root->fs_info->super_copy),
+	       btrfs_super_chunk_root(&root->fs_info->super_copy));
+
+	err = btrfs_next_metadata(&root->fs_info->mapping_tree,
+				  &metadata_offset, &metadata_size);
+	if (err)
+		return ret;
+
+	offset = metadata_offset;
+	while (1) {
+		u64 map_length = 4096;
+		u64 type;
+
+		if (offset >
+		    btrfs_super_total_bytes(&root->fs_info->super_copy)) {
+			printf("Went past the fs size, exiting");
+			break;
+		}
+		if (offset >= (metadata_offset + metadata_size)) {
+			err = btrfs_next_metadata(&root->fs_info->mapping_tree,
+						  &metadata_offset,
+						  &metadata_size);
+			if (err) {
+				printf("No more metdata to scan, exiting\n");
+				break;
+			}
+			offset = metadata_offset;
+		}
+		err = __btrfs_map_block(&root->fs_info->mapping_tree, READ,
+				      offset, &map_length, &type, &multi, 0);
+		if (err) {
+			offset += map_length;
+			continue;
+		}
+
+		if (!(type & BTRFS_BLOCK_GROUP_METADATA)) {
+			offset += map_length;
+			continue;
+		}
+
+		device = multi->stripes[0].dev;
+		fd = device->fd;
+		bytenr = multi->stripes[0].physical;
+		kfree(multi);
+
+		err = read_physical(root, fd, offset, bytenr, map_length);
+		if (!err) {
+			ret = 0;
+			break;
+		} else if (err < 0) {
+			ret = err;
+			break;
+		}
+		offset += map_length;
+	}
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	struct btrfs_root *root;
+	int dev_fd;
+	int opt;
+	int ret;
+
+	while ((opt = getopt(argc, argv, "vo:")) != -1) {
+		switch(opt) {
+			case 'v':
+				verbose++;
+				break;
+			case 'o':
+				errno = 0;
+				search_objectid = (u64)strtoll(optarg, NULL,
+							       10);
+				if (errno) {
+					fprintf(stderr, "Error parsing "
+						"objectid\n");
+					exit(1);
+				}
+				break;
+			default:
+				usage();
+				exit(1);
+		}
+	}
+
+	if (optind >= argc) {
+		usage();
+		exit(1);
+	}
+
+	dev_fd = open(argv[optind], O_RDONLY);
+	if (dev_fd < 0) {
+		fprintf(stderr, "Failed to open device %s\n", argv[optind]);
+		exit(1);
+	}
+
+	root = open_ctree_broken(dev_fd, argv[optind]);
+	close(dev_fd);
+	if (!root)
+		exit(1);
+
+	csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
+	ret = find_root(root);
+	close_ctree(root);
+	return ret;
+}
diff --git a/help.c b/help.c
new file mode 100644
index 0000000..6d04293
--- /dev/null
+++ b/help.c
@@ -0,0 +1,214 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "commands.h"
+
+extern char argv0_buf[ARGV0_BUF_SIZE];
+
+#define USAGE_SHORT		1U
+#define USAGE_LONG		2U
+#define USAGE_OPTIONS		4U
+#define USAGE_LISTING		8U
+
+static int do_usage_one_command(const char * const *usagestr,
+				unsigned int flags, FILE *outf)
+{
+	int pad = 4;
+
+	if (!usagestr || !*usagestr)
+		return -1;
+
+	fprintf(outf, "%s%s\n", (flags & USAGE_LISTING) ? "    " : "usage: ",
+		*usagestr++);
+
+	/* a short one-line description (mandatory) */
+	if ((flags & USAGE_SHORT) == 0)
+		return 0;
+	else if (!*usagestr)
+		return -2;
+
+	if (flags & USAGE_LISTING)
+		pad = 8;
+	else
+		fputc('\n', outf);
+
+	fprintf(outf, "%*s%s\n", pad, "", *usagestr++);
+
+	/* a long (possibly multi-line) description (optional) */
+	if (!*usagestr || ((flags & USAGE_LONG) == 0))
+		return 0;
+
+	if (**usagestr)
+		fputc('\n', outf);
+	while (*usagestr && **usagestr)
+		fprintf(outf, "%*s%s\n", pad, "", *usagestr++);
+
+	/* options (optional) */
+	if (!*usagestr || ((flags & USAGE_OPTIONS) == 0))
+		return 0;
+
+	/*
+	 * options (if present) should always (even if there is no long
+	 * description) be prepended with an empty line, skip it
+	 */
+	usagestr++;
+
+	fputc('\n', outf);
+	while (*usagestr)
+		fprintf(outf, "%*s%s\n", pad, "", *usagestr++);
+
+	return 0;
+}
+
+static int usage_command_internal(const char * const *usagestr,
+				  const char *token, int full, int lst,
+				  FILE *outf)
+{
+	unsigned int flags = USAGE_SHORT;
+	int ret;
+
+	if (full)
+		flags |= USAGE_LONG | USAGE_OPTIONS;
+	if (lst)
+		flags |= USAGE_LISTING;
+
+	ret = do_usage_one_command(usagestr, flags, outf);
+	switch (ret) {
+	case -1:
+		fprintf(outf, "No usage for '%s'\n", token);
+		break;
+	case -2:
+		fprintf(outf, "No short description for '%s'\n", token);
+		break;
+	}
+
+	return ret;
+}
+
+static void usage_command_usagestr(const char * const *usagestr,
+				   const char *token, int full, int err)
+{
+	FILE *outf = err ? stderr : stdout;
+	int ret;
+
+	ret = usage_command_internal(usagestr, token, full, 0, outf);
+	if (!ret)
+		fputc('\n', outf);
+}
+
+void usage_command(const struct cmd_struct *cmd, int full, int err)
+{
+	usage_command_usagestr(cmd->usagestr, cmd->token, full, err);
+}
+
+void usage(const char * const *usagestr)
+{
+	usage_command_usagestr(usagestr, NULL, 1, 1);
+	exit(129);
+}
+
+static void usage_command_group_internal(const struct cmd_group *grp, int full,
+					 FILE *outf)
+{
+	const struct cmd_struct *cmd = grp->commands;
+	int do_sep = 0;
+
+	for (; cmd->token; cmd++) {
+		if (cmd->hidden)
+			continue;
+
+		if (full && cmd != grp->commands)
+			fputc('\n', outf);
+
+		if (!cmd->next) {
+			if (do_sep) {
+				fputc('\n', outf);
+				do_sep = 0;
+			}
+
+			usage_command_internal(cmd->usagestr, cmd->token, full,
+					       1, outf);
+			continue;
+		}
+
+		/* this is an entry point to a nested command group */
+
+		if (!full && cmd != grp->commands)
+			fputc('\n', outf);
+
+		usage_command_group_internal(cmd->next, full, outf);
+
+		if (!full)
+			do_sep = 1;
+	}
+}
+
+void usage_command_group(const struct cmd_group *grp, int full, int err)
+{
+	const char * const *usagestr = grp->usagestr;
+	FILE *outf = err ? stderr : stdout;
+
+	if (usagestr && *usagestr) {
+		fprintf(outf, "usage: %s\n", *usagestr++);
+		while (*usagestr)
+			fprintf(outf, "   or: %s\n", *usagestr++);
+	}
+
+	fputc('\n', outf);
+	usage_command_group_internal(grp, full, outf);
+	fputc('\n', outf);
+
+	if (grp->infostr)
+		fprintf(outf, "%s\n", grp->infostr);
+}
+
+void help_unknown_token(const char *arg, const struct cmd_group *grp)
+{
+	fprintf(stderr, "%s: unknown token '%s'\n", argv0_buf, arg);
+	usage_command_group(grp, 0, 1);
+	exit(1);
+}
+
+void help_ambiguous_token(const char *arg, const struct cmd_group *grp)
+{
+	const struct cmd_struct *cmd = grp->commands;
+
+	fprintf(stderr, "%s: ambiguous token '%s'\n", argv0_buf, arg);
+	fprintf(stderr, "\nDid you mean one of these ?\n");
+
+	for (; cmd->token; cmd++) {
+		if (!prefixcmp(cmd->token, arg))
+			fprintf(stderr, "\t%s\n", cmd->token);
+	}
+
+	exit(1);
+}
+
+void help_command_group(const struct cmd_group *grp, int argc, char **argv)
+{
+	int full = 0;
+
+	if (argc > 1) {
+		if (!strcmp(argv[1], "--full"))
+			full = 1;
+	}
+
+	usage_command_group(grp, full, 0);
+}
diff --git a/ioctl-test.c b/ioctl-test.c
new file mode 100644
index 0000000..1c27d61
--- /dev/null
+++ b/ioctl-test.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "kerncompat.h"
+#include "ioctl.h"
+
+unsigned long ioctls[] = {
+	BTRFS_IOC_SNAP_CREATE,
+	BTRFS_IOC_DEFRAG,
+	BTRFS_IOC_RESIZE,
+	BTRFS_IOC_SCAN_DEV,
+	BTRFS_IOC_TRANS_START,
+	BTRFS_IOC_TRANS_END,
+	BTRFS_IOC_SYNC,
+	BTRFS_IOC_CLONE,
+	BTRFS_IOC_ADD_DEV,
+	BTRFS_IOC_RM_DEV,
+	BTRFS_IOC_BALANCE,
+	BTRFS_IOC_SUBVOL_CREATE,
+	BTRFS_IOC_SNAP_DESTROY,
+	BTRFS_IOC_DEFRAG_RANGE,
+	BTRFS_IOC_TREE_SEARCH,
+	BTRFS_IOC_INO_LOOKUP,
+	BTRFS_IOC_DEFAULT_SUBVOL,
+	BTRFS_IOC_SPACE_INFO,
+	BTRFS_IOC_SNAP_CREATE_V2,
+	0 };
+
+int main(int ac, char **av)
+{
+	int i = 0;
+	while(ioctls[i]) {
+		printf("%lu\n" ,ioctls[i]);
+		i++;
+	}
+	return 0;
+}
+
diff --git a/ioctl.h b/ioctl.h
index a084f33..f2e5d8d 100644
--- a/ioctl.h
+++ b/ioctl.h
@@ -23,13 +23,256 @@
 
 #define BTRFS_IOCTL_MAGIC 0x94
 #define BTRFS_VOL_NAME_MAX 255
-#define BTRFS_PATH_NAME_MAX 4087
 
+/* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
 struct btrfs_ioctl_vol_args {
 	__s64 fd;
 	char name[BTRFS_PATH_NAME_MAX + 1];
 };
 
+#define BTRFS_SUBVOL_RDONLY		(1ULL << 1)
+#define BTRFS_SUBVOL_NAME_MAX 4039
+
+struct btrfs_ioctl_vol_args_v2 {
+	__s64 fd;
+	__u64 transid;
+	__u64 flags;
+	__u64 unused[4];
+	char name[BTRFS_SUBVOL_NAME_MAX + 1];
+};
+
+#define BTRFS_FSID_SIZE 16
+#define BTRFS_UUID_SIZE 16
+
+struct btrfs_scrub_progress {
+	__u64 data_extents_scrubbed;
+	__u64 tree_extents_scrubbed;
+	__u64 data_bytes_scrubbed;
+	__u64 tree_bytes_scrubbed;
+	__u64 read_errors;
+	__u64 csum_errors;
+	__u64 verify_errors;
+	__u64 no_csum;
+	__u64 csum_discards;
+	__u64 super_errors;
+	__u64 malloc_errors;
+	__u64 uncorrectable_errors;
+	__u64 corrected_errors;
+	__u64 last_physical;
+	__u64 unverified_errors;
+};
+
+#define BTRFS_SCRUB_READONLY	1
+struct btrfs_ioctl_scrub_args {
+	__u64 devid;				/* in */
+	__u64 start;				/* in */
+	__u64 end;				/* in */
+	__u64 flags;				/* in */
+	struct btrfs_scrub_progress progress;	/* out */
+	/* pad to 1k */
+	__u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
+};
+
+#define BTRFS_DEVICE_PATH_NAME_MAX 1024
+struct btrfs_ioctl_dev_info_args {
+	__u64 devid;				/* in/out */
+	__u8 uuid[BTRFS_UUID_SIZE];		/* in/out */
+	__u64 bytes_used;			/* out */
+	__u64 total_bytes;			/* out */
+	__u64 unused[379];			/* pad to 4k */
+	__u8 path[BTRFS_DEVICE_PATH_NAME_MAX];	/* out */
+};
+
+struct btrfs_ioctl_fs_info_args {
+	__u64 max_id;				/* out */
+	__u64 num_devices;			/* out */
+	__u8 fsid[BTRFS_FSID_SIZE];		/* out */
+	__u64 reserved[124];			/* pad to 1k */
+};
+
+/* balance control ioctl modes */
+#define BTRFS_BALANCE_CTL_PAUSE		1
+#define BTRFS_BALANCE_CTL_CANCEL	2
+#define BTRFS_BALANCE_CTL_RESUME	3
+
+/*
+ * this is packed, because it should be exactly the same as its disk
+ * byte order counterpart (struct btrfs_disk_balance_args)
+ */
+struct btrfs_balance_args {
+	__u64 profiles;
+	__u64 usage;
+	__u64 devid;
+	__u64 pstart;
+	__u64 pend;
+	__u64 vstart;
+	__u64 vend;
+
+	__u64 target;
+
+	__u64 flags;
+
+	__u64 unused[8];
+} __attribute__ ((__packed__));
+
+struct btrfs_balance_progress {
+	__u64 expected;
+	__u64 considered;
+	__u64 completed;
+};
+
+#define BTRFS_BALANCE_STATE_RUNNING	(1ULL << 0)
+#define BTRFS_BALANCE_STATE_PAUSE_REQ	(1ULL << 1)
+#define BTRFS_BALANCE_STATE_CANCEL_REQ	(1ULL << 2)
+
+struct btrfs_ioctl_balance_args {
+	__u64 flags;				/* in/out */
+	__u64 state;				/* out */
+
+	struct btrfs_balance_args data;		/* in/out */
+	struct btrfs_balance_args meta;		/* in/out */
+	struct btrfs_balance_args sys;		/* in/out */
+
+	struct btrfs_balance_progress stat;	/* out */
+
+	__u64 unused[72];			/* pad to 1k */
+};
+
+struct btrfs_ioctl_search_key {
+	/* which root are we searching.  0 is the tree of tree roots */
+	__u64 tree_id;
+
+	/* keys returned will be >= min and <= max */
+	__u64 min_objectid;
+	__u64 max_objectid;
+
+	/* keys returned will be >= min and <= max */
+	__u64 min_offset;
+	__u64 max_offset;
+
+	/* max and min transids to search for */
+	__u64 min_transid;
+	__u64 max_transid;
+
+	/* keys returned will be >= min and <= max */
+	__u32 min_type;
+	__u32 max_type;
+
+	/*
+	 * how many items did userland ask for, and how many are we
+	 * returning
+	 */
+	__u32 nr_items;
+
+	/* align to 64 bits */
+	__u32 unused;
+
+	/* some extra for later */
+	__u64 unused1;
+	__u64 unused2;
+	__u64 unused3;
+	__u64 unused4;
+};
+
+struct btrfs_ioctl_search_header {
+	__u64 transid;
+	__u64 objectid;
+	__u64 offset;
+	__u32 type;
+	__u32 len;
+} __attribute__((may_alias));
+
+#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
+/*
+ * the buf is an array of search headers where
+ * each header is followed by the actual item
+ * the type field is expanded to 32 bits for alignment
+ */
+struct btrfs_ioctl_search_args {
+	struct btrfs_ioctl_search_key key;
+	char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
+};
+
+#define BTRFS_INO_LOOKUP_PATH_MAX 4080
+struct btrfs_ioctl_ino_lookup_args {
+	__u64 treeid;
+	__u64 objectid;
+	char name[BTRFS_INO_LOOKUP_PATH_MAX];
+};
+
+/* flags for the defrag range ioctl */
+#define BTRFS_DEFRAG_RANGE_COMPRESS 1
+#define BTRFS_DEFRAG_RANGE_START_IO 2
+
+struct btrfs_ioctl_defrag_range_args {
+	/* start of the defrag operation */
+	__u64 start;
+
+	/* number of bytes to defrag, use (u64)-1 to say all */
+	__u64 len;
+
+	/*
+	 * flags for the operation, which can include turning
+	 * on compression for this one defrag
+	 */
+	__u64 flags;
+
+	/*
+	 * any extent bigger than this will be considered
+	 * already defragged.  Use 0 to take the kernel default
+	 * Use 1 to say every single extent must be rewritten
+	 */
+	__u32 extent_thresh;
+
+	/*
+	 * which compression method to use if turning on compression
+	 * for this defrag operation.  If unspecified, zlib will
+	 * be used
+	 */
+	__u32 compress_type;
+
+	/* spare for later */
+	__u32 unused[4];
+};
+
+struct btrfs_ioctl_space_info {
+	__u64 flags;
+	__u64 total_bytes;
+	__u64 used_bytes;
+};
+
+struct btrfs_ioctl_space_args {
+	__u64 space_slots;
+	__u64 total_spaces;
+	struct btrfs_ioctl_space_info spaces[0];
+};
+
+struct btrfs_data_container {
+	__u32	bytes_left;	/* out -- bytes not needed to deliver output */
+	__u32	bytes_missing;	/* out -- additional bytes needed for result */
+	__u32	elem_cnt;	/* out */
+	__u32	elem_missed;	/* out */
+	__u64	val[0];		/* out */
+};
+
+struct btrfs_ioctl_ino_path_args {
+	__u64				inum;		/* in */
+	__u64				size;		/* in */
+	__u64				reserved[4];
+	/* struct btrfs_data_container	*fspath;	   out */
+	__u64				fspath;		/* out */
+};
+
+struct btrfs_ioctl_logical_ino_args {
+	__u64				logical;	/* in */
+	__u64				size;		/* in */
+	__u64				reserved[4];
+	/* struct btrfs_data_container	*inodes;	out   */
+	__u64				inodes;
+};
+
+/* BTRFS_IOC_SNAP_CREATE is no longer used by the btrfs command */
 #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
 				   struct btrfs_ioctl_vol_args)
 #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@@ -56,4 +299,36 @@ struct btrfs_ioctl_vol_args {
 /* 13 is for CLONE_RANGE */
 #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
 				   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
+				   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \
+				struct btrfs_ioctl_defrag_range_args)
+#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
+				   struct btrfs_ioctl_search_args)
+#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
+				   struct btrfs_ioctl_ino_lookup_args)
+#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
+#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
+				    struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
+				   struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
+				struct btrfs_ioctl_scrub_args)
+#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28)
+#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \
+					struct btrfs_ioctl_scrub_args)
+#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \
+					struct btrfs_ioctl_dev_info_args)
+#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
+                                 struct btrfs_ioctl_fs_info_args)
+#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
+				   struct btrfs_ioctl_balance_args)
+#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
+#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
+					struct btrfs_ioctl_balance_args)
+#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
+					struct btrfs_ioctl_ino_path_args)
+#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
+					struct btrfs_ioctl_ino_path_args)
+
 #endif
diff --git a/kerncompat.h b/kerncompat.h
index e4c8ce0..46236cd 100644
--- a/kerncompat.h
+++ b/kerncompat.h
@@ -42,7 +42,11 @@
 #define GFP_NOFS 0
 #define __read_mostly
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#ifndef ULONG_MAX
 #define ULONG_MAX       (~0UL)
+#endif
+
 #define BUG() abort()
 #ifdef __CHECKER__
 #define __force    __attribute__((force))
diff --git a/man/Makefile b/man/Makefile
index 4e8893b..4a90b75 100644
--- a/man/Makefile
+++ b/man/Makefile
@@ -7,13 +7,16 @@ mandir = $(prefix)/man
 man8dir = $(mandir)/man8
 
 MANPAGES = mkfs.btrfs.8.gz btrfsctl.8.gz btrfsck.8.gz btrfs-image.8.gz \
-	   btrfs-show.8.gz
+	   btrfs-show.8.gz btrfs.8.gz
 
 all: $(MANPAGES)
 
 mkfs.btrfs.8.gz: mkfs.btrfs.8.in
 	$(GZIP) -n -c mkfs.btrfs.8.in > mkfs.btrfs.8.gz
 
+btrfs.8.gz: btrfs.8.in
+	$(GZIP) -n -c btrfs.8.in > btrfs.8.gz
+
 btrfsctl.8.gz: btrfsctl.8.in
 	$(GZIP) -n -c btrfsctl.8.in > btrfsctl.8.gz
 
diff --git a/man/btrfs-show.8.in b/man/btrfs-show.8.in
index dd0b147..cb98b68 100644
--- a/man/btrfs-show.8.in
+++ b/man/btrfs-show.8.in
@@ -3,6 +3,9 @@
 btrfs-show \- scan the /dev directory for btrfs partitions and print results.
 .SH SYNOPSIS
 .B btrfs-show
+.SH NOTE
+.B btrfs-show
+is deprecated. Please consider to switch to the btrfs utility.	
 .SH DESCRIPTION
 .B btrfs-show
 is used to scan the /dev directory for btrfs partitions and display brief
diff --git a/man/btrfs.8.in b/man/btrfs.8.in
new file mode 100644
index 0000000..be478e0
--- /dev/null
+++ b/man/btrfs.8.in
@@ -0,0 +1,322 @@
+.TH BTRFS 8 "" "btrfs" "btrfs"
+.\"
+.\" Man page written by Goffredo Baroncelli <kreijack@inwind.it> (Feb 2010)
+.\"
+.SH NAME
+btrfs \- control a btrfs filesystem
+.SH SYNOPSIS
+\fBbtrfs\fP \fBsubvolume snapshot\fP\fI [-r] <source> [<dest>/]<name>\fP
+.PP
+\fBbtrfs\fP \fBsubvolume delete\fP\fI <subvolume>\fP
+.PP
+\fBbtrfs\fP \fBsubvolume create\fP\fI [<dest>/]<name>\fP
+.PP
+\fBbtrfs\fP \fBsubvolume list\fP\fI [-p] <path>\fP
+.PP
+\fBbtrfs\fP \fBsubvolume set-default\fP\fI <id> <path>\fP
+.PP
+\fBbtrfs\fP \fBsubvolume get-default\fP\fI <path>\fP
+.PP
+\fBbtrfs\fP \fBfilesystem sync\fP\fI <path> \fP
+.PP
+\fBbtrfs\fP \fBfilesystem resize\fP\fI [+/\-]<size>[gkm]|max <filesystem>\fP
+.PP
+\fBbtrfs\fP \fBfilesystem label\fP\fI <dev> [newlabel]\fP
+.PP
+\fBbtrfs\fP \fBfilesystem defrag\fP\fI [options] <file>|<dir> [<file>|<dir>...]\fP
+.PP
+\fBbtrfs\fP \fBsubvolume find-new\fP\fI <subvolume> <last_gen>\fP
+.PP
+\fBbtrfs\fP \fBfilesystem balance\fP\fI <path> \fP
+.PP
+\fBbtrfs\fP \fBfilesystem defragment\fP\fI <file>|<dir> [<file>|<dir>...]\fP
+.PP
+\fBbtrfs\fP \fBdevice scan\fP\fI [--all-devices|<device> [<device>...]]\fP
+.PP
+\fBbtrfs\fP \fBdevice show\fP\fI [--all-devices|<uuid>|<label>]\fP
+.PP
+\fBbtrfs\fP \fBdevice add\fP\fI <device> [<device>...] <path> \fP
+.PP
+\fBbtrfs\fP \fBdevice delete\fP\fI <device> [<device>...] <path> \fP
+.PP
+\fBbtrfs\fP \fBscrub start\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP}
+.PP
+\fBbtrfs\fP \fBscrub cancel\fP {\fI<path>\fP|\fI<device>\fP}
+.PP
+\fBbtrfs\fP \fBscrub resume\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP}
+.PP
+\fBbtrfs\fP \fBscrub status\fP [-d] {\fI<path>\fP|\fI<device>\fP}
+.PP
+\fBbtrfs\fP \fBinspect-internal inode-resolve\fP [-v] \fI<inode>\fP \fI<path>\fP
+.PP
+\fBbtrfs\fP \fBinspect-internal logical-resolve\fP
+[-Pv] \fI<logical>\fP \fI<path>\fP
+.PP
+\fBbtrfs\fP \fBhelp|\-\-help|\-h \fP\fI\fP
+.PP
+\fBbtrfs\fP \fB<command> \-\-help \fP\fI\fP
+.PP
+.SH DESCRIPTION
+.B btrfs
+is used to control the filesystem and the files and directories stored. It is
+the tool to create or destroy a snapshot or a subvolume for the
+filesystem, to defrag a file or a directory, flush the data to the disk,
+to resize the filesystem, to scan the device.
+
+It is possible to abbreviate the commands unless the commands  are ambiguous.
+For example: it is possible to run
+.I btrfs sub snaps
+instead of
+.I btrfs subvolume snapshot.
+But
+.I btrfs file s
+is not allowed, because
+.I file s
+may be interpreted both as
+.I filesystem show
+and as
+.I filesystem sync.
+In this case
+.I btrfs
+returnsfilesystem sync
+If a command is terminated by
+.I --help
+, the detailed help is showed. If the passed command matches more commands,
+detailed help of all the matched commands is showed. For example
+.I btrfs dev --help
+shows the help of all
+.I device*
+commands.
+
+.SH COMMANDS
+.TP
+
+\fBsubvolume snapshot\fR\fI [-r] <source> [<dest>/]<name>\fR
+Create a writable/readonly snapshot of the subvolume \fI<source>\fR with the
+name \fI<name>\fR in the \fI<dest>\fR directory. If \fI<source>\fR is not a
+subvolume, \fBbtrfs\fR returns an error. If \fI-r\fR is given, the snapshot
+will be readonly.
+.TP
+
+\fBsubvolume delete\fR\fI <subvolume>\fR
+Delete the subvolume \fI<subvolume>\fR. If \fI<subvolume>\fR is not a
+subvolume, \fBbtrfs\fR returns an error.
+.TP
+
+\fBsubvolume create\fR\fI [<dest>/]<name>\fR
+Create a subvolume in \fI<dest>\fR (or in the current directory if
+\fI<dest>\fR is omitted).
+.TP
+
+\fBsubvolume list\fR\fI [-p] <path>\fR
+List the subvolumes present in the filesystem \fI<path>\fR. For every
+subvolume the following information is shown by default.
+ID <ID> top level <ID> path <path>
+where path is the relative path of the subvolume to the \fItop level\fR
+subvolume.
+The subvolume's ID may be used by the \fBsubvolume set-default\fR command, or
+at mount time via the \fIsubvol=\fR option.
+If \fI-p\fR is given, then \fIparent <ID>\fR is added to the output between ID
+and top level. The parent's ID may be used at mount time via the
+\fIsubvolrootid=\fR option.
+.TP
+
+\fBsubvolume set-default\fR\fI <id> <path>\fR
+Set the subvolume of the filesystem \fI<path>\fR which is mounted as 
+\fIdefault\fR. The subvolume is identified by \fI<id>\fR, which 
+is returned by the \fBsubvolume list\fR command.
+.TP
+
+\fBsubvolume get-default\fR\fI <path>\fR
+Get the default subvolume of the filesystem \fI<path>\fR. The output format
+is similar to \fBsubvolume list\fR command.
+.TP
+
+\fBfilesystem defragment\fP -c[zlib|lzo] [-l \fIlen\fR] [-s \fIstart\fR] [-t \fIsize\fR] -[vf] <\fIfile\fR>|<\fIdir\fR> [<\fIfile\fR>|<\fIdir\fR>...]
+
+Defragment file data and/or directory metadata. To defragment all files in a
+directory you have to specify each one on its own or use your shell wildcards.
+
+The start position and the number of bytes to deframention can be specified by \fIstart\fR and \fIlen\fR. Any extent bigger than \fIthresh\fR will be considered already defragged. Use 0 to take the kernel default, and use 1 to say eveery single extent must be rewritten. You can also turn on compression in defragment operations.
+
+\fB-v\fP be verbose
+
+\fB-c\fP compress file contents while defragmenting
+
+\fB-f\fP flush filesystem after defragmenting
+
+\fB-s start\fP defragment only from byte \fIstart\fR onward
+
+\fB-l len\fP defragment only up to \fIlen\fR bytes
+
+\fB-t size\fP defragment only files at least \fIsize\fR bytes big
+
+NOTE: defragmenting with kernels up to 2.6.37 will unlink COW-ed copies of data, don't 
+use it if you use snapshots, have de-duplicated your data or made copies with 
+\fBcp --reflink\fP.
+\fBsubvolume find-new\fR\fI <subvolume> <last_gen>\fR
+List the recently modified files in a subvolume, after \fI<last_gen>\fR ID.
+.TP
+
+\fBfilesystem sync\fR\fI <path> \fR
+Force a sync for the filesystem identified by \fI<path>\fR.
+.TP
+
+.\"
+.\" Some wording are extracted by the resize2fs man page
+.\"
+
+\fBfilesystem resize\fR\fI [+/\-]<size>[gkm]|max <path>\fR
+Resize a filesystem identified by \fI<path>\fR.
+The \fI<size>\fR parameter specifies the new size of the filesystem.
+If the prefix \fI+\fR or \fI\-\fR is present the size is increased or decreased
+by the quantity \fI<size>\fR.
+If no units are specified, the unit of the \fI<size>\fR parameter defaults to
+bytes. Optionally, the size parameter may be suffixed by one of the following
+the units designators: 'K', 'M', or 'G', kilobytes, megabytes, or gigabytes,
+respectively.
+
+If 'max' is passed, the filesystem will occupy all available space on the
+volume(s).
+
+The \fBresize\fR command \fBdoes not\fR manipulate the size of underlying
+partition.  If you wish to enlarge/reduce a filesystem, you must make sure you
+can expand the partition before enlarging the filesystem and shrink the
+partition after reducing the size of the filesystem.
+.TP
+
+\fBbtrfs\fP \fBfilesystem label\fP\fI <dev> [newlabel]\fP
+Show or update the label of a filesystem. \fI<dev>\fR is used to identify the
+filesystem. 
+If a \fInewlabel\fR optional argument is passed, the label is changed. The
+following costraints exist for a label:
+.IP
+- the maximum allowable lenght shall be less or equal than 256 chars
+.IP
+- the label shall not  contain the '/' or '\\' characters.
+
+NOTE: Currently there are the following limitations:
+.IP
+- the filesystem has to be unmounted
+.IP
+- the filesystem should not have more than one device.
+.TP
+
+\fBfilesystem show\fR [--all-devices|<uuid>|<label>]\fR
+Show the btrfs filesystem with some additional info. If no \fIUUID\fP or 
+\fIlabel\fP is passed, \fBbtrfs\fR show info of all the btrfs filesystem.
+If \fB--all-devices\fP is passed, all the devices under /dev are scanned;
+otherwise the devices list is extracted from the /proc/partitions file.
+.TP
+
+\fBdevice balance\fR \fI<path>\fR
+Balance the chunks of the filesystem identified by \fI<path>\fR
+across the devices.
+.TP
+
+\fBdevice add\fR\fI <dev> [<dev>..] <path>\fR
+Add device(s) to the filesystem identified by \fI<path>\fR.
+.TP
+
+\fBdevice delete\fR\fI <dev> [<dev>..] <path>\fR
+Remove device(s) from a filesystem identified by \fI<path>\fR.
+.TP
+
+\fBdevice scan\fR \fI[--all-devices|<device> [<device>...]\fR
+If one or more devices are passed, these are scanned for a btrfs filesystem. 
+If no devices are passed, \fBbtrfs\fR scans all the block devices listed
+in the /proc/partitions file.
+Finally, if \fB--all-devices\fP is passed, all the devices under /dev are 
+scanned.
+.TP
+
+\fBscrub start\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP}
+Start a scrub on all devices of the filesystem identified by \fI<path>\fR or on
+a single \fI<device>\fR. Without options, scrub is started as a background
+process. Progress can be obtained with the \fBscrub status\fR command. Scrubbing
+involves reading all data from all disks and verifying checksums. Errors are
+corrected along the way if possible.
+.RS
+
+\fIOptions\fR
+.IP -B 5
+Do not background and print scrub statistics when finished.
+.IP -d 5
+Print separate statistics for each device of the filesystem (-B only).
+.IP -q 5
+Quiet. Omit error messages and statistics.
+.IP -r 5
+Read only mode. Do not attempt to correct anything.
+.IP -u 5
+Scrub unused space as well. (NOT IMPLEMENTED)
+.RE
+.TP
+
+\fBscrub cancel\fP {\fI<path>\fP|\fI<device>\fP}
+If a scrub is running on the filesystem identified by \fI<path>\fR, cancel it.
+Progress is saved in the scrub progress file and scrubbing can be resumed later
+using the \fBscrub resume\fR command.
+If a \fI<device>\fR is given, the corresponding filesystem is found and
+\fBscrub cancel\fP behaves as if it was called on that filesystem.
+.TP
+
+\fBscrub resume\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP}
+Resume a canceled or interrupted scrub cycle on the filesystem identified by
+\fI<path>\fR or on a given \fI<device>\fR. Does not start a new scrub if the
+last scrub finished successfully.
+.RS
+
+\fIOptions\fR
+.TP
+see \fBscrub start\fP.
+.RE
+.TP
+
+\fBscrub status\fP [-d] {\fI<path>\fP|\fI<device>\fP}
+Show status of a running scrub for the filesystem identified by \fI<path>\fR or
+for the specified \fI<device>\fR.
+If no scrub is running, show statistics of the last finished or canceled scrub
+for that filesystem or device.
+.RS
+
+\fIOptions\fR
+.IP -d 5
+Print separate statistics for each device of the filesystem.
+.RE
+.TP
+
+\fBinspect-internal inode-resolve\fP [-v] \fI<inode>\fP \fI<path>\fP
+Resolves an <inode> in subvolume <path> to all filesystem paths.
+.RS
+
+\fIOptions\fR
+.IP -v 5
+verbose mode. print count of returned paths and ioctl() return value
+.RE
+.TP
+
+\fBinspect-internal logical-resolve\fP [-Pv] \fI<logical>\fP \fI<path>\fP
+Resolves a <logical> address in the filesystem mounted at <path> to all inodes.
+By default, each inode is then resolved to a file system path (similar to the
+\fBinode-resolve\fP subcommand).
+.RS
+
+\fIOptions\fR
+.IP -P 5
+skip the path resolving and print the inodes instead
+.IP -v 5
+verbose mode. print count of returned paths and all ioctl() return values
+.RE
+
+.SH EXIT STATUS
+\fBbtrfs\fR returns a zero exist status if it succeeds. Non zero is returned in
+case of failure.
+
+.SH AVAILABILITY
+.B btrfs
+is part of btrfs-progs. Btrfs filesystem is currently under heavy development,
+and not suitable for any uses other than benchmarking and review.
+Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
+further details.
+.SH SEE ALSO
+.BR mkfs.btrfs (8)
diff --git a/man/btrfsctl.8.in b/man/btrfsctl.8.in
index c2d4488..8705fa6 100644
--- a/man/btrfsctl.8.in
+++ b/man/btrfsctl.8.in
@@ -10,6 +10,9 @@ btrfsctl \- control a btrfs filesystem
 [ \fB \-A\fP\fI device\fP ]
 [ \fB \-a\fP ]
 [ \fB \-c\fP ]
+.SH NOTE
+B btrfsctl
+is deprecated. Please consider to switch to the btrfs utility.
 .SH DESCRIPTION
 .B btrfsctl
 is used to control the filesystem and the files and directories stored. It is the tool to create a new snapshot for the filesystem.
diff --git a/man/mkfs.btrfs.8.in b/man/mkfs.btrfs.8.in
index 1e14c6c..432db1b 100644
--- a/man/mkfs.btrfs.8.in
+++ b/man/mkfs.btrfs.8.in
@@ -9,6 +9,7 @@ mkfs.btrfs \- create an btrfs filesystem
 [ \fB \-l\fP\fI leafsize\fP ]
 [ \fB \-L\fP\fI label\fP ]
 [ \fB \-m\fP\fI metadata profile\fP ]
+[ \fB \-M\fP\fI mixed data+metadata\fP ]
 [ \fB \-n\fP\fI nodesize\fP ]
 [ \fB \-s\fP\fI sectorsize\fP ]
 [ \fB \-h\fP ]
@@ -45,6 +46,12 @@ Specify a label for the filesystem.
 Specify how metadata must be spanned across the devices specified. Valid
 values are raid0, raid1, raid10 or single.
 .TP
+\fB\-M\fR, \fB\-\-mixed\fR
+Mix data and metadata chunks together for more efficient space 
+utilization.  This feature incurs a performance penalty in
+larger filesystems.  It is recommended for use with filesystems
+of 1 GiB or smaller.
+.TP
 \fB\-n\fR, \fB\-\-nodesize \fIsize\fR
 Specify the nodesize. By default the value is set to the pagesize.
 .TP
diff --git a/mkfs.c b/mkfs.c
index 2e99b95..c531ef2 100644
--- a/mkfs.c
+++ b/mkfs.c
@@ -29,12 +29,14 @@
 #include <stdlib.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/dir.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <uuid/uuid.h>
 #include <linux/fs.h>
 #include <ctype.h>
+#include <attr/xattr.h>
 #include "kerncompat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -43,11 +45,23 @@
 #include "utils.h"
 #include "version.h"
 
+static u64 index_cnt = 2;
+
+struct directory_name_entry {
+	char *dir_name;
+	char *path;
+	ino_t inum;
+	struct list_head list;
+};
+
 static u64 parse_size(char *s)
 {
 	int len = strlen(s);
 	char c;
 	u64 mult = 1;
+	u64 ret;
+
+	s = strdup(s);
 
 	if (!isdigit(s[len - 1])) {
 		c = tolower(s[len - 1]);
@@ -66,10 +80,12 @@ static u64 parse_size(char *s)
 		}
 		s[len - 1] = '\0';
 	}
-	return atol(s) * mult;
+	ret = atol(s) * mult;
+	free(s);
+	return ret;
 }
 
-static int make_root_dir(struct btrfs_root *root)
+static int make_root_dir(struct btrfs_root *root, int mixed)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_key location;
@@ -88,30 +104,47 @@ static int make_root_dir(struct btrfs_root *root)
 				     0, BTRFS_MKFS_SYSTEM_GROUP_SIZE);
 	BUG_ON(ret);
 
-	ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
-				&chunk_start, &chunk_size,
-				BTRFS_BLOCK_GROUP_METADATA);
-	BUG_ON(ret);
-	ret = btrfs_make_block_group(trans, root, 0,
-				     BTRFS_BLOCK_GROUP_METADATA,
-				     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-				     chunk_start, chunk_size);
-	BUG_ON(ret);
+	if (mixed) {
+		ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+					&chunk_start, &chunk_size,
+					BTRFS_BLOCK_GROUP_METADATA |
+					BTRFS_BLOCK_GROUP_DATA);
+		BUG_ON(ret);
+		ret = btrfs_make_block_group(trans, root, 0,
+					     BTRFS_BLOCK_GROUP_METADATA |
+					     BTRFS_BLOCK_GROUP_DATA,
+					     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+					     chunk_start, chunk_size);
+		BUG_ON(ret);
+		printf("Created a data/metadata chunk of size %llu\n", chunk_size);
+	} else {
+		ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+					&chunk_start, &chunk_size,
+					BTRFS_BLOCK_GROUP_METADATA);
+		BUG_ON(ret);
+		ret = btrfs_make_block_group(trans, root, 0,
+					     BTRFS_BLOCK_GROUP_METADATA,
+					     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+					     chunk_start, chunk_size);
+		BUG_ON(ret);
+	}
 
 	root->fs_info->system_allocs = 0;
 	btrfs_commit_transaction(trans, root);
 	trans = btrfs_start_transaction(root, 1);
 	BUG_ON(!trans);
 
-	ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
-				&chunk_start, &chunk_size,
-				BTRFS_BLOCK_GROUP_DATA);
-	BUG_ON(ret);
-	ret = btrfs_make_block_group(trans, root, 0,
-				     BTRFS_BLOCK_GROUP_DATA,
-				     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-				     chunk_start, chunk_size);
-	BUG_ON(ret);
+	if (!mixed) {
+		ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+					&chunk_start, &chunk_size,
+					BTRFS_BLOCK_GROUP_DATA);
+		BUG_ON(ret);
+		ret = btrfs_make_block_group(trans, root, 0,
+					     BTRFS_BLOCK_GROUP_DATA,
+					     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+					     chunk_start, chunk_size);
+		BUG_ON(ret);
+	}
 
 	ret = btrfs_make_root_dir(trans, root->fs_info->tree_root,
 			      BTRFS_ROOT_TREE_DIR_OBJECTID);
@@ -200,12 +233,26 @@ static int create_one_raid_group(struct btrfs_trans_handle *trans,
 
 static int create_raid_groups(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root, u64 data_profile,
-			      u64 metadata_profile)
+			      int data_profile_opt, u64 metadata_profile,
+			      int metadata_profile_opt, int mixed)
 {
 	u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
 	u64 allowed;
 	int ret;
 
+	/*
+	 * Set default profiles according to number of added devices.
+	 * For mixed groups defaults are single/single.
+	 */
+	if (!metadata_profile_opt && !mixed) {
+		metadata_profile = (num_devices > 1) ?
+			BTRFS_BLOCK_GROUP_RAID1 : BTRFS_BLOCK_GROUP_DUP;
+	}
+	if (!data_profile_opt && !mixed) {
+		data_profile = (num_devices > 1) ?
+			BTRFS_BLOCK_GROUP_RAID0 : 0; /* raid0 or single */
+	}
+
 	if (num_devices == 1)
 		allowed = BTRFS_BLOCK_GROUP_DUP;
 	else if (num_devices >= 4) {
@@ -214,21 +261,38 @@ static int create_raid_groups(struct btrfs_trans_handle *trans,
 	} else
 		allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1;
 
+	if (metadata_profile & ~allowed) {
+		fprintf(stderr,	"unable to create FS with metadata "
+			"profile %llu (%llu devices)\n", metadata_profile,
+			num_devices);
+		exit(1);
+	}
+	if (data_profile & ~allowed) {
+		fprintf(stderr, "unable to create FS with data "
+			"profile %llu (%llu devices)\n", data_profile,
+			num_devices);
+		exit(1);
+	}
+
 	if (allowed & metadata_profile) {
+		u64 meta_flags = BTRFS_BLOCK_GROUP_METADATA;
+
 		ret = create_one_raid_group(trans, root,
 					    BTRFS_BLOCK_GROUP_SYSTEM |
 					    (allowed & metadata_profile));
 		BUG_ON(ret);
 
-		ret = create_one_raid_group(trans, root,
-					    BTRFS_BLOCK_GROUP_METADATA |
+		if (mixed)
+			meta_flags |= BTRFS_BLOCK_GROUP_DATA;
+
+		ret = create_one_raid_group(trans, root, meta_flags |
 					    (allowed & metadata_profile));
 		BUG_ON(ret);
 
 		ret = recow_roots(trans, root);
 		BUG_ON(ret);
 	}
-	if (num_devices > 1 && (allowed & data_profile)) {
+	if (!mixed && num_devices > 1 && (allowed & data_profile)) {
 		ret = create_one_raid_group(trans, root,
 					    BTRFS_BLOCK_GROUP_DATA |
 					    (allowed & data_profile));
@@ -274,8 +338,10 @@ static void print_usage(void)
 	fprintf(stderr, "\t -l --leafsize size of btree leaves\n");
 	fprintf(stderr, "\t -L --label set a label\n");
 	fprintf(stderr, "\t -m --metadata metadata profile, values like data profile\n");
+	fprintf(stderr, "\t -M --mixed mix metadata and data together\n");
 	fprintf(stderr, "\t -n --nodesize size of btree nodes\n");
 	fprintf(stderr, "\t -s --sectorsize min block allocation\n");
+	fprintf(stderr, "\t -r --rootdir the source directory\n");
 	fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION);
 	exit(1);
 }
@@ -291,15 +357,16 @@ static u64 parse_profile(char *s)
 	if (strcmp(s, "raid0") == 0) {
 		return BTRFS_BLOCK_GROUP_RAID0;
 	} else if (strcmp(s, "raid1") == 0) {
-		return BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP;
+		return BTRFS_BLOCK_GROUP_RAID1;
 	} else if (strcmp(s, "raid10") == 0) {
-		return BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP;
+		return BTRFS_BLOCK_GROUP_RAID10;
 	} else if (strcmp(s, "single") == 0) {
 		return 0;
 	} else {
 		fprintf(stderr, "Unknown option %s\n", s);
 		print_usage();
 	}
+	/* not reached */
 	return 0;
 }
 
@@ -308,9 +375,9 @@ static char *parse_label(char *input)
 	int i;
 	int len = strlen(input);
 
-	if (len > BTRFS_LABEL_SIZE) {
+	if (len >= BTRFS_LABEL_SIZE) {
 		fprintf(stderr, "Label %s is too long (max %d)\n", input,
-			BTRFS_LABEL_SIZE);
+			BTRFS_LABEL_SIZE - 1);
 		exit(1);
 	}
 	for (i = 0; i < len; i++) {
@@ -328,13 +395,803 @@ static struct option long_options[] = {
 	{ "leafsize", 1, NULL, 'l' },
 	{ "label", 1, NULL, 'L'},
 	{ "metadata", 1, NULL, 'm' },
+	{ "mixed", 0, NULL, 'M' },
 	{ "nodesize", 1, NULL, 'n' },
 	{ "sectorsize", 1, NULL, 's' },
 	{ "data", 1, NULL, 'd' },
 	{ "version", 0, NULL, 'V' },
+	{ "rootdir", 1, NULL, 'r' },
 	{ 0, 0, 0, 0}
 };
 
+static int add_directory_items(struct btrfs_trans_handle *trans,
+			       struct btrfs_root *root, u64 objectid,
+			       ino_t parent_inum, const char *name,
+			       struct stat *st, int *dir_index_cnt)
+{
+	int ret;
+	int name_len;
+	struct btrfs_key location;
+	u8 filetype = 0;
+
+	name_len = strlen(name);
+
+	location.objectid = objectid;
+	location.offset = 0;
+	btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY);
+
+	if (S_ISDIR(st->st_mode))
+		filetype = BTRFS_FT_DIR;
+	if (S_ISREG(st->st_mode))
+		filetype = BTRFS_FT_REG_FILE;
+	if (S_ISLNK(st->st_mode))
+		filetype = BTRFS_FT_SYMLINK;
+
+	ret = btrfs_insert_dir_item(trans, root, name, name_len,
+				    parent_inum, &location,
+				    filetype, index_cnt);
+
+	*dir_index_cnt = index_cnt;
+	index_cnt++;
+
+	return ret;
+}
+
+static int fill_inode_item(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *root,
+			   struct btrfs_inode_item *dst, struct stat *src)
+{
+	u64 blocks = 0;
+	u64 sectorsize = root->sectorsize;
+
+	/*
+	 * btrfs_inode_item has some reserved fields
+	 * and represents on-disk inode entry, so
+	 * zero everything to prevent information leak
+	 */
+	memset(dst, 0, sizeof (*dst));
+
+	btrfs_set_stack_inode_generation(dst, trans->transid);
+	btrfs_set_stack_inode_size(dst, src->st_size);
+	btrfs_set_stack_inode_nbytes(dst, 0);
+	btrfs_set_stack_inode_block_group(dst, 0);
+	btrfs_set_stack_inode_nlink(dst, src->st_nlink);
+	btrfs_set_stack_inode_uid(dst, src->st_uid);
+	btrfs_set_stack_inode_gid(dst, src->st_gid);
+	btrfs_set_stack_inode_mode(dst, src->st_mode);
+	btrfs_set_stack_inode_rdev(dst, 0);
+	btrfs_set_stack_inode_flags(dst, 0);
+	btrfs_set_stack_timespec_sec(&dst->atime, src->st_atime);
+	btrfs_set_stack_timespec_nsec(&dst->atime, 0);
+	btrfs_set_stack_timespec_sec(&dst->ctime, src->st_ctime);
+	btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
+	btrfs_set_stack_timespec_sec(&dst->mtime, src->st_mtime);
+	btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
+	btrfs_set_stack_timespec_sec(&dst->otime, 0);
+	btrfs_set_stack_timespec_nsec(&dst->otime, 0);
+
+	if (S_ISDIR(src->st_mode)) {
+		btrfs_set_stack_inode_size(dst, 0);
+		btrfs_set_stack_inode_nlink(dst, 1);
+	}
+	if (S_ISREG(src->st_mode)) {
+		btrfs_set_stack_inode_size(dst, (u64)src->st_size);
+		if (src->st_size <= BTRFS_MAX_INLINE_DATA_SIZE(root))
+			btrfs_set_stack_inode_nbytes(dst, src->st_size);
+		else {
+			blocks = src->st_size / sectorsize;
+			if (src->st_size % sectorsize)
+				blocks += 1;
+			blocks *= sectorsize;
+			btrfs_set_stack_inode_nbytes(dst, blocks);
+		}
+	}
+	if (S_ISLNK(src->st_mode))
+		btrfs_set_stack_inode_nbytes(dst, src->st_size + 1);
+
+	return 0;
+}
+
+static int directory_select(const struct direct *entry)
+{
+	if ((strncmp(entry->d_name, ".", entry->d_reclen) == 0) ||
+		(strncmp(entry->d_name, "..", entry->d_reclen) == 0))
+		return 0;
+	else
+		return 1;
+}
+
+static void free_namelist(struct direct **files, int count)
+{
+	int i;
+
+	if (count < 0)
+		return;
+
+	for (i = 0; i < count; ++i)
+		free(files[i]);
+	free(files);
+}
+
+static u64 calculate_dir_inode_size(char *dirname)
+{
+	int count, i;
+	struct direct **files, *cur_file;
+	u64 dir_inode_size = 0;
+
+	count = scandir(dirname, &files, directory_select, NULL);
+
+	for (i = 0; i < count; i++) {
+		cur_file = files[i];
+		dir_inode_size += strlen(cur_file->d_name);
+	}
+
+	free_namelist(files, count);
+
+	dir_inode_size *= 2;
+	return dir_inode_size;
+}
+
+static int add_inode_items(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *root,
+			   struct stat *st, char *name,
+			   u64 self_objectid, ino_t parent_inum,
+			   int dir_index_cnt, struct btrfs_inode_item *inode_ret)
+{
+	int ret;
+	struct btrfs_key inode_key;
+	struct btrfs_inode_item btrfs_inode;
+	u64 objectid;
+	u64 inode_size = 0;
+	int name_len;
+
+	name_len = strlen(name);
+	fill_inode_item(trans, root, &btrfs_inode, st);
+	objectid = self_objectid;
+
+	if (S_ISDIR(st->st_mode)) {
+		inode_size = calculate_dir_inode_size(name);
+		btrfs_set_stack_inode_size(&btrfs_inode, inode_size);
+	}
+
+	inode_key.objectid = objectid;
+	inode_key.offset = 0;
+	btrfs_set_key_type(&inode_key, BTRFS_INODE_ITEM_KEY);
+
+	ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
+	if (ret)
+		goto fail;
+
+	ret = btrfs_insert_inode_ref(trans, root, name, name_len,
+				     objectid, parent_inum, dir_index_cnt);
+	if (ret)
+		goto fail;
+
+	*inode_ret = btrfs_inode;
+fail:
+	return ret;
+}
+
+static int add_xattr_item(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root, u64 objectid,
+			  const char *file_name)
+{
+	int ret;
+	int cur_name_len;
+	char xattr_list[XATTR_LIST_MAX];
+	char *cur_name;
+	char cur_value[XATTR_SIZE_MAX];
+	char delimiter = '\0';
+	char *next_location = xattr_list;
+
+	ret = llistxattr(file_name, xattr_list, XATTR_LIST_MAX);
+	if (ret < 0) {
+		if(errno == ENOTSUP)
+			return 0;
+		fprintf(stderr, "get a list of xattr failed for %s\n",
+			file_name);
+		return ret;
+	}
+	if (ret == 0)
+		return ret;
+
+	cur_name = strtok(xattr_list, &delimiter);
+	while (cur_name != NULL) {
+		cur_name_len = strlen(cur_name);
+		next_location += cur_name_len + 1;
+
+		ret = getxattr(file_name, cur_name, cur_value, XATTR_SIZE_MAX);
+		if (ret < 0) {
+			if(errno == ENOTSUP)
+				return 0;
+			fprintf(stderr, "get a xattr value failed for %s attr %s\n",
+				file_name, cur_name);
+			return ret;
+		}
+
+		ret = btrfs_insert_xattr_item(trans, root, cur_name,
+					      cur_name_len, cur_value,
+					      ret, objectid);
+		if (ret) {
+			fprintf(stderr, "insert a xattr item failed for %s\n",
+				file_name);
+		}
+
+		cur_name = strtok(next_location, &delimiter);
+	}
+
+	return ret;
+}
+static int custom_alloc_extent(struct btrfs_root *root, u64 num_bytes,
+			       u64 hint_byte, struct btrfs_key *ins)
+{
+	u64 start;
+	u64 end;
+	u64 last = hint_byte;
+	int ret;
+	int wrapped = 0;
+	struct btrfs_block_group_cache *cache;
+
+	while (1) {
+		ret = find_first_extent_bit(&root->fs_info->free_space_cache,
+					    last, &start, &end, EXTENT_DIRTY);
+		if (ret) {
+			if (wrapped++ == 0) {
+				last = 0;
+				continue;
+			} else {
+				goto fail;
+			}
+		}
+
+		start = max(last, start);
+		last = end + 1;
+		if (last - start < num_bytes)
+			continue;
+
+		last = start + num_bytes;
+		if (test_range_bit(&root->fs_info->pinned_extents,
+				   start, last - 1, EXTENT_DIRTY, 0))
+			continue;
+
+		cache = btrfs_lookup_block_group(root->fs_info, start);
+		BUG_ON(!cache);
+		if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM ||
+		    last > cache->key.objectid + cache->key.offset) {
+			last = cache->key.objectid + cache->key.offset;
+			continue;
+		}
+
+		if (cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
+			    BTRFS_BLOCK_GROUP_METADATA)) {
+			last = cache->key.objectid + cache->key.offset;
+			continue;
+		}
+
+		clear_extent_dirty(&root->fs_info->free_space_cache,
+				   start, start + num_bytes - 1, 0);
+
+		ins->objectid = start;
+		ins->offset = num_bytes;
+		ins->type = BTRFS_EXTENT_ITEM_KEY;
+		return 0;
+	}
+fail:
+	fprintf(stderr, "not enough free space\n");
+	return -ENOSPC;
+}
+
+static int record_file_extent(struct btrfs_trans_handle *trans,
+			      struct btrfs_root *root, u64 objectid,
+			      struct btrfs_inode_item *inode,
+			      u64 file_pos, u64 disk_bytenr,
+			      u64 num_bytes)
+{
+	int ret;
+	struct btrfs_fs_info *info = root->fs_info;
+	struct btrfs_root *extent_root = info->extent_root;
+	struct extent_buffer *leaf;
+	struct btrfs_file_extent_item *fi;
+	struct btrfs_key ins_key;
+	struct btrfs_path path;
+	struct btrfs_extent_item *ei;
+
+	btrfs_init_path(&path);
+
+	ins_key.objectid = objectid;
+	ins_key.offset = 0;
+	btrfs_set_key_type(&ins_key, BTRFS_EXTENT_DATA_KEY);
+	ret = btrfs_insert_empty_item(trans, root, &path, &ins_key,
+				      sizeof(*fi));
+	if (ret)
+		goto fail;
+	leaf = path.nodes[0];
+	fi = btrfs_item_ptr(leaf, path.slots[0],
+			    struct btrfs_file_extent_item);
+	btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+	btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
+	btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
+	btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
+	btrfs_set_file_extent_offset(leaf, fi, 0);
+	btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+	btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+	btrfs_set_file_extent_compression(leaf, fi, 0);
+	btrfs_set_file_extent_encryption(leaf, fi, 0);
+	btrfs_set_file_extent_other_encoding(leaf, fi, 0);
+	btrfs_mark_buffer_dirty(leaf);
+
+	btrfs_release_path(root, &path);
+
+	ins_key.objectid = disk_bytenr;
+	ins_key.offset = num_bytes;
+	ins_key.type = BTRFS_EXTENT_ITEM_KEY;
+
+	ret = btrfs_insert_empty_item(trans, extent_root, &path,
+				&ins_key, sizeof(*ei));
+	if (ret == 0) {
+		leaf = path.nodes[0];
+		ei = btrfs_item_ptr(leaf, path.slots[0],
+				    struct btrfs_extent_item);
+
+		btrfs_set_extent_refs(leaf, ei, 0);
+		btrfs_set_extent_generation(leaf, ei, trans->transid);
+		btrfs_set_extent_flags(leaf, ei, BTRFS_EXTENT_FLAG_DATA);
+
+		btrfs_mark_buffer_dirty(leaf);
+		ret = btrfs_update_block_group(trans, root, disk_bytenr,
+					       num_bytes, 1, 0);
+		if (ret)
+			goto fail;
+	} else if (ret != -EEXIST) {
+		goto fail;
+	}
+
+	ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, 0,
+				   root->root_key.objectid,
+				   objectid, 0);
+fail:
+	btrfs_release_path(root, &path);
+	return ret;
+}
+
+static int add_symbolic_link(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root,
+			     u64 objectid, const char *path_name)
+{
+	int ret;
+	u64 sectorsize = root->sectorsize;
+	char *buf = malloc(sectorsize);
+
+	ret = readlink(path_name, buf, sectorsize);
+	if (ret <= 0) {
+		fprintf(stderr, "readlink failed for %s\n", path_name);
+		goto fail;
+	}
+	if (ret >= sectorsize) {
+		fprintf(stderr, "symlink too long for %s", path_name);
+		ret = -1;
+		goto fail;
+	}
+
+	buf[ret] = '\0'; /* readlink does not do it for us */
+	ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
+					 buf, ret + 1);
+fail:
+	free(buf);
+	return ret;
+}
+
+static int add_file_items(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root,
+			  struct btrfs_inode_item *btrfs_inode, u64 objectid,
+			  ino_t parent_inum, struct stat *st,
+			  const char *path_name, int out_fd)
+{
+	int ret = -1;
+	ssize_t ret_read;
+	u64 bytes_read = 0;
+	char *buffer = NULL;
+	struct btrfs_key key;
+	int blocks;
+	u32 sectorsize = root->sectorsize;
+	u64 first_block = 0;
+	u64 num_blocks = 0;
+	int fd;
+
+	fd = open(path_name, O_RDONLY);
+	if (fd == -1) {
+		fprintf(stderr, "%s open failed\n", path_name);
+		goto end;
+	}
+
+	blocks = st->st_size / sectorsize;
+	if (st->st_size % sectorsize)
+		blocks += 1;
+
+	if (st->st_size <= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
+		buffer = malloc(st->st_size);
+		ret_read = pread64(fd, buffer, st->st_size, bytes_read);
+		if (ret_read == -1) {
+			fprintf(stderr, "%s read failed\n", path_name);
+			goto end;
+		}
+
+		ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
+						 buffer, st->st_size);
+		goto end;
+	}
+
+	ret = custom_alloc_extent(root, blocks * sectorsize, 0, &key);
+	if (ret)
+		goto end;
+
+	first_block = key.objectid;
+	bytes_read = 0;
+	buffer = malloc(sectorsize);
+
+	do {
+		memset(buffer, 0, sectorsize);
+		ret_read = pread64(fd, buffer, sectorsize, bytes_read);
+		if (ret_read == -1) {
+			fprintf(stderr, "%s read failed\n", path_name);
+			goto end;
+		}
+
+		ret = pwrite64(out_fd, buffer, sectorsize,
+			       first_block + bytes_read);
+		if (ret != sectorsize) {
+			fprintf(stderr, "output file write failed\n");
+			goto end;
+		}
+
+		/* checksum for file data */
+		ret = btrfs_csum_file_block(trans, root->fs_info->csum_root,
+				first_block + (blocks * sectorsize),
+				first_block + bytes_read,
+				buffer, sectorsize);
+		if (ret) {
+			fprintf(stderr, "%s checksum failed\n", path_name);
+			goto end;
+		}
+
+		bytes_read += ret_read;
+		num_blocks++;
+	} while (ret_read == sectorsize);
+
+	if (num_blocks > 0) {
+		ret = record_file_extent(trans, root, objectid, btrfs_inode,
+					 first_block, first_block,
+					 blocks * sectorsize);
+		if (ret)
+			goto end;
+	}
+
+end:
+	if (buffer)
+		free(buffer);
+	close(fd);
+	return ret;
+}
+
+static char *make_path(char *dir, char *name)
+{
+	char *path;
+
+	path = malloc(strlen(dir) + strlen(name) + 2);
+	if (!path)
+		return NULL;
+	strcpy(path, dir);
+	if (dir[strlen(dir) - 1] != '/')
+		strcat(path, "/");
+	strcat(path, name);
+	return path;
+}
+
+static int traverse_directory(struct btrfs_trans_handle *trans,
+			      struct btrfs_root *root, char *dir_name,
+			      struct directory_name_entry *dir_head, int out_fd)
+{
+	int ret = 0;
+
+	struct btrfs_inode_item cur_inode;
+	struct btrfs_inode_item *inode_item;
+	int count, i, dir_index_cnt;
+	struct direct **files;
+	struct stat st;
+	struct directory_name_entry *dir_entry, *parent_dir_entry;
+	struct direct *cur_file;
+	ino_t parent_inum, cur_inum;
+	ino_t highest_inum = 0;
+	char *parent_dir_name;
+	struct btrfs_path path;
+	struct extent_buffer *leaf;
+	struct btrfs_key root_dir_key;
+	u64 root_dir_inode_size = 0;
+
+	/* Add list for source directory */
+	dir_entry = malloc(sizeof(struct directory_name_entry));
+	dir_entry->dir_name = dir_name;
+	dir_entry->path = malloc(strlen(dir_name) + 1);
+	strcpy(dir_entry->path, dir_name);
+
+	parent_inum = highest_inum + BTRFS_FIRST_FREE_OBJECTID;
+	dir_entry->inum = parent_inum;
+	list_add_tail(&dir_entry->list, &dir_head->list);
+
+	btrfs_init_path(&path);
+
+	root_dir_key.objectid = btrfs_root_dirid(&root->root_item);
+	root_dir_key.offset = 0;
+	btrfs_set_key_type(&root_dir_key, BTRFS_INODE_ITEM_KEY);
+	ret = btrfs_lookup_inode(trans, root, &path, &root_dir_key, 1);
+	if (ret) {
+		fprintf(stderr, "root dir lookup error\n");
+		return -1;
+	}
+
+	leaf = path.nodes[0];
+	inode_item = btrfs_item_ptr(leaf, path.slots[0],
+				    struct btrfs_inode_item);
+
+	root_dir_inode_size = calculate_dir_inode_size(dir_name);
+	btrfs_set_inode_size(leaf, inode_item, root_dir_inode_size);
+	btrfs_mark_buffer_dirty(leaf);
+
+	btrfs_release_path(root, &path);
+
+	do {
+		parent_dir_entry = list_entry(dir_head->list.next,
+					      struct directory_name_entry,
+					      list);
+		list_del(&parent_dir_entry->list);
+
+		parent_inum = parent_dir_entry->inum;
+		parent_dir_name = parent_dir_entry->dir_name;
+		if (chdir(parent_dir_entry->path)) {
+			fprintf(stderr, "chdir error for %s\n",
+				parent_dir_name);
+			goto fail_no_files;
+		}
+
+		count = scandir(parent_dir_entry->path, &files,
+				directory_select, NULL);
+		if (count == -1)
+		{
+			fprintf(stderr, "scandir for %s failed: %s\n",
+				parent_dir_name, strerror (errno));
+			goto fail;
+		}
+
+		for (i = 0; i < count; i++) {
+			cur_file = files[i];
+
+			if (lstat(cur_file->d_name, &st) == -1) {
+				fprintf(stderr, "lstat failed for file %s\n",
+					cur_file->d_name);
+				goto fail;
+			}
+
+			cur_inum = ++highest_inum + BTRFS_FIRST_FREE_OBJECTID;
+			ret = add_directory_items(trans, root,
+						  cur_inum, parent_inum,
+						  cur_file->d_name,
+						  &st, &dir_index_cnt);
+			if (ret) {
+				fprintf(stderr, "add_directory_items failed\n");
+				goto fail;
+			}
+
+			ret = add_inode_items(trans, root, &st,
+					      cur_file->d_name, cur_inum,
+					      parent_inum, dir_index_cnt,
+					      &cur_inode);
+			if (ret) {
+				fprintf(stderr, "add_inode_items failed\n");
+				goto fail;
+			}
+
+			ret = add_xattr_item(trans, root,
+					     cur_inum, cur_file->d_name);
+			if (ret) {
+				fprintf(stderr, "add_xattr_item failed\n");
+				if(ret != -ENOTSUP)
+					goto fail;
+			}
+
+			if (S_ISDIR(st.st_mode)) {
+				dir_entry = malloc(sizeof(struct directory_name_entry));
+				dir_entry->dir_name = cur_file->d_name;
+				dir_entry->path = make_path(parent_dir_entry->path,
+							    cur_file->d_name);
+				dir_entry->inum = cur_inum;
+				list_add_tail(&dir_entry->list,	&dir_head->list);
+			} else if (S_ISREG(st.st_mode)) {
+				ret = add_file_items(trans, root, &cur_inode,
+						     cur_inum, parent_inum, &st,
+						     cur_file->d_name, out_fd);
+				if (ret) {
+					fprintf(stderr, "add_file_items failed\n");
+					goto fail;
+				}
+			} else if (S_ISLNK(st.st_mode)) {
+				ret = add_symbolic_link(trans, root,
+						        cur_inum, cur_file->d_name);
+				if (ret) {
+					fprintf(stderr, "add_symbolic_link failed\n");
+					goto fail;
+				}
+			}
+		}
+
+		free_namelist(files, count);
+		free(parent_dir_entry->path);
+		free(parent_dir_entry);
+
+		index_cnt = 2;
+
+	} while (!list_empty(&dir_head->list));
+
+	return 0;
+fail:
+	free_namelist(files, count);
+fail_no_files:
+	free(parent_dir_entry->path);
+	free(parent_dir_entry);
+	return -1;
+}
+
+static int open_target(char *output_name)
+{
+	int output_fd;
+	output_fd = open(output_name, O_CREAT | O_RDWR | O_TRUNC,
+		         S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
+
+	return output_fd;
+}
+
+static int create_chunks(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root, u64 num_of_meta_chunks,
+			 u64 size_of_data)
+{
+	u64 chunk_start;
+	u64 chunk_size;
+	u64 meta_type = BTRFS_BLOCK_GROUP_METADATA;
+	u64 data_type = BTRFS_BLOCK_GROUP_DATA;
+	u64 minimum_data_chunk_size = 8 * 1024 * 1024;
+	u64 i;
+	int ret;
+
+	for (i = 0; i < num_of_meta_chunks; i++) {
+		ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
+					&chunk_start, &chunk_size, meta_type);
+		BUG_ON(ret);
+		ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
+					     meta_type, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+					     chunk_start, chunk_size);
+		BUG_ON(ret);
+		set_extent_dirty(&root->fs_info->free_space_cache,
+				 chunk_start, chunk_start + chunk_size - 1, 0);
+	}
+
+	if (size_of_data < minimum_data_chunk_size)
+		size_of_data = minimum_data_chunk_size;
+	ret = btrfs_alloc_data_chunk(trans, root->fs_info->extent_root,
+				     &chunk_start, size_of_data, data_type);
+	BUG_ON(ret);
+	ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0,
+				     data_type, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+				     chunk_start, size_of_data);
+	BUG_ON(ret);
+	set_extent_dirty(&root->fs_info->free_space_cache,
+			 chunk_start, chunk_start + size_of_data - 1, 0);
+	return ret;
+}
+
+static int make_image(char *source_dir, struct btrfs_root *root, int out_fd)
+{
+	int ret;
+	struct btrfs_trans_handle *trans;
+
+	struct stat root_st;
+
+	struct directory_name_entry dir_head;
+
+	ret = lstat(source_dir, &root_st);
+	if (ret) {
+		fprintf(stderr, "unable to lstat the %s\n", source_dir);
+		goto fail;
+	}
+
+	INIT_LIST_HEAD(&dir_head.list);
+
+	trans = btrfs_start_transaction(root, 1);
+	ret = traverse_directory(trans, root, source_dir, &dir_head, out_fd);
+	if (ret) {
+		fprintf(stderr, "unable to traverse_directory\n");
+		goto fail;
+	}
+	btrfs_commit_transaction(trans, root);
+
+	printf("Making image is completed.\n");
+	return 0;
+fail:
+	fprintf(stderr, "Making image is aborted.\n");
+	return -1;
+}
+
+static u64 size_sourcedir(char *dir_name, u64 sectorsize,
+			  u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret)
+{
+	u64 dir_size = 0;
+	u64 total_size = 0;
+	int ret;
+	char command[1024];
+	char path[512];
+	char *file_name = "temp_file";
+	FILE *file;
+	u64 default_chunk_size = 8 * 1024 * 1024;	/* 8MB */
+	u64 allocated_meta_size = 8 * 1024 * 1024;	/* 8MB */
+	u64 allocated_total_size = 20 * 1024 * 1024;	/* 20MB */
+	u64 num_of_meta_chunks = 0;
+	u64 num_of_allocated_meta_chunks =
+			allocated_meta_size / default_chunk_size;
+
+	ret = sprintf(command, "du -B 4096 -s ");
+	if (ret < 0) {
+		fprintf(stderr, "error executing sprintf for du command\n");
+		return -1;
+	}
+	strcat(command, dir_name);
+	strcat(command, " > ");
+	strcat(command, file_name);
+	ret = system(command);
+
+	file = fopen(file_name, "r");
+	ret = fscanf(file, "%lld %s\n", &dir_size, path);
+	fclose(file);
+	remove(file_name);
+
+	dir_size *= sectorsize;
+	*size_of_data_ret = dir_size;
+
+	num_of_meta_chunks = (dir_size / 2) / default_chunk_size;
+	if (((dir_size / 2) % default_chunk_size) != 0)
+		num_of_meta_chunks++;
+	if (num_of_meta_chunks <= num_of_allocated_meta_chunks)
+		num_of_meta_chunks = 0;
+	else
+		num_of_meta_chunks -= num_of_allocated_meta_chunks;
+
+	total_size = allocated_total_size + dir_size +
+		     (num_of_meta_chunks * default_chunk_size);
+
+	*num_of_meta_chunks_ret = num_of_meta_chunks;
+
+	return total_size;
+}
+
+static int zero_output_file(int out_fd, u64 size, u32 sectorsize)
+{
+	int len = sectorsize;
+	int loop_num = size / sectorsize;
+	u64 location = 0;
+	char *buf = malloc(len);
+	int ret = 0, i;
+	ssize_t written;
+
+	if (!buf)
+		return -ENOMEM;
+	memset(buf, 0, len);
+	for (i = 0; i < loop_num; i++) {
+		written = pwrite64(out_fd, buf, len, location);
+		if (written != len)
+			ret = -EIO;
+		location += sectorsize;
+	}
+	free(buf);
+	return ret;
+}
+
 int main(int ac, char **av)
 {
 	char *file;
@@ -346,8 +1203,8 @@ int main(int ac, char **av)
 	u64 dev_block_count = 0;
 	u64 blocks[7];
 	u64 alloc_start = 0;
-	u64 metadata_profile = BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP;
-	u64 data_profile = BTRFS_BLOCK_GROUP_RAID0;
+	u64 metadata_profile = 0;
+	u64 data_profile = 0;
 	u32 leafsize = getpagesize();
 	u32 sectorsize = 4096;
 	u32 nodesize = leafsize;
@@ -355,13 +1212,22 @@ int main(int ac, char **av)
 	int zero_end = 1;
 	int option_index = 0;
 	int fd;
-	int first_fd;
 	int ret;
 	int i;
+	int mixed = 0;
+	int data_profile_opt = 0;
+	int metadata_profile_opt = 0;
+
+	char *source_dir = NULL;
+	int source_dir_set = 0;
+	u64 num_of_meta_chunks = 0;
+	u64 size_of_data = 0;
+	u64 source_dir_size = 0;
+	char *pretty_buf;
 
 	while(1) {
 		int c;
-		c = getopt_long(ac, av, "A:b:l:n:s:m:d:L:V", long_options,
+		c = getopt_long(ac, av, "A:b:l:n:s:m:d:L:r:VM", long_options,
 				&option_index);
 		if (c < 0)
 			break;
@@ -371,8 +1237,11 @@ int main(int ac, char **av)
 				break;
 			case 'd':
 				data_profile = parse_profile(optarg);
+				data_profile_opt = 1;
 				break;
 			case 'l':
+			case 'n':
+				nodesize = parse_size(optarg);
 				leafsize = parse_size(optarg);
 				break;
 			case 'L':
@@ -380,27 +1249,30 @@ int main(int ac, char **av)
 				break;
 			case 'm':
 				metadata_profile = parse_profile(optarg);
+				metadata_profile_opt = 1;
 				break;
-			case 'n':
-				nodesize = parse_size(optarg);
+			case 'M':
+				mixed = 1;
 				break;
 			case 's':
 				sectorsize = parse_size(optarg);
 				break;
 			case 'b':
 				block_count = parse_size(optarg);
-				if (block_count < 256*1024*1024) {
-					fprintf(stderr, "File system size "
-						"%llu bytes is too small, "
-						"256M is required at least\n",
-						(unsigned long long)block_count);
-					exit(1);
+				if (block_count <= 1024*1024*1024) {
+					printf("SMALL VOLUME: forcing mixed "
+					       "metadata/data groups\n");
+					mixed = 1;
 				}
 				zero_end = 0;
 				break;
 			case 'V':
 				print_version();
 				break;
+			case 'r':
+				source_dir = optarg;
+				source_dir_set = 1;
+				break;
 			default:
 				print_usage();
 		}
@@ -421,27 +1293,54 @@ int main(int ac, char **av)
 	printf("\nWARNING! - %s IS EXPERIMENTAL\n", BTRFS_BUILD_VERSION);
 	printf("WARNING! - see http://btrfs.wiki.kernel.org before using\n\n");
 
-	file = av[optind++];
-	ret = check_mounted(file);
-	if (ret < 0) {
-		fprintf(stderr, "error checking %s mount status\n", file);
-		exit(1);
-	}
-	if (ret == 1) {
-		fprintf(stderr, "%s is mounted\n", file);
-		exit(1);
+	if (source_dir == 0) {
+		file = av[optind++];
+		ret = check_mounted(file);
+		if (ret < 0) {
+			fprintf(stderr, "error checking %s mount status\n", file);
+			exit(1);
+		}
+		if (ret == 1) {
+			fprintf(stderr, "%s is mounted\n", file);
+			exit(1);
+		}
+		ac--;
+		fd = open(file, O_RDWR);
+		if (fd < 0) {
+			fprintf(stderr, "unable to open %s\n", file);
+			exit(1);
+		}
+		first_file = file;
+		ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count, &mixed);
+		if (block_count == 0)
+			block_count = dev_block_count;
+	} else {
+		ac = 0;
+		file = av[optind++];
+		fd = open_target(file);
+		if (fd < 0) {
+			fprintf(stderr, "unable to open the %s\n", file);
+			exit(1);
+		}
+
+		first_file = file;
+		source_dir_size = size_sourcedir(source_dir, sectorsize,
+					     &num_of_meta_chunks, &size_of_data);
+		if(block_count < source_dir_size)
+			block_count = source_dir_size;
+		ret = zero_output_file(fd, block_count, sectorsize);
+		if (ret) {
+			fprintf(stderr, "unable to zero the output file\n");
+			exit(1);
+		}
 	}
-	ac--;
-	fd = open(file, O_RDWR);
-	if (fd < 0) {
-		fprintf(stderr, "unable to open %s\n", file);
-		exit(1);
+	if (mixed) {
+		if (metadata_profile != data_profile) {
+			fprintf(stderr, "With mixed block groups data and metadata "
+				"profiles must be the same\n");
+			exit(1);
+		}
 	}
-	first_fd = fd;
-	first_file = file;
-	ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count);
-	if (block_count == 0)
-		block_count = dev_block_count;
 
 	blocks[0] = BTRFS_SUPER_INFO_OFFSET;
 	for (i = 1; i < 7; i++) {
@@ -456,10 +1355,15 @@ int main(int ac, char **av)
 		fprintf(stderr, "error during mkfs %d\n", ret);
 		exit(1);
 	}
+
 	root = open_ctree(file, 0, O_RDWR);
+	if (!root) {
+		fprintf(stderr, "ctree init failed\n");
+		exit(1);
+	}
 	root->fs_info->alloc_start = alloc_start;
 
-	ret = make_root_dir(root);
+	ret = make_root_dir(root, mixed);
 	if (ret) {
 		fprintf(stderr, "failed to setup the root directory\n");
 		exit(1);
@@ -471,13 +1375,11 @@ int main(int ac, char **av)
 		goto raid_groups;
 
 	btrfs_register_one_device(file);
-	if (!root) {
-		fprintf(stderr, "ctree init failed\n");
-		return -1;
-	}
 
 	zero_end = 1;
 	while(ac-- > 0) {
+		int old_mixed = mixed;
+
 		file = av[optind++];
 		ret = check_mounted(file);
 		if (ret < 0) {
@@ -503,8 +1405,8 @@ int main(int ac, char **av)
 			continue;
 		}
 		ret = btrfs_prepare_device(fd, file, zero_end,
-					   &dev_block_count);
-
+					   &dev_block_count, &mixed);
+		mixed = old_mixed;
 		BUG_ON(ret);
 
 		ret = btrfs_add_to_fsid(trans, root, fd, file, dev_block_count,
@@ -514,20 +1416,44 @@ int main(int ac, char **av)
 	}
 
 raid_groups:
-	ret = create_raid_groups(trans, root, data_profile,
-				 metadata_profile);
-	BUG_ON(ret);
+	if (!source_dir_set) {
+		ret = create_raid_groups(trans, root, data_profile,
+				 data_profile_opt, metadata_profile,
+				 metadata_profile_opt, mixed);
+		BUG_ON(ret);
+	}
 
 	ret = create_data_reloc_tree(trans, root);
 	BUG_ON(ret);
 
+	if (mixed) {
+		struct btrfs_super_block *super = &root->fs_info->super_copy;
+		u64 flags = btrfs_super_incompat_flags(super);
+
+		flags |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS;
+		btrfs_set_super_incompat_flags(super, flags);
+	}
+
 	printf("fs created label %s on %s\n\tnodesize %u leafsize %u "
 	    "sectorsize %u size %s\n",
 	    label, first_file, nodesize, leafsize, sectorsize,
-	    pretty_sizes(btrfs_super_total_bytes(&root->fs_info->super_copy)));
+	    pretty_buf = pretty_sizes(btrfs_super_total_bytes(&root->fs_info->super_copy)));
+	free(pretty_buf);
 
 	printf("%s\n", BTRFS_BUILD_VERSION);
 	btrfs_commit_transaction(trans, root);
+
+	if (source_dir_set) {
+		trans = btrfs_start_transaction(root, 1);
+		ret = create_chunks(trans, root,
+				    num_of_meta_chunks, size_of_data);
+		BUG_ON(ret);
+		btrfs_commit_transaction(trans, root);
+
+		ret = make_image(source_dir, root, fd);
+		BUG_ON(ret);
+	}
+
 	ret = close_ctree(root);
 	BUG_ON(ret);
 
diff --git a/print-tree.c b/print-tree.c
index 59f4358..fc134c0 100644
--- a/print-tree.c
+++ b/print-tree.c
@@ -239,7 +239,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
 			       btrfs_shared_data_ref_count(eb, sref));
 			break;
 		default:
-			BUG();
+			return;
 		}
 		ptr += btrfs_extent_inline_ref_size(type);
 	}
@@ -351,6 +351,9 @@ static void print_key_type(u8 type)
 	case BTRFS_DEV_EXTENT_KEY:
 		printf("DEV_EXTENT");
 		break;
+	case BTRFS_BALANCE_ITEM_KEY:
+		printf("BALANCE_ITEM");
+		break;
 	case BTRFS_STRING_ITEM_KEY:
 		printf("STRING_ITEM");
 		break;
@@ -391,6 +394,9 @@ static void print_objectid(unsigned long long objectid, u8 type)
 	case BTRFS_CSUM_TREE_OBJECTID:
 		printf("CSUM_TREE");
 		break;
+	case BTRFS_BALANCE_OBJECTID:
+		printf("BALANCE");
+		break;
 	case BTRFS_ORPHAN_OBJECTID:
 		printf("ORPHAN");
 		break;
@@ -413,8 +419,11 @@ static void print_objectid(unsigned long long objectid, u8 type)
 		printf("MULTIPLE");
 		break;
 	case BTRFS_FIRST_CHUNK_TREE_OBJECTID:
-		printf("FIRST_CHUNK_TREE");
-		break;
+		if (type == BTRFS_CHUNK_ITEM_KEY) {
+			printf("FIRST_CHUNK_TREE");
+			break;
+		}
+		/* fall-thru */
 	default:
 		printf("%llu", objectid);
 	}
@@ -441,7 +450,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 	struct btrfs_dir_item *di;
 	struct btrfs_inode_item *ii;
 	struct btrfs_file_extent_item *fi;
-	struct btrfs_csum_item *ci;
 	struct btrfs_block_group_item *bi;
 	struct btrfs_extent_data_ref *dref;
 	struct btrfs_shared_data_ref *sref;
@@ -494,7 +502,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 		case BTRFS_DIR_LOG_ITEM_KEY:
 			dlog = btrfs_item_ptr(l, i, struct btrfs_dir_log_item);
 			printf("\t\tdir log end %Lu\n",
-			       btrfs_dir_log_end(l, dlog));
+			       (unsigned long long)btrfs_dir_log_end(l, dlog));
 		       break;
 		case BTRFS_ORPHAN_ITEM_KEY:
 			printf("\t\torphan item\n");
@@ -502,11 +510,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 		case BTRFS_ROOT_ITEM_KEY:
 			ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
 			read_extent_buffer(l, &root_item, (unsigned long)ri, sizeof(root_item));
-			printf("\t\troot data bytenr %llu level %d dirid %llu refs %u\n",
+			printf("\t\troot data bytenr %llu level %d dirid %llu refs %u gen %llu\n",
 				(unsigned long long)btrfs_root_bytenr(&root_item),
 				btrfs_root_level(&root_item),
 				(unsigned long long)btrfs_root_dirid(&root_item),
-				btrfs_root_refs(&root_item));
+				btrfs_root_refs(&root_item),
+				(unsigned long long)btrfs_root_generation(&root_item));
 			if (btrfs_root_refs(&root_item) == 0) {
 				struct btrfs_key drop_key;
 				btrfs_disk_key_to_cpu(&drop_key,
@@ -553,11 +562,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 #endif
 			break;
 		case BTRFS_CSUM_ITEM_KEY:
-			ci = btrfs_item_ptr(l, i, struct btrfs_csum_item);
 			printf("\t\tcsum item\n");
 			break;
 		case BTRFS_EXTENT_CSUM_KEY:
-			ci = btrfs_item_ptr(l, i, struct btrfs_csum_item);
 			printf("\t\textent csum item\n");
 			break;
 		case BTRFS_EXTENT_DATA_KEY:
@@ -607,7 +614,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 	}
 }
 
-void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *eb)
+void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *eb, int follow)
 {
 	int i;
 	u32 nr;
@@ -643,6 +650,9 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *eb)
 		       (unsigned long long)btrfs_node_ptr_generation(eb, i));
 		fflush(stdout);
 	}
+	if (!follow)
+		return;
+
 	for (i = 0; i < nr; i++) {
 		struct extent_buffer *next = read_tree_block(root,
 					     btrfs_node_blockptr(eb, i),
@@ -660,8 +670,7 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *eb)
 		if (btrfs_header_level(next) !=
 			btrfs_header_level(eb) - 1)
 			BUG();
-		btrfs_print_tree(root, next);
+		btrfs_print_tree(root, next, 1);
 		free_extent_buffer(next);
 	}
 }
-
diff --git a/print-tree.h b/print-tree.h
index 4d1a01a..495b81a 100644
--- a/print-tree.h
+++ b/print-tree.h
@@ -19,6 +19,6 @@
 #ifndef __PRINT_TREE_
 #define __PRINT_TREE_
 void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
-void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t);
+void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t, int follow);
 void btrfs_print_key(struct btrfs_disk_key *disk_key);
 #endif
diff --git a/quick-test.c b/quick-test.c
index 351c706..fa6fd83 100644
--- a/quick-test.c
+++ b/quick-test.c
@@ -85,7 +85,7 @@ int main(int ac, char **av) {
 			fprintf(stderr, "search %d:%d\n", num, i);
 		ret = btrfs_search_slot(NULL, root, &ins, &path, 0, 0);
 		if (ret) {
-			btrfs_print_tree(root, root->node);
+			btrfs_print_tree(root, root->node, 1);
 			printf("unable to find %d\n", num);
 			exit(1);
 		}
@@ -148,7 +148,7 @@ int main(int ac, char **av) {
 			fprintf(stderr, "search %d:%d\n", num, i);
 		ret = btrfs_search_slot(NULL, root, &ins, &path, 0, 0);
 		if (ret) {
-			btrfs_print_tree(root, root->node);
+			btrfs_print_tree(root, root->node, 1);
 			printf("unable to find %d\n", num);
 			exit(1);
 		}
@@ -196,7 +196,7 @@ int main(int ac, char **av) {
 	btrfs_commit_transaction(trans, root);
 	printf("tree size is now %d\n", tree_size);
 	printf("root %p commit root %p\n", root->node, root->commit_root);
-	btrfs_print_tree(root, root->node);
+	btrfs_print_tree(root, root->node, 1);
 	close_ctree(root);
 	return 0;
 }
diff --git a/random-test.c b/random-test.c
index 571735d..0003236 100644
--- a/random-test.c
+++ b/random-test.c
@@ -404,7 +404,7 @@ int main(int ac, char **av)
 			if (ret) {
 				fprintf(stderr, "op %d failed %d:%d\n",
 					op, i, iterations);
-				btrfs_print_tree(root, root->node);
+				btrfs_print_tree(root, root->node, 1);
 				fprintf(stderr, "op %d failed %d:%d\n",
 					op, i, iterations);
 				err = ret;
diff --git a/repair.c b/repair.c
new file mode 100644
index 0000000..e640465
--- /dev/null
+++ b/repair.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2012 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "extent-cache.h"
+#include "utils.h"
+#include "repair.h"
+
+int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info,
+				    struct btrfs_key *first_key,
+				    u64 start, u64 len, int level)
+
+{
+	int ret = 0;
+	struct btrfs_corrupt_block *corrupt;
+
+	if (!info->corrupt_blocks)
+		return 0;
+
+	corrupt = malloc(sizeof(*corrupt));
+	if (!corrupt)
+		return -ENOMEM;
+
+	memcpy(&corrupt->key, first_key, sizeof(*first_key));
+	corrupt->cache.start = start;
+	corrupt->cache.size = len;
+	corrupt->level = level;
+
+	ret = insert_existing_cache_extent(info->corrupt_blocks, &corrupt->cache);
+	if (ret)
+		free(corrupt);
+	BUG_ON(ret && ret != -EEXIST);
+	return ret;
+}
+
diff --git a/repair.h b/repair.h
new file mode 100644
index 0000000..3d0dcb9
--- /dev/null
+++ b/repair.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2012 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_REPAIR__
+#define __BTRFS_REPAIR__
+
+struct btrfs_corrupt_block {
+	struct cache_extent cache;
+	struct btrfs_key key;
+	int level;
+};
+
+int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info,
+				    struct btrfs_key *first_key,
+				    u64 start, u64 len, int level);
+
+#endif
diff --git a/restore.c b/restore.c
new file mode 100644
index 0000000..250c9d3
--- /dev/null
+++ b/restore.c
@@ -0,0 +1,872 @@
+/*
+ * Copyright (C) 2011 Red Hat.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _XOPEN_SOURCE 500
+#define _GNU_SOURCE 1
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <zlib.h>
+#include "kerncompat.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+#include "list.h"
+#include "version.h"
+#include "volumes.h"
+#include "utils.h"
+
+static char path_name[4096];
+static int get_snaps = 0;
+static int verbose = 0;
+static int ignore_errors = 0;
+static int overwrite = 0;
+
+static int decompress(char *inbuf, char *outbuf, u64 compress_len,
+		      u64 decompress_len)
+{
+	z_stream strm;
+	int ret;
+
+	memset(&strm, 0, sizeof(strm));
+	ret = inflateInit(&strm);
+	if (ret != Z_OK) {
+		fprintf(stderr, "inflate init returnd %d\n", ret);
+		return -1;
+	}
+
+	strm.avail_in = compress_len;
+	strm.next_in = (unsigned char *)inbuf;
+	strm.avail_out = decompress_len;
+	strm.next_out = (unsigned char *)outbuf;
+	ret = inflate(&strm, Z_NO_FLUSH);
+	if (ret != Z_STREAM_END) {
+		(void)inflateEnd(&strm);
+		fprintf(stderr, "ret is %d\n", ret);
+		return -1;
+	}
+
+	(void)inflateEnd(&strm);
+	return 0;
+}
+
+int next_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+	int slot;
+	int level = 1;
+	struct extent_buffer *c;
+	struct extent_buffer *next = NULL;
+
+	for (; level < BTRFS_MAX_LEVEL; level++) {
+		if (path->nodes[level])
+			break;
+	}
+
+	if (level == BTRFS_MAX_LEVEL)
+		return 1;
+
+	slot = path->slots[level] + 1;
+
+	while(level < BTRFS_MAX_LEVEL) {
+		if (!path->nodes[level])
+			return 1;
+
+		slot = path->slots[level] + 1;
+		c = path->nodes[level];
+		if (slot >= btrfs_header_nritems(c)) {
+			level++;
+			if (level == BTRFS_MAX_LEVEL)
+				return 1;
+			continue;
+		}
+
+		if (next)
+			free_extent_buffer(next);
+
+		if (path->reada)
+			reada_for_search(root, path, level, slot, 0);
+
+		next = read_node_slot(root, c, slot);
+		break;
+	}
+	path->slots[level] = slot;
+	while(1) {
+		level--;
+		c = path->nodes[level];
+		free_extent_buffer(c);
+		path->nodes[level] = next;
+		path->slots[level] = 0;
+		if (!level)
+			break;
+		if (path->reada)
+			reada_for_search(root, path, level, 0, 0);
+		next = read_node_slot(root, next, 0);
+	}
+	return 0;
+}
+
+static int copy_one_inline(int fd, struct btrfs_path *path, u64 pos)
+{
+	struct extent_buffer *leaf = path->nodes[0];
+	struct btrfs_file_extent_item *fi;
+	char buf[4096];
+	char *outbuf;
+	ssize_t done;
+	unsigned long ptr;
+	int ret;
+	int len;
+	int ram_size;
+	int compress;
+
+	fi = btrfs_item_ptr(leaf, path->slots[0],
+			    struct btrfs_file_extent_item);
+	ptr = btrfs_file_extent_inline_start(fi);
+	len = btrfs_file_extent_inline_item_len(leaf,
+					btrfs_item_nr(leaf, path->slots[0]));
+	read_extent_buffer(leaf, buf, ptr, len);
+
+	compress = btrfs_file_extent_compression(leaf, fi);
+	if (compress == BTRFS_COMPRESS_NONE) {
+		done = pwrite(fd, buf, len, pos);
+		if (done < len) {
+			fprintf(stderr, "Short inline write, wanted %d, did "
+				"%zd: %d\n", len, done, errno);
+			return -1;
+		}
+		return 0;
+	}
+
+	ram_size = btrfs_file_extent_ram_bytes(leaf, fi);
+	outbuf = malloc(ram_size);
+	if (!outbuf) {
+		fprintf(stderr, "No memory\n");
+		return -1;
+	}
+
+	ret = decompress(buf, outbuf, len, ram_size);
+	if (ret) {
+		free(outbuf);
+		return ret;
+	}
+
+	done = pwrite(fd, outbuf, ram_size, pos);
+	free(outbuf);
+	if (done < len) {
+		fprintf(stderr, "Short compressed inline write, wanted %d, "
+			"did %zd: %d\n", ram_size, done, errno);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int copy_one_extent(struct btrfs_root *root, int fd,
+			   struct extent_buffer *leaf,
+			   struct btrfs_file_extent_item *fi, u64 pos)
+{
+	struct btrfs_multi_bio *multi = NULL;
+	struct btrfs_device *device;
+	char *inbuf, *outbuf = NULL;
+	ssize_t done, total = 0;
+	u64 bytenr;
+	u64 ram_size;
+	u64 disk_size;
+	u64 length;
+	u64 size_left;
+	u64 dev_bytenr;
+	u64 count = 0;
+	int compress;
+	int ret;
+	int dev_fd;
+
+	compress = btrfs_file_extent_compression(leaf, fi);
+	bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+	disk_size = btrfs_file_extent_disk_num_bytes(leaf, fi);
+	ram_size = btrfs_file_extent_ram_bytes(leaf, fi);
+	size_left = disk_size;
+
+	/* we found a hole */
+	if (disk_size == 0)
+		return 0;
+
+	inbuf = malloc(disk_size);
+	if (!inbuf) {
+		fprintf(stderr, "No memory\n");
+		return -1;
+	}
+
+	if (compress != BTRFS_COMPRESS_NONE) {
+		outbuf = malloc(ram_size);
+		if (!outbuf) {
+			fprintf(stderr, "No memory\n");
+			free(inbuf);
+			return -1;
+		}
+	}
+again:
+	length = size_left;
+	ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+			      bytenr, &length, &multi, 0);
+	if (ret) {
+		free(inbuf);
+		free(outbuf);
+		fprintf(stderr, "Error mapping block %d\n", ret);
+		return ret;
+	}
+	device = multi->stripes[0].dev;
+	dev_fd = device->fd;
+	device->total_ios++;
+	dev_bytenr = multi->stripes[0].physical;
+	kfree(multi);
+
+	if (size_left < length)
+		length = size_left;
+	size_left -= length;
+
+	done = pread(dev_fd, inbuf+count, length, dev_bytenr);
+	if (done < length) {
+		free(inbuf);
+		free(outbuf);
+		fprintf(stderr, "Short read %d\n", errno);
+		return -1;
+	}
+
+	count += length;
+	bytenr += length;
+	if (size_left)
+		goto again;
+
+
+	if (compress == BTRFS_COMPRESS_NONE) {
+		while (total < ram_size) {
+			done = pwrite(fd, inbuf+total, ram_size-total,
+				      pos+total);
+			if (done < 0) {
+				free(inbuf);
+				fprintf(stderr, "Error writing: %d %s\n", errno, strerror(errno));
+				return -1;
+			}
+			total += done;
+		}
+		free(inbuf);
+		return 0;
+	}
+
+	ret = decompress(inbuf, outbuf, disk_size, ram_size);
+	free(inbuf);
+	if (ret) {
+		free(outbuf);
+		return ret;
+	}
+
+	while (total < ram_size) {
+		done = pwrite(fd, outbuf+total, ram_size-total, pos+total);
+		if (done < 0) {
+			free(outbuf);
+			fprintf(stderr, "Error writing: %d %s\n", errno, strerror(errno));
+			return -1;
+		}
+		total += done;
+	}
+	free(outbuf);
+
+	return 0;
+}
+
+static int ask_to_continue(const char *file)
+{
+	char buf[2];
+	char *ret;
+
+	printf("We seem to be looping a lot on %s, do you want to keep going "
+	       "on ? (y/N): ", file);
+again:
+	ret = fgets(buf, 2, stdin);
+	if (*ret == '\n' || tolower(*ret) == 'n')
+		return 1;
+	if (tolower(*ret) != 'y') {
+		printf("Please enter either 'y' or 'n': ");
+		goto again;
+	}
+
+	return 0;
+}
+
+
+static int copy_file(struct btrfs_root *root, int fd, struct btrfs_key *key,
+		     const char *file)
+{
+	struct extent_buffer *leaf;
+	struct btrfs_path *path;
+	struct btrfs_file_extent_item *fi;
+	struct btrfs_inode_item *inode_item;
+	struct btrfs_key found_key;
+	int ret;
+	int extent_type;
+	int compression;
+	int loops = 0;
+	u64 found_size = 0;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		fprintf(stderr, "Ran out of memory\n");
+		return -1;
+	}
+	path->skip_locking = 1;
+
+	ret = btrfs_lookup_inode(NULL, root, path, key, 0);
+	if (ret == 0) {
+		inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				    struct btrfs_inode_item);
+		found_size = btrfs_inode_size(path->nodes[0], inode_item);
+	}
+	btrfs_release_path(root, path);
+
+	key->offset = 0;
+	key->type = BTRFS_EXTENT_DATA_KEY;
+
+	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+	if (ret < 0) {
+		fprintf(stderr, "Error searching %d\n", ret);
+		btrfs_free_path(path);
+		return ret;
+	}
+
+	leaf = path->nodes[0];
+	while (!leaf) {
+		ret = next_leaf(root, path);
+		if (ret < 0) {
+			fprintf(stderr, "Error getting next leaf %d\n",
+				ret);
+			btrfs_free_path(path);
+			return ret;
+		} else if (ret > 0) {
+			/* No more leaves to search */
+			btrfs_free_path(path);
+			return 0;
+		}
+		leaf = path->nodes[0];
+	}
+
+	while (1) {
+		if (loops++ >= 1024) {
+			ret = ask_to_continue(file);
+			if (ret)
+				break;
+			loops = 0;
+		}
+		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+			do {
+				ret = next_leaf(root, path);
+				if (ret < 0) {
+					fprintf(stderr, "Error searching %d\n", ret);
+					btrfs_free_path(path);
+					return ret;
+				} else if (ret) {
+					/* No more leaves to search */
+					btrfs_free_path(path);
+					goto set_size;
+					return 0;
+				}
+				leaf = path->nodes[0];
+			} while (!leaf);
+			continue;
+		}
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+		if (found_key.objectid != key->objectid)
+			break;
+		if (found_key.type != key->type)
+			break;
+		fi = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_file_extent_item);
+		extent_type = btrfs_file_extent_type(leaf, fi);
+		compression = btrfs_file_extent_compression(leaf, fi);
+		if (compression >= BTRFS_COMPRESS_LAST) {
+			fprintf(stderr, "Don't support compression yet %d\n",
+				compression);
+			btrfs_free_path(path);
+			return -1;
+		}
+
+		if (extent_type == BTRFS_FILE_EXTENT_PREALLOC)
+			goto next;
+		if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+			ret = copy_one_inline(fd, path, found_key.offset);
+			if (ret) {
+				btrfs_free_path(path);
+				return -1;
+			}
+		} else if (extent_type == BTRFS_FILE_EXTENT_REG) {
+			ret = copy_one_extent(root, fd, leaf, fi,
+					      found_key.offset);
+			if (ret) {
+				btrfs_free_path(path);
+				return ret;
+			}
+		} else {
+			printf("Weird extent type %d\n", extent_type);
+		}
+next:
+		path->slots[0]++;
+	}
+
+	btrfs_free_path(path);
+set_size:
+	if (found_size)
+		ftruncate(fd, (loff_t)found_size);
+	return 0;
+}
+
+static int search_dir(struct btrfs_root *root, struct btrfs_key *key,
+		      const char *dir)
+{
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	struct btrfs_dir_item *dir_item;
+	struct btrfs_key found_key, location;
+	char filename[BTRFS_NAME_LEN + 1];
+	unsigned long name_ptr;
+	int name_len;
+	int ret;
+	int fd;
+	int loops = 0;
+	u8 type;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		fprintf(stderr, "Ran out of memory\n");
+		return -1;
+	}
+	path->skip_locking = 1;
+
+	key->offset = 0;
+	key->type = BTRFS_DIR_INDEX_KEY;
+
+	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+	if (ret < 0) {
+		fprintf(stderr, "Error searching %d\n", ret);
+		btrfs_free_path(path);
+		return ret;
+	}
+
+	leaf = path->nodes[0];
+	while (!leaf) {
+		if (verbose > 1)
+			printf("No leaf after search, looking for the next "
+			       "leaf\n");
+		ret = next_leaf(root, path);
+		if (ret < 0) {
+			fprintf(stderr, "Error getting next leaf %d\n",
+				ret);
+			btrfs_free_path(path);
+			return ret;
+		} else if (ret > 0) {
+			/* No more leaves to search */
+			if (verbose)
+				printf("Reached the end of the tree looking "
+				       "for the directory\n");
+			btrfs_free_path(path);
+			return 0;
+		}
+		leaf = path->nodes[0];
+	}
+
+	while (leaf) {
+		if (loops++ >= 1024) {
+			printf("We have looped trying to restore files in %s "
+			       "too many times to be making progress, "
+			       "stopping\n", dir);
+			break;
+		}
+
+		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+			do {
+				ret = next_leaf(root, path);
+				if (ret < 0) {
+					fprintf(stderr, "Error searching %d\n",
+						ret);
+					btrfs_free_path(path);
+					return ret;
+				} else if (ret > 0) {
+					/* No more leaves to search */
+					if (verbose)
+						printf("Reached the end of "
+						       "the tree searching the"
+						       " directory\n");
+					btrfs_free_path(path);
+					return 0;
+				}
+				leaf = path->nodes[0];
+			} while (!leaf);
+			continue;
+		}
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+		if (found_key.objectid != key->objectid) {
+			if (verbose > 1)
+				printf("Found objectid=%Lu, key=%Lu\n",
+				       found_key.objectid, key->objectid);
+			break;
+		}
+		if (found_key.type != key->type) {
+			if (verbose > 1)
+				printf("Found type=%u, want=%u\n",
+				       found_key.type, key->type);
+			break;
+		}
+		dir_item = btrfs_item_ptr(leaf, path->slots[0],
+					  struct btrfs_dir_item);
+		name_ptr = (unsigned long)(dir_item + 1);
+		name_len = btrfs_dir_name_len(leaf, dir_item);
+		read_extent_buffer(leaf, filename, name_ptr, name_len);
+		filename[name_len] = '\0';
+		type = btrfs_dir_type(leaf, dir_item);
+		btrfs_dir_item_key_to_cpu(leaf, dir_item, &location);
+
+		snprintf(path_name, 4096, "%s/%s", dir, filename);
+
+
+		/*
+		 * At this point we're only going to restore directories and
+		 * files, no symlinks or anything else.
+		 */
+		if (type == BTRFS_FT_REG_FILE) {
+			if (!overwrite) {
+				static int warn = 0;
+				struct stat st;
+
+				ret = stat(path_name, &st);
+				if (!ret) {
+					loops = 0;
+					if (verbose || !warn)
+						printf("Skipping existing file"
+						       " %s\n", path_name);
+					if (warn)
+						goto next;
+					printf("If you wish to overwrite use "
+					       "the -o option to overwrite\n");
+					warn = 1;
+					goto next;
+				}
+				ret = 0;
+			}
+			if (verbose)
+				printf("Restoring %s\n", path_name);
+			fd = open(path_name, O_CREAT|O_WRONLY, 0644);
+			if (fd < 0) {
+				fprintf(stderr, "Error creating %s: %d\n",
+					path_name, errno);
+				if (ignore_errors)
+					goto next;
+				btrfs_free_path(path);
+				return -1;
+			}
+			loops = 0;
+			ret = copy_file(root, fd, &location, path_name);
+			close(fd);
+			if (ret) {
+				if (ignore_errors)
+					goto next;
+				btrfs_free_path(path);
+				return ret;
+			}
+		} else if (type == BTRFS_FT_DIR) {
+			struct btrfs_root *search_root = root;
+			char *dir = strdup(path_name);
+
+			if (!dir) {
+				fprintf(stderr, "Ran out of memory\n");
+				btrfs_free_path(path);
+				return -1;
+			}
+
+			if (location.type == BTRFS_ROOT_ITEM_KEY) {
+				/*
+				 * If we are a snapshot and this is the index
+				 * object to ourselves just skip it.
+				 */
+				if (location.objectid ==
+				    root->root_key.objectid) {
+					free(dir);
+					goto next;
+				}
+
+				search_root = btrfs_read_fs_root(root->fs_info,
+								 &location);
+				if (IS_ERR(search_root)) {
+					free(dir);
+					fprintf(stderr, "Error reading "
+						"subvolume %s: %lu\n",
+						path_name,
+						PTR_ERR(search_root));
+					if (ignore_errors)
+						goto next;
+					return PTR_ERR(search_root);
+				}
+
+				/*
+				 * A subvolume will have a key.offset of 0, a
+				 * snapshot will have key.offset of a transid.
+				 */
+				if (search_root->root_key.offset != 0 &&
+				    get_snaps == 0) {
+					free(dir);
+					printf("Skipping snapshot %s\n",
+					       filename);
+					goto next;
+				}
+				location.objectid = BTRFS_FIRST_FREE_OBJECTID;
+			}
+
+			if (verbose)
+				printf("Restoring %s\n", path_name);
+
+			errno = 0;
+			ret = mkdir(path_name, 0755);
+			if (ret && errno != EEXIST) {
+				free(dir);
+				fprintf(stderr, "Error mkdiring %s: %d\n",
+					path_name, errno);
+				if (ignore_errors)
+					goto next;
+				btrfs_free_path(path);
+				return -1;
+			}
+			loops = 0;
+			ret = search_dir(search_root, &location, dir);
+			free(dir);
+			if (ret) {
+				if (ignore_errors)
+					goto next;
+				btrfs_free_path(path);
+				return ret;
+			}
+		}
+next:
+		path->slots[0]++;
+	}
+
+	if (verbose)
+		printf("Done searching %s\n", dir);
+	btrfs_free_path(path);
+	return 0;
+}
+
+static void usage()
+{
+	fprintf(stderr, "Usage: restore [-svio] [-t disk offset] <device> "
+		"<directory>\n");
+}
+
+static struct btrfs_root *open_fs(const char *dev, u64 root_location, int super_mirror)
+{
+	struct btrfs_root *root;
+	u64 bytenr;
+	int i;
+
+	for (i = super_mirror; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+		bytenr = btrfs_sb_offset(i);
+		root = open_ctree_recovery(dev, bytenr, root_location);
+		if (root)
+			return root;
+		fprintf(stderr, "Could not open root, trying backup super\n");
+	}
+
+	return NULL;
+}
+
+static int find_first_dir(struct btrfs_root *root, u64 *objectid)
+{
+	struct btrfs_path *path;
+	struct btrfs_key found_key;
+	struct btrfs_key key;
+	int ret = -1;
+	int i;
+
+	key.objectid = 0;
+	key.type = BTRFS_DIR_INDEX_KEY;
+	key.offset = 0;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		fprintf(stderr, "Ran out of memory\n");
+		goto out;
+	}
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		fprintf(stderr, "Error searching %d\n", ret);
+		goto out;
+	}
+
+	if (!path->nodes[0]) {
+		fprintf(stderr, "No leaf!\n");
+		goto out;
+	}
+again:
+	for (i = path->slots[0];
+	     i < btrfs_header_nritems(path->nodes[0]); i++) {
+		btrfs_item_key_to_cpu(path->nodes[0], &found_key, i);
+		if (found_key.type != key.type)
+			continue;
+
+		printf("Using objectid %Lu for first dir\n",
+		       found_key.objectid);
+		*objectid = found_key.objectid;
+		ret = 0;
+		goto out;
+	}
+	do {
+		ret = next_leaf(root, path);
+		if (ret < 0) {
+			fprintf(stderr, "Error getting next leaf %d\n",
+				ret);
+			goto out;
+		} else if (ret > 0) {
+			fprintf(stderr, "No more leaves\n");
+			goto out;
+		}
+	} while (!path->nodes[0]);
+	if (path->nodes[0])
+		goto again;
+	printf("Couldn't find a dir index item\n");
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	char dir_name[128];
+	u64 tree_location = 0;
+	u64 fs_location = 0;
+	int len;
+	int ret;
+	int opt;
+	int super_mirror = 0;
+	int find_dir = 0;
+
+	while ((opt = getopt(argc, argv, "sviot:u:df:")) != -1) {
+		switch (opt) {
+			case 's':
+				get_snaps = 1;
+				break;
+			case 'v':
+				verbose++;
+				break;
+			case 'i':
+				ignore_errors = 1;
+				break;
+			case 'o':
+				overwrite = 1;
+				break;
+			case 't':
+				errno = 0;
+				tree_location = (u64)strtoll(optarg, NULL, 10);
+				if (errno != 0) {
+					fprintf(stderr, "Tree location not valid\n");
+					exit(1);
+				}
+				break;
+			case 'f':
+				errno = 0;
+				fs_location = (u64)strtoll(optarg, NULL, 10);
+				if (errno != 0) {
+					fprintf(stderr, "Fs location not valid\n");
+					exit(1);
+				}
+				break;
+			case 'u':
+				errno = 0;
+				super_mirror = (int)strtol(optarg, NULL, 10);
+				if (errno != 0 ||
+				    super_mirror >= BTRFS_SUPER_MIRROR_MAX) {
+					fprintf(stderr, "Super mirror not "
+						"valid\n");
+					exit(1);
+				}
+				break;
+			case 'd':
+				find_dir = 1;
+				break;
+			default:
+				usage();
+				exit(1);
+		}
+	}
+
+	if (optind + 1 >= argc) {
+		usage();
+		exit(1);
+	}
+
+	if ((ret = check_mounted(argv[optind])) < 0) {
+		fprintf(stderr, "Could not check mount status: %s\n",
+			strerror(ret));
+		return ret;
+	} else if (ret) {
+		fprintf(stderr, "%s is currently mounted.  Aborting.\n", argv[optind + 1]);
+		return -EBUSY;
+	}
+
+	root = open_fs(argv[optind], tree_location, super_mirror);
+	if (root == NULL)
+		return 1;
+
+	if (fs_location != 0) {
+		free_extent_buffer(root->node);
+		root->node = read_tree_block(root, fs_location, 4096, 0);
+		if (!root->node) {
+			fprintf(stderr, "Failed to read fs location\n");
+			goto out;
+		}
+	}
+
+	printf("Root objectid is %Lu\n", root->objectid);
+
+	memset(path_name, 0, 4096);
+
+	strncpy(dir_name, argv[optind + 1], 128);
+
+	/* Strip the trailing / on the dir name */
+	while (1) {
+		len = strlen(dir_name);
+		if (dir_name[len - 1] != '/')
+			break;
+		dir_name[len - 1] = '\0';
+	}
+
+	if (find_dir) {
+		ret = find_first_dir(root, &key.objectid);
+		if (ret)
+			goto out;
+	} else {
+		key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+	}
+
+	ret = search_dir(root->fs_info->fs_root, &key, dir_name);
+
+out:
+	close_ctree(root);
+	return ret;
+}
diff --git a/utils.c b/utils.c
index 2f4c6e1..ee7fa1b 100644
--- a/utils.c
+++ b/utils.c
@@ -31,6 +31,10 @@
 #include <fcntl.h>
 #include <unistd.h>
 #include <mntent.h>
+#include <linux/loop.h>
+#include <linux/major.h>
+#include <linux/kdev_t.h>
+#include <limits.h>
 #include "kerncompat.h"
 #include "radix-tree.h"
 #include "ctree.h"
@@ -46,6 +50,20 @@
 static inline int ioctl(int fd, int define, u64 *size) { return 0; }
 #endif
 
+#ifndef BLKDISCARD
+#define BLKDISCARD	_IO(0x12,119)
+#endif
+
+static int
+discard_blocks(int fd, u64 start, u64 len)
+{
+	u64 range[2] = { start, len };
+
+	if (ioctl(fd, BLKDISCARD, &range) < 0)
+		return errno;
+	return 0;
+}
+
 static u64 reference_root_table[] = {
 	[1] =	BTRFS_ROOT_TREE_OBJECTID,
 	[2] =	BTRFS_EXTENT_TREE_OBJECTID,
@@ -103,8 +121,9 @@ int make_btrfs(int fd, const char *device, const char *label,
 	btrfs_set_super_stripesize(&super, stripesize);
 	btrfs_set_super_csum_type(&super, BTRFS_CSUM_TYPE_CRC32);
 	btrfs_set_super_chunk_root_generation(&super, 1);
+	btrfs_set_super_cache_generation(&super, -1);
 	if (label)
-		strcpy(super.label, label);
+		strncpy(super.label, label, BTRFS_LABEL_SIZE - 1);
 
 	buf = malloc(sizeof(*buf) + max(sectorsize, leafsize));
 
@@ -193,6 +212,8 @@ int make_btrfs(int fd, const char *device, const char *label,
 	BUG_ON(ret != leafsize);
 
 	/* create the items for the extent tree */
+	memset(buf->data+sizeof(struct btrfs_header), 0,
+		leafsize-sizeof(struct btrfs_header));
 	nritems = 0;
 	itemoff = __BTRFS_LEAF_DATA_SIZE(leafsize);
 	for (i = 1; i < 7; i++) {
@@ -238,6 +259,8 @@ int make_btrfs(int fd, const char *device, const char *label,
 	BUG_ON(ret != leafsize);
 
 	/* create the chunk tree */
+	memset(buf->data+sizeof(struct btrfs_header), 0,
+		leafsize-sizeof(struct btrfs_header));
 	nritems = 0;
 	item_size = sizeof(*dev_item);
 	itemoff = __BTRFS_LEAF_DATA_SIZE(leafsize) - item_size;
@@ -319,6 +342,8 @@ int make_btrfs(int fd, const char *device, const char *label,
 	ret = pwrite(fd, buf->data, leafsize, blocks[3]);
 
 	/* create the device tree */
+	memset(buf->data+sizeof(struct btrfs_header), 0,
+		leafsize-sizeof(struct btrfs_header));
 	nritems = 0;
 	itemoff = __BTRFS_LEAF_DATA_SIZE(leafsize) -
 		sizeof(struct btrfs_dev_extent);
@@ -352,6 +377,8 @@ int make_btrfs(int fd, const char *device, const char *label,
 	ret = pwrite(fd, buf->data, leafsize, blocks[4]);
 
 	/* create the FS root */
+	memset(buf->data+sizeof(struct btrfs_header), 0,
+		leafsize-sizeof(struct btrfs_header));
 	btrfs_set_header_bytenr(buf, blocks[5]);
 	btrfs_set_header_owner(buf, BTRFS_FS_TREE_OBJECTID);
 	btrfs_set_header_nritems(buf, 0);
@@ -360,6 +387,8 @@ int make_btrfs(int fd, const char *device, const char *label,
 	BUG_ON(ret != leafsize);
 
 	/* finally create the csum root */
+	memset(buf->data+sizeof(struct btrfs_header), 0,
+		leafsize-sizeof(struct btrfs_header));
 	btrfs_set_header_bytenr(buf, blocks[6]);
 	btrfs_set_header_owner(buf, BTRFS_CSUM_TREE_OBJECTID);
 	btrfs_set_header_nritems(buf, 0);
@@ -507,7 +536,8 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
-int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret)
+int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret,
+			 int *mixed)
 {
 	u64 block_count;
 	u64 bytenr;
@@ -527,11 +557,17 @@ int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret)
 	}
 	zero_end = 1;
 
-	if (block_count < 256 * 1024 * 1024) {
-		fprintf(stderr, "device %s is too small "
-		        "(must be at least 256 MB)\n", file);
-		exit(1);
+	if (block_count < 1024 * 1024 * 1024 && !(*mixed)) {
+		printf("SMALL VOLUME: forcing mixed metadata/data groups\n");
+		*mixed = 1;
 	}
+
+	/*
+	 * We intentionally ignore errors from the discard ioctl.  It is
+	 * not necessary for the mkfs functionality but just an optimization.
+	 */
+	discard_blocks(fd, 0, block_count);
+
 	ret = zero_dev_start(fd);
 	if (ret) {
 		fprintf(stderr, "failed to zero device start %d\n", ret);
@@ -561,6 +597,7 @@ int btrfs_make_root_dir(struct btrfs_trans_handle *trans,
 {
 	int ret;
 	struct btrfs_inode_item inode_item;
+	time_t now = time(NULL);
 
 	memset(&inode_item, 0, sizeof(inode_item));
 	btrfs_set_stack_inode_generation(&inode_item, trans->transid);
@@ -568,6 +605,14 @@ int btrfs_make_root_dir(struct btrfs_trans_handle *trans,
 	btrfs_set_stack_inode_nlink(&inode_item, 1);
 	btrfs_set_stack_inode_nbytes(&inode_item, root->leafsize);
 	btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0555);
+	btrfs_set_stack_timespec_sec(&inode_item.atime, now);
+	btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
+	btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
+	btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
+	btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
+	btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
+	btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
+	btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
 
 	if (root->fs_info->tree_root == root)
 		btrfs_set_super_root_dir(&root->fs_info->super_copy, objectid);
@@ -586,58 +631,281 @@ error:
 	return ret;
 }
 
+/* checks if a device is a loop device */
+int is_loop_device (const char* device) {
+	struct stat statbuf;
+
+	if(stat(device, &statbuf) < 0)
+		return -errno;
+
+	return (S_ISBLK(statbuf.st_mode) &&
+		MAJOR(statbuf.st_rdev) == LOOP_MAJOR);
+}
+
+
+/* Takes a loop device path (e.g. /dev/loop0) and returns
+ * the associated file (e.g. /images/my_btrfs.img) */
+int resolve_loop_device(const char* loop_dev, char* loop_file, int max_len)
+{
+	int loop_fd;
+	int ret_ioctl;
+	struct loop_info loopinfo;
+
+	if ((loop_fd = open(loop_dev, O_RDONLY)) < 0)
+		return -errno;
+
+	ret_ioctl = ioctl(loop_fd, LOOP_GET_STATUS, &loopinfo);
+	close(loop_fd);
+
+	if (ret_ioctl == 0)
+		strncpy(loop_file, loopinfo.lo_name, max_len);
+	else
+		return -errno;
+
+	return 0;
+}
+
+/* Checks whether a and b are identical or device
+ * files associated with the same block device
+ */
+int is_same_blk_file(const char* a, const char* b)
+{
+	struct stat st_buf_a, st_buf_b;
+	char real_a[PATH_MAX];
+	char real_b[PATH_MAX];
+
+	if(!realpath(a, real_a) ||
+	   !realpath(b, real_b))
+	{
+		return -errno;
+	}
+
+	/* Identical path? */
+	if(strcmp(real_a, real_b) == 0)
+		return 1;
+
+	if(stat(a, &st_buf_a) < 0 ||
+	   stat(b, &st_buf_b) < 0)
+	{
+		if (errno == ENOENT)
+			return 0;
+		return -errno;
+	}
+
+	/* Same blockdevice? */
+	if(S_ISBLK(st_buf_a.st_mode) &&
+	   S_ISBLK(st_buf_b.st_mode) &&
+	   st_buf_a.st_rdev == st_buf_b.st_rdev)
+	{
+		return 1;
+	}
+
+	/* Hardlink? */
+	if (st_buf_a.st_dev == st_buf_b.st_dev &&
+	    st_buf_a.st_ino == st_buf_b.st_ino)
+	{
+		return 1;
+	}
+
+	return 0;
+}
+
+/* checks if a and b are identical or device
+ * files associated with the same block device or
+ * if one file is a loop device that uses the other
+ * file.
+ */
+int is_same_loop_file(const char* a, const char* b)
+{
+	char res_a[PATH_MAX];
+	char res_b[PATH_MAX];
+	const char* final_a;
+	const char* final_b;
+	int ret;
+
+	/* Resolve a if it is a loop device */
+	if((ret = is_loop_device(a)) < 0) {
+		if (ret == -ENOENT)
+			return 0;
+		return ret;
+	} else if (ret) {
+		if ((ret = resolve_loop_device(a, res_a, sizeof(res_a))) < 0)
+			return ret;
+
+		final_a = res_a;
+	} else {
+		final_a = a;
+	}
+
+	/* Resolve b if it is a loop device */
+	if ((ret = is_loop_device(b)) < 0) {
+		if (ret == -ENOENT)
+			return 0;
+		return ret;
+	} else if (ret) {
+		if((ret = resolve_loop_device(b, res_b, sizeof(res_b))) < 0)
+			return ret;
+
+		final_b = res_b;
+	} else {
+		final_b = b;
+	}
+
+	return is_same_blk_file(final_a, final_b);
+}
+
+/* Checks if a file exists and is a block or regular file*/
+int is_existing_blk_or_reg_file(const char* filename)
+{
+	struct stat st_buf;
+
+	if(stat(filename, &st_buf) < 0) {
+		if(errno == ENOENT)
+			return 0;
+		else
+			return -errno;
+	}
+
+	return (S_ISBLK(st_buf.st_mode) || S_ISREG(st_buf.st_mode));
+}
+
+/* Checks if a file is used (directly or indirectly via a loop device)
+ * by a device in fs_devices
+ */
+int blk_file_in_dev_list(struct btrfs_fs_devices* fs_devices, const char* file)
+{
+	int ret;
+	struct list_head *head;
+	struct list_head *cur;
+	struct btrfs_device *device;
+
+	head = &fs_devices->devices;
+	list_for_each(cur, head) {
+		device = list_entry(cur, struct btrfs_device, dev_list);
+
+		if((ret = is_same_loop_file(device->name, file)))
+			return ret;
+	}
+
+	return 0;
+}
+
 /*
  * returns 1 if the device was mounted, < 0 on error or 0 if everything
- * is safe to continue.  TODO, this should also scan multi-device filesystems
+ * is safe to continue.
  */
-int check_mounted(char *file)
+int check_mounted(const char* file)
 {
-	struct mntent *mnt;
-	struct stat st_buf;
-	dev_t file_dev = 0;
-	dev_t file_rdev = 0;
-	ino_t file_ino = 0;
+	int fd;
+	int ret;
+
+	fd = open(file, O_RDONLY);
+	if (fd < 0) {
+		fprintf (stderr, "check_mounted(): Could not open %s\n", file);
+		return -errno;
+	}
+
+	ret =  check_mounted_where(fd, file, NULL, 0, NULL);
+	close(fd);
+
+	return ret;
+}
+
+int check_mounted_where(int fd, const char *file, char *where, int size,
+			struct btrfs_fs_devices **fs_dev_ret)
+{
+	int ret;
+	u64 total_devs = 1;
+	int is_btrfs;
+	struct btrfs_fs_devices *fs_devices_mnt = NULL;
 	FILE *f;
-	int ret = 0;
+	struct mntent *mnt;
+
+	/* scan the initial device */
+	ret = btrfs_scan_one_device(fd, file, &fs_devices_mnt,
+				    &total_devs, BTRFS_SUPER_INFO_OFFSET);
+	is_btrfs = (ret >= 0);
 
+	/* scan other devices */
+	if (is_btrfs && total_devs > 1) {
+		if((ret = btrfs_scan_for_fsid(fs_devices_mnt, total_devs, 1)))
+			return ret;
+	}
+
+	/* iterate over the list of currently mountes filesystems */
 	if ((f = setmntent ("/proc/mounts", "r")) == NULL)
 		return -errno;
 
-	if (stat(file, &st_buf) < 0) {
-		return -errno;
-	} else {
-		if (S_ISBLK(st_buf.st_mode)) {
-			file_rdev = st_buf.st_rdev;
+	while ((mnt = getmntent (f)) != NULL) {
+		if(is_btrfs) {
+			if(strcmp(mnt->mnt_type, "btrfs") != 0)
+				continue;
+
+			ret = blk_file_in_dev_list(fs_devices_mnt, mnt->mnt_fsname);
 		} else {
-			file_dev = st_buf.st_dev;
-			file_ino = st_buf.st_ino;
+			/* ignore entries in the mount table that are not
+			   associated with a file*/
+			if((ret = is_existing_blk_or_reg_file(mnt->mnt_fsname)) < 0)
+				goto out_mntloop_err;
+			else if(!ret)
+				continue;
+
+			ret = is_same_loop_file(file, mnt->mnt_fsname);
 		}
-	}
 
-	while ((mnt = getmntent (f)) != NULL) {
-		if (strcmp(file, mnt->mnt_fsname) == 0)
+		if(ret < 0)
+			goto out_mntloop_err;
+		else if(ret)
 			break;
-
-		if (stat(mnt->mnt_fsname, &st_buf) == 0) {
-			if (S_ISBLK(st_buf.st_mode)) {
-				if (file_rdev && (file_rdev == st_buf.st_rdev))
-					break;
-			} else if (file_dev && ((file_dev == st_buf.st_dev) &&
-						(file_ino == st_buf.st_ino))) {
-					break;
-			}
-		}
 	}
 
-	if (mnt) {
-		/* found an entry in mnt table */
-		ret = 1;
-	}
+	/* Did we find an entry in mnt table? */
+	if (mnt && size && where)
+		strncpy(where, mnt->mnt_dir, size);
+	if (fs_dev_ret)
+		*fs_dev_ret = fs_devices_mnt;
+
+	ret = (mnt != NULL);
 
+out_mntloop_err:
 	endmntent (f);
+
 	return ret;
 }
 
+/* Gets the mount point of btrfs filesystem that is using the specified device.
+ * Returns 0 is everything is good, <0 if we have an error.
+ * TODO: Fix this fucntion and check_mounted to work with multiple drive BTRFS
+ * setups.
+ */
+int get_mountpt(char *dev, char *mntpt, size_t size)
+{
+       struct mntent *mnt;
+       FILE *f;
+       int ret = 0;
+
+       f = setmntent("/proc/mounts", "r");
+       if (f == NULL)
+               return -errno;
+
+       while ((mnt = getmntent(f)) != NULL )
+       {
+               if (strcmp(dev, mnt->mnt_fsname) == 0)
+               {
+                       strncpy(mntpt, mnt->mnt_dir, size);
+                       break;
+               }
+       }
+
+       if (mnt == NULL)
+       {
+               /* We didn't find an entry so lets report an error */
+               ret = -1;
+       }
+
+       return ret;
+}
+
 struct pending_dir {
 	struct list_head list;
 	char name[256];
@@ -648,6 +916,7 @@ void btrfs_register_one_device(char *fname)
 	struct btrfs_ioctl_vol_args args;
 	int fd;
 	int ret;
+	int e;
 
 	fd = open("/dev/btrfs-control", O_RDONLY);
 	if (fd < 0) {
@@ -655,8 +924,13 @@ void btrfs_register_one_device(char *fname)
 			"skipping device registration\n");
 		return;
 	}
-	strcpy(args.name, fname);
+	strncpy(args.name, fname, BTRFS_PATH_NAME_MAX);
 	ret = ioctl(fd, BTRFS_IOC_SCAN_DEV, &args);
+	e = errno;
+	if(ret<0){
+		fprintf(stderr, "ERROR: unable to scan the device '%s' - %s\n",
+			fname, strerror(e));
+	}
 	close(fd);
 }
 
@@ -694,7 +968,7 @@ again:
 	}
 	dirp = opendir(dirname);
 	if (!dirp) {
-		fprintf(stderr, "Unable to open /sys/block for scanning\n");
+		fprintf(stderr, "Unable to open %s for scanning\n", dirname);
 		return -ENOENT;
 	}
 	while(1) {
@@ -729,7 +1003,8 @@ again:
 		}
 		fd = open(fullpath, O_RDONLY);
 		if (fd < 0) {
-			fprintf(stderr, "failed to read %s\n", fullpath);
+			fprintf(stderr, "failed to read %s: %s\n", fullpath,
+					strerror(errno));
 			continue;
 		}
 		ret = btrfs_scan_one_device(fd, fullpath, &tmp_devices,
@@ -759,7 +1034,12 @@ fail:
 int btrfs_scan_for_fsid(struct btrfs_fs_devices *fs_devices, u64 total_devs,
 			int run_ioctls)
 {
-	return btrfs_scan_one_dir("/dev", run_ioctls);
+	int ret;
+
+	ret = btrfs_scan_block_devices(run_ioctls);
+	if (ret)
+		ret = btrfs_scan_one_dir("/dev", run_ioctls);
+	return ret;
 }
 
 int btrfs_device_already_in_root(struct btrfs_root *root, int fd,
@@ -798,6 +1078,7 @@ static char *size_strs[] = { "", "KB", "MB", "GB", "TB",
 char *pretty_sizes(u64 size)
 {
 	int num_divs = 0;
+        int pretty_len = 16;
 	u64 last_size = size;
 	u64 fract_size = size;
 	float fraction;
@@ -815,8 +1096,113 @@ char *pretty_sizes(u64 size)
 		return NULL;
 
 	fraction = (float)fract_size / 1024;
-	pretty = malloc(16);
-	sprintf(pretty, "%.2f%s", fraction, size_strs[num_divs-1]);
+	pretty = malloc(pretty_len);
+	snprintf(pretty, pretty_len, "%.2f%s", fraction, size_strs[num_divs-1]);
 	return pretty;
 }
 
+/*
+ * Checks to make sure that the label matches our requirements.
+ * Returns:
+       0    if everything is safe and usable
+      -1    if the label is too long
+      -2    if the label contains an invalid character
+ */
+int check_label(char *input)
+{
+       int i;
+       int len = strlen(input);
+
+       if (len > BTRFS_LABEL_SIZE) {
+               return -1;
+       }
+
+       for (i = 0; i < len; i++) {
+               if (input[i] == '/' || input[i] == '\\') {
+                       return -2;
+               }
+       }
+
+       return 0;
+}
+
+int btrfs_scan_block_devices(int run_ioctl)
+{
+
+	struct stat st;
+	int ret;
+	int fd;
+	struct btrfs_fs_devices *tmp_devices;
+	u64 num_devices;
+	FILE *proc_partitions;
+	int i;
+	char buf[1024];
+	char fullpath[110];
+	int scans = 0;
+	int special;
+
+scan_again:
+	proc_partitions = fopen("/proc/partitions","r");
+	if (!proc_partitions) {
+		fprintf(stderr, "Unable to open '/proc/partitions' for scanning\n");
+		return -ENOENT;
+	}
+	/* skip the header */
+	for(i=0; i < 2 ; i++)
+		if(!fgets(buf, 1023, proc_partitions)){
+		fprintf(stderr, "Unable to read '/proc/partitions' for scanning\n");
+		fclose(proc_partitions);
+		return -ENOENT;
+	}
+
+	strcpy(fullpath,"/dev/");
+	while(fgets(buf, 1023, proc_partitions)) {
+		i = sscanf(buf," %*d %*d %*d %99s", fullpath+5);
+
+		/*
+		 * multipath and MD devices may register as a btrfs filesystem
+		 * both through the original block device and through
+		 * the special (/dev/mapper or /dev/mdX) entry.
+		 * This scans the special entries last
+		 */
+		special = strncmp(fullpath, "/dev/dm-", strlen("/dev/dm-")) == 0;
+		if (!special)
+			special = strncmp(fullpath, "/dev/md", strlen("/dev/md")) == 0;
+
+		if (scans == 0 && special)
+			continue;
+		if (scans > 0 && !special)
+			continue;
+
+		ret = lstat(fullpath, &st);
+		if (ret < 0) {
+			fprintf(stderr, "failed to stat %s\n", fullpath);
+			continue;
+		}
+		if (!S_ISBLK(st.st_mode)) {
+			continue;
+		}
+
+		fd = open(fullpath, O_RDONLY);
+		if (fd < 0) {
+			fprintf(stderr, "failed to read %s\n", fullpath);
+			continue;
+		}
+		ret = btrfs_scan_one_device(fd, fullpath, &tmp_devices,
+					    &num_devices,
+					    BTRFS_SUPER_INFO_OFFSET);
+		if (ret == 0 && run_ioctl > 0) {
+			btrfs_register_one_device(fullpath);
+		}
+		close(fd);
+	}
+
+	fclose(proc_partitions);
+
+	if (scans == 0) {
+		scans++;
+		goto scan_again;
+	}
+	return 0;
+}
+
diff --git a/utils.h b/utils.h
index 7ff542b..c5f55e1 100644
--- a/utils.h
+++ b/utils.h
@@ -27,7 +27,7 @@ int make_btrfs(int fd, const char *device, const char *label,
 int btrfs_make_root_dir(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, u64 objectid);
 int btrfs_prepare_device(int fd, char *file, int zero_end,
-			 u64 *block_count_ret);
+			 u64 *block_count_ret, int *mixed);
 int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root, int fd, char *path,
 		      u64 block_count, u32 io_width, u32 io_align,
@@ -36,8 +36,14 @@ int btrfs_scan_for_fsid(struct btrfs_fs_devices *fs_devices, u64 total_devs,
 			int run_ioctls);
 void btrfs_register_one_device(char *fname);
 int btrfs_scan_one_dir(char *dirname, int run_ioctl);
-int check_mounted(char *devicename);
+int check_mounted(const char *devicename);
+int check_mounted_where(int fd, const char *file, char *where, int size,
+			struct btrfs_fs_devices **fs_devices_mnt);
 int btrfs_device_already_in_root(struct btrfs_root *root, int fd,
 				 int super_offset);
 char *pretty_sizes(u64 size);
+int check_label(char *input);
+int get_mountpt(char *dev, char *mntpt, size_t size);
+
+int btrfs_scan_block_devices(int run_ioctl);
 #endif
diff --git a/volumes.c b/volumes.c
index 7671855..8dca5e1 100644
--- a/volumes.c
+++ b/volumes.c
@@ -35,18 +35,6 @@ struct stripe {
 	u64 physical;
 };
 
-struct map_lookup {
-	struct cache_extent ce;
-	u64 type;
-	int io_align;
-	int io_width;
-	int stripe_len;
-	int sector_size;
-	int num_stripes;
-	int sub_stripes;
-	struct btrfs_bio_stripe stripes[];
-};
-
 #define map_lookup_size(n) (sizeof(struct map_lookup) + \
 			    (sizeof(struct btrfs_bio_stripe) * (n)))
 
@@ -128,7 +116,14 @@ static int device_list_add(const char *path,
 			btrfs_stack_device_bytes_used(&disk_super->dev_item);
 		list_add(&device->dev_list, &fs_devices->devices);
 		device->fs_devices = fs_devices;
-	}
+	} else if (!device->name || strcmp(device->name, path)) {
+		char *name = strdup(path);
+                if (!name)
+                        return -ENOMEM;
+                kfree(device->name);
+                device->name = name;
+        }
+
 
 	if (found_transid > fs_devices->latest_trans) {
 		fs_devices->latest_devid = devid;
@@ -643,7 +638,6 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	struct list_head *cur;
 	struct map_lookup *map;
 	int min_stripe_size = 1 * 1024 * 1024;
-	u64 physical;
 	u64 calc_size = 8 * 1024 * 1024;
 	u64 min_free;
 	u64 max_chunk_size = 4 * calc_size;
@@ -811,7 +805,6 @@ again:
 		btrfs_set_stack_stripe_devid(stripe, device->devid);
 		btrfs_set_stack_stripe_offset(stripe, dev_offset);
 		memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
-		physical = dev_offset;
 		index++;
 	}
 	BUG_ON(!list_empty(&private_devs));
@@ -857,6 +850,108 @@ again:
 	return ret;
 }
 
+int btrfs_alloc_data_chunk(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *extent_root, u64 *start,
+			   u64 num_bytes, u64 type)
+{
+	u64 dev_offset;
+	struct btrfs_fs_info *info = extent_root->fs_info;
+	struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
+	struct btrfs_stripe *stripes;
+	struct btrfs_device *device = NULL;
+	struct btrfs_chunk *chunk;
+	struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices;
+	struct list_head *cur;
+	struct map_lookup *map;
+	u64 calc_size = 8 * 1024 * 1024;
+	int num_stripes = 1;
+	int sub_stripes = 0;
+	int ret;
+	int index;
+	int stripe_len = 64 * 1024;
+	struct btrfs_key key;
+
+	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+	key.type = BTRFS_CHUNK_ITEM_KEY;
+	ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+			      &key.offset);
+	if (ret)
+		return ret;
+
+	chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
+	if (!chunk)
+		return -ENOMEM;
+
+	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
+	if (!map) {
+		kfree(chunk);
+		return -ENOMEM;
+	}
+
+	stripes = &chunk->stripe;
+	calc_size = num_bytes;
+
+	index = 0;
+	cur = dev_list->next;
+	device = list_entry(cur, struct btrfs_device, dev_list);
+
+	while (index < num_stripes) {
+		struct btrfs_stripe *stripe;
+
+		ret = btrfs_alloc_dev_extent(trans, device,
+			     info->chunk_root->root_key.objectid,
+			     BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
+			     calc_size, &dev_offset);
+		BUG_ON(ret);
+
+		device->bytes_used += calc_size;
+		ret = btrfs_update_device(trans, device);
+		BUG_ON(ret);
+
+		map->stripes[index].dev = device;
+		map->stripes[index].physical = dev_offset;
+		stripe = stripes + index;
+		btrfs_set_stack_stripe_devid(stripe, device->devid);
+		btrfs_set_stack_stripe_offset(stripe, dev_offset);
+		memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
+		index++;
+	}
+
+	/* key was set above */
+	btrfs_set_stack_chunk_length(chunk, num_bytes);
+	btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
+	btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
+	btrfs_set_stack_chunk_type(chunk, type);
+	btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
+	btrfs_set_stack_chunk_io_align(chunk, stripe_len);
+	btrfs_set_stack_chunk_io_width(chunk, stripe_len);
+	btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
+	btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
+	map->sector_size = extent_root->sectorsize;
+	map->stripe_len = stripe_len;
+	map->io_align = stripe_len;
+	map->io_width = stripe_len;
+	map->type = type;
+	map->num_stripes = num_stripes;
+	map->sub_stripes = sub_stripes;
+
+	ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
+				btrfs_chunk_item_size(num_stripes));
+	BUG_ON(ret);
+	*start = key.offset;
+
+	map->ce.start = key.offset;
+	map->ce.size = num_bytes;
+
+	ret = insert_existing_cache_extent(
+			   &extent_root->fs_info->mapping_tree.cache_tree,
+			   &map->ce);
+	BUG_ON(ret);
+
+	kfree(chunk);
+	return ret;
+}
+
 void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
 {
 	cache_tree_init(&tree->cache_tree);
@@ -867,14 +962,12 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
 	struct cache_extent *ce;
 	struct map_lookup *map;
 	int ret;
-	u64 offset;
 
 	ce = find_first_cache_extent(&map_tree->cache_tree, logical);
 	BUG_ON(!ce);
 	BUG_ON(ce->start > logical || ce->start + ce->size < logical);
 	map = container_of(ce, struct map_lookup, ce);
 
-	offset = logical - ce->start;
 	if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
 		ret = map->num_stripes;
 	else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
@@ -884,6 +977,30 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
 	return ret;
 }
 
+int btrfs_next_metadata(struct btrfs_mapping_tree *map_tree, u64 *logical,
+			u64 *size)
+{
+	struct cache_extent *ce;
+	struct map_lookup *map;
+
+	ce = find_first_cache_extent(&map_tree->cache_tree, *logical);
+
+	while (ce) {
+		ce = next_cache_extent(ce);
+		if (!ce)
+			return -ENOENT;
+
+		map = container_of(ce, struct map_lookup, ce);
+		if (map->type & BTRFS_BLOCK_GROUP_METADATA) {
+			*logical = ce->start;
+			*size = ce->size;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
 int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
 		     u64 chunk_start, u64 physical, u64 devid,
 		     u64 **logical, int *naddrs, int *stripe_len)
@@ -944,6 +1061,14 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 		    u64 logical, u64 *length,
 		    struct btrfs_multi_bio **multi_ret, int mirror_num)
 {
+	return __btrfs_map_block(map_tree, rw, logical, length, NULL,
+				 multi_ret, mirror_num);
+}
+
+int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+		    u64 logical, u64 *length, u64 *type,
+		    struct btrfs_multi_bio **multi_ret, int mirror_num)
+{
 	struct cache_extent *ce;
 	struct map_lookup *map;
 	u64 offset;
@@ -959,16 +1084,24 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 		stripes_allocated = 1;
 	}
 again:
+	ce = find_first_cache_extent(&map_tree->cache_tree, logical);
+	if (!ce) {
+		if (multi)
+			kfree(multi);
+		return -ENOENT;
+	}
+	if (ce->start > logical || ce->start + ce->size < logical) {
+		if (multi)
+			kfree(multi);
+		return -ENOENT;
+	}
+
 	if (multi_ret) {
 		multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
 				GFP_NOFS);
 		if (!multi)
 			return -ENOMEM;
 	}
-
-	ce = find_first_cache_extent(&map_tree->cache_tree, logical);
-	BUG_ON(!ce);
-	BUG_ON(ce->start > logical || ce->start + ce->size < logical);
 	map = container_of(ce, struct map_lookup, ce);
 	offset = logical - ce->start;
 
@@ -1032,8 +1165,6 @@ again:
 			multi->num_stripes = map->sub_stripes;
 		else if (mirror_num)
 			stripe_index += mirror_num - 1;
-		else
-			stripe_index = stripe_nr % map->sub_stripes;
 
 		stripe_nr = stripe_nr / factor;
 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
@@ -1060,6 +1191,8 @@ again:
 		stripe_index++;
 	}
 	*multi_ret = multi;
+	if (type)
+		*type = map->type;
 out:
 	return 0;
 }
@@ -1159,6 +1292,16 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
 	return readonly;
 }
 
+static struct btrfs_device *fill_missing_device(u64 devid)
+{
+	struct btrfs_device *device;
+
+	device = kzalloc(sizeof(*device), GFP_NOFS);
+	device->devid = devid;
+	device->fd = -1;
+	return device;
+}
+
 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 			  struct extent_buffer *leaf,
 			  struct btrfs_chunk *chunk)
@@ -1209,8 +1352,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 		map->stripes[i].dev = btrfs_find_device(root, devid, uuid,
 							NULL);
 		if (!map->stripes[i].dev) {
-			kfree(map);
-			return -EIO;
+			map->stripes[i].dev = fill_missing_device(devid);
+			printf("warning, device %llu is missing\n",
+			       (unsigned long long)devid);
 		}
 
 	}
@@ -1333,7 +1477,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 	u8 *ptr;
 	unsigned long sb_ptr;
 	u32 cur;
-	int ret;
+	int ret = 0;
 
 	sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
 					  BTRFS_SUPER_INFO_SIZE);
@@ -1364,7 +1508,8 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 		if (key.type == BTRFS_CHUNK_ITEM_KEY) {
 			chunk = (struct btrfs_chunk *)sb_ptr;
 			ret = read_one_chunk(root, &key, sb, chunk);
-			BUG_ON(ret);
+			if (ret)
+				break;
 			num_stripes = btrfs_chunk_num_stripes(sb, chunk);
 			len = btrfs_chunk_item_size(num_stripes);
 		} else {
@@ -1375,7 +1520,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 		cur += len;
 	}
 	free_extent_buffer(sb);
-	return 0;
+	return ret;
 }
 
 int btrfs_read_chunk_tree(struct btrfs_root *root)
diff --git a/volumes.h b/volumes.h
index bb78751..9ff6182 100644
--- a/volumes.h
+++ b/volumes.h
@@ -18,6 +18,7 @@
 
 #ifndef __BTRFS_VOLUMES_
 #define __BTRFS_VOLUMES_
+
 struct btrfs_device {
 	struct list_head dev_list;
 	struct btrfs_root *dev_root;
@@ -88,17 +89,65 @@ struct btrfs_multi_bio {
 	struct btrfs_bio_stripe stripes[];
 };
 
+struct map_lookup {
+	struct cache_extent ce;
+	u64 type;
+	int io_align;
+	int io_width;
+	int stripe_len;
+	int sector_size;
+	int num_stripes;
+	int sub_stripes;
+	struct btrfs_bio_stripe stripes[];
+};
+
 #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
 			    (sizeof(struct btrfs_bio_stripe) * (n)))
 
+/*
+ * Restriper's general type filter
+ */
+#define BTRFS_BALANCE_DATA		(1ULL << 0)
+#define BTRFS_BALANCE_SYSTEM		(1ULL << 1)
+#define BTRFS_BALANCE_METADATA		(1ULL << 2)
+
+#define BTRFS_BALANCE_TYPE_MASK		(BTRFS_BALANCE_DATA |	    \
+					 BTRFS_BALANCE_SYSTEM |	    \
+					 BTRFS_BALANCE_METADATA)
+
+#define BTRFS_BALANCE_FORCE		(1ULL << 3)
+#define BTRFS_BALANCE_RESUME		(1ULL << 4)
+
+/*
+ * Balance filters
+ */
+#define BTRFS_BALANCE_ARGS_PROFILES	(1ULL << 0)
+#define BTRFS_BALANCE_ARGS_USAGE	(1ULL << 1)
+#define BTRFS_BALANCE_ARGS_DEVID	(1ULL << 2)
+#define BTRFS_BALANCE_ARGS_DRANGE	(1ULL << 3)
+#define BTRFS_BALANCE_ARGS_VRANGE	(1ULL << 4)
+
+/*
+ * Profile changing flags.  When SOFT is set we won't relocate chunk if
+ * it already has the target profile (even though it may be
+ * half-filled).
+ */
+#define BTRFS_BALANCE_ARGS_CONVERT	(1ULL << 8)
+#define BTRFS_BALANCE_ARGS_SOFT		(1ULL << 9)
+
 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
 			   struct btrfs_device *device,
 			   u64 chunk_tree, u64 chunk_objectid,
 			   u64 chunk_offset,
 			   u64 num_bytes, u64 *start);
+int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+		      u64 logical, u64 *length, u64 *type,
+		      struct btrfs_multi_bio **multi_ret, int mirror_num);
 int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 		    u64 logical, u64 *length,
 		    struct btrfs_multi_bio **multi_ret, int mirror_num);
+int btrfs_next_metadata(struct btrfs_mapping_tree *map_tree, u64 *logical,
+			u64 *size);
 int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
 		     u64 chunk_start, u64 physical, u64 devid,
 		     u64 **logical, int *naddrs, int *stripe_len);
@@ -107,6 +156,9 @@ int btrfs_read_chunk_tree(struct btrfs_root *root);
 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *extent_root, u64 *start,
 		      u64 *num_bytes, u64 type);
+int btrfs_alloc_data_chunk(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *extent_root, u64 *start,
+			   u64 num_bytes, u64 type);
 int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
 int btrfs_add_device(struct btrfs_trans_handle *trans,
 		     struct btrfs_root *root,