From a2c4efbb5b968a80eb552757308c2fb2f28157c6 Mon Sep 17 00:00:00 2001
From: Marcel Apfelbaum <marcel.a@redhat.com>
Date: Sun, 19 Jan 2014 13:07:36 +0100
Subject: [PATCH 11/11] exec: separate sections and nodes per address space
RH-Author: Marcel Apfelbaum <marcel.a@redhat.com>
Message-id: <1390136856-7024-3-git-send-email-marcel.a@redhat.com>
Patchwork-id: 56811
O-Subject: [RHEL-7 qemu-kvm PATCH v2 2/2] exec: separate sections and nodes per address space
Bugzilla: 1003535
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Markus Armbruster <armbru@redhat.com>
Every address space has its own nodes and sections, but
it uses the same global arrays of nodes/section.
This limits the number of devices that can be attached
to the guest to 20-30 devices. It happens because:
- The sections array is limited to 2^12 entries.
- The main memory has at least 100 sections.
- Each device address space is actually an alias to
main memory, multiplying its number of nodes/sections.
Remove the limitation by using separate arrays of
nodes and sections for each address space.
Closest upstream commit: 53cb28cbfea038f8ad50132dc8a684e638c7d48b
Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
---
v1 -> v2:
- The series confilcted with Juan's series:
- [RHEL7 qemu-kvm PATCH 00/40] bitmap optmization
- Conflicts solved:
- AddressSpaceDispatch was moved to exec.c
- PhysPageEntry was moved to exec.c
- Moved also PhysPageMap to exec.c
exec.c | 166 ++++++++++++++++++++++++++++++++++-------------------------------
1 file changed, 86 insertions(+), 80 deletions(-)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
exec.c | 166 +++++++++++++++++++++++++++++++++-------------------------------
1 files changed, 86 insertions(+), 80 deletions(-)
diff --git a/exec.c b/exec.c
index 01c74cd..ce9310c 100644
--- a/exec.c
+++ b/exec.c
@@ -91,25 +91,32 @@ struct PhysPageEntry {
uint16_t ptr : 15;
};
+typedef PhysPageEntry Node[L2_SIZE];
+
+typedef struct PhysPageMap {
+ unsigned sections_nb;
+ unsigned sections_nb_alloc;
+ unsigned nodes_nb;
+ unsigned nodes_nb_alloc;
+ Node *nodes;
+ MemoryRegionSection *sections;
+} PhysPageMap;
+
struct AddressSpaceDispatch {
/* This is a multi-level map on the physical address space.
* The bottom level has pointers to MemoryRegionSections.
*/
PhysPageEntry phys_map;
+ PhysPageMap map;
MemoryListener listener;
+ AddressSpace *as;
};
-static MemoryRegionSection *phys_sections;
-static unsigned phys_sections_nb, phys_sections_nb_alloc;
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
-/* Simple allocator for PhysPageEntry nodes */
-static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
-static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
-
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
static void io_mem_init(void);
@@ -121,41 +128,38 @@ static MemoryRegion io_mem_watch;
#if !defined(CONFIG_USER_ONLY)
-static void phys_map_node_reserve(unsigned nodes)
+static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
{
- if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
- typedef PhysPageEntry Node[L2_SIZE];
- phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
- phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
- phys_map_nodes_nb + nodes);
- phys_map_nodes = g_renew(Node, phys_map_nodes,
- phys_map_nodes_nb_alloc);
+ if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
+ map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
+ map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
+ map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
}
}
-static uint16_t phys_map_node_alloc(void)
+static uint16_t phys_map_node_alloc(PhysPageMap *map)
{
unsigned i;
uint16_t ret;
- ret = phys_map_nodes_nb++;
+ ret = map->nodes_nb++;
assert(ret != PHYS_MAP_NODE_NIL);
- assert(ret != phys_map_nodes_nb_alloc);
+ assert(ret != map->nodes_nb_alloc);
for (i = 0; i < L2_SIZE; ++i) {
- phys_map_nodes[ret][i].is_leaf = 0;
- phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
+ map->nodes[ret][i].is_leaf = 0;
+ map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
}
return ret;
}
-static void phys_map_nodes_reset(void)
+static void phys_map_nodes_reset(PhysPageMap *map)
{
- phys_map_nodes_nb = 0;
+ map->nodes_nb = 0;
}
-static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
- hwaddr *nb, uint16_t leaf,
+static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
+ hwaddr *index, hwaddr *nb, uint16_t leaf,
int level)
{
PhysPageEntry *p;
@@ -163,8 +167,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
hwaddr step = (hwaddr)1 << (level * L2_BITS);
if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
- lp->ptr = phys_map_node_alloc();
- p = phys_map_nodes[lp->ptr];
+ lp->ptr = phys_map_node_alloc(map);
+ p = map->nodes[lp->ptr];
if (level == 0) {
for (i = 0; i < L2_SIZE; i++) {
p[i].is_leaf = 1;
@@ -172,7 +176,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
}
}
} else {
- p = phys_map_nodes[lp->ptr];
+ p = map->nodes[lp->ptr];
}
lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
@@ -183,7 +187,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
*index += step;
*nb -= step;
} else {
- phys_page_set_level(lp, index, nb, leaf, level - 1);
+ phys_page_set_level(map, lp, index, nb, leaf, level - 1);
}
++lp;
}
@@ -194,9 +198,10 @@ static void phys_page_set(AddressSpaceDispatch *d,
uint16_t leaf)
{
/* Wildly overreserve - it doesn't matter much. */
- phys_map_node_reserve(3 * P_L2_LEVELS);
+ phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
- phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
+ phys_page_set_level(&d->map, &d->phys_map, &index,
+ &nb, leaf, P_L2_LEVELS - 1);
}
MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
@@ -210,13 +215,13 @@ MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
if (lp.ptr == PHYS_MAP_NODE_NIL) {
goto not_found;
}
- p = phys_map_nodes[lp.ptr];
+ p = d->map.nodes[lp.ptr];
lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
}
s_index = lp.ptr;
not_found:
- return &phys_sections[s_index];
+ return &d->map.sections[s_index];
}
bool memory_region_is_unassigned(MemoryRegion *mr)
@@ -657,7 +662,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
and avoid full address decoding in every device.
We can't use the high bits of pd for this because
IO_MEM_ROMD uses these as a ram address. */
- iotlb = section - phys_sections;
+ iotlb = section - address_space_memory.dispatch->map.sections;
iotlb += memory_region_section_addr(section, paddr);
}
@@ -683,13 +688,14 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
MemoryRegion iomem;
+ AddressSpace *as;
hwaddr base;
uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
uint16_t section);
-static subpage_t *subpage_init(hwaddr base);
+static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
@@ -703,9 +709,9 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
phys_mem_alloc = alloc;
}
-static void destroy_page_desc(uint16_t section_index)
+static void destroy_page_desc(PhysPageMap *map, uint16_t section_index)
{
- MemoryRegionSection *section = &phys_sections[section_index];
+ MemoryRegionSection *section = &map->sections[section_index];
MemoryRegion *mr = section->mr;
if (mr->subpage) {
@@ -715,7 +721,8 @@ static void destroy_page_desc(uint16_t section_index)
}
}
-static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
+static void destroy_l2_mapping(PhysPageMap *map, PhysPageEntry *lp,
+ unsigned level)
{
unsigned i;
PhysPageEntry *p;
@@ -724,12 +731,12 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
return;
}
- p = phys_map_nodes[lp->ptr];
+ p = map->nodes[lp->ptr];
for (i = 0; i < L2_SIZE; ++i) {
if (!p[i].is_leaf) {
- destroy_l2_mapping(&p[i], level - 1);
+ destroy_l2_mapping(map, &p[i], level - 1);
} else {
- destroy_page_desc(p[i].ptr);
+ destroy_page_desc(map, p[i].ptr);
}
}
lp->is_leaf = 0;
@@ -738,24 +745,25 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
static void destroy_all_mappings(AddressSpaceDispatch *d)
{
- destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
- phys_map_nodes_reset();
+ destroy_l2_mapping(&d->map, &d->phys_map, P_L2_LEVELS - 1);
+ phys_map_nodes_reset(&d->map);
}
-static uint16_t phys_section_add(MemoryRegionSection *section)
+static uint16_t phys_section_add(PhysPageMap *map,
+ MemoryRegionSection *section)
{
- if (phys_sections_nb == phys_sections_nb_alloc) {
- phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
- phys_sections = g_renew(MemoryRegionSection, phys_sections,
- phys_sections_nb_alloc);
+ if (map->sections_nb == map->sections_nb_alloc) {
+ map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
+ map->sections = g_renew(MemoryRegionSection, map->sections,
+ map->sections_nb_alloc);
}
- phys_sections[phys_sections_nb] = *section;
- return phys_sections_nb++;
+ map->sections[map->sections_nb] = *section;
+ return map->sections_nb++;
}
-static void phys_sections_clear(void)
+static void phys_sections_clear(PhysPageMap *map)
{
- phys_sections_nb = 0;
+ map->sections_nb = 0;
}
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
@@ -773,16 +781,16 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
if (!(existing->mr->subpage)) {
- subpage = subpage_init(base);
+ subpage = subpage_init(d->as, base);
subsection.mr = &subpage->iomem;
phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
- phys_section_add(&subsection));
+ phys_section_add(&d->map, &subsection));
} else {
subpage = container_of(existing->mr, subpage_t, iomem);
}
start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
end = start + section->size - 1;
- subpage_register(subpage, start, end, phys_section_add(section));
+ subpage_register(subpage, start, end, phys_section_add(&d->map, section));
}
@@ -791,7 +799,7 @@ static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *sec
hwaddr start_addr = section->offset_within_address_space;
ram_addr_t size = section->size;
hwaddr addr;
- uint16_t section_index = phys_section_add(section);
+ uint16_t section_index = phys_section_add(&d->map, section);
assert(size);
@@ -1619,7 +1627,7 @@ static uint64_t subpage_read(void *opaque, hwaddr addr,
mmio, len, addr, idx);
#endif
- section = &phys_sections[mmio->sub_section[idx]];
+ section = &mmio->as->dispatch->map.sections[mmio->sub_section[idx]];
addr += mmio->base;
addr -= section->offset_within_address_space;
addr += section->offset_within_region;
@@ -1638,7 +1646,7 @@ static void subpage_write(void *opaque, hwaddr addr,
__func__, mmio, len, addr, idx, value);
#endif
- section = &phys_sections[mmio->sub_section[idx]];
+ section = &mmio->as->dispatch->map.sections[mmio->sub_section[idx]];
addr += mmio->base;
addr -= section->offset_within_address_space;
addr += section->offset_within_region;
@@ -1696,10 +1704,10 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
mmio, start, end, idx, eidx, memory);
#endif
- if (memory_region_is_ram(phys_sections[section].mr)) {
- MemoryRegionSection new_section = phys_sections[section];
+ if (memory_region_is_ram(mmio->as->dispatch->map.sections[section].mr)) {
+ MemoryRegionSection new_section = mmio->as->dispatch->map.sections[section];
new_section.mr = &io_mem_subpage_ram;
- section = phys_section_add(&new_section);
+ section = phys_section_add(&mmio->as->dispatch->map, &new_section);
}
for (; idx <= eidx; idx++) {
mmio->sub_section[idx] = section;
@@ -1708,12 +1716,13 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
return 0;
}
-static subpage_t *subpage_init(hwaddr base)
+static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
{
subpage_t *mmio;
mmio = g_malloc0(sizeof(subpage_t));
+ mmio->as = as;
mmio->base = base;
memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
"subpage", TARGET_PAGE_SIZE);
@@ -1727,7 +1736,7 @@ static subpage_t *subpage_init(hwaddr base)
return mmio;
}
-static uint16_t dummy_section(MemoryRegion *mr)
+static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
{
MemoryRegionSection section = {
.mr = mr,
@@ -1736,12 +1745,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
.size = UINT64_MAX,
};
- return phys_section_add(§ion);
+ return phys_section_add(map, §ion);
}
MemoryRegion *iotlb_to_region(hwaddr index)
{
- return phys_sections[index & ~TARGET_PAGE_MASK].mr;
+ return address_space_memory.dispatch->map.sections[
+ index & ~TARGET_PAGE_MASK].mr;
}
static void io_mem_init(void)
@@ -1761,23 +1771,19 @@ static void io_mem_init(void)
static void mem_begin(MemoryListener *listener)
{
AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
+ uint16_t n;
destroy_all_mappings(d);
d->phys_map.ptr = PHYS_MAP_NODE_NIL;
-}
-
-static void core_begin(MemoryListener *listener)
-{
- uint16_t n;
- phys_sections_clear();
- n = dummy_section(&io_mem_unassigned);
+ phys_sections_clear(&d->map);
+ n = dummy_section(&d->map, &io_mem_unassigned);
assert(n == PHYS_SECTION_UNASSIGNED);
- n = dummy_section(&io_mem_notdirty);
+ n = dummy_section(&d->map, &io_mem_notdirty);
assert(n == PHYS_SECTION_NOTDIRTY);
- n = dummy_section(&io_mem_rom);
+ n = dummy_section(&d->map, &io_mem_rom);
assert(n == PHYS_SECTION_ROM);
- n = dummy_section(&io_mem_watch);
+ n = dummy_section(&d->map, &io_mem_watch);
assert(n == PHYS_SECTION_WATCH);
}
@@ -1822,7 +1828,6 @@ static void io_region_del(MemoryListener *listener,
}
static MemoryListener core_memory_listener = {
- .begin = core_begin,
.log_global_start = core_log_global_start,
.log_global_stop = core_log_global_stop,
.priority = 1,
@@ -1840,7 +1845,7 @@ static MemoryListener tcg_memory_listener = {
void address_space_init_dispatch(AddressSpace *as)
{
- AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
+ AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
d->listener = (MemoryListener) {
@@ -1849,6 +1854,7 @@ void address_space_init_dispatch(AddressSpace *as)
.region_nop = mem_add,
.priority = 0,
};
+ d->as = as;
as->dispatch = d;
memory_listener_register(&d->listener, as);
}
@@ -1858,7 +1864,7 @@ void address_space_destroy_dispatch(AddressSpace *as)
AddressSpaceDispatch *d = as->dispatch;
memory_listener_unregister(&d->listener);
- destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
+ destroy_l2_mapping(&d->map, &d->phys_map, P_L2_LEVELS - 1);
g_free(d);
as->dispatch = NULL;
}
@@ -2446,7 +2452,7 @@ void stl_phys_notdirty(hwaddr addr, uint32_t val)
if (!memory_region_is_ram(section->mr) || section->readonly) {
addr = memory_region_section_addr(section, addr);
if (memory_region_is_ram(section->mr)) {
- section = &phys_sections[PHYS_SECTION_ROM];
+ section = &address_space_memory.dispatch->map.sections[PHYS_SECTION_ROM];
}
io_mem_write(section->mr, addr, val, 4);
} else {
@@ -2479,7 +2485,7 @@ void stq_phys_notdirty(hwaddr addr, uint64_t val)
if (!memory_region_is_ram(section->mr) || section->readonly) {
addr = memory_region_section_addr(section, addr);
if (memory_region_is_ram(section->mr)) {
- section = &phys_sections[PHYS_SECTION_ROM];
+ section = &address_space_memory.dispatch->map.sections[PHYS_SECTION_ROM];
}
#ifdef TARGET_WORDS_BIGENDIAN
io_mem_write(section->mr, addr, val >> 32, 4);
@@ -2508,7 +2514,7 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val,
if (!memory_region_is_ram(section->mr) || section->readonly) {
addr = memory_region_section_addr(section, addr);
if (memory_region_is_ram(section->mr)) {
- section = &phys_sections[PHYS_SECTION_ROM];
+ section = &address_space_memory.dispatch->map.sections[PHYS_SECTION_ROM];
}
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2575,7 +2581,7 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val,
if (!memory_region_is_ram(section->mr) || section->readonly) {
addr = memory_region_section_addr(section, addr);
if (memory_region_is_ram(section->mr)) {
- section = &phys_sections[PHYS_SECTION_ROM];
+ section = &address_space_memory.dispatch->map.sections[PHYS_SECTION_ROM];
}
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
--
1.7.1