From: Chris Lalancette <clalance@redhat.com> Date: Mon, 3 Nov 2008 09:09:07 +0100 Subject: [xen] remove contiguous_bitmap Message-id: 490EB1A3.9000405@redhat.com O-Subject: [RHEL5.3 PATCH 2/2]: Remove Xen contiguous_bitmap Bugzilla: 463500 RH-Acked-by: Mark McLoughlin <markmc@redhat.com> RH-Acked-by: Bill Burns <bburns@redhat.com> RH-Acked-by: Prarit Bhargava <prarit@redhat.com> RH-Acked-by: Rik van Riel <riel@redhat.com> When the RHEL-5 kernel is running under Xen, it can ask the hypervisor to allocate a set of physically contiguous pages on it's behalf (for instance, so dom0 can do DMA operations to devices). The kernel tracks where these contiguous pages are via a contiguous_bitmap. In the swiotlb path, we check this bitmap to see if pages are physically contiguous; if they aren't, then we know we have to split this request. Unfortunately, on i386, this bitmap is allocated based on max_low_pfn. For the most part, this is fine; if your page is below max_low_pfn, then checking in the bitmap is just fine. Also, due to some other checking before checking the bitmap, if the request is for just one page, regardless of where that page is, we know we only have to do one request. The problem comes in when we get a request for multiple pages (I've seen requests for 2), starting at a high memory address. In this case, we try to access the contiguous_bitmap, but we end up taking a fatal page fault in the kernel because we are looking too far above max_low_pfn. It turns out that the contiguous_bitmap is a dubious optimization of a more general problem that Stephen solved for 5.2. For that reason, upstream decided to rip out the contiguous_bitmap. By doing that, we completely avoid the fatal pagefault on i386, and we really shouldn't be losing any performance. This patch is a straightforward backport of upstream linux-2.6.18-xen.hg c/s 707. diff --git a/arch/i386/kernel/pci-dma-xen.c b/arch/i386/kernel/pci-dma-xen.c index 14f3539..7bf796e 100644 --- a/arch/i386/kernel/pci-dma-xen.c +++ b/arch/i386/kernel/pci-dma-xen.c @@ -130,17 +130,11 @@ static int check_pages_physically_contiguous(unsigned long pfn, int range_straddles_page_boundary(paddr_t p, size_t size) { - extern unsigned long *contiguous_bitmap; unsigned long pfn = p >> PAGE_SHIFT; unsigned int offset = p & ~PAGE_MASK; - if (offset + size <= PAGE_SIZE) - return 0; - if (test_bit(pfn, contiguous_bitmap)) - return 0; - if (check_pages_physically_contiguous(pfn, offset, size)) - return 0; - return 1; + return ((offset + size > PAGE_SIZE) && + !check_pages_physically_contiguous(pfn, offset, size)); } int diff --git a/arch/i386/mm/hypervisor.c b/arch/i386/mm/hypervisor.c index d09507b..88b4dab 100644 --- a/arch/i386/mm/hypervisor.c +++ b/arch/i386/mm/hypervisor.c @@ -214,54 +214,6 @@ void xen_set_ldt(unsigned long ptr, unsigned long len) BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } -/* - * Bitmap is indexed by page number. If bit is set, the page is part of a - * xen_create_contiguous_region() area of memory. - */ -unsigned long *contiguous_bitmap; - -static void contiguous_bitmap_set( - unsigned long first_page, unsigned long nr_pages) -{ - unsigned long start_off, end_off, curr_idx, end_idx; - - curr_idx = first_page / BITS_PER_LONG; - start_off = first_page & (BITS_PER_LONG-1); - end_idx = (first_page + nr_pages) / BITS_PER_LONG; - end_off = (first_page + nr_pages) & (BITS_PER_LONG-1); - - if (curr_idx == end_idx) { - contiguous_bitmap[curr_idx] |= - ((1UL<<end_off)-1) & -(1UL<<start_off); - } else { - contiguous_bitmap[curr_idx] |= -(1UL<<start_off); - while ( ++curr_idx < end_idx ) - contiguous_bitmap[curr_idx] = ~0UL; - contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1; - } -} - -static void contiguous_bitmap_clear( - unsigned long first_page, unsigned long nr_pages) -{ - unsigned long start_off, end_off, curr_idx, end_idx; - - curr_idx = first_page / BITS_PER_LONG; - start_off = first_page & (BITS_PER_LONG-1); - end_idx = (first_page + nr_pages) / BITS_PER_LONG; - end_off = (first_page + nr_pages) & (BITS_PER_LONG-1); - - if (curr_idx == end_idx) { - contiguous_bitmap[curr_idx] &= - -(1UL<<end_off) | ((1UL<<start_off)-1); - } else { - contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1; - while ( ++curr_idx != end_idx ) - contiguous_bitmap[curr_idx] = 0; - contiguous_bitmap[curr_idx] &= -(1UL<<end_off); - } -} - /* Protected by balloon_lock. */ #define MAX_CONTIG_ORDER 9 /* 2MB */ static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; @@ -355,10 +307,6 @@ int xen_create_contiguous_region( if (HYPERVISOR_multicall(cr_mcl, i)) BUG(); - if (success) - contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT, - 1UL << order); - balloon_unlock(flags); return success ? 0 : -ENOMEM; @@ -384,8 +332,7 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) } }; - if (xen_feature(XENFEAT_auto_translated_physmap) || - !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap)) + if (xen_feature(XENFEAT_auto_translated_physmap)) return; if (unlikely(order > MAX_CONTIG_ORDER)) @@ -398,8 +345,6 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) balloon_lock(flags); - contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order); - /* 1. Find start MFN of contiguous extent. */ in_frame = pfn_to_mfn(__pa(vstart) >> PAGE_SHIFT); diff --git a/arch/i386/mm/init-xen.c b/arch/i386/mm/init-xen.c index a7c09dc..1fd6d79 100644 --- a/arch/i386/mm/init-xen.c +++ b/arch/i386/mm/init-xen.c @@ -47,8 +47,6 @@ #include <asm/hypervisor.h> #include <asm/swiotlb.h> -extern unsigned long *contiguous_bitmap; - unsigned int __VMALLOC_RESERVE = 128 << 20; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -647,11 +645,6 @@ void __init mem_init(void) int bad_ppro; unsigned long pfn; - contiguous_bitmap = alloc_bootmem_low_pages( - (max_low_pfn + 2*BITS_PER_LONG) >> 3); - BUG_ON(!contiguous_bitmap); - memset(contiguous_bitmap, 0, (max_low_pfn + 2*BITS_PER_LONG) >> 3); - #if defined(CONFIG_SWIOTLB) swiotlb_init(); #endif diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index f96c41d..cc3c02b 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -699,9 +699,6 @@ setup_arch (char **cmdline_p) platform_setup(cmdline_p); check_sal_cache_flush(); paging_init(); -#ifdef CONFIG_XEN - xen_contiguous_bitmap_init(max_pfn); -#endif } /* diff --git a/arch/ia64/xen/hypervisor.c b/arch/ia64/xen/hypervisor.c index 6224ca8..7c22682 100644 --- a/arch/ia64/xen/hypervisor.c +++ b/arch/ia64/xen/hypervisor.c @@ -52,152 +52,6 @@ static int p2m_expose_init(void); EXPORT_SYMBOL(__hypercall); -//XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear() -// move those to lib/contiguous_bitmap? -//XXX discontigmem/sparsemem - -/* - * Bitmap is indexed by page number. If bit is set, the page is part of a - * xen_create_contiguous_region() area of memory. - */ -unsigned long *contiguous_bitmap; - -#ifdef CONFIG_VIRTUAL_MEM_MAP -/* Following logic is stolen from create_mem_map_table() for virtual memmap */ -static int -create_contiguous_bitmap(u64 start, u64 end, void *arg) -{ - unsigned long address, start_page, end_page; - unsigned long bitmap_start, bitmap_end; - unsigned char *bitmap; - int node; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - bitmap_start = (unsigned long)contiguous_bitmap + - ((__pa(start) >> PAGE_SHIFT) >> 3); - bitmap_end = (unsigned long)contiguous_bitmap + - (((__pa(end) >> PAGE_SHIFT) + 2 * BITS_PER_LONG) >> 3); - - start_page = bitmap_start & PAGE_MASK; - end_page = PAGE_ALIGN(bitmap_end); - node = paddr_to_nid(__pa(start)); - - bitmap = alloc_bootmem_pages_node(NODE_DATA(node), - end_page - start_page); - BUG_ON(!bitmap); - memset(bitmap, 0, end_page - start_page); - - for (address = start_page; address < end_page; address += PAGE_SIZE) { - pgd = pgd_offset_k(address); - if (pgd_none(*pgd)) - pgd_populate(&init_mm, pgd, - alloc_bootmem_pages_node(NODE_DATA(node), - PAGE_SIZE)); - pud = pud_offset(pgd, address); - - if (pud_none(*pud)) - pud_populate(&init_mm, pud, - alloc_bootmem_pages_node(NODE_DATA(node), - PAGE_SIZE)); - pmd = pmd_offset(pud, address); - - if (pmd_none(*pmd)) - pmd_populate_kernel(&init_mm, pmd, - alloc_bootmem_pages_node - (NODE_DATA(node), PAGE_SIZE)); - pte = pte_offset_kernel(pmd, address); - - if (pte_none(*pte)) - set_pte(pte, - pfn_pte(__pa(bitmap + (address - start_page)) - >> PAGE_SHIFT, PAGE_KERNEL)); - } - return 0; -} -#endif - -static void -__contiguous_bitmap_init(unsigned long size) -{ - contiguous_bitmap = alloc_bootmem_pages(size); - BUG_ON(!contiguous_bitmap); - memset(contiguous_bitmap, 0, size); -} - -void -xen_contiguous_bitmap_init(unsigned long end_pfn) -{ - unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3; -#ifndef CONFIG_VIRTUAL_MEM_MAP - __contiguous_bitmap_init(size); -#else - unsigned long max_gap = 0; - - efi_memmap_walk(find_largest_hole, (u64*)&max_gap); - if (max_gap < LARGE_GAP) { - __contiguous_bitmap_init(size); - } else { - unsigned long map_size = PAGE_ALIGN(size); - vmalloc_end -= map_size; - contiguous_bitmap = (unsigned long*)vmalloc_end; - efi_memmap_walk(create_contiguous_bitmap, NULL); - } -#endif -} - -#if 0 -int -contiguous_bitmap_test(void* p) -{ - return test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap); -} -#endif - -static void contiguous_bitmap_set( - unsigned long first_page, unsigned long nr_pages) -{ - unsigned long start_off, end_off, curr_idx, end_idx; - - curr_idx = first_page / BITS_PER_LONG; - start_off = first_page & (BITS_PER_LONG-1); - end_idx = (first_page + nr_pages) / BITS_PER_LONG; - end_off = (first_page + nr_pages) & (BITS_PER_LONG-1); - - if (curr_idx == end_idx) { - contiguous_bitmap[curr_idx] |= - ((1UL<<end_off)-1) & -(1UL<<start_off); - } else { - contiguous_bitmap[curr_idx] |= -(1UL<<start_off); - while ( ++curr_idx < end_idx ) - contiguous_bitmap[curr_idx] = ~0UL; - contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1; - } -} - -static void contiguous_bitmap_clear( - unsigned long first_page, unsigned long nr_pages) -{ - unsigned long start_off, end_off, curr_idx, end_idx; - - curr_idx = first_page / BITS_PER_LONG; - start_off = first_page & (BITS_PER_LONG-1); - end_idx = (first_page + nr_pages) / BITS_PER_LONG; - end_off = (first_page + nr_pages) & (BITS_PER_LONG-1); - - if (curr_idx == end_idx) { - contiguous_bitmap[curr_idx] &= - -(1UL<<end_off) | ((1UL<<start_off)-1); - } else { - contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1; - while ( ++curr_idx != end_idx ) - contiguous_bitmap[curr_idx] = 0; - contiguous_bitmap[curr_idx] &= -(1UL<<end_off); - } -} - // __xen_create_contiguous_region(), __xen_destroy_contiguous_region() // are based on i386 xen_create_contiguous_region(), // xen_destroy_contiguous_region() @@ -273,8 +127,6 @@ __xen_create_contiguous_region(unsigned long vstart, } else success = 1; } - if (success) - contiguous_bitmap_set(start_gpfn, num_gpfn); #if 0 if (success) { unsigned long mfn; @@ -333,9 +185,6 @@ __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) }; - if (!test_bit(start_gpfn, contiguous_bitmap)) - return; - if (unlikely(order > MAX_CONTIG_ORDER)) return; @@ -346,8 +195,6 @@ __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) balloon_lock(flags); - contiguous_bitmap_clear(start_gpfn, num_gpfn); - /* Do the exchange for non-contiguous MFNs. */ in_frame = start_gpfn; for (i = 0; i < num_gpfn; i++) { diff --git a/arch/ia64/xen/xen_dma.c b/arch/ia64/xen/xen_dma.c index 9b277ed..41bcb3d 100644 --- a/arch/ia64/xen/xen_dma.c +++ b/arch/ia64/xen/xen_dma.c @@ -55,7 +55,6 @@ static int check_pages_physically_contiguous(unsigned long pfn, int range_straddles_page_boundary(paddr_t p, size_t size) { - extern unsigned long *contiguous_bitmap; unsigned long pfn = p >> PAGE_SHIFT; unsigned int offset = p & ~PAGE_MASK; @@ -64,8 +63,6 @@ int range_straddles_page_boundary(paddr_t p, size_t size) if (offset + size <= PAGE_SIZE) return 0; - if (test_bit(pfn, contiguous_bitmap)) - return 0; if (check_pages_physically_contiguous(pfn, offset, size)) return 0; return 1; diff --git a/arch/x86_64/mm/init-xen.c b/arch/x86_64/mm/init-xen.c index 08d9e8d..42bc9d8 100644 --- a/arch/x86_64/mm/init-xen.c +++ b/arch/x86_64/mm/init-xen.c @@ -54,8 +54,6 @@ struct dma_mapping_ops* dma_ops; EXPORT_SYMBOL(dma_ops); -extern unsigned long *contiguous_bitmap; - static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -1058,11 +1056,6 @@ void __init mem_init(void) long codesize, reservedpages, datasize, initsize; unsigned long pfn; - contiguous_bitmap = alloc_bootmem_low_pages( - (end_pfn + 2*BITS_PER_LONG) >> 3); - BUG_ON(!contiguous_bitmap); - memset(contiguous_bitmap, 0, (end_pfn + 2*BITS_PER_LONG) >> 3); - pci_iommu_alloc(); /* How many end-of-memory variables you have, grandma! */ diff --git a/include/asm-ia64/hypervisor.h b/include/asm-ia64/hypervisor.h index 0230101..0bb0441 100644 --- a/include/asm-ia64/hypervisor.h +++ b/include/asm-ia64/hypervisor.h @@ -147,7 +147,6 @@ int privcmd_mmap(struct file * file, struct vm_area_struct * vma); #define pfn_pte_ma(_x,_y) __pte_ma(0) /* unmodified use */ #ifndef CONFIG_VMX_GUEST -void xen_contiguous_bitmap_init(unsigned long end_pfn); int __xen_create_contiguous_region(unsigned long vstart, unsigned int order, unsigned int address_bits); static inline int xen_create_contiguous_region(unsigned long vstart,