From: Chris Lalancette <clalance@redhat.com> Date: Wed, 6 May 2009 09:38:24 +0200 Subject: [ia64] xen: switch from flipping to copying interface Message-id: 4A013E70.1000108@redhat.com O-Subject: [RHEL5.4 PATCH 1/3 v2]: Switch from a flipping to a copying interface Bugzilla: 479754 RH-Acked-by: Rik van Riel <riel@redhat.com> RH-Acked-by: Don Dutile <ddutile@redhat.com> This first patch changes Xen netback from using page flipping by default to using page copying by default. It also removes the calls to xen_create_contiguous_region(), which is causing the original bug to happen. With this patch in place, the original problem in bz 479754 seems to be solved. This is a backport of upstream xen-unstable c/s 12007 and 13494. diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 4e33a1d..38ffa20 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -616,9 +616,6 @@ if XEN config XEN_UTIL default n -config HAVE_ARCH_ALLOC_SKB - default y - config HAVE_ARCH_DEV_ALLOC_SKB default y diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 907d4b5..0fa7f62 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -238,10 +238,6 @@ config XEN_COMPAT_030002 endmenu -config HAVE_ARCH_ALLOC_SKB - bool - default y - config HAVE_ARCH_DEV_ALLOC_SKB bool default y diff --git a/drivers/xen/core/skbuff.c b/drivers/xen/core/skbuff.c index 37b8e4c..ab2111f 100644 --- a/drivers/xen/core/skbuff.c +++ b/drivers/xen/core/skbuff.c @@ -1,3 +1,9 @@ +/* + * clalance: In the bare-metal kernel __dev_alloc_skb is an inline function. + * Unfortunately, in the Xen kernel, it's a regular function, and somehow + * found it's way onto the Xen kernel kABI whitelist. We just maintain a + * copy of the bare-metal version here to maintain kABI. + */ #include <linux/module.h> #include <linux/version.h> @@ -13,132 +19,15 @@ #include <asm/page.h> #include <asm/hypervisor.h> -/* Referenced in netback.c. */ /*static*/ kmem_cache_t *skbuff_cachep; EXPORT_SYMBOL(skbuff_cachep); -/* Allow up to 64kB or page-sized packets (whichever is greater). */ -#if PAGE_SHIFT < 16 -#define MAX_SKBUFF_ORDER (16 - PAGE_SHIFT) -#else -#define MAX_SKBUFF_ORDER 0 -#endif -static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1]; - -static struct { - int size; - kmem_cache_t *cachep; -} skbuff_small[] = { { 512, NULL }, { 2048, NULL } }; - -struct sk_buff *__alloc_skb(unsigned int length, gfp_t gfp_mask, - int fclone) -{ - int order, i; - kmem_cache_t *cachep; - - length = SKB_DATA_ALIGN(length) + sizeof(struct skb_shared_info); - - if (length <= skbuff_small[ARRAY_SIZE(skbuff_small)-1].size) { - for (i = 0; skbuff_small[i].size < length; i++) - continue; - cachep = skbuff_small[i].cachep; - } else { - order = get_order(length); - if (order > MAX_SKBUFF_ORDER) { - printk(KERN_ALERT "Attempt to allocate order %d " - "skbuff. Increase MAX_SKBUFF_ORDER.\n", order); - return NULL; - } - cachep = skbuff_order_cachep[order]; - } - - length -= sizeof(struct skb_shared_info); - - return alloc_skb_from_cache(cachep, length, gfp_mask, fclone); -} - struct sk_buff *__dev_alloc_skb(unsigned int length, gfp_t gfp_mask) { - struct sk_buff *skb; - int order; - - length = SKB_DATA_ALIGN(length + 16); - order = get_order(length + sizeof(struct skb_shared_info)); - if (order > MAX_SKBUFF_ORDER) { - printk(KERN_ALERT "Attempt to allocate order %d skbuff. " - "Increase MAX_SKBUFF_ORDER.\n", order); - return NULL; - } - - skb = alloc_skb_from_cache( - skbuff_order_cachep[order], length, gfp_mask, 0); - if (skb != NULL) - skb_reserve(skb, 16); - + struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); + if (likely(skb)) + skb_reserve(skb, NET_SKB_PAD); return skb; } -static void skbuff_ctor(void *buf, kmem_cache_t *cachep, unsigned long unused) -{ - int order = 0; - - while (skbuff_order_cachep[order] != cachep) - order++; - - /* Do our best to allocate contiguous memory but fall back to IOMMU. */ - if (order != 0) - (void)xen_create_contiguous_region( - (unsigned long)buf, order, 0); - - scrub_pages(buf, 1 << order); -} - -static void skbuff_dtor(void *buf, kmem_cache_t *cachep, unsigned long unused) -{ - int order = 0; - - while (skbuff_order_cachep[order] != cachep) - order++; - - if (order != 0) - xen_destroy_contiguous_region((unsigned long)buf, order); -} - -static int __init skbuff_init(void) -{ - static char name[MAX_SKBUFF_ORDER + 1][20]; - static char small_name[ARRAY_SIZE(skbuff_small)][20]; - unsigned long size; - int i, order; - - for (i = 0; i < ARRAY_SIZE(skbuff_small); i++) { - size = skbuff_small[i].size; - sprintf(small_name[i], "xen-skb-%lu", size); - /* - * No ctor/dtor: objects do not span page boundaries, and they - * are only used on transmit path so no need for scrubbing. - */ - skbuff_small[i].cachep = kmem_cache_create( - small_name[i], size, size, 0, NULL, NULL); - } - - for (order = 0; order <= MAX_SKBUFF_ORDER; order++) { - size = PAGE_SIZE << order; - sprintf(name[order], "xen-skb-%lu", size); - if (is_running_on_xen() && is_initial_xendomain()) - skbuff_order_cachep[order] = kmem_cache_create( - name[order], size, size, 0, - skbuff_ctor, skbuff_dtor); - else - skbuff_order_cachep[order] = kmem_cache_create( - name[order], size, size, 0, NULL, NULL); - - } - - skbuff_cachep = skbuff_order_cachep[0]; - - return 0; -} -core_initcall(skbuff_init); - EXPORT_SYMBOL(__dev_alloc_skb); diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c index 1468fb8..8fb8927 100644 --- a/drivers/xen/netback/netback.c +++ b/drivers/xen/netback/netback.c @@ -137,42 +137,6 @@ static inline void maybe_schedule_tx_action(void) tasklet_schedule(&net_tx_tasklet); } -/* - * A gross way of confirming the origin of an skb data page. The slab - * allocator abuses a field in the page struct to cache the kmem_cache_t ptr. - */ -static inline int is_xen_skb(struct sk_buff *skb) -{ - extern kmem_cache_t *skbuff_cachep; - kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next; - return (cp == skbuff_cachep); -} - -/* - * We can flip without copying the packet unless: - * 1. The data is not allocated from our special cache; or - * 2. The main data area is shared; or - * 3. One or more fragments are shared; or - * 4. There are chained fragments. - */ -static inline int is_flippable_skb(struct sk_buff *skb) -{ - int frag; - - if (!is_xen_skb(skb) || skb_cloned(skb)) - return 0; - - for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { - if (page_count(skb_shinfo(skb)->frags[frag].page) > 1) - return 0; - } - - if (skb_shinfo(skb)->frag_list != NULL) - return 0; - - return 1; -} - static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) { struct skb_shared_info *ninfo; @@ -286,7 +250,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) * Copy the packet here if it's destined for a flipping interface * but isn't flippable (e.g. extra references to data). */ - if (!netif->copying_receiver && !is_flippable_skb(skb)) { + if (!netif->copying_receiver) { struct sk_buff *nskb = netbk_copy_skb(skb); if ( unlikely(nskb == NULL) ) goto drop; diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c index d15ce95..9026df0 100644 --- a/drivers/xen/netback/xenbus.c +++ b/drivers/xen/netback/xenbus.c @@ -93,10 +93,22 @@ static int netback_probe(struct xenbus_device *dev, goto abort_transaction; } + /* We support rx-copy path. */ err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1); if (err) { - message = "writing feature-copying"; + message = "writing feature-rx-copy"; + goto abort_transaction; + } + + /* + * We don't support rx-flip path (except old guests who don't + * grok this feature flag). + */ + err = xenbus_printf(xbt, dev->nodename, + "feature-rx-flip", "%d", 0); + if (err) { + message = "writing feature-rx-flip"; goto abort_transaction; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 56a9f81..17c03bf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -360,8 +360,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, - gfp_t priority, - int fclone); + gfp_t priority); extern void kfree_skbmem(struct sk_buff *skb); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 360c22e..19c7222 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -140,7 +140,6 @@ EXPORT_SYMBOL(skb_truesize_bug); * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -#ifndef CONFIG_HAVE_ARCH_ALLOC_SKB struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int fclone) { @@ -196,7 +195,6 @@ nodata: skb = NULL; goto out; } -#endif /* !CONFIG_HAVE_ARCH_ALLOC_SKB */ /** * alloc_skb_from_cache - allocate a network buffer @@ -214,17 +212,14 @@ nodata: */ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, - gfp_t gfp_mask, - int fclone) + gfp_t gfp_mask) { - kmem_cache_t *cache; struct sk_buff *skb; u8 *data; - cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; - /* Get the HEAD */ - skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA); + skb = kmem_cache_alloc(skbuff_head_cache, + gfp_mask & ~__GFP_DMA); if (!skb) goto out; @@ -249,20 +244,10 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, skb_shinfo(skb)->gso_segs = 0; skb_shinfo(skb)->gso_type = 0; skb_shinfo(skb)->frag_list = NULL; - - if (fclone) { - struct sk_buff *child = skb + 1; - atomic_t *fclone_ref = (atomic_t *) (child + 1); - - skb->fclone = SKB_FCLONE_ORIG; - atomic_set(fclone_ref, 1); - - child->fclone = SKB_FCLONE_UNAVAILABLE; - } out: return skb; nodata: - kmem_cache_free(cache, skb); + kmem_cache_free(skbuff_head_cache, skb); skb = NULL; goto out; }