Sophie: kernel-2.6.18-238.19.1.el5.centos.plus src

kernel-2.6.18-238.19.1.el5.centos.plus.src.rpm

From: Chris Lalancette <clalance@redhat.com>
Date: Wed, 6 May 2009 09:38:24 +0200
Subject: [ia64] xen: switch from flipping to copying interface
Message-id: 4A013E70.1000108@redhat.com
O-Subject: [RHEL5.4 PATCH 1/3 v2]: Switch from a flipping to a copying interface
Bugzilla: 479754
RH-Acked-by: Rik van Riel <riel@redhat.com>
RH-Acked-by: Don Dutile <ddutile@redhat.com>

This first patch changes Xen netback from using page flipping by default to
using page copying by default.  It also removes the calls to
xen_create_contiguous_region(), which is causing the original bug to happen.
With this patch in place, the original problem in bz 479754 seems to be solved.
 This is a backport of upstream xen-unstable c/s 12007 and 13494.

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 4e33a1d..38ffa20 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -616,9 +616,6 @@ if XEN
 config XEN_UTIL
 	default n
 
-config HAVE_ARCH_ALLOC_SKB
-	default y
-
 config HAVE_ARCH_DEV_ALLOC_SKB
 	default y
 
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 907d4b5..0fa7f62 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -238,10 +238,6 @@ config XEN_COMPAT_030002
 
 endmenu
 
-config HAVE_ARCH_ALLOC_SKB
-	bool
-	default y
-
 config HAVE_ARCH_DEV_ALLOC_SKB
 	bool
 	default y
diff --git a/drivers/xen/core/skbuff.c b/drivers/xen/core/skbuff.c
index 37b8e4c..ab2111f 100644
--- a/drivers/xen/core/skbuff.c
+++ b/drivers/xen/core/skbuff.c
@@ -1,3 +1,9 @@
+/*
+ * clalance: In the bare-metal kernel __dev_alloc_skb is an inline function.
+ * Unfortunately, in the Xen kernel, it's a regular function, and somehow
+ * found it's way onto the Xen kernel kABI whitelist.  We just maintain a
+ * copy of the bare-metal version here to maintain kABI.
+ */
 
 #include <linux/module.h>
 #include <linux/version.h>
@@ -13,132 +19,15 @@
 #include <asm/page.h>
 #include <asm/hypervisor.h>
 
-/* Referenced in netback.c. */
 /*static*/ kmem_cache_t *skbuff_cachep;
 EXPORT_SYMBOL(skbuff_cachep);
 
-/* Allow up to 64kB or page-sized packets (whichever is greater). */
-#if PAGE_SHIFT < 16
-#define MAX_SKBUFF_ORDER (16 - PAGE_SHIFT)
-#else
-#define MAX_SKBUFF_ORDER 0
-#endif
-static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1];
-
-static struct {
-	int size;
-	kmem_cache_t *cachep;
-} skbuff_small[] = { { 512, NULL }, { 2048, NULL } };
-
-struct sk_buff *__alloc_skb(unsigned int length, gfp_t gfp_mask,
-			    int fclone)
-{
-	int order, i;
-	kmem_cache_t *cachep;
-
-	length = SKB_DATA_ALIGN(length) + sizeof(struct skb_shared_info);
-
-	if (length <= skbuff_small[ARRAY_SIZE(skbuff_small)-1].size) {
-		for (i = 0; skbuff_small[i].size < length; i++)
-			continue;
-		cachep = skbuff_small[i].cachep;
-	} else {
-		order = get_order(length);
-		if (order > MAX_SKBUFF_ORDER) {
-			printk(KERN_ALERT "Attempt to allocate order %d "
-			       "skbuff. Increase MAX_SKBUFF_ORDER.\n", order);
-			return NULL;
-		}
-		cachep = skbuff_order_cachep[order];
-	}
-
-	length -= sizeof(struct skb_shared_info);
-
-	return alloc_skb_from_cache(cachep, length, gfp_mask, fclone);
-}
-
 struct sk_buff *__dev_alloc_skb(unsigned int length, gfp_t gfp_mask)
 {
-	struct sk_buff *skb;
-	int order;
-
-	length = SKB_DATA_ALIGN(length + 16);
-	order = get_order(length + sizeof(struct skb_shared_info));
-	if (order > MAX_SKBUFF_ORDER) {
-		printk(KERN_ALERT "Attempt to allocate order %d skbuff. "
-		       "Increase MAX_SKBUFF_ORDER.\n", order);
-		return NULL;
-	}
-
-	skb = alloc_skb_from_cache(
-		skbuff_order_cachep[order], length, gfp_mask, 0);
-	if (skb != NULL)
-		skb_reserve(skb, 16);
-
+	struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
+	if (likely(skb))
+		skb_reserve(skb, NET_SKB_PAD);
 	return skb;
 }
 
-static void skbuff_ctor(void *buf, kmem_cache_t *cachep, unsigned long unused)
-{
-	int order = 0;
-
-	while (skbuff_order_cachep[order] != cachep)
-		order++;
-
-	/* Do our best to allocate contiguous memory but fall back to IOMMU. */
-	if (order != 0)
-		(void)xen_create_contiguous_region(
-			(unsigned long)buf, order, 0);
-
-	scrub_pages(buf, 1 << order);
-}
-
-static void skbuff_dtor(void *buf, kmem_cache_t *cachep, unsigned long unused)
-{
-	int order = 0;
-
-	while (skbuff_order_cachep[order] != cachep)
-		order++;
-
-	if (order != 0)
-		xen_destroy_contiguous_region((unsigned long)buf, order);
-}
-
-static int __init skbuff_init(void)
-{
-	static char name[MAX_SKBUFF_ORDER + 1][20];
-	static char small_name[ARRAY_SIZE(skbuff_small)][20];
-	unsigned long size;
-	int i, order;
-
-	for (i = 0; i < ARRAY_SIZE(skbuff_small); i++) {
-		size = skbuff_small[i].size;
-		sprintf(small_name[i], "xen-skb-%lu", size);
-		/*
-		 * No ctor/dtor: objects do not span page boundaries, and they
-		 * are only used on transmit path so no need for scrubbing.
-		 */
-		skbuff_small[i].cachep = kmem_cache_create(
-			small_name[i], size, size, 0, NULL, NULL);
-	}
-
-	for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
-		size = PAGE_SIZE << order;
-		sprintf(name[order], "xen-skb-%lu", size);
-		if (is_running_on_xen() && is_initial_xendomain())
-			skbuff_order_cachep[order] = kmem_cache_create(
-				name[order], size, size, 0,
-				skbuff_ctor, skbuff_dtor);
-		else
-			skbuff_order_cachep[order] = kmem_cache_create(
-				name[order], size, size, 0, NULL, NULL);
-			
-	}
-
-	skbuff_cachep = skbuff_order_cachep[0];
-
-	return 0;
-}
-core_initcall(skbuff_init);
-
 EXPORT_SYMBOL(__dev_alloc_skb);
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 1468fb8..8fb8927 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -137,42 +137,6 @@ static inline void maybe_schedule_tx_action(void)
 		tasklet_schedule(&net_tx_tasklet);
 }
 
-/*
- * A gross way of confirming the origin of an skb data page. The slab
- * allocator abuses a field in the page struct to cache the kmem_cache_t ptr.
- */
-static inline int is_xen_skb(struct sk_buff *skb)
-{
-	extern kmem_cache_t *skbuff_cachep;
-	kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
-	return (cp == skbuff_cachep);
-}
-
-/*
- * We can flip without copying the packet unless:
- *  1. The data is not allocated from our special cache; or
- *  2. The main data area is shared; or
- *  3. One or more fragments are shared; or
- *  4. There are chained fragments.
- */
-static inline int is_flippable_skb(struct sk_buff *skb)
-{
-	int frag;
-
-	if (!is_xen_skb(skb) || skb_cloned(skb))
-		return 0;
-
-	for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) {
-		if (page_count(skb_shinfo(skb)->frags[frag].page) > 1)
-			return 0;
-	}
-
-	if (skb_shinfo(skb)->frag_list != NULL)
-		return 0;
-
-	return 1;
-}
-
 static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
 {
 	struct skb_shared_info *ninfo;
@@ -286,7 +250,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * Copy the packet here if it's destined for a flipping interface
 	 * but isn't flippable (e.g. extra references to data).
 	 */
-	if (!netif->copying_receiver && !is_flippable_skb(skb)) {
+	if (!netif->copying_receiver) {
 		struct sk_buff *nskb = netbk_copy_skb(skb);
 		if ( unlikely(nskb == NULL) )
 			goto drop;
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index d15ce95..9026df0 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -93,10 +93,22 @@ static int netback_probe(struct xenbus_device *dev,
 			goto abort_transaction;
 		}
 
+		/* We support rx-copy path. */
 		err = xenbus_printf(xbt, dev->nodename,
 				    "feature-rx-copy", "%d", 1);
 		if (err) {
-			message = "writing feature-copying";
+			message = "writing feature-rx-copy";
+			goto abort_transaction;
+		}
+
+		/*
+		 * We don't support rx-flip path (except old guests who don't
+		 * grok this feature flag).
+		 */
+		err = xenbus_printf(xbt, dev->nodename,
+				    "feature-rx-flip", "%d", 0);
+		if (err) {
+			message = "writing feature-rx-flip";
 			goto abort_transaction;
 		}
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 56a9f81..17c03bf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -360,8 +360,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 
 extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
 					    unsigned int size,
-					    gfp_t priority,
-					    int fclone);
+					    gfp_t priority);
 extern void	       kfree_skbmem(struct sk_buff *skb);
 extern struct sk_buff *skb_clone(struct sk_buff *skb,
 				 gfp_t priority);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 360c22e..19c7222 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -140,7 +140,6 @@ EXPORT_SYMBOL(skb_truesize_bug);
  *	Buffers may only be allocated from interrupts using a @gfp_mask of
  *	%GFP_ATOMIC.
  */
-#ifndef CONFIG_HAVE_ARCH_ALLOC_SKB
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 			    int fclone)
 {
@@ -196,7 +195,6 @@ nodata:
 	skb = NULL;
 	goto out;
 }
-#endif /* !CONFIG_HAVE_ARCH_ALLOC_SKB */
 
 /**
  *	alloc_skb_from_cache	-	allocate a network buffer
@@ -214,17 +212,14 @@ nodata:
  */
 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
 				     unsigned int size,
-				     gfp_t gfp_mask,
-				     int fclone)
+				     gfp_t gfp_mask)
 {
-	kmem_cache_t *cache;
 	struct sk_buff *skb;
 	u8 *data;
 
-	cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
-
 	/* Get the HEAD */
-	skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA);
+	skb = kmem_cache_alloc(skbuff_head_cache,
+			       gfp_mask & ~__GFP_DMA);
 	if (!skb)
 		goto out;
 
@@ -249,20 +244,10 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
 	skb_shinfo(skb)->gso_segs = 0;
 	skb_shinfo(skb)->gso_type = 0;
 	skb_shinfo(skb)->frag_list = NULL;
-
-	if (fclone) {
-		struct sk_buff *child = skb + 1;
-		atomic_t *fclone_ref = (atomic_t *) (child + 1);
-
-		skb->fclone = SKB_FCLONE_ORIG;
-		atomic_set(fclone_ref, 1);
-
-		child->fclone = SKB_FCLONE_UNAVAILABLE;
-	}
 out:
 	return skb;
 nodata:
-	kmem_cache_free(cache, skb);
+	kmem_cache_free(skbuff_head_cache, skb);
 	skb = NULL;
 	goto out;
 }