From: Neil Horman <nhorman@redhat.com> Date: Fri, 12 Sep 2008 09:53:21 -0400 Subject: [net] ipv6: extra sysctls for additional TAHI tests Message-id: 20080912135321.GE29710@hmsendeavour.rdu.redhat.com O-Subject: [RHEL 5.3 PATCH]: ipv6: add extra ipv6 sysctls to support additional TAHI tests (bz 458270) Bugzilla: 458270 RH-Acked-by: Thomas Graf <tgraf@redhat.com> Hey all- This is a patch to add the accept_dad and disable_ipv6 per-interface sysctls to the RHEL5 kernel. We need them to pass a few TAHI ipv6 certification tests. Unfortunately, since these sysctls live in the ipv6_devconf structure, we can't directly backport them from upstream, since thats a big ABI breaker. So I've had to shuffle to stuff off into an extension on the net_device structure like with did in RHEL4 for netpoll. I'm not a big fan of doing that, but theres no other way without really messing up the user space interface that I can see. Plus we have a few other potential patches I think that may want to make use of an extended net_device struct. Tested by our TAHI test group extensively, satisfies bz 458270. This is flagged as an exception so reviews ASAP appreciated. Regards Neil diff --git a/include/linux/if.h b/include/linux/if.h index 92faf44..554e1a8 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -61,6 +61,7 @@ #define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */ #define IFF_BONDING 0x20 /* bonding master or slave */ #define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */ +#define IFF_EXTENDED 0x80 /* Extended data available */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 254c6a7..073ad8e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -182,6 +182,8 @@ enum { DEVCONF_ACCEPT_RA_RTR_PREF, DEVCONF_RTR_PROBE_INTERVAL, DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN, + DEVCONF_DISABLE_IPV6, + DEVCONF_ACCEPT_DAD, DEVCONF_MAX }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8e82c9..8d93f17 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -535,6 +535,23 @@ struct net_device struct class_device class_dev; /* space for optional statistics and wireless sysfs groups */ struct attribute_group *sysfs_groups[3]; +#ifndef __GENKSYMS__ + unsigned short priv_len; +#endif +}; + +/* + * struct net_device can't be modified without breaking ABI, so we + * add net_device_extended to the end in alloc_netdev. Anything that + * needs to be added to a net_device can be appended here + */ +struct ipv6_devconf_extensions { + s32 disable_ipv6; + s32 accept_dad; +}; + +struct net_device_extended { + struct ipv6_devconf_extensions ipv6_devconf_ext; }; #define NETDEV_ALIGN 32 @@ -547,6 +564,14 @@ static inline void *netdev_priv(struct net_device *dev) & ~NETDEV_ALIGN_CONST); } +static inline struct net_device_extended *dev_extended(struct net_device *dev) +{ + if (!(dev->priv_flags & IFF_EXTENDED)) + return NULL; + return (struct net_device_extended *) ((char *) netdev_priv(dev) + + ((dev->priv_len + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST)); +} + #define SET_MODULE_OWNER(dev) do { } while (0) /* Set the sysfs physical device reference for the network logical device * if set prior to registration will cause a symlink during initialization. diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 24d477b..d4b00d4 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -585,6 +585,9 @@ enum { NET_IPV6_ACCEPT_RA_RTR_PREF=20, NET_IPV6_RTR_PROBE_INTERVAL=21, NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22, +#define NET_IPV6_SYSCTL_EXTENDED_BOUNDARY NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN + NET_IPV6_DISABLE=23, + NET_IPV6_ACCEPT_DAD=24, __NET_IPV6_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index 7cb219c..a63500a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3257,7 +3257,8 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, /* ensure 32-byte alignment of both the device and private area */ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; - alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; + alloc_size += (sizeof_priv + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size += sizeof(struct net_device_extended) + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); if (!p) { @@ -3274,6 +3275,8 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, setup(dev); strcpy(dev->name, name); + dev->priv_flags |= IFF_EXTENDED; + dev->priv_len = sizeof_priv; return dev; } EXPORT_SYMBOL(alloc_netdev); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2db3e94..3744762 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -131,6 +131,7 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); static int addrconf_ifdown(struct net_device *dev, int how); +static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); static void addrconf_dad_timer(unsigned long data); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); @@ -176,6 +177,11 @@ struct ipv6_devconf ipv6_devconf = { #endif }; +struct ipv6_devconf_extensions ipv6_devconf_extensions = { + .disable_ipv6 = 0, + .accept_dad = 1, +}; + static struct ipv6_devconf ipv6_devconf_dflt = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, @@ -206,6 +212,11 @@ static struct ipv6_devconf ipv6_devconf_dflt = { #endif }; +static struct ipv6_devconf_extensions ipv6_devconf_extensions_dflt = { + .disable_ipv6 = 0, + .accept_dad = 1, +}; + int sysctl_ip6_odad = 0; /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ @@ -345,6 +356,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) { struct inet6_dev *ndev; struct in6_addr maddr; + struct net_device_extended *ext = dev_extended(dev); ASSERT_RTNL(); @@ -359,6 +371,10 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); + if (ext != NULL) { + memcpy(&ext->ipv6_devconf_ext, &ipv6_devconf_extensions_dflt, + sizeof(ext->ipv6_devconf_ext)); + } ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); @@ -394,6 +410,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) */ in6_dev_hold(ndev); + if (ext && (dev->flags & (IFF_NOARP | IFF_LOOPBACK))) + ext->ipv6_devconf_ext.accept_dad = -1; + #ifdef CONFIG_IPV6_PRIVACY init_timer(&ndev->regen_timer); ndev->regen_timer.function = ipv6_regen_rndid; @@ -1313,6 +1332,25 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp) void addrconf_dad_failure(struct inet6_ifaddr *ifp) { + struct inet6_dev *idev = ifp->idev; + struct net_device_extended *ext; + + ext = (idev->dev == NULL) ? NULL : dev_extended(idev->dev); + + if (ext && ext->ipv6_devconf_ext.accept_dad > 1 && + !ext->ipv6_devconf_ext.disable_ipv6) { + struct in6_addr addr; + + addr.s6_addr32[0] = htonl(0xfe800000); + addr.s6_addr32[1] = 0; + + if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && + ipv6_addr_equal(&ifp->addr, &addr)) { + /* DAD failed for link-local based on MAC address */ + ext->ipv6_devconf_ext.disable_ipv6 = 1; + } + } + if (net_ratelimit()) printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); addrconf_dad_stop(ifp); @@ -2597,12 +2635,21 @@ static void addrconf_dad_timer(unsigned long data) struct inet6_dev *idev = ifp->idev; struct in6_addr unspec; struct in6_addr mcaddr; + struct net_device_extended *ext = (idev == NULL) ? NULL : dev_extended(idev->dev); read_lock_bh(&idev->lock); if (idev->dead) { read_unlock_bh(&idev->lock); goto out; } + + if (ext && ext->ipv6_devconf_ext.accept_dad > 1 && + ext->ipv6_devconf_ext.disable_ipv6) { + read_unlock_bh(&idev->lock); + addrconf_dad_failure(ifp); + return; + } + spin_lock_bh(&ifp->lock); if (ifp->probes == 0) { /* @@ -3348,7 +3395,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); } -static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, +static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, struct ipv6_devconf_extensions *xcnf, __s32 *array, int bytes) { memset(array, 0, bytes); @@ -3380,6 +3427,10 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif + if (xcnf) { + array[DEVCONF_DISABLE_IPV6] = xcnf->disable_ipv6; + array[DEVCONF_ACCEPT_DAD] = xcnf->accept_dad; + } } /* Maximum length of ifinfomsg attributes */ @@ -3404,6 +3455,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, struct rtattr *subattr; __u32 mtu = dev->mtu; struct ifla_cacheinfo ci; + struct net_device_extended *ext = (dev == NULL) ? NULL : dev_extended(dev); + struct ipv6_devconf_extensions *xcnf = (ext == NULL) ? NULL : &ext->ipv6_devconf_ext; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); r = NLMSG_DATA(nlh); @@ -3441,7 +3494,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, /* return the device sysctl params */ if ((array = kmalloc(DEVCONF_MAX * sizeof(*array), GFP_ATOMIC)) == NULL) goto rtattr_failure; - ipv6_store_devconf(&idev->cnf, array, DEVCONF_MAX * sizeof(*array)); + ipv6_store_devconf(&idev->cnf, xcnf, array, DEVCONF_MAX * sizeof(*array)); RTA_PUT(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(*array), array); /* XXX - Statistics/MC not implemented */ @@ -3813,6 +3866,22 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = NET_IPV6_DISABLE, + .procname = "disable_ipv6", + .data = &ipv6_devconf_extensions.disable_ipv6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_IPV6_ACCEPT_DAD, + .procname = "accept_dad", + .data = &ipv6_devconf_extensions.accept_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #ifdef CONFIG_IPV6_PRIVACY { .ctl_name = NET_IPV6_USE_TEMPADDR, @@ -3963,6 +4032,8 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf int i; struct net_device *dev = idev ? idev->dev : NULL; struct addrconf_sysctl_table *t; + struct net_device_extended *ext; + struct ipv6_devconf_extensions *dext; char *dev_name = NULL; t = kmalloc(sizeof(*t), GFP_KERNEL); @@ -3970,9 +4041,44 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf return; memcpy(t, &addrconf_sysctl, sizeof(*t)); for (i=0; t->addrconf_vars[i].data; i++) { - t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; - t->addrconf_vars[i].de = NULL; - t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ + if (t->addrconf_vars[i].ctl_name <= NET_IPV6_SYSCTL_EXTENDED_BOUNDARY) { + t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; + t->addrconf_vars[i].de = NULL; + t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ + } else { + ext = (dev == NULL) ? NULL : dev_extended(dev); + if (ext == NULL) { + dext = &ipv6_devconf_extensions_dflt; + if (p != &ipv6_devconf_dflt) + /* + * We're registering an interface here that + * doesn't have an extended segment, so we + * register the extra sysctls, but we give them + * no permissions so that we can't write to them + */ + t->addrconf_vars[i].mode = 0000; + } else + dext = &ext->ipv6_devconf_ext; + + if (dext == NULL) + continue; + /* + * Common settings for the extensions + */ + t->addrconf_vars[i].de = NULL; + t->addrconf_vars[i].extra1 = idev; + switch (t->addrconf_vars[i].ctl_name) { + case NET_IPV6_DISABLE: + t->addrconf_vars[i].data = &dext->disable_ipv6; + break; + case NET_IPV6_ACCEPT_DAD: + t->addrconf_vars[i].data = &dext->accept_dad; + break; + default: + BUG(); + } + } + } if (dev) { dev_name = dev->name; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 48bf675..7c02a9c 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -61,6 +61,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt struct ipv6hdr *hdr; u32 pkt_len; struct inet6_dev *idev; + struct net_device_extended *ext = dev_extended(dev); if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); @@ -73,7 +74,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES); - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || + (ext && unlikely(ext->ipv6_devconf_ext.disable_ipv6))) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 920625f..94cf058 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -149,6 +149,15 @@ static int ip6_output2(struct sk_buff *skb) int ip6_output(struct sk_buff *skb) { + struct inet6_dev *idev = ip6_dst_idev(skb->dst); + struct net_device_extended *ext = dev_extended(idev->dev); + + if (unlikely(ext && ext->ipv6_devconf_ext.disable_ipv6)) { + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return 0; + } + if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) || dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2);