From: Anton Arapov <aarapov@redhat.com> Date: Fri, 13 Jun 2008 16:34:53 +0200 Subject: [net] do liberal tracking for picked up connections Message-id: 4852858D.8010902@redhat.com O-Subject: [RHEL5.3 PATCH] BZ448328: connections hangs after running "service iptables restart" Bugzilla: 448328 RH-Acked-by: David S. Miller <davem@redhat.com> Bugzilla: 448328 Details: (from the upstream patch comment) Do liberal tracking (only RSTs need to be in-window) for connections picked up without seeing a SYN to deal with window scaling. Also change logging of invalid packets not to log packets accepted by liberal tracking to avoid spamming the logs. Upstream status: there are patch addressed to issue: a09113c2c8ec59a5cc228efa5869aade2b8f13f7 Test status: build, and successfully tested http://brewweb.devel.redhat.com/brew/taskinfo?taskID=1355444 Notice: I've backported whole patch with logging part, to be aligned with upstream and avoid spamming the logs in RHEL as well, due to introduced change. "u_int8_t loose;" of "enum tcp_conntrack", is not using anymore, but saved in order to keep KABI. == include/linux/netfilter/nf_conntrack_tcp.h | 4 ++- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 42 ++++++++++---------------- net/netfilter/nf_conntrack_proto_tcp.c | 40 +++++++++---------------- 3 files changed, 34 insertions(+), 52 deletions(-) include/linux/netfilter/nf_conntrack_tcp.h | 3 ++ net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 42 ++++++++++---------------- net/netfilter/nf_conntrack_proto_tcp.c | 40 +++++++++---------------- 3 files changed, 34 insertions(+), 51 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index b2feeff..0eccc65 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -2,6 +2,9 @@ #define _NF_CONNTRACK_TCP_H /* TCP tracking. */ +/* Be liberal in window checking */ +#define IP_CT_TCP_FLAG_BE_LIBERAL 0x08 + /* This is exposed to userspace (ctnetlink) */ enum tcp_conntrack { TCP_CONNTRACK_NONE, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index fb920e7..2cec6ce 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -50,12 +50,9 @@ static DEFINE_RWLOCK(tcp_lock); If it's non-zero, we mark only out of window RST segments as INVALID. */ int ip_ct_tcp_be_liberal = 0; -/* When connection is picked up from the middle, how many packets are required - to pass in each direction when we assume we are in sync - if any side uses - window scaling, we lost the game. - If it is set to zero, we disable picking up already established +/* If it is set to zero, we disable picking up already established connections. */ -int ip_ct_tcp_loose = 3; +int ip_ct_tcp_loose = 1; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer @@ -693,12 +690,10 @@ static int tcp_in_window(struct ip_ct_tcp *state, after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), after(ack, receiver->td_end - MAXACKWINDOW(sender))); - - if (sender->loose || receiver->loose || - (before(seq, sender->td_maxend + 1) && - after(end, sender->td_end - receiver->td_maxwin - 1) && - before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { + if (before(seq, sender->td_maxend + 1) && + after(end, sender->td_end - receiver->td_maxwin - 1) && + before(sack, receiver->td_end + 1) && + after(ack, receiver->td_end - MAXACKWINDOW(sender))) { /* * Take into account window scaling (RFC 1323). */ @@ -741,15 +736,13 @@ static int tcp_in_window(struct ip_ct_tcp *state, state->retrans = 0; } } - /* - * Close the window of disabled window tracking :-) - */ - if (sender->loose) - sender->loose--; - res = 1; } else { - if (LOG_INVALID(IPPROTO_TCP)) + res = 0; + if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || + ip_ct_tcp_be_liberal) + res = 1; + if (!res && LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? @@ -760,8 +753,6 @@ static int tcp_in_window(struct ip_ct_tcp *state, : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); - - res = ip_ct_tcp_be_liberal; } DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " @@ -1104,8 +1095,6 @@ static int tcp_new(struct ip_conntrack *conntrack, tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]); conntrack->proto.tcp.seen[1].flags = 0; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = 0; } else if (ip_ct_tcp_loose == 0) { /* Don't try to pick up connections. */ return 0; @@ -1126,11 +1115,12 @@ static int tcp_new(struct ip_conntrack *conntrack, conntrack->proto.tcp.seen[0].td_maxwin; conntrack->proto.tcp.seen[0].td_scale = 0; - /* We assume SACK. Should we assume window scaling too? */ + /* We assume SACK and liberal window checking to handle + * window scaling */ conntrack->proto.tcp.seen[0].flags = - conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose; + conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | + IP_CT_TCP_FLAG_BE_LIBERAL; + } conntrack->proto.tcp.seen[1].td_end = 0; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index af8adcb..10a57e6 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -59,12 +59,9 @@ static DEFINE_RWLOCK(tcp_lock); If it's non-zero, we mark only out of window RST segments as INVALID. */ int nf_ct_tcp_be_liberal = 0; -/* When connection is picked up from the middle, how many packets are required - to pass in each direction when we assume we are in sync - if any side uses - window scaling, we lost the game. - If it is set to zero, we disable picking up already established +/* If it is set to zero, we disable picking up already established connections. */ -int nf_ct_tcp_loose = 3; +int nf_ct_tcp_loose = 1; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer @@ -651,11 +648,10 @@ static int tcp_in_window(struct ip_ct_tcp *state, before(sack, receiver->td_end + 1), after(ack, receiver->td_end - MAXACKWINDOW(sender))); - if (sender->loose || receiver->loose || - (before(seq, sender->td_maxend + 1) && - after(end, sender->td_end - receiver->td_maxwin - 1) && - before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { + if (before(seq, sender->td_maxend + 1) && + after(end, sender->td_end - receiver->td_maxwin - 1) && + before(sack, receiver->td_end + 1) && + after(ack, receiver->td_end - MAXACKWINDOW(sender))) { /* * Take into account window scaling (RFC 1323). */ @@ -698,15 +694,13 @@ static int tcp_in_window(struct ip_ct_tcp *state, state->retrans = 0; } } - /* - * Close the window of disabled window tracking :-) - */ - if (sender->loose) - sender->loose--; - res = 1; } else { - if (LOG_INVALID(IPPROTO_TCP)) + res = 0; + if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || + nf_ct_tcp_be_liberal) + res = 1; + if (!res && LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? @@ -717,8 +711,6 @@ static int tcp_in_window(struct ip_ct_tcp *state, : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); - - res = nf_ct_tcp_be_liberal; } DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " @@ -1063,8 +1055,6 @@ static int tcp_new(struct nf_conn *conntrack, tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]); conntrack->proto.tcp.seen[1].flags = 0; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = 0; } else if (nf_ct_tcp_loose == 0) { /* Don't try to pick up connections. */ return 0; @@ -1085,11 +1075,11 @@ static int tcp_new(struct nf_conn *conntrack, conntrack->proto.tcp.seen[0].td_maxwin; conntrack->proto.tcp.seen[0].td_scale = 0; - /* We assume SACK. Should we assume window scaling too? */ + /* We assume SACK and liberal window checking to handle + * window scaling */ conntrack->proto.tcp.seen[0].flags = - conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = nf_ct_tcp_loose; + conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | + IP_CT_TCP_FLAG_BE_LIBERAL; } conntrack->proto.tcp.seen[1].td_end = 0;