diff --git a/kernel.spec b/kernel.spec index 23389a5..2b0a14d 100644 --- a/kernel.spec +++ b/kernel.spec @@ -783,12 +783,12 @@ Patch25132: rt2800usb-slow-down-TX-status-polling.patch #rhbz 1015558 Patch25133: fix-buslogic.patch -#rhbz 989251 -Patch25134: tcp-fix-incorrect-ca_state-in-tail-loss-probe.patch - #rhbz 1023413 Patch25135: alps-Support-for-Dell-XT2-model.patch +#CVE-2013-XXXX rhbz 1023477 1023495 +Patch25136: net_311.mbox + # END OF PATCH DEFINITIONS %endif @@ -1509,12 +1509,12 @@ ApplyPatch rt2800usb-slow-down-TX-status-polling.patch #rhbz 1015558 ApplyPatch fix-buslogic.patch -#rhbz 989251 -ApplyPatch tcp-fix-incorrect-ca_state-in-tail-loss-probe.patch - #rhbz 1023413 ApplyPatch alps-Support-for-Dell-XT2-model.patch +#CVE-2013-XXXX rhbz 1023477 1023495 +ApplyPatch net_311.mbox + # END OF PATCH APPLICATIONS %endif @@ -2357,6 +2357,7 @@ fi # || || %changelog * Fri Oct 25 2013 Josh Boyer +- CVE-2013-XXXX net: memory corruption with UDP_CORK and UFO (rhbz 1023477 1023495) - Add touchpad support for Dell XT2 (rhbz 1023413) * Tue Oct 22 2013 Josh Boyer diff --git a/net_311.mbox b/net_311.mbox new file mode 100644 index 0000000..d420777 --- /dev/null +++ b/net_311.mbox @@ -0,0 +1,3794 @@ +From 5444e381f5784d32d741864312909d2a6afe428e Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 27 Aug 2013 05:46:32 -0700 +Subject: [PATCH 01/47] tcp: TSO packets automatic sizing + +[ Upstream commits 6d36824e730f247b602c90e8715a792003e3c5a7, + 02cf4ebd82ff0ac7254b88e466820a290ed8289a, and parts of + 7eec4174ff29cd42f2acfae8112f51c228545d40 ] + +After hearing many people over past years complaining against TSO being +bursty or even buggy, we are proud to present automatic sizing of TSO +packets. + +One part of the problem is that tcp_tso_should_defer() uses an heuristic +relying on upcoming ACKS instead of a timer, but more generally, having +big TSO packets makes little sense for low rates, as it tends to create +micro bursts on the network, and general consensus is to reduce the +buffering amount. + +This patch introduces a per socket sk_pacing_rate, that approximates +the current sending rate, and allows us to size the TSO packets so +that we try to send one packet every ms. + +This field could be set by other transports. + +Patch has no impact for high speed flows, where having large TSO packets +makes sense to reach line rate. + +For other flows, this helps better packet scheduling and ACK clocking. + +This patch increases performance of TCP flows in lossy environments. + +A new sysctl (tcp_min_tso_segs) is added, to specify the +minimal size of a TSO packet (default being 2). + +A follow-up patch will provide a new packet scheduler (FQ), using +sk_pacing_rate as an input to perform optional per flow pacing. + +This explains why we chose to set sk_pacing_rate to twice the current +rate, allowing 'slow start' ramp up. + +sk_pacing_rate = 2 * cwnd * mss / srtt + +v2: Neal Cardwell reported a suspect deferring of last two segments on +initial write of 10 MSS, I had to change tcp_tso_should_defer() to take +into account tp->xmit_size_goal_segs + +Signed-off-by: Eric Dumazet +Cc: Neal Cardwell +Cc: Yuchung Cheng +Cc: Van Jacobson +Cc: Tom Herbert +Acked-by: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +--- + Documentation/networking/ip-sysctl.txt | 9 +++++++++ + include/net/sock.h | 2 ++ + include/net/tcp.h | 1 + + net/core/sock.c | 1 + + net/ipv4/sysctl_net_ipv4.c | 10 ++++++++++ + net/ipv4/tcp.c | 28 +++++++++++++++++++++++----- + net/ipv4/tcp_input.c | 34 +++++++++++++++++++++++++++++++++- + net/ipv4/tcp_output.c | 2 +- + 8 files changed, 80 insertions(+), 7 deletions(-) + +diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt +index 1074290..b522883 100644 +--- a/Documentation/networking/ip-sysctl.txt ++++ b/Documentation/networking/ip-sysctl.txt +@@ -478,6 +478,15 @@ tcp_syn_retries - INTEGER + tcp_timestamps - BOOLEAN + Enable timestamps as defined in RFC1323. + ++tcp_min_tso_segs - INTEGER ++ Minimal number of segments per TSO frame. ++ Since linux-3.12, TCP does an automatic sizing of TSO frames, ++ depending on flow rate, instead of filling 64Kbytes packets. ++ For specific usages, it's possible to force TCP to build big ++ TSO frames. Note that TCP stack might split too big TSO packets ++ if available window is too small. ++ Default: 2 ++ + tcp_tso_win_divisor - INTEGER + This allows control over what percentage of the congestion window + can be consumed by a single TSO frame. +diff --git a/include/net/sock.h b/include/net/sock.h +index 31d5cfb..04e148f 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -232,6 +232,7 @@ struct cg_proto; + * @sk_napi_id: id of the last napi context to receive data for sk + * @sk_ll_usec: usecs to busypoll when there is no data + * @sk_allocation: allocation mode ++ * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) + * @sk_sndbuf: size of send buffer in bytes + * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, + * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings +@@ -361,6 +362,7 @@ struct sock { + kmemcheck_bitfield_end(flags); + int sk_wmem_queued; + gfp_t sk_allocation; ++ u32 sk_pacing_rate; /* bytes per second */ + netdev_features_t sk_route_caps; + netdev_features_t sk_route_nocaps; + int sk_gso_type; +diff --git a/include/net/tcp.h b/include/net/tcp.h +index d198005..46cb8a4 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -284,6 +284,7 @@ extern int sysctl_tcp_thin_dupack; + extern int sysctl_tcp_early_retrans; + extern int sysctl_tcp_limit_output_bytes; + extern int sysctl_tcp_challenge_ack_limit; ++extern int sysctl_tcp_min_tso_segs; + + extern atomic_long_t tcp_memory_allocated; + extern struct percpu_counter tcp_sockets_allocated; +diff --git a/net/core/sock.c b/net/core/sock.c +index 2c097c5..8729d91 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -2297,6 +2297,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) + sk->sk_ll_usec = sysctl_net_busy_read; + #endif + ++ sk->sk_pacing_rate = ~0U; + /* + * Before updating sk_refcnt, we must commit prior changes to memory + * (Documentation/RCU/rculist_nulls.txt for details) +diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c +index 610e324..6900b8b 100644 +--- a/net/ipv4/sysctl_net_ipv4.c ++++ b/net/ipv4/sysctl_net_ipv4.c +@@ -29,6 +29,7 @@ + static int zero; + static int one = 1; + static int four = 4; ++static int gso_max_segs = GSO_MAX_SEGS; + static int tcp_retr1_max = 255; + static int ip_local_port_range_min[] = { 1, 1 }; + static int ip_local_port_range_max[] = { 65535, 65535 }; +@@ -754,6 +755,15 @@ static struct ctl_table ipv4_table[] = { + .extra2 = &four, + }, + { ++ .procname = "tcp_min_tso_segs", ++ .data = &sysctl_tcp_min_tso_segs, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = &zero, ++ .extra2 = &gso_max_segs, ++ }, ++ { + .procname = "udp_mem", + .data = &sysctl_udp_mem, + .maxlen = sizeof(sysctl_udp_mem), +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 95544e4..ec586e5 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -283,6 +283,8 @@ + + int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; + ++int sysctl_tcp_min_tso_segs __read_mostly = 2; ++ + struct percpu_counter tcp_orphan_count; + EXPORT_SYMBOL_GPL(tcp_orphan_count); + +@@ -789,12 +791,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, + xmit_size_goal = mss_now; + + if (large_allowed && sk_can_gso(sk)) { +- xmit_size_goal = ((sk->sk_gso_max_size - 1) - +- inet_csk(sk)->icsk_af_ops->net_header_len - +- inet_csk(sk)->icsk_ext_hdr_len - +- tp->tcp_header_len); ++ u32 gso_size, hlen; ++ ++ /* Maybe we should/could use sk->sk_prot->max_header here ? */ ++ hlen = inet_csk(sk)->icsk_af_ops->net_header_len + ++ inet_csk(sk)->icsk_ext_hdr_len + ++ tp->tcp_header_len; ++ ++ /* Goal is to send at least one packet per ms, ++ * not one big TSO packet every 100 ms. ++ * This preserves ACK clocking and is consistent ++ * with tcp_tso_should_defer() heuristic. ++ */ ++ gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC); ++ gso_size = max_t(u32, gso_size, ++ sysctl_tcp_min_tso_segs * mss_now); ++ ++ xmit_size_goal = min_t(u32, gso_size, ++ sk->sk_gso_max_size - 1 - hlen); + +- /* TSQ : try to have two TSO segments in flight */ ++ /* TSQ : try to have at least two segments in flight ++ * (one in NIC TX ring, another in Qdisc) ++ */ + xmit_size_goal = min_t(u32, xmit_size_goal, + sysctl_tcp_limit_output_bytes >> 1); + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 3ca2139..2f0e94b 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -688,6 +688,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) + } + } + ++/* Set the sk_pacing_rate to allow proper sizing of TSO packets. ++ * Note: TCP stack does not yet implement pacing. ++ * FQ packet scheduler can be used to implement cheap but effective ++ * TCP pacing, to smooth the burst on large writes when packets ++ * in flight is significantly lower than cwnd (or rwin) ++ */ ++static void tcp_update_pacing_rate(struct sock *sk) ++{ ++ const struct tcp_sock *tp = tcp_sk(sk); ++ u64 rate; ++ ++ /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ ++ rate = (u64)tp->mss_cache * 2 * (HZ << 3); ++ ++ rate *= max(tp->snd_cwnd, tp->packets_out); ++ ++ /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), ++ * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) ++ * We probably need usec resolution in the future. ++ * Note: This also takes care of possible srtt=0 case, ++ * when tcp_rtt_estimator() was not yet called. ++ */ ++ if (tp->srtt > 8 + 2) ++ do_div(rate, tp->srtt); ++ ++ sk->sk_pacing_rate = min_t(u64, rate, ~0U); ++} ++ + /* Calculate rto without backoff. This is the second half of Van Jacobson's + * routine referred to above. + */ +@@ -3269,7 +3297,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) + u32 ack_seq = TCP_SKB_CB(skb)->seq; + u32 ack = TCP_SKB_CB(skb)->ack_seq; + bool is_dupack = false; +- u32 prior_in_flight; ++ u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; + u32 prior_fackets; + int prior_packets = tp->packets_out; + const int prior_unsacked = tp->packets_out - tp->sacked_out; +@@ -3375,6 +3403,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) + tcp_schedule_loss_probe(sk); ++ if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) ++ tcp_update_pacing_rate(sk); + return 1; + + no_queue: +@@ -5671,6 +5701,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + } else + tcp_init_metrics(sk); + ++ tcp_update_pacing_rate(sk); ++ + /* Prevent spurious tcp_cwnd_restart() on first data packet */ + tp->lsndtime = tcp_time_stamp; + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 170737a..7b263c3 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1628,7 +1628,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) + + /* If a full-sized TSO skb can be sent, do it. */ + if (limit >= min_t(unsigned int, sk->sk_gso_max_size, +- sk->sk_gso_max_segs * tp->mss_cache)) ++ tp->xmit_size_goal_segs * tp->mss_cache)) + goto send_now; + + /* Middle in queue won't get any more data, full sendable already? */ +-- +1.7.11.7 + + +From 1b6c7d9979e1db1d42bd0545452a9d204c019582 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 27 Sep 2013 03:28:54 -0700 +Subject: [PATCH 02/47] tcp: TSQ can use a dynamic limit + +[ Upstream commit c9eeec26e32e087359160406f96e0949b3cc6f10 ] + +When TCP Small Queues was added, we used a sysctl to limit amount of +packets queues on Qdisc/device queues for a given TCP flow. + +Problem is this limit is either too big for low rates, or too small +for high rates. + +Now TCP stack has rate estimation in sk->sk_pacing_rate, and TSO +auto sizing, it can better control number of packets in Qdisc/device +queues. + +New limit is two packets or at least 1 to 2 ms worth of packets. + +Low rates flows benefit from this patch by having even smaller +number of packets in queues, allowing for faster recovery, +better RTT estimations. + +High rates flows benefit from this patch by allowing more than 2 packets +in flight as we had reports this was a limiting factor to reach line +rate. [ In particular if TX completion is delayed because of coalescing +parameters ] + +Example for a single flow on 10Gbp link controlled by FQ/pacing + +14 packets in flight instead of 2 + +$ tc -s -d qd +qdisc fq 8001: dev eth0 root refcnt 32 limit 10000p flow_limit 100p +buckets 1024 quantum 3028 initial_quantum 15140 + Sent 1168459366606 bytes 771822841 pkt (dropped 0, overlimits 0 +requeues 6822476) + rate 9346Mbit 771713pps backlog 953820b 14p requeues 6822476 + 2047 flow, 2046 inactive, 1 throttled, delay 15673 ns + 2372 gc, 0 highprio, 0 retrans, 9739249 throttled, 0 flows_plimit + +Note that sk_pacing_rate is currently set to twice the actual rate, but +this might be refined in the future when a flow is in congestion +avoidance. + +Additional change : skb->destructor should be set to tcp_wfree(). + +A future patch (for linux 3.13+) might remove tcp_limit_output_bytes + +Signed-off-by: Eric Dumazet +Cc: Wei Liu +Cc: Cong Wang +Cc: Yuchung Cheng +Cc: Neal Cardwell +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +--- + net/ipv4/tcp_output.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 7b263c3..fe897ed 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -892,8 +892,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + + skb_orphan(skb); + skb->sk = sk; +- skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? +- tcp_wfree : sock_wfree; ++ skb->destructor = tcp_wfree; + atomic_add(skb->truesize, &sk->sk_wmem_alloc); + + /* Build TCP header and checksum it. */ +@@ -1837,7 +1836,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + while ((skb = tcp_send_head(sk))) { + unsigned int limit; + +- + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); + BUG_ON(!tso_segs); + +@@ -1866,13 +1864,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + break; + } + +- /* TSQ : sk_wmem_alloc accounts skb truesize, +- * including skb overhead. But thats OK. ++ /* TCP Small Queues : ++ * Control number of packets in qdisc/devices to two packets / or ~1 ms. ++ * This allows for : ++ * - better RTT estimation and ACK scheduling ++ * - faster recovery ++ * - high rates + */ +- if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { ++ limit = max(skb->truesize, sk->sk_pacing_rate >> 10); ++ ++ if (atomic_read(&sk->sk_wmem_alloc) > limit) { + set_bit(TSQ_THROTTLED, &tp->tsq_flags); + break; + } ++ + limit = mss_now; + if (tso_segs > 1 && !tcp_urg_mode(tp)) + limit = tcp_mss_split_point(sk, skb, mss_now, +-- +1.7.11.7 + + +From 4f25abff83e2780265eaa17d437b7659ea543bd5 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 15 Oct 2013 11:54:30 -0700 +Subject: [PATCH 03/47] tcp: must unclone packets before mangling them + +[ Upstream commit c52e2421f7368fd36cbe330d2cf41b10452e39a9 ] + +TCP stack should make sure it owns skbs before mangling them. + +We had various crashes using bnx2x, and it turned out gso_size +was cleared right before bnx2x driver was populating TC descriptor +of the _previous_ packet send. TCP stack can sometime retransmit +packets that are still in Qdisc. + +Of course we could make bnx2x driver more robust (using +ACCESS_ONCE(shinfo->gso_size) for example), but the bug is TCP stack. + +We have identified two points where skb_unclone() was needed. + +This patch adds a WARN_ON_ONCE() to warn us if we missed another +fix of this kind. + +Kudos to Neal for finding the root cause of this bug. Its visible +using small MSS. + +Signed-off-by: Eric Dumazet +Signed-off-by: Neal Cardwell +Cc: Yuchung Cheng +Signed-off-by: David S. Miller +--- + net/ipv4/tcp_output.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index fe897ed..28c0d6a 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -981,6 +981,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) + static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, + unsigned int mss_now) + { ++ /* Make sure we own this skb before messing gso_size/gso_segs */ ++ WARN_ON_ONCE(skb_cloned(skb)); ++ + if (skb->len <= mss_now || !sk_can_gso(sk) || + skb->ip_summed == CHECKSUM_NONE) { + /* Avoid the costly divide in the normal +@@ -1062,9 +1065,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, + if (nsize < 0) + nsize = 0; + +- if (skb_cloned(skb) && +- skb_is_nonlinear(skb) && +- pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) ++ if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; + + /* Get a new skb... force flag on. */ +@@ -2339,6 +2340,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) + int oldpcount = tcp_skb_pcount(skb); + + if (unlikely(oldpcount > 1)) { ++ if (skb_unclone(skb, GFP_ATOMIC)) ++ return -ENOMEM; + tcp_init_tso_segs(sk, skb, cur_mss); + tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); + } +-- +1.7.11.7 + + +From 8731e25f7527ca851045eb0715d998d1ac07aadb Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 4 Oct 2013 10:31:41 -0700 +Subject: [PATCH 04/47] tcp: do not forget FIN in tcp_shifted_skb() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 5e8a402f831dbe7ee831340a91439e46f0d38acd ] + +Yuchung found following problem : + + There are bugs in the SACK processing code, merging part in + tcp_shift_skb_data(), that incorrectly resets or ignores the sacked + skbs FIN flag. When a receiver first SACK the FIN sequence, and later + throw away ofo queue (e.g., sack-reneging), the sender will stop + retransmitting the FIN flag, and hangs forever. + +Following packetdrill test can be used to reproduce the bug. + +$ cat sack-merge-bug.pkt +`sysctl -q net.ipv4.tcp_fack=0` + +// Establish a connection and send 10 MSS. +0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++.000 bind(3, ..., ...) = 0 ++.000 listen(3, 1) = 0 + ++.050 < S 0:0(0) win 32792 ++.000 > S. 0:0(0) ack 1 ++.001 < . 1:1(0) ack 1 win 1024 ++.000 accept(3, ..., ...) = 4 + ++.100 write(4, ..., 12000) = 12000 ++.000 shutdown(4, SHUT_WR) = 0 ++.000 > . 1:10001(10000) ack 1 ++.050 < . 1:1(0) ack 2001 win 257 ++.000 > FP. 10001:12001(2000) ack 1 ++.050 < . 1:1(0) ack 2001 win 257 ++.050 < . 1:1(0) ack 2001 win 257 +// SACK reneg ++.050 < . 1:1(0) ack 12001 win 257 ++0 %{ print "unacked: ",tcpi_unacked }% ++5 %{ print "" }% + +First, a typo inverted left/right of one OR operation, then +code forgot to advance end_seq if the merged skb carried FIN. + +Bug was added in 2.6.29 by commit 832d11c5cd076ab +("tcp: Try to restore large SKBs while SACK processing") + +Signed-off-by: Eric Dumazet +Signed-off-by: Yuchung Cheng +Acked-by: Neal Cardwell +Cc: Ilpo Järvinen +Acked-by: Ilpo Järvinen +Signed-off-by: David S. Miller +--- + net/ipv4/tcp_input.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 2f0e94b..61e2360 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1279,7 +1279,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, + tp->lost_cnt_hint -= tcp_skb_pcount(prev); + } + +- TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; ++ TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; ++ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ++ TCP_SKB_CB(prev)->end_seq++; ++ + if (skb == tcp_highest_sack(sk)) + tcp_advance_highest_sack(sk, skb); + +-- +1.7.11.7 + + +From bfc0a00d669a4fa0835c417f01c50c18996d1e60 Mon Sep 17 00:00:00 2001 +From: Yuchung Cheng +Date: Sat, 12 Oct 2013 10:16:27 -0700 +Subject: [PATCH 05/47] tcp: fix incorrect ca_state in tail loss probe + +[ Upstream commit 031afe4990a7c9dbff41a3a742c44d3e740ea0a1 ] + +On receiving an ACK that covers the loss probe sequence, TLP +immediately sets the congestion state to Open, even though some packets +are not recovered and retransmisssion are on the way. The later ACks +may trigger a WARN_ON check in step D of tcp_fastretrans_alert(), e.g., +https://bugzilla.redhat.com/show_bug.cgi?id=989251 + +The fix is to follow the similar procedure in recovery by calling +tcp_try_keep_open(). The sender switches to Open state if no packets +are retransmissted. Otherwise it goes to Disorder and let subsequent +ACKs move the state to Recovery or Open. + +Reported-By: Michael Sterrett +Tested-By: Dormando +Signed-off-by: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 61e2360..723951a 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3284,7 +3284,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) + tcp_init_cwnd_reduction(sk, true); + tcp_set_ca_state(sk, TCP_CA_CWR); + tcp_end_cwnd_reduction(sk); +- tcp_set_ca_state(sk, TCP_CA_Open); ++ tcp_try_keep_open(sk); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); + } +-- +1.7.11.7 + + +From 05c9fdfad860abd64136d8ccd88dbf84e40bd5f5 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 1 Oct 2013 21:04:11 -0700 +Subject: [PATCH 06/47] net: do not call sock_put() on TIMEWAIT sockets + +[ Upstream commit 80ad1d61e72d626e30ebe8529a0455e660ca4693 ] + +commit 3ab5aee7fe84 ("net: Convert TCP & DCCP hash tables to use RCU / +hlist_nulls") incorrectly used sock_put() on TIMEWAIT sockets. + +We should instead use inet_twsk_put() + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +--- + net/ipv4/inet_hashtables.c | 2 +- + net/ipv6/inet6_hashtables.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index 7bd8983..96da9c7 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -287,7 +287,7 @@ begintw: + if (unlikely(!INET_TW_MATCH(sk, net, acookie, + saddr, daddr, ports, + dif))) { +- sock_put(sk); ++ inet_twsk_put(inet_twsk(sk)); + goto begintw; + } + goto out; +diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c +index 32b4a16..066640e 100644 +--- a/net/ipv6/inet6_hashtables.c ++++ b/net/ipv6/inet6_hashtables.c +@@ -116,7 +116,7 @@ begintw: + } + if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, + ports, dif))) { +- sock_put(sk); ++ inet_twsk_put(inet_twsk(sk)); + goto begintw; + } + goto out; +-- +1.7.11.7 + + +From bc7fd34d31c17b0e4c100013e77277a2ed7e15cf Mon Sep 17 00:00:00 2001 +From: Matthias Schiffer +Date: Fri, 27 Sep 2013 18:03:39 +0200 +Subject: [PATCH 07/47] batman-adv: set up network coding packet handlers + during module init + +[ Upstream commit 6c519bad7b19a2c14a075b400edabaa630330123 ] + +batman-adv saves its table of packet handlers as a global state, so handlers +must be set up only once (and setting them up a second time will fail). + +The recently-added network coding support tries to set up its handler each time +a new softif is registered, which obviously fails when more that one softif is +used (and in consequence, the softif creation fails). + +Fix this by splitting up batadv_nc_init into batadv_nc_init (which is called +only once) and batadv_nc_mesh_init (which is called for each softif); in +addition batadv_nc_free is renamed to batadv_nc_mesh_free to keep naming +consistent. + +Signed-off-by: Matthias Schiffer +Signed-off-by: Marek Lindner +Signed-off-by: Antonio Quartulli +--- + net/batman-adv/main.c | 5 +++-- + net/batman-adv/network-coding.c | 28 ++++++++++++++++++---------- + net/batman-adv/network-coding.h | 14 ++++++++++---- + 3 files changed, 31 insertions(+), 16 deletions(-) + +diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c +index 08125f3..c8e0671 100644 +--- a/net/batman-adv/main.c ++++ b/net/batman-adv/main.c +@@ -61,6 +61,7 @@ static int __init batadv_init(void) + batadv_recv_handler_init(); + + batadv_iv_init(); ++ batadv_nc_init(); + + batadv_event_workqueue = create_singlethread_workqueue("bat_events"); + +@@ -138,7 +139,7 @@ int batadv_mesh_init(struct net_device *soft_iface) + if (ret < 0) + goto err; + +- ret = batadv_nc_init(bat_priv); ++ ret = batadv_nc_mesh_init(bat_priv); + if (ret < 0) + goto err; + +@@ -163,7 +164,7 @@ void batadv_mesh_free(struct net_device *soft_iface) + batadv_vis_quit(bat_priv); + + batadv_gw_node_purge(bat_priv); +- batadv_nc_free(bat_priv); ++ batadv_nc_mesh_free(bat_priv); + batadv_dat_free(bat_priv); + batadv_bla_free(bat_priv); + +diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c +index a487d46..4ecc0b6 100644 +--- a/net/batman-adv/network-coding.c ++++ b/net/batman-adv/network-coding.c +@@ -35,6 +35,20 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); + + /** ++ * batadv_nc_init - one-time initialization for network coding ++ */ ++int __init batadv_nc_init(void) ++{ ++ int ret; ++ ++ /* Register our packet type */ ++ ret = batadv_recv_handler_register(BATADV_CODED, ++ batadv_nc_recv_coded_packet); ++ ++ return ret; ++} ++ ++/** + * batadv_nc_start_timer - initialise the nc periodic worker + * @bat_priv: the bat priv with all the soft interface information + */ +@@ -45,10 +59,10 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) + } + + /** +- * batadv_nc_init - initialise coding hash table and start house keeping ++ * batadv_nc_mesh_init - initialise coding hash table and start house keeping + * @bat_priv: the bat priv with all the soft interface information + */ +-int batadv_nc_init(struct batadv_priv *bat_priv) ++int batadv_nc_mesh_init(struct batadv_priv *bat_priv) + { + bat_priv->nc.timestamp_fwd_flush = jiffies; + bat_priv->nc.timestamp_sniffed_purge = jiffies; +@@ -70,11 +84,6 @@ int batadv_nc_init(struct batadv_priv *bat_priv) + batadv_hash_set_lock_class(bat_priv->nc.coding_hash, + &batadv_nc_decoding_hash_lock_class_key); + +- /* Register our packet type */ +- if (batadv_recv_handler_register(BATADV_CODED, +- batadv_nc_recv_coded_packet) < 0) +- goto err; +- + INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); + batadv_nc_start_timer(bat_priv); + +@@ -1721,12 +1730,11 @@ free_nc_packet: + } + + /** +- * batadv_nc_free - clean up network coding memory ++ * batadv_nc_mesh_free - clean up network coding memory + * @bat_priv: the bat priv with all the soft interface information + */ +-void batadv_nc_free(struct batadv_priv *bat_priv) ++void batadv_nc_mesh_free(struct batadv_priv *bat_priv) + { +- batadv_recv_handler_unregister(BATADV_CODED); + cancel_delayed_work_sync(&bat_priv->nc.work); + + batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); +diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h +index 85a4ec8..ddfa618 100644 +--- a/net/batman-adv/network-coding.h ++++ b/net/batman-adv/network-coding.h +@@ -22,8 +22,9 @@ + + #ifdef CONFIG_BATMAN_ADV_NC + +-int batadv_nc_init(struct batadv_priv *bat_priv); +-void batadv_nc_free(struct batadv_priv *bat_priv); ++int batadv_nc_init(void); ++int batadv_nc_mesh_init(struct batadv_priv *bat_priv); ++void batadv_nc_mesh_free(struct batadv_priv *bat_priv); + void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, +@@ -46,12 +47,17 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv); + + #else /* ifdef CONFIG_BATMAN_ADV_NC */ + +-static inline int batadv_nc_init(struct batadv_priv *bat_priv) ++static inline int batadv_nc_init(void) + { + return 0; + } + +-static inline void batadv_nc_free(struct batadv_priv *bat_priv) ++static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv) ++{ ++ return 0; ++} ++ ++static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv) + { + return; + } +-- +1.7.11.7 + + +From 8be4005ed947924104df5850944a20b7f6570137 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Fran=C3=A7ois=20Cachereul?= +Date: Wed, 2 Oct 2013 10:16:02 +0200 +Subject: [PATCH 08/47] l2tp: fix kernel panic when using IPv4-mapped IPv6 + addresses +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit e18503f41f9b12132c95d7c31ca6ee5155e44e5c ] + +IPv4 mapped addresses cause kernel panic. +The patch juste check whether the IPv6 address is an IPv4 mapped +address. If so, use IPv4 API instead of IPv6. + +[ 940.026915] general protection fault: 0000 [#1] +[ 940.026915] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppox ppp_generic slhc loop psmouse +[ 940.026915] CPU: 0 PID: 3184 Comm: memcheck-amd64- Not tainted 3.11.0+ #1 +[ 940.026915] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 +[ 940.026915] task: ffff880007130e20 ti: ffff88000737e000 task.ti: ffff88000737e000 +[ 940.026915] RIP: 0010:[] [] ip6_xmit+0x276/0x326 +[ 940.026915] RSP: 0018:ffff88000737fd28 EFLAGS: 00010286 +[ 940.026915] RAX: c748521a75ceff48 RBX: ffff880000c30800 RCX: 0000000000000000 +[ 940.026915] RDX: ffff88000075cc4e RSI: 0000000000000028 RDI: ffff8800060e5a40 +[ 940.026915] RBP: ffff8800060e5a40 R08: 0000000000000000 R09: ffff88000075cc90 +[ 940.026915] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88000737fda0 +[ 940.026915] R13: 0000000000000000 R14: 0000000000002000 R15: ffff880005d3b580 +[ 940.026915] FS: 00007f163dc5e800(0000) GS:ffffffff81623000(0000) knlGS:0000000000000000 +[ 940.026915] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 940.026915] CR2: 00000004032dc940 CR3: 0000000005c25000 CR4: 00000000000006f0 +[ 940.026915] Stack: +[ 940.026915] ffff88000075cc4e ffffffff81694e90 ffff880000c30b38 0000000000000020 +[ 940.026915] 11000000523c4bac ffff88000737fdb4 0000000000000000 ffff880000c30800 +[ 940.026915] ffff880005d3b580 ffff880000c30b38 ffff8800060e5a40 0000000000000020 +[ 940.026915] Call Trace: +[ 940.026915] [] ? inet6_csk_xmit+0xa4/0xc4 +[ 940.026915] [] ? l2tp_xmit_skb+0x503/0x55a [l2tp_core] +[ 940.026915] [] ? pskb_expand_head+0x161/0x214 +[ 940.026915] [] ? pppol2tp_xmit+0xf2/0x143 [l2tp_ppp] +[ 940.026915] [] ? ppp_channel_push+0x36/0x8b [ppp_generic] +[ 940.026915] [] ? ppp_write+0xaf/0xc5 [ppp_generic] +[ 940.026915] [] ? vfs_write+0xa2/0x106 +[ 940.026915] [] ? SyS_write+0x56/0x8a +[ 940.026915] [] ? system_call_fastpath+0x16/0x1b +[ 940.026915] Code: 00 49 8b 8f d8 00 00 00 66 83 7c 11 02 00 74 60 49 +8b 47 58 48 83 e0 fe 48 8b 80 18 01 00 00 48 85 c0 74 13 48 8b 80 78 02 +00 00 <48> ff 40 28 41 8b 57 68 48 01 50 30 48 8b 54 24 08 49 c7 c1 51 +[ 940.026915] RIP [] ip6_xmit+0x276/0x326 +[ 940.026915] RSP +[ 940.057945] ---[ end trace be8aba9a61c8b7f3 ]--- +[ 940.058583] Kernel panic - not syncing: Fatal exception in interrupt + +Signed-off-by: François CACHEREUL +Signed-off-by: David S. Miller +--- + net/l2tp/l2tp_core.c | 27 +++++++++++++++++++++++---- + net/l2tp/l2tp_core.h | 3 +++ + 2 files changed, 26 insertions(+), 4 deletions(-) + +diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c +index feae495..aedaa2c 100644 +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -496,6 +496,7 @@ out: + static inline int l2tp_verify_udp_checksum(struct sock *sk, + struct sk_buff *skb) + { ++ struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data; + struct udphdr *uh = udp_hdr(skb); + u16 ulen = ntohs(uh->len); + __wsum psum; +@@ -504,7 +505,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, + return 0; + + #if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family == PF_INET6) { ++ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) { + if (!uh->check) { + LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); + return 1; +@@ -1128,7 +1129,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, + /* Queue the packet to IP for output */ + skb->local_df = 1; + #if IS_ENABLED(CONFIG_IPV6) +- if (skb->sk->sk_family == PF_INET6) ++ if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped) + error = inet6_csk_xmit(skb, NULL); + else + #endif +@@ -1255,7 +1256,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len + + /* Calculate UDP checksum if configured to do so */ + #if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family == PF_INET6) ++ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) + l2tp_xmit_ipv6_csum(sk, skb, udp_len); + else + #endif +@@ -1704,6 +1705,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 + if (cfg != NULL) + tunnel->debug = cfg->debug; + ++#if IS_ENABLED(CONFIG_IPV6) ++ if (sk->sk_family == PF_INET6) { ++ struct ipv6_pinfo *np = inet6_sk(sk); ++ ++ if (ipv6_addr_v4mapped(&np->saddr) && ++ ipv6_addr_v4mapped(&np->daddr)) { ++ struct inet_sock *inet = inet_sk(sk); ++ ++ tunnel->v4mapped = true; ++ inet->inet_saddr = np->saddr.s6_addr32[3]; ++ inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3]; ++ inet->inet_daddr = np->daddr.s6_addr32[3]; ++ } else { ++ tunnel->v4mapped = false; ++ } ++ } ++#endif ++ + /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ + tunnel->encap = encap; + if (encap == L2TP_ENCAPTYPE_UDP) { +@@ -1712,7 +1731,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 + udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; + udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; + #if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family == PF_INET6) ++ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) + udpv6_encap_enable(); + else + #endif +diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h +index 66a559b..6f251cb 100644 +--- a/net/l2tp/l2tp_core.h ++++ b/net/l2tp/l2tp_core.h +@@ -194,6 +194,9 @@ struct l2tp_tunnel { + struct sock *sock; /* Parent socket */ + int fd; /* Parent fd, if tunnel socket + * was created by userspace */ ++#if IS_ENABLED(CONFIG_IPV6) ++ bool v4mapped; ++#endif + + struct work_struct del_work; + +-- +1.7.11.7 + + +From 0ec2b01190b1a2ba020241ab89730bf7e7d77b9c Mon Sep 17 00:00:00 2001 +From: "David S. Miller" +Date: Tue, 8 Oct 2013 15:44:26 -0400 +Subject: [PATCH 09/47] l2tp: Fix build warning with ipv6 disabled. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 8d8a51e26a6d415e1470759f2cf5f3ee3ee86196 ] + +net/l2tp/l2tp_core.c: In function ‘l2tp_verify_udp_checksum’: +net/l2tp/l2tp_core.c:499:22: warning: unused variable ‘tunnel’ [-Wunused-variable] + +Create a helper "l2tp_tunnel()" to facilitate this, and as a side +effect get rid of a bunch of unnecessary void pointer casts. + +Signed-off-by: David S. Miller +--- + net/l2tp/l2tp_core.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c +index aedaa2c..b076e83 100644 +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -115,6 +115,11 @@ struct l2tp_net { + static void l2tp_session_set_header_len(struct l2tp_session *session, int version); + static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); + ++static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) ++{ ++ return sk->sk_user_data; ++} ++ + static inline struct l2tp_net *l2tp_pernet(struct net *net) + { + BUG_ON(!net); +@@ -496,7 +501,6 @@ out: + static inline int l2tp_verify_udp_checksum(struct sock *sk, + struct sk_buff *skb) + { +- struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data; + struct udphdr *uh = udp_hdr(skb); + u16 ulen = ntohs(uh->len); + __wsum psum; +@@ -505,7 +509,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, + return 0; + + #if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) { ++ if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) { + if (!uh->check) { + LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); + return 1; +@@ -1305,10 +1309,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); + */ + static void l2tp_tunnel_destruct(struct sock *sk) + { +- struct l2tp_tunnel *tunnel; ++ struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); + struct l2tp_net *pn; + +- tunnel = sk->sk_user_data; + if (tunnel == NULL) + goto end; + +@@ -1676,7 +1679,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 + } + + /* Check if this socket has already been prepped */ +- tunnel = (struct l2tp_tunnel *)sk->sk_user_data; ++ tunnel = l2tp_tunnel(sk); + if (tunnel != NULL) { + /* This socket has already been prepped */ + err = -EBUSY; +-- +1.7.11.7 + + +From 35e64a9e465a85ffacd373439c1caa757e407656 Mon Sep 17 00:00:00 2001 +From: Sebastian Hesselbarth +Date: Wed, 2 Oct 2013 12:57:20 +0200 +Subject: [PATCH 10/47] net: mv643xx_eth: update statistics timer from timer + context only + +[ Upstream commit 041b4ddb84989f06ff1df0ca869b950f1ee3cb1c ] + +Each port driver installs a periodic timer to update port statistics +by calling mib_counters_update. As mib_counters_update is also called +from non-timer context, we should not reschedule the timer there but +rather move it to timer-only context. + +Signed-off-by: Sebastian Hesselbarth +Acked-by: Jason Cooper +Signed-off-by: David S. Miller +--- + drivers/net/ethernet/marvell/mv643xx_eth.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c +index c35db73..51c138b 100644 +--- a/drivers/net/ethernet/marvell/mv643xx_eth.c ++++ b/drivers/net/ethernet/marvell/mv643xx_eth.c +@@ -1131,15 +1131,13 @@ static void mib_counters_update(struct mv643xx_eth_private *mp) + p->rx_discard += rdlp(mp, RX_DISCARD_FRAME_CNT); + p->rx_overrun += rdlp(mp, RX_OVERRUN_FRAME_CNT); + spin_unlock_bh(&mp->mib_counters_lock); +- +- mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); + } + + static void mib_counters_timer_wrapper(unsigned long _mp) + { + struct mv643xx_eth_private *mp = (void *)_mp; +- + mib_counters_update(mp); ++ mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); + } + + +-- +1.7.11.7 + + +From b6b20d9c54b23ba35c5807e45ff7d9579503bffa Mon Sep 17 00:00:00 2001 +From: Sebastian Hesselbarth +Date: Wed, 2 Oct 2013 12:57:21 +0200 +Subject: [PATCH 11/47] net: mv643xx_eth: fix orphaned statistics timer crash + +[ Upstream commit f564412c935111c583b787bcc18157377b208e2e ] + +The periodic statistics timer gets started at port _probe() time, but +is stopped on _stop() only. In a modular environment, this can cause +the timer to access already deallocated memory, if the module is unloaded +without starting the eth device. To fix this, we add the timer right +before the port is started, instead of at _probe() time. + +Signed-off-by: Sebastian Hesselbarth +Acked-by: Jason Cooper +Signed-off-by: David S. Miller +--- + drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c +index 51c138b..39334d4 100644 +--- a/drivers/net/ethernet/marvell/mv643xx_eth.c ++++ b/drivers/net/ethernet/marvell/mv643xx_eth.c +@@ -2235,6 +2235,7 @@ static int mv643xx_eth_open(struct net_device *dev) + mp->int_mask |= INT_TX_END_0 << i; + } + ++ add_timer(&mp->mib_counters_timer); + port_start(mp); + + wrlp(mp, INT_MASK_EXT, INT_EXT_LINK_PHY | INT_EXT_TX); +@@ -2914,7 +2915,6 @@ static int mv643xx_eth_probe(struct platform_device *pdev) + mp->mib_counters_timer.data = (unsigned long)mp; + mp->mib_counters_timer.function = mib_counters_timer_wrapper; + mp->mib_counters_timer.expires = jiffies + 30 * HZ; +- add_timer(&mp->mib_counters_timer); + + spin_lock_init(&mp->mib_counters_lock); + +-- +1.7.11.7 + + +From b8baf1c21a214c1b836eef390c9d6e153293fef9 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Thu, 3 Oct 2013 00:27:20 +0300 +Subject: [PATCH 12/47] net: heap overflow in __audit_sockaddr() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 1661bf364ae9c506bc8795fef70d1532931be1e8 ] + +We need to cap ->msg_namelen or it leads to a buffer overflow when we +to the memcpy() in __audit_sockaddr(). It requires CAP_AUDIT_CONTROL to +exploit this bug. + +The call tree is: +___sys_recvmsg() + move_addr_to_user() + audit_sockaddr() + __audit_sockaddr() + +Reported-by: Jüri Aedla +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +--- + net/compat.c | 2 ++ + net/socket.c | 24 ++++++++++++++++++++---- + 2 files changed, 22 insertions(+), 4 deletions(-) + +diff --git a/net/compat.c b/net/compat.c +index f0a1ba6..8903258 100644 +--- a/net/compat.c ++++ b/net/compat.c +@@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) + __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || + __get_user(kmsg->msg_flags, &umsg->msg_flags)) + return -EFAULT; ++ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) ++ return -EINVAL; + kmsg->msg_name = compat_ptr(tmp1); + kmsg->msg_iov = compat_ptr(tmp2); + kmsg->msg_control = compat_ptr(tmp3); +diff --git a/net/socket.c b/net/socket.c +index b2d7c62..4b94643 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -1973,6 +1973,16 @@ struct used_address { + unsigned int name_len; + }; + ++static int copy_msghdr_from_user(struct msghdr *kmsg, ++ struct msghdr __user *umsg) ++{ ++ if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) ++ return -EFAULT; ++ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) ++ return -EINVAL; ++ return 0; ++} ++ + static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, + struct msghdr *msg_sys, unsigned int flags, + struct used_address *used_address) +@@ -1991,8 +2001,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, + if (MSG_CMSG_COMPAT & flags) { + if (get_compat_msghdr(msg_sys, msg_compat)) + return -EFAULT; +- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) +- return -EFAULT; ++ } else { ++ err = copy_msghdr_from_user(msg_sys, msg); ++ if (err) ++ return err; ++ } + + if (msg_sys->msg_iovlen > UIO_FASTIOV) { + err = -EMSGSIZE; +@@ -2200,8 +2213,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, + if (MSG_CMSG_COMPAT & flags) { + if (get_compat_msghdr(msg_sys, msg_compat)) + return -EFAULT; +- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) +- return -EFAULT; ++ } else { ++ err = copy_msghdr_from_user(msg_sys, msg); ++ if (err) ++ return err; ++ } + + if (msg_sys->msg_iovlen > UIO_FASTIOV) { + err = -EMSGSIZE; +-- +1.7.11.7 + + +From 6e24497ef79e18f5b1ddce66712d55093a6cf3e9 Mon Sep 17 00:00:00 2001 +From: Willem de Bruijn +Date: Tue, 22 Oct 2013 10:59:18 -0400 +Subject: [PATCH 13/47] sit: amend "allow to use rtnl ops on fb tunnel" + +Amend backport to 3.11.y of + + [ Upstream commit 205983c43700ac3a81e7625273a3fa83cd2759b5 ] + +The discussion thread in the upstream commit mentions that in +backports to stable-* branches, the line + + - unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); + +must be omitted if that branch does not have commit 5e6700b3bf98 +("sit: add support of x-netns"). This line has correctly been omitted +in the backport to 3.10, which indeed does not have that commit. + +It was also removed in the backport to 3.11.y, which does have that +commit. + +This causes the following steps to hit a BUG at net/core/dev.c:5039: + + `modprobe sit; rmmod sit` + +The bug demonstrates that it causes a device to be unregistered twice. +The simple fix is to apply the one line in the upstream commit that +was dropped in the backport to 3.11 (3783100374653e2e7fbdf68c710f5). +This brings the logic in line with upstream linux, net and net-next +branches. + +Signed-off-by: Willem de Bruijn +Acked-by: Nicolas Dichtel +Reviewed-by: Veaceslav Falico +--- + net/ipv6/sit.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c +index 86f639b..a51ad07 100644 +--- a/net/ipv6/sit.c ++++ b/net/ipv6/sit.c +@@ -1708,7 +1708,6 @@ static void __net_exit sit_exit_net(struct net *net) + + rtnl_lock(); + sit_destroy_tunnels(sitn, &list); +- unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); + } +-- +1.7.11.7 + + +From 6c7e3c3382670fe98debedf2ddaff8abf2944bb4 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Mon, 30 Sep 2013 22:03:06 +0200 +Subject: [PATCH 14/47] proc connector: fix info leaks + +[ Upstream commit e727ca82e0e9616ab4844301e6bae60ca7327682 ] + +Initialize event_data for all possible message types to prevent leaking +kernel stack contents to userland (up to 20 bytes). Also set the flags +member of the connector message to 0 to prevent leaking two more stack +bytes this way. + +Cc: stable@vger.kernel.org # v2.6.15+ +Signed-off-by: Mathias Krause +Signed-off-by: David S. Miller +--- + drivers/connector/cn_proc.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c +index 08ae128..c73fc2b 100644 +--- a/drivers/connector/cn_proc.c ++++ b/drivers/connector/cn_proc.c +@@ -65,6 +65,7 @@ void proc_fork_connector(struct task_struct *task) + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -80,6 +81,7 @@ void proc_fork_connector(struct task_struct *task) + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + /* If cn_netlink_send() failed, the data is not sent */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } +@@ -96,6 +98,7 @@ void proc_exec_connector(struct task_struct *task) + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -106,6 +109,7 @@ void proc_exec_connector(struct task_struct *task) + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -122,6 +126,7 @@ void proc_id_connector(struct task_struct *task, int which_id) + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + ev->what = which_id; + ev->event_data.id.process_pid = task->pid; + ev->event_data.id.process_tgid = task->tgid; +@@ -145,6 +150,7 @@ void proc_id_connector(struct task_struct *task, int which_id) + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -160,6 +166,7 @@ void proc_sid_connector(struct task_struct *task) + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -170,6 +177,7 @@ void proc_sid_connector(struct task_struct *task) + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -185,6 +193,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -203,6 +212,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -218,6 +228,7 @@ void proc_comm_connector(struct task_struct *task) + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -229,6 +240,7 @@ void proc_comm_connector(struct task_struct *task) + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -244,6 +256,7 @@ void proc_coredump_connector(struct task_struct *task) + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -254,6 +267,7 @@ void proc_coredump_connector(struct task_struct *task) + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -269,6 +283,7 @@ void proc_exit_connector(struct task_struct *task) + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -281,6 +296,7 @@ void proc_exit_connector(struct task_struct *task) + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +@@ -304,6 +320,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; ++ memset(&ev->event_data, 0, sizeof(ev->event_data)); + msg->seq = rcvd_seq; + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); +@@ -313,6 +330,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = rcvd_ack + 1; + msg->len = sizeof(*ev); ++ msg->flags = 0; /* not used */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); + } + +-- +1.7.11.7 + + +From f3d398e2465b3b74987a3a2fc42ea3e8c83d2166 Mon Sep 17 00:00:00 2001 +From: Jiri Benc +Date: Fri, 4 Oct 2013 17:04:48 +0200 +Subject: [PATCH 15/47] ipv4: fix ineffective source address selection + +[ Upstream commit 0a7e22609067ff524fc7bbd45c6951dd08561667 ] + +When sending out multicast messages, the source address in inet->mc_addr is +ignored and rewritten by an autoselected one. This is caused by a typo in +commit 813b3b5db831 ("ipv4: Use caller's on-stack flowi as-is in output +route lookups"). + +Signed-off-by: Jiri Benc +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +--- + net/ipv4/route.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index a9a54a2..2de16d9 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2074,7 +2074,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) + RT_SCOPE_LINK); + goto make_route; + } +- if (fl4->saddr) { ++ if (!fl4->saddr) { + if (ipv4_is_multicast(fl4->daddr)) + fl4->saddr = inet_select_addr(dev_out, 0, + fl4->flowi4_scope); +-- +1.7.11.7 + + +From 8fd516716afeb4631cf790a2be7ca30d0a664b01 Mon Sep 17 00:00:00 2001 +From: Marc Kleine-Budde +Date: Sat, 5 Oct 2013 21:25:17 +0200 +Subject: [PATCH 16/47] can: dev: fix nlmsg size calculation in can_get_size() + +[ Upstream commit fe119a05f8ca481623a8d02efcc984332e612528 ] + +This patch fixes the calculation of the nlmsg size, by adding the missing +nla_total_size(). + +Signed-off-by: Marc Kleine-Budde +Signed-off-by: David S. Miller +--- + drivers/net/can/dev.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c +index f9cba41..1870c47 100644 +--- a/drivers/net/can/dev.c ++++ b/drivers/net/can/dev.c +@@ -705,14 +705,14 @@ static size_t can_get_size(const struct net_device *dev) + size_t size; + + size = nla_total_size(sizeof(u32)); /* IFLA_CAN_STATE */ +- size += sizeof(struct can_ctrlmode); /* IFLA_CAN_CTRLMODE */ ++ size += nla_total_size(sizeof(struct can_ctrlmode)); /* IFLA_CAN_CTRLMODE */ + size += nla_total_size(sizeof(u32)); /* IFLA_CAN_RESTART_MS */ +- size += sizeof(struct can_bittiming); /* IFLA_CAN_BITTIMING */ +- size += sizeof(struct can_clock); /* IFLA_CAN_CLOCK */ ++ size += nla_total_size(sizeof(struct can_bittiming)); /* IFLA_CAN_BITTIMING */ ++ size += nla_total_size(sizeof(struct can_clock)); /* IFLA_CAN_CLOCK */ + if (priv->do_get_berr_counter) /* IFLA_CAN_BERR_COUNTER */ +- size += sizeof(struct can_berr_counter); ++ size += nla_total_size(sizeof(struct can_berr_counter)); + if (priv->bittiming_const) /* IFLA_CAN_BITTIMING_CONST */ +- size += sizeof(struct can_bittiming_const); ++ size += nla_total_size(sizeof(struct can_bittiming_const)); + + return size; + } +-- +1.7.11.7 + + +From 1b3231ca7e26084580145c904dd10a60cac35c63 Mon Sep 17 00:00:00 2001 +From: Fabio Estevam +Date: Sat, 5 Oct 2013 17:56:59 -0300 +Subject: [PATCH 17/47] net: secure_seq: Fix warning when CONFIG_IPV6 and + CONFIG_INET are not selected + +[ Upstream commit cb03db9d0e964568407fb08ea46cc2b6b7f67587 ] + +net_secret() is only used when CONFIG_IPV6 or CONFIG_INET are selected. + +Building a defconfig with both of these symbols unselected (Using the ARM +at91sam9rl_defconfig, for example) leads to the following build warning: + +$ make at91sam9rl_defconfig +# +# configuration written to .config +# + +$ make net/core/secure_seq.o +scripts/kconfig/conf --silentoldconfig Kconfig + CHK include/config/kernel.release + CHK include/generated/uapi/linux/version.h + CHK include/generated/utsrelease.h +make[1]: `include/generated/mach-types.h' is up to date. + CALL scripts/checksyscalls.sh + CC net/core/secure_seq.o +net/core/secure_seq.c:17:13: warning: 'net_secret_init' defined but not used [-Wunused-function] + +Fix this warning by protecting the definition of net_secret() with these +symbols. + +Reported-by: Olof Johansson +Signed-off-by: Fabio Estevam +Signed-off-by: David S. Miller +--- + net/core/secure_seq.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c +index 3f1ec15..8d9d05e 100644 +--- a/net/core/secure_seq.c ++++ b/net/core/secure_seq.c +@@ -10,6 +10,7 @@ + + #include + ++#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) + #define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) + + static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; +@@ -29,6 +30,7 @@ static void net_secret_init(void) + cmpxchg(&net_secret[--i], 0, tmp); + } + } ++#endif + + #ifdef CONFIG_INET + static u32 seq_scale(u32 seq) +-- +1.7.11.7 + + +From 538680b534f30fe6531099f87267bb676c935351 Mon Sep 17 00:00:00 2001 +From: Paul Durrant +Date: Tue, 8 Oct 2013 14:56:44 +0100 +Subject: [PATCH 18/47] xen-netback: Don't destroy the netdev until the vif is + shut down + +[ upstream commit id: 279f438e36c0a70b23b86d2090aeec50155034a9 ] + +Without this patch, if a frontend cycles through states Closing +and Closed (which Windows frontends need to do) then the netdev +will be destroyed and requires re-invocation of hotplug scripts +to restore state before the frontend can move to Connected. Thus +when udev is not in use the backend gets stuck in InitWait. + +With this patch, the netdev is left alone whilst the backend is +still online and is only de-registered and freed just prior to +destroying the vif (which is also nicely symmetrical with the +netdev allocation and registration being done during probe) so +no re-invocation of hotplug scripts is required. + +Signed-off-by: Paul Durrant +Cc: David Vrabel +Cc: Wei Liu +Cc: Ian Campbell +--- + drivers/net/xen-netback/common.h | 1 + + drivers/net/xen-netback/interface.c | 23 +++++++++-------------- + drivers/net/xen-netback/xenbus.c | 17 ++++++++++++----- + 3 files changed, 22 insertions(+), 19 deletions(-) + +diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h +index 8a4d77e..4d9a5e7 100644 +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -120,6 +120,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, + unsigned long rx_ring_ref, unsigned int tx_evtchn, + unsigned int rx_evtchn); + void xenvif_disconnect(struct xenvif *vif); ++void xenvif_free(struct xenvif *vif); + + void xenvif_get(struct xenvif *vif); + void xenvif_put(struct xenvif *vif); +diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c +index 087d2db..73336c1 100644 +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -326,6 +326,9 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, + } + + netdev_dbg(dev, "Successfully created xenvif\n"); ++ ++ __module_get(THIS_MODULE); ++ + return vif; + } + +@@ -413,12 +416,6 @@ void xenvif_carrier_off(struct xenvif *vif) + + void xenvif_disconnect(struct xenvif *vif) + { +- /* Disconnect funtion might get called by generic framework +- * even before vif connects, so we need to check if we really +- * need to do a module_put. +- */ +- int need_module_put = 0; +- + if (netif_carrier_ok(vif->dev)) + xenvif_carrier_off(vif); + +@@ -432,18 +429,16 @@ void xenvif_disconnect(struct xenvif *vif) + unbind_from_irqhandler(vif->tx_irq, vif); + unbind_from_irqhandler(vif->rx_irq, vif); + } +- /* vif->irq is valid, we had a module_get in +- * xenvif_connect. +- */ +- need_module_put = 1; + } + +- unregister_netdev(vif->dev); +- + xen_netbk_unmap_frontend_rings(vif); ++} ++ ++void xenvif_free(struct xenvif *vif) ++{ ++ unregister_netdev(vif->dev); + + free_netdev(vif->dev); + +- if (need_module_put) +- module_put(THIS_MODULE); ++ module_put(THIS_MODULE); + } +diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c +index 1fe48fe3..a53782e 100644 +--- a/drivers/net/xen-netback/xenbus.c ++++ b/drivers/net/xen-netback/xenbus.c +@@ -42,7 +42,7 @@ static int netback_remove(struct xenbus_device *dev) + if (be->vif) { + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); + xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); +- xenvif_disconnect(be->vif); ++ xenvif_free(be->vif); + be->vif = NULL; + } + kfree(be); +@@ -213,9 +213,18 @@ static void disconnect_backend(struct xenbus_device *dev) + { + struct backend_info *be = dev_get_drvdata(&dev->dev); + ++ if (be->vif) ++ xenvif_disconnect(be->vif); ++} ++ ++static void destroy_backend(struct xenbus_device *dev) ++{ ++ struct backend_info *be = dev_get_drvdata(&dev->dev); ++ + if (be->vif) { ++ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); + xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); +- xenvif_disconnect(be->vif); ++ xenvif_free(be->vif); + be->vif = NULL; + } + } +@@ -246,14 +255,11 @@ static void frontend_changed(struct xenbus_device *dev, + case XenbusStateConnected: + if (dev->state == XenbusStateConnected) + break; +- backend_create_xenvif(be); + if (be->vif) + connect(be); + break; + + case XenbusStateClosing: +- if (be->vif) +- kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); + disconnect_backend(dev); + xenbus_switch_state(dev, XenbusStateClosing); + break; +@@ -262,6 +268,7 @@ static void frontend_changed(struct xenbus_device *dev, + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; ++ destroy_backend(dev); + /* fall through if not online */ + case XenbusStateUnknown: + device_unregister(&dev->dev); +-- +1.7.11.7 + + +From 29bb21656d747e62d55b9e1929b23eadcd6be324 Mon Sep 17 00:00:00 2001 +From: Amir Vadai +Date: Mon, 7 Oct 2013 13:38:12 +0200 +Subject: [PATCH 19/47] net/mlx4_en: Rename name of mlx4_en_rx_alloc members + +[ Upstream commit 70fbe0794393829d9acd686428d87c27b6f6984b ] + +Add page prefix to page related members: @size and @offset into +@page_size and @page_offset + +CC: Eric Dumazet +Signed-off-by: Amir Vadai +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +--- + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 40 ++++++++++++++++------------ + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 4 +-- + 2 files changed, 25 insertions(+), 19 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c +index dec455c..066fc27 100644 +--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c +@@ -70,14 +70,15 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, + put_page(page); + return -ENOMEM; + } +- page_alloc->size = PAGE_SIZE << order; ++ page_alloc->page_size = PAGE_SIZE << order; + page_alloc->page = page; + page_alloc->dma = dma; +- page_alloc->offset = frag_info->frag_align; ++ page_alloc->page_offset = frag_info->frag_align; + /* Not doing get_page() for each frag is a big win + * on asymetric workloads. + */ +- atomic_set(&page->_count, page_alloc->size / frag_info->frag_stride); ++ atomic_set(&page->_count, ++ page_alloc->page_size / frag_info->frag_stride); + return 0; + } + +@@ -96,16 +97,19 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, + for (i = 0; i < priv->num_frags; i++) { + frag_info = &priv->frag_info[i]; + page_alloc[i] = ring_alloc[i]; +- page_alloc[i].offset += frag_info->frag_stride; +- if (page_alloc[i].offset + frag_info->frag_stride <= ring_alloc[i].size) ++ page_alloc[i].page_offset += frag_info->frag_stride; ++ ++ if (page_alloc[i].page_offset + frag_info->frag_stride <= ++ ring_alloc[i].page_size) + continue; ++ + if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp)) + goto out; + } + + for (i = 0; i < priv->num_frags; i++) { + frags[i] = ring_alloc[i]; +- dma = ring_alloc[i].dma + ring_alloc[i].offset; ++ dma = ring_alloc[i].dma + ring_alloc[i].page_offset; + ring_alloc[i] = page_alloc[i]; + rx_desc->data[i].addr = cpu_to_be64(dma); + } +@@ -117,7 +121,7 @@ out: + frag_info = &priv->frag_info[i]; + if (page_alloc[i].page != ring_alloc[i].page) { + dma_unmap_page(priv->ddev, page_alloc[i].dma, +- page_alloc[i].size, PCI_DMA_FROMDEVICE); ++ page_alloc[i].page_size, PCI_DMA_FROMDEVICE); + page = page_alloc[i].page; + atomic_set(&page->_count, 1); + put_page(page); +@@ -132,9 +136,10 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, + { + const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; + +- if (frags[i].offset + frag_info->frag_stride > frags[i].size) +- dma_unmap_page(priv->ddev, frags[i].dma, frags[i].size, +- PCI_DMA_FROMDEVICE); ++ if (frags[i].page_offset + frag_info->frag_stride > ++ frags[i].page_size) ++ dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, ++ PCI_DMA_FROMDEVICE); + + if (frags[i].page) + put_page(frags[i].page); +@@ -161,7 +166,7 @@ out: + + page_alloc = &ring->page_alloc[i]; + dma_unmap_page(priv->ddev, page_alloc->dma, +- page_alloc->size, PCI_DMA_FROMDEVICE); ++ page_alloc->page_size, PCI_DMA_FROMDEVICE); + page = page_alloc->page; + atomic_set(&page->_count, 1); + put_page(page); +@@ -184,10 +189,11 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, + i, page_count(page_alloc->page)); + + dma_unmap_page(priv->ddev, page_alloc->dma, +- page_alloc->size, PCI_DMA_FROMDEVICE); +- while (page_alloc->offset + frag_info->frag_stride < page_alloc->size) { ++ page_alloc->page_size, PCI_DMA_FROMDEVICE); ++ while (page_alloc->page_offset + frag_info->frag_stride < ++ page_alloc->page_size) { + put_page(page_alloc->page); +- page_alloc->offset += frag_info->frag_stride; ++ page_alloc->page_offset += frag_info->frag_stride; + } + page_alloc->page = NULL; + } +@@ -478,7 +484,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, + /* Save page reference in skb */ + __skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page); + skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size); +- skb_frags_rx[nr].page_offset = frags[nr].offset; ++ skb_frags_rx[nr].page_offset = frags[nr].page_offset; + skb->truesize += frag_info->frag_stride; + frags[nr].page = NULL; + } +@@ -517,7 +523,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, + + /* Get pointer to first fragment so we could copy the headers into the + * (linear part of the) skb */ +- va = page_address(frags[0].page) + frags[0].offset; ++ va = page_address(frags[0].page) + frags[0].page_offset; + + if (length <= SMALL_PACKET_SIZE) { + /* We are copying all relevant data to the skb - temporarily +@@ -645,7 +651,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud + dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), + DMA_FROM_DEVICE); + ethh = (struct ethhdr *)(page_address(frags[0].page) + +- frags[0].offset); ++ frags[0].page_offset); + + if (is_multicast_ether_addr(ethh->h_dest)) { + struct mlx4_mac_entry *entry; +diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +index 5e0aa56..bf06e36 100644 +--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h ++++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +@@ -237,8 +237,8 @@ struct mlx4_en_tx_desc { + struct mlx4_en_rx_alloc { + struct page *page; + dma_addr_t dma; +- u32 offset; +- u32 size; ++ u32 page_offset; ++ u32 page_size; + }; + + struct mlx4_en_tx_ring { +-- +1.7.11.7 + + +From 4bd2cc99115d31513bfe3c2bd7bcfe67fc081ae8 Mon Sep 17 00:00:00 2001 +From: Amir Vadai +Date: Mon, 7 Oct 2013 13:38:13 +0200 +Subject: [PATCH 20/47] net/mlx4_en: Fix pages never dma unmapped on rx + +[ Upstream commit 021f1107ffdae7a82af6c53f4c52654062e365c6 ] + +This patch fixes a bug introduced by commit 51151a16 (mlx4: allow +order-0 memory allocations in RX path). + +dma_unmap_page never reached because condition to detect last fragment +in page is wrong. offset+frag_stride can't be greater than size, need to +make sure no additional frag will fit in page => compare offset + +frag_stride + next_frag_size instead. +next_frag_size is the same as the current one, since page is shared only +with frags of the same size. + +CC: Eric Dumazet +Signed-off-by: Amir Vadai +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +--- + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c +index 066fc27..afe2efa 100644 +--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c +@@ -135,9 +135,10 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, + int i) + { + const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; ++ u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride; + +- if (frags[i].page_offset + frag_info->frag_stride > +- frags[i].page_size) ++ ++ if (next_frag_end > frags[i].page_size) + dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, + PCI_DMA_FROMDEVICE); + +-- +1.7.11.7 + + +From af64f33fff313187ca01ddb7db09b537a89208dd Mon Sep 17 00:00:00 2001 +From: Marc Kleine-Budde +Date: Mon, 7 Oct 2013 23:19:58 +0200 +Subject: [PATCH 21/47] net: vlan: fix nlmsg size calculation in + vlan_get_size() + +[ Upstream commit c33a39c575068c2ea9bffb22fd6de2df19c74b89 ] + +This patch fixes the calculation of the nlmsg size, by adding the missing +nla_total_size(). + +Cc: Patrick McHardy +Signed-off-by: Marc Kleine-Budde +Signed-off-by: David S. Miller +--- + net/8021q/vlan_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c +index 3091297..c7e634a 100644 +--- a/net/8021q/vlan_netlink.c ++++ b/net/8021q/vlan_netlink.c +@@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev) + + return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ + nla_total_size(2) + /* IFLA_VLAN_ID */ +- sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ ++ nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */ + vlan_qos_map_size(vlan->nr_ingress_mappings) + + vlan_qos_map_size(vlan->nr_egress_mappings); + } +-- +1.7.11.7 + + +From 74869292aeb07213144e34b0e21e23f7e3c9f61f Mon Sep 17 00:00:00 2001 +From: Vlad Yasevich +Date: Thu, 10 Oct 2013 15:57:59 -0400 +Subject: [PATCH 22/47] bridge: update mdb expiration timer upon reports. + +[ Upstream commit f144febd93d5ee534fdf23505ab091b2b9088edc ] + +commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b + bridge: only expire the mdb entry when query is received +changed the mdb expiration timer to be armed only when QUERY is +received. Howerver, this causes issues in an environment where +the multicast server socket comes and goes very fast while a client +is trying to send traffic to it. + +The root cause is a race where a sequence of LEAVE followed by REPORT +messages can race against QUERY messages generated in response to LEAVE. +The QUERY ends up starting the expiration timer, and that timer can +potentially expire after the new REPORT message has been received signaling +the new join operation. This leads to a significant drop in multicast +traffic and possible complete stall. + +The solution is to have REPORT messages update the expiration timer +on entries that already exist. + +CC: Cong Wang +CC: Herbert Xu +CC: Stephen Hemminger +Signed-off-by: Vlad Yasevich +Acked-by: Herbert Xu +Signed-off-by: David S. Miller +--- + net/bridge/br_multicast.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c +index bbcb435..0e3fea7 100644 +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -610,6 +610,9 @@ rehash: + break; + + default: ++ /* If we have an existing entry, update it's expire timer */ ++ mod_timer(&mp->timer, ++ jiffies + br->multicast_membership_interval); + goto out; + } + +@@ -679,8 +682,12 @@ static int br_multicast_add_group(struct net_bridge *br, + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { +- if (p->port == port) ++ if (p->port == port) { ++ /* We already have a portgroup, update the timer. */ ++ mod_timer(&p->timer, ++ jiffies + br->multicast_membership_interval); + goto out; ++ } + if ((unsigned long)p->port < (unsigned long)port) + break; + } +-- +1.7.11.7 + + +From d9f02cfe59400677feea276d4b27981f6d91825a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Linus=20L=C3=BCssing?= +Date: Sun, 20 Oct 2013 00:58:57 +0200 +Subject: [PATCH 23/47] Revert "bridge: only expire the mdb entry when query + is received" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 454594f3b93a49ef568cd190c5af31376b105a7b ] + +While this commit was a good attempt to fix issues occuring when no +multicast querier is present, this commit still has two more issues: + +1) There are cases where mdb entries do not expire even if there is a +querier present. The bridge will unnecessarily continue flooding +multicast packets on the according ports. + +2) Never removing an mdb entry could be exploited for a Denial of +Service by an attacker on the local link, slowly, but steadily eating up +all memory. + +Actually, this commit became obsolete with +"bridge: disable snooping if there is no querier" (b00589af3b) +which included fixes for a few more cases. + +Therefore reverting the following commits (the commit stated in the +commit message plus three of its follow up fixes): + +==================== +Revert "bridge: update mdb expiration timer upon reports." +This reverts commit f144febd93d5ee534fdf23505ab091b2b9088edc. +Revert "bridge: do not call setup_timer() multiple times" +This reverts commit 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1. +Revert "bridge: fix some kernel warning in multicast timer" +This reverts commit c7e8e8a8f7a70b343ca1e0f90a31e35ab2d16de1. +Revert "bridge: only expire the mdb entry when query is received" +This reverts commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b. +==================== + +CC: Cong Wang +Signed-off-by: Linus Lüssing +Reviewed-by: Vlad Yasevich +Signed-off-by: David S. Miller +--- + net/bridge/br_mdb.c | 2 +- + net/bridge/br_multicast.c | 47 +++++++++++++++++++++++++++-------------------- + net/bridge/br_private.h | 1 - + 3 files changed, 28 insertions(+), 22 deletions(-) + +diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c +index 6319c43..de3a0e7 100644 +--- a/net/bridge/br_mdb.c ++++ b/net/bridge/br_mdb.c +@@ -451,7 +451,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) + call_rcu_bh(&p->rcu, br_multicast_free_pg); + err = 0; + +- if (!mp->ports && !mp->mglist && mp->timer_armed && ++ if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + break; +diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c +index 0e3fea7..fbad619 100644 +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -271,7 +271,7 @@ static void br_multicast_del_pg(struct net_bridge *br, + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + +- if (!mp->ports && !mp->mglist && mp->timer_armed && ++ if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + +@@ -610,9 +610,6 @@ rehash: + break; + + default: +- /* If we have an existing entry, update it's expire timer */ +- mod_timer(&mp->timer, +- jiffies + br->multicast_membership_interval); + goto out; + } + +@@ -622,7 +619,6 @@ rehash: + + mp->br = br; + mp->addr = *group; +- + setup_timer(&mp->timer, br_multicast_group_expired, + (unsigned long)mp); + +@@ -662,6 +658,7 @@ static int br_multicast_add_group(struct net_bridge *br, + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; ++ unsigned long now = jiffies; + int err; + + spin_lock(&br->multicast_lock); +@@ -676,18 +673,15 @@ static int br_multicast_add_group(struct net_bridge *br, + + if (!port) { + mp->mglist = true; ++ mod_timer(&mp->timer, now + br->multicast_membership_interval); + goto out; + } + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { +- if (p->port == port) { +- /* We already have a portgroup, update the timer. */ +- mod_timer(&p->timer, +- jiffies + br->multicast_membership_interval); +- goto out; +- } ++ if (p->port == port) ++ goto found; + if ((unsigned long)p->port < (unsigned long)port) + break; + } +@@ -698,6 +692,8 @@ static int br_multicast_add_group(struct net_bridge *br, + rcu_assign_pointer(*pp, p); + br_mdb_notify(br->dev, port, group, RTM_NEWMDB); + ++found: ++ mod_timer(&p->timer, now + br->multicast_membership_interval); + out: + err = 0; + +@@ -1197,9 +1193,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, + if (!mp) + goto out; + +- mod_timer(&mp->timer, now + br->multicast_membership_interval); +- mp->timer_armed = true; +- + max_delay *= br->multicast_last_member_count; + + if (mp->mglist && +@@ -1276,9 +1269,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, + if (!mp) + goto out; + +- mod_timer(&mp->timer, now + br->multicast_membership_interval); +- mp->timer_armed = true; +- + max_delay *= br->multicast_last_member_count; + if (mp->mglist && + (timer_pending(&mp->timer) ? +@@ -1364,7 +1354,7 @@ static void br_multicast_leave_group(struct net_bridge *br, + call_rcu_bh(&p->rcu, br_multicast_free_pg); + br_mdb_notify(br->dev, port, group, RTM_DELMDB); + +- if (!mp->ports && !mp->mglist && mp->timer_armed && ++ if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + } +@@ -1376,12 +1366,30 @@ static void br_multicast_leave_group(struct net_bridge *br, + br->multicast_last_member_interval; + + if (!port) { +- if (mp->mglist && mp->timer_armed && ++ if (mp->mglist && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, time) : + try_to_del_timer_sync(&mp->timer) >= 0)) { + mod_timer(&mp->timer, time); + } ++ ++ goto out; ++ } ++ ++ for (p = mlock_dereference(mp->ports, br); ++ p != NULL; ++ p = mlock_dereference(p->next, br)) { ++ if (p->port != port) ++ continue; ++ ++ if (!hlist_unhashed(&p->mglist) && ++ (timer_pending(&p->timer) ? ++ time_after(p->timer.expires, time) : ++ try_to_del_timer_sync(&p->timer) >= 0)) { ++ mod_timer(&p->timer, time); ++ } ++ ++ break; + } + out: + spin_unlock(&br->multicast_lock); +@@ -1798,7 +1806,6 @@ void br_multicast_stop(struct net_bridge *br) + hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], + hlist[ver]) { + del_timer(&mp->timer); +- mp->timer_armed = false; + call_rcu_bh(&mp->rcu, br_multicast_free_group); + } + } +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h +index cde1eb1..aa05bd8 100644 +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -126,7 +126,6 @@ struct net_bridge_mdb_entry + struct timer_list timer; + struct br_ip addr; + bool mglist; +- bool timer_armed; + }; + + struct net_bridge_mdb_htable +-- +1.7.11.7 + + +From 40420baad983147cd23e6de95c958c96b96be727 Mon Sep 17 00:00:00 2001 +From: Christophe Gouault +Date: Tue, 8 Oct 2013 17:21:22 +0200 +Subject: [PATCH 24/47] vti: get rid of nf mark rule in prerouting + +[ Upstream commit 7263a5187f9e9de45fcb51349cf0e031142c19a1 ] + +This patch fixes and improves the use of vti interfaces (while +lightly changing the way of configuring them). + +Currently: + +- it is necessary to identify and mark inbound IPsec + packets destined to each vti interface, via netfilter rules in + the mangle table at prerouting hook. + +- the vti module cannot retrieve the right tunnel in input since + commit b9959fd3: vti tunnels all have an i_key, but the tunnel lookup + is done with flag TUNNEL_NO_KEY, so there no chance to retrieve them. + +- the i_key is used by the outbound processing as a mark to lookup + for the right SP and SA bundle. + +This patch uses the o_key to store the vti mark (instead of i_key) and +enables: + +- to avoid the need for previously marking the inbound skbuffs via a + netfilter rule. +- to properly retrieve the right tunnel in input, only based on the IPsec + packet outer addresses. +- to properly perform an inbound policy check (using the tunnel o_key + as a mark). +- to properly perform an outbound SPD and SAD lookup (using the tunnel + o_key as a mark). +- to keep the current mark of the skbuff. The skbuff mark is neither + used nor changed by the vti interface. Only the vti interface o_key + is used. + +SAs have a wildcard mark. +SPs have a mark equal to the vti interface o_key. + +The vti interface must be created as follows (i_key = 0, o_key = mark): + + ip link add vti1 mode vti local 1.1.1.1 remote 2.2.2.2 okey 1 + +The SPs attached to vti1 must be created as follows (mark = vti1 o_key): + + ip xfrm policy add dir out mark 1 tmpl src 1.1.1.1 dst 2.2.2.2 \ + proto esp mode tunnel + ip xfrm policy add dir in mark 1 tmpl src 2.2.2.2 dst 1.1.1.1 \ + proto esp mode tunnel + +The SAs are created with the default wildcard mark. There is no +distinction between global vs. vti SAs. Just their addresses will +possibly link them to a vti interface: + + ip xfrm state add src 1.1.1.1 dst 2.2.2.2 proto esp spi 1000 mode tunnel \ + enc "cbc(aes)" "azertyuiopqsdfgh" + + ip xfrm state add src 2.2.2.2 dst 1.1.1.1 proto esp spi 2000 mode tunnel \ + enc "cbc(aes)" "sqbdhgqsdjqjsdfh" + +To avoid matching "global" (not vti) SPs in vti interfaces, global SPs +should no use the default wildcard mark, but explicitly match mark 0. + +To avoid a double SPD lookup in input and output (in global and vti SPDs), +the NOPOLICY and NOXFRM options should be set on the vti interfaces: + + echo 1 > /proc/sys/net/ipv4/conf/vti1/disable_policy + echo 1 > /proc/sys/net/ipv4/conf/vti1/disable_xfrm + +The outgoing traffic is steered to vti1 by a route via the vti interface: + + ip route add 192.168.0.0/16 dev vti1 + +The incoming IPsec traffic is steered to vti1 because its outer addresses +match the vti1 tunnel configuration. + +Signed-off-by: Christophe Gouault +Signed-off-by: David S. Miller +--- + net/ipv4/ip_vti.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c +index 17cc0ff..0656041 100644 +--- a/net/ipv4/ip_vti.c ++++ b/net/ipv4/ip_vti.c +@@ -285,8 +285,17 @@ static int vti_rcv(struct sk_buff *skb) + tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); + if (tunnel != NULL) { + struct pcpu_tstats *tstats; ++ u32 oldmark = skb->mark; ++ int ret; + +- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) ++ ++ /* temporarily mark the skb with the tunnel o_key, to ++ * only match policies with this mark. ++ */ ++ skb->mark = be32_to_cpu(tunnel->parms.o_key); ++ ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); ++ skb->mark = oldmark; ++ if (!ret) + return -1; + + tstats = this_cpu_ptr(tunnel->dev->tstats); +@@ -295,7 +304,6 @@ static int vti_rcv(struct sk_buff *skb) + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + +- skb->mark = 0; + secpath_reset(skb); + skb->dev = tunnel->dev; + return 1; +@@ -327,7 +335,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) + + memset(&fl4, 0, sizeof(fl4)); + flowi4_init_output(&fl4, tunnel->parms.link, +- be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos), ++ be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), + RT_SCOPE_UNIVERSE, + IPPROTO_IPIP, 0, + dst, tiph->saddr, 0, 0); +-- +1.7.11.7 + + +From d74d8a563ec79425464d7a8aeaa1796724fea7bc Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Thu, 10 Oct 2013 06:30:09 -0700 +Subject: [PATCH 25/47] l2tp: must disable bh before calling l2tp_xmit_skb() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 455cc32bf128e114455d11ad919321ab89a2c312 ] + +François Cachereul made a very nice bug report and suspected +the bh_lock_sock() / bh_unlok_sock() pair used in l2tp_xmit_skb() from +process context was not good. + +This problem was added by commit 6af88da14ee284aaad6e4326da09a89191ab6165 +("l2tp: Fix locking in l2tp_core.c"). + +l2tp_eth_dev_xmit() runs from BH context, so we must disable BH +from other l2tp_xmit_skb() users. + +[ 452.060011] BUG: soft lockup - CPU#1 stuck for 23s! [accel-pppd:6662] +[ 452.061757] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppoe pppox +ppp_generic slhc ipv6 ext3 mbcache jbd virtio_balloon xfs exportfs dm_mod +virtio_blk ata_generic virtio_net floppy ata_piix libata virtio_pci virtio_ring virtio [last unloaded: scsi_wait_scan] +[ 452.064012] CPU 1 +[ 452.080015] BUG: soft lockup - CPU#2 stuck for 23s! [accel-pppd:6643] +[ 452.080015] CPU 2 +[ 452.080015] +[ 452.080015] Pid: 6643, comm: accel-pppd Not tainted 3.2.46.mini #1 Bochs Bochs +[ 452.080015] RIP: 0010:[] [] do_raw_spin_lock+0x17/0x1f +[ 452.080015] RSP: 0018:ffff88007125fc18 EFLAGS: 00000293 +[ 452.080015] RAX: 000000000000aba9 RBX: ffffffff811d0703 RCX: 0000000000000000 +[ 452.080015] RDX: 00000000000000ab RSI: ffff8800711f6896 RDI: ffff8800745c8110 +[ 452.080015] RBP: ffff88007125fc18 R08: 0000000000000020 R09: 0000000000000000 +[ 452.080015] R10: 0000000000000000 R11: 0000000000000280 R12: 0000000000000286 +[ 452.080015] R13: 0000000000000020 R14: 0000000000000240 R15: 0000000000000000 +[ 452.080015] FS: 00007fdc0cc24700(0000) GS:ffff8800b6f00000(0000) knlGS:0000000000000000 +[ 452.080015] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 452.080015] CR2: 00007fdb054899b8 CR3: 0000000074404000 CR4: 00000000000006a0 +[ 452.080015] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 452.080015] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 +[ 452.080015] Process accel-pppd (pid: 6643, threadinfo ffff88007125e000, task ffff8800b27e6dd0) +[ 452.080015] Stack: +[ 452.080015] ffff88007125fc28 ffffffff81256559 ffff88007125fc98 ffffffffa01b2bd1 +[ 452.080015] ffff88007125fc58 000000000000000c 00000000029490d0 0000009c71dbe25e +[ 452.080015] 000000000000005c 000000080000000e 0000000000000000 ffff880071170600 +[ 452.080015] Call Trace: +[ 452.080015] [] _raw_spin_lock+0xe/0x10 +[ 452.080015] [] l2tp_xmit_skb+0x189/0x4ac [l2tp_core] +[ 452.080015] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] +[ 452.080015] [] __sock_sendmsg_nosec+0x22/0x24 +[ 452.080015] [] sock_sendmsg+0xa1/0xb6 +[ 452.080015] [] ? __schedule+0x5c1/0x616 +[ 452.080015] [] ? __dequeue_signal+0xb7/0x10c +[ 452.080015] [] ? fget_light+0x75/0x89 +[ 452.080015] [] ? sockfd_lookup_light+0x20/0x56 +[ 452.080015] [] sys_sendto+0x10c/0x13b +[ 452.080015] [] system_call_fastpath+0x16/0x1b +[ 452.080015] Code: 81 48 89 e5 72 0c 31 c0 48 81 ff 45 66 25 81 0f 92 c0 5d c3 55 b8 00 01 00 00 48 89 e5 f0 66 0f c1 07 0f b6 d4 38 d0 74 06 f3 90 <8a> 07 eb f6 5d c3 90 90 55 48 89 e5 9c 58 0f 1f 44 00 00 5d c3 +[ 452.080015] Call Trace: +[ 452.080015] [] _raw_spin_lock+0xe/0x10 +[ 452.080015] [] l2tp_xmit_skb+0x189/0x4ac [l2tp_core] +[ 452.080015] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] +[ 452.080015] [] __sock_sendmsg_nosec+0x22/0x24 +[ 452.080015] [] sock_sendmsg+0xa1/0xb6 +[ 452.080015] [] ? __schedule+0x5c1/0x616 +[ 452.080015] [] ? __dequeue_signal+0xb7/0x10c +[ 452.080015] [] ? fget_light+0x75/0x89 +[ 452.080015] [] ? sockfd_lookup_light+0x20/0x56 +[ 452.080015] [] sys_sendto+0x10c/0x13b +[ 452.080015] [] system_call_fastpath+0x16/0x1b +[ 452.064012] +[ 452.064012] Pid: 6662, comm: accel-pppd Not tainted 3.2.46.mini #1 Bochs Bochs +[ 452.064012] RIP: 0010:[] [] do_raw_spin_lock+0x19/0x1f +[ 452.064012] RSP: 0018:ffff8800b6e83ba0 EFLAGS: 00000297 +[ 452.064012] RAX: 000000000000aaa9 RBX: ffff8800b6e83b40 RCX: 0000000000000002 +[ 452.064012] RDX: 00000000000000aa RSI: 000000000000000a RDI: ffff8800745c8110 +[ 452.064012] RBP: ffff8800b6e83ba0 R08: 000000000000c802 R09: 000000000000001c +[ 452.064012] R10: ffff880071096c4e R11: 0000000000000006 R12: ffff8800b6e83b18 +[ 452.064012] R13: ffffffff8125d51e R14: ffff8800b6e83ba0 R15: ffff880072a589c0 +[ 452.064012] FS: 00007fdc0b81e700(0000) GS:ffff8800b6e80000(0000) knlGS:0000000000000000 +[ 452.064012] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 452.064012] CR2: 0000000000625208 CR3: 0000000074404000 CR4: 00000000000006a0 +[ 452.064012] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 452.064012] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 +[ 452.064012] Process accel-pppd (pid: 6662, threadinfo ffff88007129a000, task ffff8800744f7410) +[ 452.064012] Stack: +[ 452.064012] ffff8800b6e83bb0 ffffffff81256559 ffff8800b6e83bc0 ffffffff8121c64a +[ 452.064012] ffff8800b6e83bf0 ffffffff8121ec7a ffff880072a589c0 ffff880071096c62 +[ 452.064012] 0000000000000011 ffffffff81430024 ffff8800b6e83c80 ffffffff8121f276 +[ 452.064012] Call Trace: +[ 452.064012] +[ 452.064012] [] _raw_spin_lock+0xe/0x10 +[ 452.064012] [] spin_lock+0x9/0xb +[ 452.064012] [] udp_queue_rcv_skb+0x186/0x269 +[ 452.064012] [] __udp4_lib_rcv+0x297/0x4ae +[ 452.064012] [] ? raw_rcv+0xe9/0xf0 +[ 452.064012] [] udp_rcv+0x1a/0x1c +[ 452.064012] [] ip_local_deliver_finish+0x12b/0x1a5 +[ 452.064012] [] ip_local_deliver+0x53/0x84 +[ 452.064012] [] ip_rcv_finish+0x2bc/0x2f3 +[ 452.064012] [] ip_rcv+0x210/0x269 +[ 452.064012] [] ? kvm_clock_get_cycles+0x9/0xb +[ 452.064012] [] __netif_receive_skb+0x3a5/0x3f7 +[ 452.064012] [] netif_receive_skb+0x57/0x5e +[ 452.064012] [] ? __netdev_alloc_skb+0x1f/0x3b +[ 452.064012] [] virtnet_poll+0x4ba/0x5a4 [virtio_net] +[ 452.064012] [] net_rx_action+0x73/0x184 +[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] +[ 452.064012] [] __do_softirq+0xc3/0x1a8 +[ 452.064012] [] ? ack_APIC_irq+0x10/0x12 +[ 452.064012] [] ? _raw_spin_lock+0xe/0x10 +[ 452.064012] [] call_softirq+0x1c/0x26 +[ 452.064012] [] do_softirq+0x45/0x82 +[ 452.064012] [] irq_exit+0x42/0x9c +[ 452.064012] [] do_IRQ+0x8e/0xa5 +[ 452.064012] [] common_interrupt+0x6e/0x6e +[ 452.064012] +[ 452.064012] [] ? kfree+0x8a/0xa3 +[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] +[ 452.064012] [] ? l2tp_xmit_skb+0x1dd/0x4ac [l2tp_core] +[ 452.064012] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] +[ 452.064012] [] __sock_sendmsg_nosec+0x22/0x24 +[ 452.064012] [] sock_sendmsg+0xa1/0xb6 +[ 452.064012] [] ? __schedule+0x5c1/0x616 +[ 452.064012] [] ? __dequeue_signal+0xb7/0x10c +[ 452.064012] [] ? fget_light+0x75/0x89 +[ 452.064012] [] ? sockfd_lookup_light+0x20/0x56 +[ 452.064012] [] sys_sendto+0x10c/0x13b +[ 452.064012] [] system_call_fastpath+0x16/0x1b +[ 452.064012] Code: 89 e5 72 0c 31 c0 48 81 ff 45 66 25 81 0f 92 c0 5d c3 55 b8 00 01 00 00 48 89 e5 f0 66 0f c1 07 0f b6 d4 38 d0 74 06 f3 90 8a 07 f6 5d c3 90 90 55 48 89 e5 9c 58 0f 1f 44 00 00 5d c3 55 48 +[ 452.064012] Call Trace: +[ 452.064012] [] _raw_spin_lock+0xe/0x10 +[ 452.064012] [] spin_lock+0x9/0xb +[ 452.064012] [] udp_queue_rcv_skb+0x186/0x269 +[ 452.064012] [] __udp4_lib_rcv+0x297/0x4ae +[ 452.064012] [] ? raw_rcv+0xe9/0xf0 +[ 452.064012] [] udp_rcv+0x1a/0x1c +[ 452.064012] [] ip_local_deliver_finish+0x12b/0x1a5 +[ 452.064012] [] ip_local_deliver+0x53/0x84 +[ 452.064012] [] ip_rcv_finish+0x2bc/0x2f3 +[ 452.064012] [] ip_rcv+0x210/0x269 +[ 452.064012] [] ? kvm_clock_get_cycles+0x9/0xb +[ 452.064012] [] __netif_receive_skb+0x3a5/0x3f7 +[ 452.064012] [] netif_receive_skb+0x57/0x5e +[ 452.064012] [] ? __netdev_alloc_skb+0x1f/0x3b +[ 452.064012] [] virtnet_poll+0x4ba/0x5a4 [virtio_net] +[ 452.064012] [] net_rx_action+0x73/0x184 +[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] +[ 452.064012] [] __do_softirq+0xc3/0x1a8 +[ 452.064012] [] ? ack_APIC_irq+0x10/0x12 +[ 452.064012] [] ? _raw_spin_lock+0xe/0x10 +[ 452.064012] [] call_softirq+0x1c/0x26 +[ 452.064012] [] do_softirq+0x45/0x82 +[ 452.064012] [] irq_exit+0x42/0x9c +[ 452.064012] [] do_IRQ+0x8e/0xa5 +[ 452.064012] [] common_interrupt+0x6e/0x6e +[ 452.064012] [] ? kfree+0x8a/0xa3 +[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] +[ 452.064012] [] ? l2tp_xmit_skb+0x1dd/0x4ac [l2tp_core] +[ 452.064012] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] +[ 452.064012] [] __sock_sendmsg_nosec+0x22/0x24 +[ 452.064012] [] sock_sendmsg+0xa1/0xb6 +[ 452.064012] [] ? __schedule+0x5c1/0x616 +[ 452.064012] [] ? __dequeue_signal+0xb7/0x10c +[ 452.064012] [] ? fget_light+0x75/0x89 +[ 452.064012] [] ? sockfd_lookup_light+0x20/0x56 +[ 452.064012] [] sys_sendto+0x10c/0x13b +[ 452.064012] [] system_call_fastpath+0x16/0x1b + +Reported-by: François Cachereul +Tested-by: François Cachereul +Signed-off-by: Eric Dumazet +Cc: James Chapman +Signed-off-by: David S. Miller +--- + net/l2tp/l2tp_ppp.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c +index 5ebee2d..8c46b27 100644 +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -353,7 +353,9 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh + goto error_put_sess_tun; + } + ++ local_bh_disable(); + l2tp_xmit_skb(session, skb, session->hdr_len); ++ local_bh_enable(); + + sock_put(ps->tunnel_sock); + sock_put(sk); +@@ -422,7 +424,9 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) + skb->data[0] = ppph[0]; + skb->data[1] = ppph[1]; + ++ local_bh_disable(); + l2tp_xmit_skb(session, skb, session->hdr_len); ++ local_bh_enable(); + + sock_put(sk_tun); + sock_put(sk); +-- +1.7.11.7 + + +From 5bf1c228293765ff84e4121cf2f92395403b7e33 Mon Sep 17 00:00:00 2001 +From: stephen hemminger +Date: Sun, 6 Oct 2013 15:15:33 -0700 +Subject: [PATCH 26/47] netem: update backlog after drop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 638a52b801e40ed276ceb69b73579ad99365361a ] + +When packet is dropped from rb-tree netem the backlog statistic should +also be updated. + +Reported-by: Сергеев Сергей +Signed-off-by: Stephen Hemminger +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +--- + net/sched/sch_netem.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c +index 82f6016..7dc79940 100644 +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -523,6 +523,7 @@ static unsigned int netem_drop(struct Qdisc *sch) + skb->next = NULL; + skb->prev = NULL; + len = qdisc_pkt_len(skb); ++ sch->qstats.backlog -= len; + kfree_skb(skb); + } + } +-- +1.7.11.7 + + +From ddc30868db0e31c0c2ab4691131a050f9136f3bf Mon Sep 17 00:00:00 2001 +From: stephen hemminger +Date: Sun, 6 Oct 2013 15:16:49 -0700 +Subject: [PATCH 27/47] netem: free skb's in tree on reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit ff704050f2fc0f3382b5a70bba56a51a3feca79d ] + +Netem can leak memory because packets get stored in red-black +tree and it is not cleared on reset. + +Reported by: Сергеев Сергей +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +--- + net/sched/sch_netem.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c +index 7dc79940..3626010 100644 +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -358,6 +358,21 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche + return PSCHED_NS2TICKS(ticks); + } + ++static void tfifo_reset(struct Qdisc *sch) ++{ ++ struct netem_sched_data *q = qdisc_priv(sch); ++ struct rb_node *p; ++ ++ while ((p = rb_first(&q->t_root))) { ++ struct sk_buff *skb = netem_rb_to_skb(p); ++ ++ rb_erase(p, &q->t_root); ++ skb->next = NULL; ++ skb->prev = NULL; ++ kfree_skb(skb); ++ } ++} ++ + static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) + { + struct netem_sched_data *q = qdisc_priv(sch); +@@ -613,6 +628,7 @@ static void netem_reset(struct Qdisc *sch) + struct netem_sched_data *q = qdisc_priv(sch); + + qdisc_reset_queue(sch); ++ tfifo_reset(sch); + if (q->qdisc) + qdisc_reset(q->qdisc); + qdisc_watchdog_cancel(&q->watchdog); +-- +1.7.11.7 + + +From c871c477136615360e283471acdb33df95d70470 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Salva=20Peir=C3=B3?= +Date: Fri, 11 Oct 2013 12:50:03 +0300 +Subject: [PATCH 28/47] farsync: fix info leak in ioctl + +[ Upstream commit 96b340406724d87e4621284ebac5e059d67b2194 ] + +The fst_get_iface() code fails to initialize the two padding bytes of +struct sync_serial_settings after the ->loopback member. Add an explicit +memset(0) before filling the structure to avoid the info leak. + +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +--- + drivers/net/wan/farsync.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c +index 3f0c4f2..bcfff0d 100644 +--- a/drivers/net/wan/farsync.c ++++ b/drivers/net/wan/farsync.c +@@ -1972,6 +1972,7 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port, + } + + i = port->index; ++ memset(&sync, 0, sizeof(sync)); + sync.clock_rate = FST_RDL(card, portConfig[i].lineSpeed); + /* Lucky card and linux use same encoding here */ + sync.clock_type = FST_RDB(card, portConfig[i].internalClock) == +-- +1.7.11.7 + + +From e69ccba66791d0edd0d596520de268369aaab610 Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Mon, 30 Sep 2013 22:05:40 +0200 +Subject: [PATCH 29/47] unix_diag: fix info leak + +[ Upstream commit 6865d1e834be84ddd5808d93d5035b492346c64a ] + +When filling the netlink message we miss to wipe the pad field, +therefore leak one byte of heap memory to userland. Fix this by +setting pad to 0. + +Signed-off-by: Mathias Krause +Signed-off-by: David S. Miller +--- + net/unix/diag.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/unix/diag.c b/net/unix/diag.c +index d591091..86fa0f3 100644 +--- a/net/unix/diag.c ++++ b/net/unix/diag.c +@@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r + rep->udiag_family = AF_UNIX; + rep->udiag_type = sk->sk_type; + rep->udiag_state = sk->sk_state; ++ rep->pad = 0; + rep->udiag_ino = sk_ino; + sock_diag_save_cookie(sk, rep->udiag_cookie); + +-- +1.7.11.7 + + +From 00fa721e6873ccbb36fc008558bb7d23e9e3c21f Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Mon, 30 Sep 2013 22:03:07 +0200 +Subject: [PATCH 30/47] connector: use nlmsg_len() to check message length + +[ Upstream commit 162b2bedc084d2d908a04c93383ba02348b648b0 ] + +The current code tests the length of the whole netlink message to be +at least as long to fit a cn_msg. This is wrong as nlmsg_len includes +the length of the netlink message header. Use nlmsg_len() instead to +fix this "off-by-NLMSG_HDRLEN" size check. + +Cc: stable@vger.kernel.org # v2.6.14+ +Signed-off-by: Mathias Krause +Signed-off-by: David S. Miller +--- + drivers/connector/connector.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c +index 6ecfa75..0daa11e 100644 +--- a/drivers/connector/connector.c ++++ b/drivers/connector/connector.c +@@ -157,17 +157,18 @@ static int cn_call_callback(struct sk_buff *skb) + static void cn_rx_skb(struct sk_buff *__skb) + { + struct nlmsghdr *nlh; +- int err; + struct sk_buff *skb; ++ int len, err; + + skb = skb_get(__skb); + + if (skb->len >= NLMSG_HDRLEN) { + nlh = nlmsg_hdr(skb); ++ len = nlmsg_len(nlh); + +- if (nlh->nlmsg_len < sizeof(struct cn_msg) || ++ if (len < (int)sizeof(struct cn_msg) || + skb->len < nlh->nlmsg_len || +- nlh->nlmsg_len > CONNECTOR_MAX_MSG_SIZE) { ++ len > CONNECTOR_MAX_MSG_SIZE) { + kfree_skb(skb); + return; + } +-- +1.7.11.7 + + +From d99d51100021c9f8b335fc1931880618eaa448e3 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Sat, 12 Oct 2013 14:08:34 -0700 +Subject: [PATCH 31/47] bnx2x: record rx queue for LRO packets + +[ Upstream commit 60e66fee56b2256dcb1dc2ea1b2ddcb6e273857d ] + +RPS support is kind of broken on bnx2x, because only non LRO packets +get proper rx queue information. This triggers reorders, as it seems +bnx2x like to generate a non LRO packet for segment including TCP PUSH +flag : (this might be pure coincidence, but all the reorders I've +seen involve segments with a PUSH) + +11:13:34.335847 IP A > B: . 415808:447136(31328) ack 1 win 457 +11:13:34.335992 IP A > B: . 447136:448560(1424) ack 1 win 457 +11:13:34.336391 IP A > B: . 448560:479888(31328) ack 1 win 457 +11:13:34.336425 IP A > B: P 511216:512640(1424) ack 1 win 457 +11:13:34.336423 IP A > B: . 479888:511216(31328) ack 1 win 457 +11:13:34.336924 IP A > B: . 512640:543968(31328) ack 1 win 457 +11:13:34.336963 IP A > B: . 543968:575296(31328) ack 1 win 457 + +We must call skb_record_rx_queue() to properly give to RPS (and more +generally for TX queue selection on forward path) the receive queue +information. + +Similar fix is needed for skb_mark_napi_id(), but will be handled +in a separate patch to ease stable backports. + +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Cc: Eilon Greenstein +Acked-by: Dmitry Kravkov +Signed-off-by: David S. Miller +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +index 0cc2611..4b0877e 100644 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +@@ -676,6 +676,7 @@ static void bnx2x_gro_receive(struct bnx2x *bp, struct bnx2x_fastpath *fp, + } + } + #endif ++ skb_record_rx_queue(skb, fp->rx_queue); + napi_gro_receive(&fp->napi, skb); + } + +-- +1.7.11.7 + + +From 3f1db36c01909701d0e34cd2413a1127e144bcc3 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 15 Oct 2013 11:18:58 +0800 +Subject: [PATCH 32/47] virtio-net: don't respond to cpu hotplug notifier if + we're not ready + +[ Upstream commit 3ab098df35f8b98b6553edc2e40234af512ba877 ] + +We're trying to re-configure the affinity unconditionally in cpu hotplug +callback. This may lead the issue during resuming from s3/s4 since + +- virt queues haven't been allocated at that time. +- it's unnecessary since thaw method will re-configure the affinity. + +Fix this issue by checking the config_enable and do nothing is we're not ready. + +The bug were introduced by commit 8de4b2f3ae90c8fc0f17eeaab87d5a951b66ee17 +(virtio-net: reset virtqueue affinity when doing cpu hotplug). + +Cc: Rusty Russell +Cc: Michael S. Tsirkin +Cc: Wanlong Gao +Acked-by: Michael S. Tsirkin +Reviewed-by: Wanlong Gao +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +--- + drivers/net/virtio_net.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index 3d2a90a..43a71d9 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -1094,6 +1094,11 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, + { + struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); + ++ mutex_lock(&vi->config_lock); ++ ++ if (!vi->config_enable) ++ goto done; ++ + switch(action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: +@@ -1106,6 +1111,9 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, + default: + break; + } ++ ++done: ++ mutex_unlock(&vi->config_lock); + return NOTIFY_OK; + } + +-- +1.7.11.7 + + +From 24ef3b7cfd16ce5ac263deebfecb661d1c784670 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 15 Oct 2013 11:18:59 +0800 +Subject: [PATCH 33/47] virtio-net: refill only when device is up during + setting queues + +[ Upstream commit 35ed159bfd96a7547ec277ed8b550c7cbd9841b6 ] + +We used to schedule the refill work unconditionally after changing the +number of queues. This may lead an issue if the device is not +up. Since we only try to cancel the work in ndo_stop(), this may cause +the refill work still work after removing the device. Fix this by only +schedule the work when device is up. + +The bug were introduce by commit 9b9cd8024a2882e896c65222aa421d461354e3f2. +(virtio-net: fix the race between channels setting and refill) + +Cc: Rusty Russell +Cc: Michael S. Tsirkin +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +--- + drivers/net/virtio_net.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index 43a71d9..1d01534 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -916,7 +916,9 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) + return -EINVAL; + } else { + vi->curr_queue_pairs = queue_pairs; +- schedule_delayed_work(&vi->refill, 0); ++ /* virtnet_open() will refill when device is going to up. */ ++ if (dev->flags & IFF_UP) ++ schedule_delayed_work(&vi->refill, 0); + } + + return 0; +@@ -1714,7 +1716,9 @@ static int virtnet_restore(struct virtio_device *vdev) + vi->config_enable = true; + mutex_unlock(&vi->config_lock); + ++ rtnl_lock(); + virtnet_set_queues(vi, vi->curr_queue_pairs); ++ rtnl_unlock(); + + return 0; + } +-- +1.7.11.7 + + +From d616bd8bf902f82ea742462a29bf4080aaa8f497 Mon Sep 17 00:00:00 2001 +From: Vlad Yasevich +Date: Tue, 15 Oct 2013 14:57:45 -0400 +Subject: [PATCH 34/47] bridge: Correctly clamp MAX forward_delay when + enabling STP + +[ Upstream commit 4b6c7879d84ad06a2ac5b964808ed599187a188d ] + +Commit be4f154d5ef0ca147ab6bcd38857a774133f5450 + bridge: Clamp forward_delay when enabling STP +had a typo when attempting to clamp maximum forward delay. + +It is possible to set bridge_forward_delay to be higher then +permitted maximum when STP is off. When turning STP on, the +higher then allowed delay has to be clamed down to max value. + +CC: Herbert Xu +CC: Stephen Hemminger +Signed-off-by: Vlad Yasevich +Reviewed-by: Veaceslav Falico +Acked-by: Herbert Xu +Signed-off-by: David S. Miller +--- + net/bridge/br_stp_if.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c +index 108084a..656a6f3 100644 +--- a/net/bridge/br_stp_if.c ++++ b/net/bridge/br_stp_if.c +@@ -134,7 +134,7 @@ static void br_stp_start(struct net_bridge *br) + + if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY) + __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY); +- else if (br->bridge_forward_delay < BR_MAX_FORWARD_DELAY) ++ else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) + __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); + + if (r == 0) { +-- +1.7.11.7 + + +From 803490b7c577add0b976aa08e4bbfdd95f505270 Mon Sep 17 00:00:00 2001 +From: Vlad Yasevich +Date: Tue, 15 Oct 2013 22:01:29 -0400 +Subject: [PATCH 35/47] net: dst: provide accessor function to dst->xfrm + +[ Upstream commit e87b3998d795123b4139bc3f25490dd236f68212 ] + +dst->xfrm is conditionally defined. Provide accessor funtion that +is always available. + +Signed-off-by: Vlad Yasevich +Acked-by: Neil Horman +Signed-off-by: David S. Miller +--- + include/net/dst.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/include/net/dst.h b/include/net/dst.h +index 1f8fd10..e0c97f5 100644 +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -477,10 +477,22 @@ static inline struct dst_entry *xfrm_lookup(struct net *net, + { + return dst_orig; + } ++ ++static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) ++{ ++ return NULL; ++} ++ + #else + extern struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, struct sock *sk, + int flags); ++ ++/* skb attached with this dst needs transformation if dst->xfrm is valid */ ++static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) ++{ ++ return dst->xfrm; ++} + #endif + + #endif /* _NET_DST_H */ +-- +1.7.11.7 + + +From 371a65903ccb75fc71fd42b30a310a28c42e54a3 Mon Sep 17 00:00:00 2001 +From: Fan Du +Date: Tue, 15 Oct 2013 22:01:30 -0400 +Subject: [PATCH 36/47] sctp: Use software crc32 checksum when xfrm transform + will happen. + +[ Upstream commit 27127a82561a2a3ed955ce207048e1b066a80a2a ] + +igb/ixgbe have hardware sctp checksum support, when this feature is enabled +and also IPsec is armed to protect sctp traffic, ugly things happened as +xfrm_output checks CHECKSUM_PARTIAL to do checksum operation(sum every thing +up and pack the 16bits result in the checksum field). The result is fail +establishment of sctp communication. + +Cc: Neil Horman +Cc: Steffen Klassert +Signed-off-by: Fan Du +Signed-off-by: Vlad Yasevich +Acked-by: Neil Horman +Signed-off-by: David S. Miller +--- + net/sctp/output.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/sctp/output.c b/net/sctp/output.c +index a46d1eb..a06a9b6 100644 +--- a/net/sctp/output.c ++++ b/net/sctp/output.c +@@ -542,7 +542,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) + * by CRC32-C as described in . + */ + if (!sctp_checksum_disable) { +- if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) { ++ if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || ++ (dst_xfrm(dst) != NULL)) { + __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); + + /* 3) Put the resultant value into the checksum field in the +-- +1.7.11.7 + + +From 9067790bb296fb5818894222d7e85407238e9843 Mon Sep 17 00:00:00 2001 +From: Vlad Yasevich +Date: Tue, 15 Oct 2013 22:01:31 -0400 +Subject: [PATCH 37/47] sctp: Perform software checksum if packet has to be + fragmented. + +[ Upstream commit d2dbbba77e95dff4b4f901fee236fef6d9552072 ] + +IP/IPv6 fragmentation knows how to compute only TCP/UDP checksum. +This causes problems if SCTP packets has to be fragmented and +ipsummed has been set to PARTIAL due to checksum offload support. +This condition can happen when retransmitting after MTU discover, +or when INIT or other control chunks are larger then MTU. +Check for the rare fragmentation condition in SCTP and use software +checksum calculation in this case. + +CC: Fan Du +Signed-off-by: Vlad Yasevich +Acked-by: Neil Horman +Signed-off-by: David S. Miller +--- + net/sctp/output.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/sctp/output.c b/net/sctp/output.c +index a06a9b6..013a07d 100644 +--- a/net/sctp/output.c ++++ b/net/sctp/output.c +@@ -543,7 +543,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) + */ + if (!sctp_checksum_disable) { + if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || +- (dst_xfrm(dst) != NULL)) { ++ (dst_xfrm(dst) != NULL) || packet->ipfragok) { + __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); + + /* 3) Put the resultant value into the checksum field in the +-- +1.7.11.7 + + +From 22e825ed8144360271614511563166f37fef9f90 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Salva=20Peir=C3=B3?= +Date: Wed, 16 Oct 2013 12:46:50 +0200 +Subject: [PATCH 38/47] wanxl: fix info leak in ioctl +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 2b13d06c9584b4eb773f1e80bbaedab9a1c344e1 ] + +The wanxl_ioctl() code fails to initialize the two padding bytes of +struct sync_serial_settings after the ->loopback member. Add an explicit +memset(0) before filling the structure to avoid the info leak. + +Signed-off-by: Salva Peiró +Signed-off-by: David S. Miller +--- + drivers/net/wan/wanxl.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c +index 6a24a5a..4c0a697 100644 +--- a/drivers/net/wan/wanxl.c ++++ b/drivers/net/wan/wanxl.c +@@ -355,6 +355,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) + ifr->ifr_settings.size = size; /* data size wanted */ + return -ENOBUFS; + } ++ memset(&line, 0, sizeof(line)); + line.clock_type = get_status(port)->clocking; + line.clock_rate = 0; + line.loopback = 0; +-- +1.7.11.7 + + +From b16dd2cff7a4eb3881f43371d71ed242332877dc Mon Sep 17 00:00:00 2001 +From: Vasundhara Volam +Date: Thu, 17 Oct 2013 11:47:14 +0530 +Subject: [PATCH 39/47] be2net: pass if_id for v1 and V2 versions of TX_CREATE + cmd + +[ Upstream commit 0fb88d61bc60779dde88b0fc268da17eb81d0412 ] + +It is a required field for all TX_CREATE cmd versions > 0. +This fixes a driver initialization failure, caused by recent SH-R Firmwares +(versions > 10.0.639.0) failing the TX_CREATE cmd when if_id field is +not passed. + +Signed-off-by: Sathya Perla +Signed-off-by: David S. Miller +--- + drivers/net/ethernet/emulex/benet/be_cmds.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c +index 8ec5d74..13ac104 100644 +--- a/drivers/net/ethernet/emulex/benet/be_cmds.c ++++ b/drivers/net/ethernet/emulex/benet/be_cmds.c +@@ -1150,7 +1150,6 @@ int be_cmd_txq_create(struct be_adapter *adapter, struct be_tx_obj *txo) + + if (lancer_chip(adapter)) { + req->hdr.version = 1; +- req->if_id = cpu_to_le16(adapter->if_handle); + } else if (BEx_chip(adapter)) { + if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) + req->hdr.version = 2; +@@ -1158,6 +1157,8 @@ int be_cmd_txq_create(struct be_adapter *adapter, struct be_tx_obj *txo) + req->hdr.version = 2; + } + ++ if (req->hdr.version > 0) ++ req->if_id = cpu_to_le16(adapter->if_handle); + req->num_pages = PAGES_4K_SPANNED(q_mem->va, q_mem->size); + req->ulp_num = BE_ULP1_NUM; + req->type = BE_ETH_TX_RING_TYPE_STANDARD; +-- +1.7.11.7 + + +From 9829aac8208e7a31e4e42e7d2e7e165593c05202 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Thu, 17 Oct 2013 22:51:31 +0200 +Subject: [PATCH 40/47] net: unix: inherit SOCK_PASS{CRED, SEC} flags from + socket to fix race + +[ Upstream commit 90c6bd34f884cd9cee21f1d152baf6c18bcac949 ] + +In the case of credentials passing in unix stream sockets (dgram +sockets seem not affected), we get a rather sparse race after +commit 16e5726 ("af_unix: dont send SCM_CREDENTIALS by default"). + +We have a stream server on receiver side that requests credential +passing from senders (e.g. nc -U). Since we need to set SO_PASSCRED +on each spawned/accepted socket on server side to 1 first (as it's +not inherited), it can happen that in the time between accept() and +setsockopt() we get interrupted, the sender is being scheduled and +continues with passing data to our receiver. At that time SO_PASSCRED +is neither set on sender nor receiver side, hence in cmsg's +SCM_CREDENTIALS we get eventually pid:0, uid:65534, gid:65534 +(== overflow{u,g}id) instead of what we actually would like to see. + +On the sender side, here nc -U, the tests in maybe_add_creds() +invoked through unix_stream_sendmsg() would fail, as at that exact +time, as mentioned, the sender has neither SO_PASSCRED on his side +nor sees it on the server side, and we have a valid 'other' socket +in place. Thus, sender believes it would just look like a normal +connection, not needing/requesting SO_PASSCRED at that time. + +As reverting 16e5726 would not be an option due to the significant +performance regression reported when having creds always passed, +one way/trade-off to prevent that would be to set SO_PASSCRED on +the listener socket and allow inheriting these flags to the spawned +socket on server side in accept(). It seems also logical to do so +if we'd tell the listener socket to pass those flags onwards, and +would fix the race. + +Before, strace: + +recvmsg(4, {msg_name(0)=NULL, msg_iov(1)=[{"blub\n", 4096}], + msg_controllen=32, {cmsg_len=28, cmsg_level=SOL_SOCKET, + cmsg_type=SCM_CREDENTIALS{pid=0, uid=65534, gid=65534}}, + msg_flags=0}, 0) = 5 + +After, strace: + +recvmsg(4, {msg_name(0)=NULL, msg_iov(1)=[{"blub\n", 4096}], + msg_controllen=32, {cmsg_len=28, cmsg_level=SOL_SOCKET, + cmsg_type=SCM_CREDENTIALS{pid=11580, uid=1000, gid=1000}}, + msg_flags=0}, 0) = 5 + +Signed-off-by: Daniel Borkmann +Cc: Eric Dumazet +Cc: Eric W. Biederman +Signed-off-by: David S. Miller +--- + net/unix/af_unix.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index c4ce243..e64bbcf 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1246,6 +1246,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) + return 0; + } + ++static void unix_sock_inherit_flags(const struct socket *old, ++ struct socket *new) ++{ ++ if (test_bit(SOCK_PASSCRED, &old->flags)) ++ set_bit(SOCK_PASSCRED, &new->flags); ++ if (test_bit(SOCK_PASSSEC, &old->flags)) ++ set_bit(SOCK_PASSSEC, &new->flags); ++} ++ + static int unix_accept(struct socket *sock, struct socket *newsock, int flags) + { + struct sock *sk = sock->sk; +@@ -1280,6 +1289,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) + /* attach accepted sock to socket */ + unix_state_lock(tsk); + newsock->state = SS_CONNECTED; ++ unix_sock_inherit_flags(sock, newsock); + sock_graft(tsk, newsock); + unix_state_unlock(tsk); + return 0; +-- +1.7.11.7 + + +From 7b48750febb4c3387db39fd0b547936c53ba7364 Mon Sep 17 00:00:00 2001 +From: Seif Mazareeb +Date: Thu, 17 Oct 2013 20:33:21 -0700 +Subject: [PATCH 41/47] net: fix cipso packet validation when !NETLABEL + +[ Upstream commit f2e5ddcc0d12f9c4c7b254358ad245c9dddce13b ] + +When CONFIG_NETLABEL is disabled, the cipso_v4_validate() function could loop +forever in the main loop if opt[opt_iter +1] == 0, this will causing a kernel +crash in an SMP system, since the CPU executing this function will +stall /not respond to IPIs. + +This problem can be reproduced by running the IP Stack Integrity Checker +(http://isic.sourceforge.net) using the following command on a Linux machine +connected to DUT: + +"icmpsic -s rand -d -r 123456" +wait (1-2 min) + +Signed-off-by: Seif Mazareeb +Acked-by: Paul Moore +Signed-off-by: David S. Miller +--- + include/net/cipso_ipv4.h | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h +index a7a683e..a8c2ef6 100644 +--- a/include/net/cipso_ipv4.h ++++ b/include/net/cipso_ipv4.h +@@ -290,6 +290,7 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, + unsigned char err_offset = 0; + u8 opt_len = opt[1]; + u8 opt_iter; ++ u8 tag_len; + + if (opt_len < 8) { + err_offset = 1; +@@ -302,11 +303,12 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, + } + + for (opt_iter = 6; opt_iter < opt_len;) { +- if (opt[opt_iter + 1] > (opt_len - opt_iter)) { ++ tag_len = opt[opt_iter + 1]; ++ if ((tag_len == 0) || (opt[opt_iter + 1] > (opt_len - opt_iter))) { + err_offset = opt_iter + 1; + goto out; + } +- opt_iter += opt[opt_iter + 1]; ++ opt_iter += tag_len; + } + + out: +-- +1.7.11.7 + + +From 27e33640a8905b1aeefe9998242551caf24e84a6 Mon Sep 17 00:00:00 2001 +From: Hannes Frederic Sowa +Date: Tue, 22 Oct 2013 00:07:47 +0200 +Subject: [PATCH 42/47] inet: fix possible memory corruption with UDP_CORK and + UFO + +[ This is a simplified -stable version of a set of upstream commits. ] + +This is a replacement patch only for stable which does fix the problems +handled by the following two commits in -net: + +"ip_output: do skb ufo init for peeked non ufo skb as well" (e93b7d748be887cd7639b113ba7d7ef792a7efb9) +"ip6_output: do skb ufo init for peeked non ufo skb as well" (c547dbf55d5f8cf615ccc0e7265e98db27d3fb8b) + +Three frames are written on a corked udp socket for which the output +netdevice has UFO enabled. If the first and third frame are smaller than +the mtu and the second one is bigger, we enqueue the second frame with +skb_append_datato_frags without initializing the gso fields. This leads +to the third frame appended regulary and thus constructing an invalid skb. + +This fixes the problem by always using skb_append_datato_frags as soon +as the first frag got enqueued to the skb without marking the packet +as SKB_GSO_UDP. + +The problem with only two frames for ipv6 was fixed by "ipv6: udp +packets following an UFO enqueued packet need also be handled by UFO" +(2811ebac2521ceac84f2bdae402455baa6a7fb47). + +Cc: Jiri Pirko +Cc: Eric Dumazet +Cc: David Miller +Signed-off-by: Hannes Frederic Sowa +--- + include/linux/skbuff.h | 5 +++++ + net/ipv4/ip_output.c | 2 +- + net/ipv6/ip6_output.c | 2 +- + 3 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h +index 3b71a4e..6bd165b 100644 +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -1316,6 +1316,11 @@ static inline int skb_pagelen(const struct sk_buff *skb) + return len + skb_headlen(skb); + } + ++static inline bool skb_has_frags(const struct sk_buff *skb) ++{ ++ return skb_shinfo(skb)->nr_frags; ++} ++ + /** + * __skb_fill_page_desc - initialise a paged fragment in an skb + * @skb: buffer containing fragment to be initialised +diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c +index a04d872..7f4ab5d 100644 +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -836,7 +836,7 @@ static int __ip_append_data(struct sock *sk, + csummode = CHECKSUM_PARTIAL; + + cork->length += length; +- if (((length > mtu) || (skb && skb_is_gso(skb))) && ++ if (((length > mtu) || (skb && skb_has_frags(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { + err = ip_ufo_append_data(sk, queue, getfrag, from, length, +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index 44df1c9..2e542d0 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1252,7 +1252,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, + skb = skb_peek_tail(&sk->sk_write_queue); + cork->length += length; + if (((length > mtu) || +- (skb && skb_is_gso(skb))) && ++ (skb && skb_has_frags(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO)) { + err = ip6_ufo_append_data(sk, getfrag, from, length, +-- +1.7.11.7 + + +From 689f77d13532698739438b2288ec8eac2f667584 Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Sun, 20 Oct 2013 15:43:03 +0300 +Subject: [PATCH 43/47] ipv6: always prefer rt6i_gateway if present + +[ Upstream commit 96dc809514fb2328605198a0602b67554d8cce7b ] + +In v3.9 6fd6ce2056de2709 ("ipv6: Do not depend on rt->n in +ip6_finish_output2()." changed the behaviour of ip6_finish_output2() +such that the recently introduced rt6_nexthop() is used +instead of an assigned neighbor. + +As rt6_nexthop() prefers rt6i_gateway only for gatewayed +routes this causes a problem for users like IPVS, xt_TEE and +RAW(hdrincl) if they want to use different address for routing +compared to the destination address. + +Another case is when redirect can create RTF_DYNAMIC +route without RTF_GATEWAY flag, we ignore the rt6i_gateway +in rt6_nexthop(). + +Fix the above problems by considering the rt6i_gateway if +present, so that traffic routed to address on local subnet is +not wrongly diverted to the destination address. + +Thanks to Simon Horman and Phil Oester for spotting the +problematic commit. + +Thanks to Hannes Frederic Sowa for his review and help in testing. + +Reported-by: Phil Oester +Reported-by: Mark Brooks +Signed-off-by: Julian Anastasov +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +--- + include/net/ip6_route.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h +index f667248..0aaf0ec 100644 +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -198,7 +198,7 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) + + static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, struct in6_addr *dest) + { +- if (rt->rt6i_flags & RTF_GATEWAY) ++ if (rt->rt6i_flags & RTF_GATEWAY || !ipv6_addr_any(&rt->rt6i_gateway)) + return &rt->rt6i_gateway; + return dest; + } +-- +1.7.11.7 + + +From 471dd605429d6645f990becd29c877740d3b32e7 Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Sun, 20 Oct 2013 15:43:04 +0300 +Subject: [PATCH 44/47] ipv6: fill rt6i_gateway with nexthop address + +[ Upstream commit 550bab42f83308c9d6ab04a980cc4333cef1c8fa ] + +Make sure rt6i_gateway contains nexthop information in +all routes returned from lookup or when routes are directly +attached to skb for generated ICMP packets. + +The effect of this patch should be a faster version of +rt6_nexthop() and the consideration of local addresses as +nexthop. + +Signed-off-by: Julian Anastasov +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +--- + include/net/ip6_route.h | 6 ++---- + net/ipv6/ip6_output.c | 4 ++-- + net/ipv6/route.c | 8 ++++++-- + 3 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h +index 0aaf0ec..c7b8860 100644 +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -196,11 +196,9 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); + } + +-static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, struct in6_addr *dest) ++static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt) + { +- if (rt->rt6i_flags & RTF_GATEWAY || !ipv6_addr_any(&rt->rt6i_gateway)) +- return &rt->rt6i_gateway; +- return dest; ++ return &rt->rt6i_gateway; + } + + #endif +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index 2e542d0..5b25f85 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -130,7 +130,7 @@ static int ip6_finish_output2(struct sk_buff *skb) + } + + rcu_read_lock_bh(); +- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); ++ nexthop = rt6_nexthop((struct rt6_info *)dst); + neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); +@@ -899,7 +899,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, + */ + rt = (struct rt6_info *) *dst; + rcu_read_lock_bh(); +- n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); ++ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); + err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; + rcu_read_unlock_bh(); + +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index 8d9a93ed..08e6c40 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -852,7 +852,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, + if (ort->rt6i_dst.plen != 128 && + ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) + rt->rt6i_flags |= RTF_ANYCAST; +- rt->rt6i_gateway = *daddr; + } + + rt->rt6i_flags |= RTF_CACHE; +@@ -1270,6 +1269,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, + rt->dst.flags |= DST_HOST; + rt->dst.output = ip6_output; + atomic_set(&rt->dst.__refcnt, 1); ++ rt->rt6i_gateway = fl6->daddr; + rt->rt6i_dst.addr = fl6->daddr; + rt->rt6i_dst.plen = 128; + rt->rt6i_idev = idev; +@@ -1824,7 +1824,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, + in6_dev_hold(rt->rt6i_idev); + rt->dst.lastuse = jiffies; + +- rt->rt6i_gateway = ort->rt6i_gateway; ++ if (ort->rt6i_flags & RTF_GATEWAY) ++ rt->rt6i_gateway = ort->rt6i_gateway; ++ else ++ rt->rt6i_gateway = *dest; + rt->rt6i_flags = ort->rt6i_flags; + if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == + (RTF_DEFAULT | RTF_ADDRCONF)) +@@ -2111,6 +2114,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, + else + rt->rt6i_flags |= RTF_LOCAL; + ++ rt->rt6i_gateway = *addr; + rt->rt6i_dst.addr = *addr; + rt->rt6i_dst.plen = 128; + rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); +-- +1.7.11.7 + + +From d01c3be45be54261f56ba63197d94e3d756befdf Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Sun, 20 Oct 2013 15:43:05 +0300 +Subject: [PATCH 45/47] netfilter: nf_conntrack: fix rt6i_gateway checks for + H.323 helper + +[ Upstream commit 56e42441ed54b092d6c7411138ce60d049e7c731 ] + +Now when rt6_nexthop() can return nexthop address we can use it +for proper nexthop comparison of directly connected destinations. +For more information refer to commit bbb5823cf742a7 +("netfilter: nf_conntrack: fix rt_gateway checks for H.323 helper"). + +Signed-off-by: Julian Anastasov +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +--- + net/netfilter/nf_conntrack_h323_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c +index bdebd03..70866d1 100644 +--- a/net/netfilter/nf_conntrack_h323_main.c ++++ b/net/netfilter/nf_conntrack_h323_main.c +@@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src, + flowi6_to_flowi(&fl1), false)) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + flowi6_to_flowi(&fl2), false)) { +- if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, +- sizeof(rt1->rt6i_gateway)) && ++ if (ipv6_addr_equal(rt6_nexthop(rt1), ++ rt6_nexthop(rt2)) && + rt1->dst.dev == rt2->dst.dev) + ret = 1; + dst_release(&rt2->dst); +-- +1.7.11.7 + + +From 1d98ddb501bedeee62c916d3d6999109f0a22198 Mon Sep 17 00:00:00 2001 +From: Hannes Frederic Sowa +Date: Mon, 21 Oct 2013 06:17:15 +0200 +Subject: [PATCH 46/47] ipv6: probe routes asynchronous in rt6_probe + +[ Upstream commit c2f17e827b419918c856131f592df9521e1a38e3 ] + +Routes need to be probed asynchronous otherwise the call stack gets +exhausted when the kernel attemps to deliver another skb inline, like +e.g. xt_TEE does, and we probe at the same time. + +We update neigh->updated still at once, otherwise we would send to +many probes. + +Cc: Julian Anastasov +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +--- + net/ipv6/route.c | 38 +++++++++++++++++++++++++++++++------- + 1 file changed, 31 insertions(+), 7 deletions(-) + +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index 08e6c40..1e32d5c 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -477,6 +477,24 @@ out: + } + + #ifdef CONFIG_IPV6_ROUTER_PREF ++struct __rt6_probe_work { ++ struct work_struct work; ++ struct in6_addr target; ++ struct net_device *dev; ++}; ++ ++static void rt6_probe_deferred(struct work_struct *w) ++{ ++ struct in6_addr mcaddr; ++ struct __rt6_probe_work *work = ++ container_of(w, struct __rt6_probe_work, work); ++ ++ addrconf_addr_solict_mult(&work->target, &mcaddr); ++ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); ++ dev_put(work->dev); ++ kfree(w); ++} ++ + static void rt6_probe(struct rt6_info *rt) + { + struct neighbour *neigh; +@@ -500,17 +518,23 @@ static void rt6_probe(struct rt6_info *rt) + + if (!neigh || + time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { +- struct in6_addr mcaddr; +- struct in6_addr *target; ++ struct __rt6_probe_work *work; ++ ++ work = kmalloc(sizeof(*work), GFP_ATOMIC); + +- if (neigh) { ++ if (neigh && work) + neigh->updated = jiffies; ++ ++ if (neigh) + write_unlock(&neigh->lock); +- } + +- target = (struct in6_addr *)&rt->rt6i_gateway; +- addrconf_addr_solict_mult(target, &mcaddr); +- ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); ++ if (work) { ++ INIT_WORK(&work->work, rt6_probe_deferred); ++ work->target = rt->rt6i_gateway; ++ dev_hold(rt->dst.dev); ++ work->dev = rt->dst.dev; ++ schedule_work(&work->work); ++ } + } else { + out: + write_unlock(&neigh->lock); +-- +1.7.11.7 + + +From d7710f5e65b37ec3ac09dde758141e81fa47315d Mon Sep 17 00:00:00 2001 +From: Mariusz Ceier +Date: Mon, 21 Oct 2013 19:45:04 +0200 +Subject: [PATCH 47/47] davinci_emac.c: Fix IFF_ALLMULTI setup + +[ Upstream commit d69e0f7ea95fef8059251325a79c004bac01f018 ] + +When IFF_ALLMULTI flag is set on interface and IFF_PROMISC isn't, +emac_dev_mcast_set should only enable RX of multicasts and reset +MACHASH registers. + +It does this, but afterwards it either sets up multicast MACs +filtering or disables RX of multicasts and resets MACHASH registers +again, rendering IFF_ALLMULTI flag useless. + +This patch fixes emac_dev_mcast_set, so that multicast MACs filtering and +disabling of RX of multicasts are skipped when IFF_ALLMULTI flag is set. + +Tested with kernel 2.6.37. + +Signed-off-by: Mariusz Ceier +Acked-by: Mugunthan V N +Signed-off-by: David S. Miller +--- + drivers/net/ethernet/ti/davinci_emac.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c +index 1a222bce..45c167f 100644 +--- a/drivers/net/ethernet/ti/davinci_emac.c ++++ b/drivers/net/ethernet/ti/davinci_emac.c +@@ -876,8 +876,7 @@ static void emac_dev_mcast_set(struct net_device *ndev) + netdev_mc_count(ndev) > EMAC_DEF_MAX_MULTICAST_ADDRESSES) { + mbp_enable = (mbp_enable | EMAC_MBP_RXMCAST); + emac_add_mcast(priv, EMAC_ALL_MULTI_SET, NULL); +- } +- if (!netdev_mc_empty(ndev)) { ++ } else if (!netdev_mc_empty(ndev)) { + struct netdev_hw_addr *ha; + + mbp_enable = (mbp_enable | EMAC_MBP_RXMCAST); +-- +1.7.11.7 + diff --git a/tcp-fix-incorrect-ca_state-in-tail-loss-probe.patch b/tcp-fix-incorrect-ca_state-in-tail-loss-probe.patch deleted file mode 100644 index 1a1264f..0000000 --- a/tcp-fix-incorrect-ca_state-in-tail-loss-probe.patch +++ /dev/null @@ -1,107 +0,0 @@ -Path: news.gmane.org!not-for-mail -From: Yuchung Cheng -Newsgroups: gmane.linux.network -Subject: [PATCH net] tcp: fix incorrect ca_state in tail loss probe -Date: Sat, 12 Oct 2013 10:16:27 -0700 -Lines: 34 -Approved: news@gmane.org -Message-ID: <1381598187-9681-1-git-send-email-ycheng@google.com> -NNTP-Posting-Host: plane.gmane.org -X-Trace: ger.gmane.org 1381598242 29686 80.91.229.3 (12 Oct 2013 17:17:22 GMT) -X-Complaints-To: usenet@ger.gmane.org -NNTP-Posting-Date: Sat, 12 Oct 2013 17:17:22 +0000 (UTC) -Cc: netdev@vger.kernel.org, michael@sterretts.net, - jwboyer@fedoraproject.org, sesse@google.com, dormando@rydia.net, - Yuchung Cheng -To: davem@davemloft.net, ncardwell@google.com, nanditad@google.com -Original-X-From: netdev-owner@vger.kernel.org Sat Oct 12 19:17:23 2013 -Return-path: -Envelope-to: linux-netdev-2@plane.gmane.org -Original-Received: from vger.kernel.org ([209.132.180.67]) - by plane.gmane.org with esmtp (Exim 4.69) - (envelope-from ) - id 1VV2od-0004tp-02 - for linux-netdev-2@plane.gmane.org; Sat, 12 Oct 2013 19:17:23 +0200 -Original-Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand - id S1753183Ab3JLRRU (ORCPT ); - Sat, 12 Oct 2013 13:17:20 -0400 -Original-Received: from mail-pb0-f74.google.com ([209.85.160.74]:35839 "EHLO - mail-pb0-f74.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org - with ESMTP id S1752493Ab3JLRRS (ORCPT - ); Sat, 12 Oct 2013 13:17:18 -0400 -Original-Received: by mail-pb0-f74.google.com with SMTP id rq2so543459pbb.1 - for ; Sat, 12 Oct 2013 10:17:18 -0700 (PDT) -DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; - d=google.com; s=20120113; - h=from:to:cc:subject:date:message-id; - bh=YSBIMZEgVuqyP2cau1199a1sz5d28JA7LPPsF6w9FYQ=; - b=cCkXgePT7f0kRy+VBGvs3DZSLhVn0z7O74B7OHYpdZkQBznhNZ2b6ZGbkDqaKJXyLT - GEsq/JXCgtwpC7aGSz9dPdAZU6kondKOAmfhh54u6f2+ymcZJ4zHpoA6mWuKJ4zlTF2w - 6tRhnT+/N5RkfIfYD/mcDx97X41kRT3NKJ6bsCoiNJIO2+6j8SrOi8C27InOkdIRY/AT - I1uu2bvai1CfrC5yQ6UfpKUg2jioFDOi7i5nSEon+JnWeJavHpO01JMHuar7ZeGnAKJg - kVLwyiRujU9Fz0CKIMPZihAngQu/0OgqORQIjygeqz+GPgtTxDGQP7IUNR/d+JOPVUse - XlSA== -X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; - d=1e100.net; s=20130820; - h=x-gm-message-state:from:to:cc:subject:date:message-id; - bh=YSBIMZEgVuqyP2cau1199a1sz5d28JA7LPPsF6w9FYQ=; - b=d95i7RXY0ff5vnWvrGqxWfSvvAE8SC6YAaBn3ZqbARIZm5GgynIAB/WYnrIOqpqGV6 - 56jVM40bfzLrols1UZzyJWqPIgxee1zPrESh+WrSsDP2tTdYKl/zk13lbt/u7nOn9o3u - HrAo2aY4DtV3P0ABEq1lKdazmmPACTc6256QQ2nxtHs5n7s7P1ERkpX7NGNqNf1zDBSv - 60xeoswRpMkh0G5ZUgpPYsIbXws9F64n5ytq34O2UDZPv5oPEd8I7P34HpqWkNsLoEBs - XXTxs1SLc8TI3vdduhaQ+rmEvcE5vTaqjVCQAT2mMKTJJ9xIFueF5zExfI892PHAcJQ8 - jiaw== -X-Gm-Message-State: ALoCoQkeL+3MY64KlpZKI1BuYMU+yTQcYF1C+U5u+kPpqROoekUMzIaH45qERBARAi/0vgJ5YM1Cwm+43d66vZMn/WdHPurbMHfFn3PYqeZSAzOEeuSA2jGTSZUkpuH8YwFqiNhABtj93ahsBXrA6POrXb531UvuahU+rnFLTGNLxVHv/08PW3l5PbN8UaTNpUI1qcf6O6MarFcB+fZLYPb339v4EIrLxg== -X-Received: by 10.66.5.226 with SMTP id v2mr8825633pav.22.1381598238410; - Sat, 12 Oct 2013 10:17:18 -0700 (PDT) -Original-Received: from corp2gmr1-2.hot.corp.google.com (corp2gmr1-2.hot.corp.google.com [172.24.189.93]) - by gmr-mx.google.com with ESMTPS id a24si3247317yhl.1.1969.12.31.16.00.00 - (version=TLSv1.1 cipher=AES128-SHA bits=128/128); - Sat, 12 Oct 2013 10:17:18 -0700 (PDT) -Original-Received: from blast2.mtv.corp.google.com (blast2.mtv.corp.google.com [172.17.132.164]) - by corp2gmr1-2.hot.corp.google.com (Postfix) with ESMTP id 2F2B45A41A0; - Sat, 12 Oct 2013 10:17:18 -0700 (PDT) -Original-Received: by blast2.mtv.corp.google.com (Postfix, from userid 5463) - id C6A85220C26; Sat, 12 Oct 2013 10:17:17 -0700 (PDT) -X-Mailer: git-send-email 1.8.4 -Original-Sender: netdev-owner@vger.kernel.org -Precedence: bulk -List-ID: -X-Mailing-List: netdev@vger.kernel.org -Xref: news.gmane.org gmane.linux.network:286793 -Archived-At: - -On receiving an ACK that covers the loss probe sequence, TLP -immediately sets the congestion state to Open, even though some packets -are not recovered and retransmisssion are on the way. The later ACks -may trigger a WARN_ON check in step D of tcp_fastretrans_alert(), e.g., -https://bugzilla.redhat.com/show_bug.cgi?id=989251 - -The fix is to follow the similar procedure in recovery by calling -tcp_try_keep_open(). The sender switches to Open state if no packets -are retransmissted. Otherwise it goes to Disorder and let subsequent -ACKs move the state to Recovery or Open. - -Reported-By: Michael Sterrett -Tested-By: Dormando -Signed-off-by: Yuchung Cheng ---- - net/ipv4/tcp_input.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 113dc5f..53974c7 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -3291,7 +3291,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) - tcp_init_cwnd_reduction(sk, true); - tcp_set_ca_state(sk, TCP_CA_CWR); - tcp_end_cwnd_reduction(sk); -- tcp_set_ca_state(sk, TCP_CA_Open); -+ tcp_try_keep_open(sk); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPLOSSPROBERECOVERY); - } --- -1.8.4 -