Skip to content

Commit 6f458df

Browse files
Eric Dumazetdavem330
Eric Dumazet
authored andcommitted
tcp: improve latencies of timer triggered events
Modern TCP stack highly depends on tcp_write_timer() having a small latency, but current implementation doesn't exactly meet the expectations. When a timer fires but finds the socket is owned by the user, it rearms itself for an additional delay hoping next run will be more successful. tcp_write_timer() for example uses a 50ms delay for next try, and it defeats many attempts to get predictable TCP behavior in term of latencies. Use the recently introduced tcp_release_cb(), so that the user owning the socket will call various handlers right before socket release. This will permit us to post a followup patch to address the tcp_tso_should_defer() syndrome (some deferred packets have to wait RTO timer to be transmitted, while cwnd should allow us to send them sooner) Signed-off-by: Eric Dumazet <[email protected]> Cc: Tom Herbert <[email protected]> Cc: Yuchung Cheng <[email protected]> Cc: Neal Cardwell <[email protected]> Cc: Nandita Dukkipati <[email protected]> Cc: H.K. Jerry Chu <[email protected]> Cc: John Heffner <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 9dc2741 commit 6f458df

File tree

4 files changed

+71
-51
lines changed

4 files changed

+71
-51
lines changed

include/linux/tcp.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,9 @@ struct tcp_sock {
515515
enum tsq_flags {
516516
TSQ_THROTTLED,
517517
TSQ_QUEUED,
518-
TSQ_OWNED, /* tcp_tasklet_func() found socket was locked */
518+
TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */
519+
TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */
520+
TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */
519521
};
520522

521523
static inline struct tcp_sock *tcp_sk(const struct sock *sk)

include/net/tcp.h

+2
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,8 @@ extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
350350
extern int tcp_sendpage(struct sock *sk, struct page *page, int offset,
351351
size_t size, int flags);
352352
extern void tcp_release_cb(struct sock *sk);
353+
extern void tcp_write_timer_handler(struct sock *sk);
354+
extern void tcp_delack_timer_handler(struct sock *sk);
353355
extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
354356
extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
355357
const struct tcphdr *th, unsigned int len);

net/ipv4/tcp_output.c

+29-17
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,13 @@ struct tsq_tasklet {
837837
};
838838
static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
839839

840+
static void tcp_tsq_handler(struct sock *sk)
841+
{
842+
if ((1 << sk->sk_state) &
843+
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
844+
TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
845+
tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
846+
}
840847
/*
841848
* One tasklest per cpu tries to send more skbs.
842849
* We run in tasklet context but need to disable irqs when
@@ -864,16 +871,10 @@ static void tcp_tasklet_func(unsigned long data)
864871
bh_lock_sock(sk);
865872

866873
if (!sock_owned_by_user(sk)) {
867-
if ((1 << sk->sk_state) &
868-
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 |
869-
TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
870-
tcp_write_xmit(sk,
871-
tcp_current_mss(sk),
872-
0, 0,
873-
GFP_ATOMIC);
874+
tcp_tsq_handler(sk);
874875
} else {
875876
/* defer the work to tcp_release_cb() */
876-
set_bit(TSQ_OWNED, &tp->tsq_flags);
877+
set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
877878
}
878879
bh_unlock_sock(sk);
879880

@@ -882,6 +883,9 @@ static void tcp_tasklet_func(unsigned long data)
882883
}
883884
}
884885

886+
#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
887+
(1UL << TCP_WRITE_TIMER_DEFERRED) | \
888+
(1UL << TCP_DELACK_TIMER_DEFERRED))
885889
/**
886890
* tcp_release_cb - tcp release_sock() callback
887891
* @sk: socket
@@ -892,16 +896,24 @@ static void tcp_tasklet_func(unsigned long data)
892896
void tcp_release_cb(struct sock *sk)
893897
{
894898
struct tcp_sock *tp = tcp_sk(sk);
899+
unsigned long flags, nflags;
895900

896-
if (test_and_clear_bit(TSQ_OWNED, &tp->tsq_flags)) {
897-
if ((1 << sk->sk_state) &
898-
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 |
899-
TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
900-
tcp_write_xmit(sk,
901-
tcp_current_mss(sk),
902-
0, 0,
903-
GFP_ATOMIC);
904-
}
901+
/* perform an atomic operation only if at least one flag is set */
902+
do {
903+
flags = tp->tsq_flags;
904+
if (!(flags & TCP_DEFERRED_ALL))
905+
return;
906+
nflags = flags & ~TCP_DEFERRED_ALL;
907+
} while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
908+
909+
if (flags & (1UL << TCP_TSQ_DEFERRED))
910+
tcp_tsq_handler(sk);
911+
912+
if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED))
913+
tcp_write_timer_handler(sk);
914+
915+
if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED))
916+
tcp_delack_timer_handler(sk);
905917
}
906918
EXPORT_SYMBOL(tcp_release_cb);
907919

net/ipv4/tcp_timer.c

+37-33
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,6 @@ int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
3232
int sysctl_tcp_orphan_retries __read_mostly;
3333
int sysctl_tcp_thin_linear_timeouts __read_mostly;
3434

35-
static void tcp_write_timer(unsigned long);
36-
static void tcp_delack_timer(unsigned long);
37-
static void tcp_keepalive_timer (unsigned long data);
38-
39-
void tcp_init_xmit_timers(struct sock *sk)
40-
{
41-
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
42-
&tcp_keepalive_timer);
43-
}
44-
EXPORT_SYMBOL(tcp_init_xmit_timers);
45-
4635
static void tcp_write_err(struct sock *sk)
4736
{
4837
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
@@ -205,21 +194,11 @@ static int tcp_write_timeout(struct sock *sk)
205194
return 0;
206195
}
207196

208-
static void tcp_delack_timer(unsigned long data)
197+
void tcp_delack_timer_handler(struct sock *sk)
209198
{
210-
struct sock *sk = (struct sock *)data;
211199
struct tcp_sock *tp = tcp_sk(sk);
212200
struct inet_connection_sock *icsk = inet_csk(sk);
213201

214-
bh_lock_sock(sk);
215-
if (sock_owned_by_user(sk)) {
216-
/* Try again later. */
217-
icsk->icsk_ack.blocked = 1;
218-
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
219-
sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
220-
goto out_unlock;
221-
}
222-
223202
sk_mem_reclaim_partial(sk);
224203

225204
if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
@@ -260,7 +239,21 @@ static void tcp_delack_timer(unsigned long data)
260239
out:
261240
if (sk_under_memory_pressure(sk))
262241
sk_mem_reclaim(sk);
263-
out_unlock:
242+
}
243+
244+
static void tcp_delack_timer(unsigned long data)
245+
{
246+
struct sock *sk = (struct sock *)data;
247+
248+
bh_lock_sock(sk);
249+
if (!sock_owned_by_user(sk)) {
250+
tcp_delack_timer_handler(sk);
251+
} else {
252+
inet_csk(sk)->icsk_ack.blocked = 1;
253+
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
254+
/* deleguate our work to tcp_release_cb() */
255+
set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
256+
}
264257
bh_unlock_sock(sk);
265258
sock_put(sk);
266259
}
@@ -450,19 +443,11 @@ void tcp_retransmit_timer(struct sock *sk)
450443
out:;
451444
}
452445

453-
static void tcp_write_timer(unsigned long data)
446+
void tcp_write_timer_handler(struct sock *sk)
454447
{
455-
struct sock *sk = (struct sock *)data;
456448
struct inet_connection_sock *icsk = inet_csk(sk);
457449
int event;
458450

459-
bh_lock_sock(sk);
460-
if (sock_owned_by_user(sk)) {
461-
/* Try again later */
462-
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20));
463-
goto out_unlock;
464-
}
465-
466451
if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
467452
goto out;
468453

@@ -485,7 +470,19 @@ static void tcp_write_timer(unsigned long data)
485470

486471
out:
487472
sk_mem_reclaim(sk);
488-
out_unlock:
473+
}
474+
475+
static void tcp_write_timer(unsigned long data)
476+
{
477+
struct sock *sk = (struct sock *)data;
478+
479+
bh_lock_sock(sk);
480+
if (!sock_owned_by_user(sk)) {
481+
tcp_write_timer_handler(sk);
482+
} else {
483+
/* deleguate our work to tcp_release_cb() */
484+
set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
485+
}
489486
bh_unlock_sock(sk);
490487
sock_put(sk);
491488
}
@@ -602,3 +599,10 @@ static void tcp_keepalive_timer (unsigned long data)
602599
bh_unlock_sock(sk);
603600
sock_put(sk);
604601
}
602+
603+
void tcp_init_xmit_timers(struct sock *sk)
604+
{
605+
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
606+
&tcp_keepalive_timer);
607+
}
608+
EXPORT_SYMBOL(tcp_init_xmit_timers);

0 commit comments

Comments
 (0)