Skip to content

Commit a8006bd

Browse files
committed
Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer fixes from Ingo Molnar: "Fix four timer locking races: two were noticed by Linus while reviewing the code while chasing for a corruption bug, and two from fixing spurious USB timeouts" * 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: timers: Prevent base clock corruption when forwarding timers: Prevent base clock rewind when forwarding clock timers: Lock base for same bucket optimization timers: Plug locking race vs. timer migration
2 parents 965c4b7 + 6bad6bc commit a8006bd

1 file changed

Lines changed: 44 additions & 30 deletions

File tree

kernel/time/timer.c

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,7 @@ static inline struct timer_base *get_timer_base(u32 tflags)
878878

879879
#ifdef CONFIG_NO_HZ_COMMON
880880
static inline struct timer_base *
881-
__get_target_base(struct timer_base *base, unsigned tflags)
881+
get_target_base(struct timer_base *base, unsigned tflags)
882882
{
883883
#ifdef CONFIG_SMP
884884
if ((tflags & TIMER_PINNED) || !base->migration_enabled)
@@ -891,40 +891,34 @@ __get_target_base(struct timer_base *base, unsigned tflags)
891891

892892
static inline void forward_timer_base(struct timer_base *base)
893893
{
894+
unsigned long jnow = READ_ONCE(jiffies);
895+
894896
/*
895897
* We only forward the base when it's idle and we have a delta between
896898
* base clock and jiffies.
897899
*/
898-
if (!base->is_idle || (long) (jiffies - base->clk) < 2)
900+
if (!base->is_idle || (long) (jnow - base->clk) < 2)
899901
return;
900902

901903
/*
902904
* If the next expiry value is > jiffies, then we fast forward to
903905
* jiffies otherwise we forward to the next expiry value.
904906
*/
905-
if (time_after(base->next_expiry, jiffies))
906-
base->clk = jiffies;
907+
if (time_after(base->next_expiry, jnow))
908+
base->clk = jnow;
907909
else
908910
base->clk = base->next_expiry;
909911
}
910912
#else
911913
static inline struct timer_base *
912-
__get_target_base(struct timer_base *base, unsigned tflags)
914+
get_target_base(struct timer_base *base, unsigned tflags)
913915
{
914916
return get_timer_this_cpu_base(tflags);
915917
}
916918

917919
static inline void forward_timer_base(struct timer_base *base) { }
918920
#endif
919921

920-
static inline struct timer_base *
921-
get_target_base(struct timer_base *base, unsigned tflags)
922-
{
923-
struct timer_base *target = __get_target_base(base, tflags);
924-
925-
forward_timer_base(target);
926-
return target;
927-
}
928922

929923
/*
930924
* We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
@@ -943,7 +937,14 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
943937
{
944938
for (;;) {
945939
struct timer_base *base;
946-
u32 tf = timer->flags;
940+
u32 tf;
941+
942+
/*
943+
* We need to use READ_ONCE() here, otherwise the compiler
944+
* might re-read @tf between the check for TIMER_MIGRATING
945+
* and spin_lock().
946+
*/
947+
tf = READ_ONCE(timer->flags);
947948

948949
if (!(tf & TIMER_MIGRATING)) {
949950
base = get_timer_base(tf);
@@ -964,6 +965,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
964965
unsigned long clk = 0, flags;
965966
int ret = 0;
966967

968+
BUG_ON(!timer->function);
969+
967970
/*
968971
* This is a common optimization triggered by the networking code - if
969972
* the timer is re-modified to have the same timeout or ends up in the
@@ -972,13 +975,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
972975
if (timer_pending(timer)) {
973976
if (timer->expires == expires)
974977
return 1;
978+
975979
/*
976-
* Take the current timer_jiffies of base, but without holding
977-
* the lock!
980+
* We lock timer base and calculate the bucket index right
981+
* here. If the timer ends up in the same bucket, then we
982+
* just update the expiry time and avoid the whole
983+
* dequeue/enqueue dance.
978984
*/
979-
base = get_timer_base(timer->flags);
980-
clk = base->clk;
985+
base = lock_timer_base(timer, &flags);
981986

987+
clk = base->clk;
982988
idx = calc_wheel_index(expires, clk);
983989

984990
/*
@@ -988,14 +994,14 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
988994
*/
989995
if (idx == timer_get_idx(timer)) {
990996
timer->expires = expires;
991-
return 1;
997+
ret = 1;
998+
goto out_unlock;
992999
}
1000+
} else {
1001+
base = lock_timer_base(timer, &flags);
9931002
}
9941003

9951004
timer_stats_timer_set_start_info(timer);
996-
BUG_ON(!timer->function);
997-
998-
base = lock_timer_base(timer, &flags);
9991005

10001006
ret = detach_if_pending(timer, base, false);
10011007
if (!ret && pending_only)
@@ -1025,12 +1031,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
10251031
}
10261032
}
10271033

1034+
/* Try to forward a stale timer base clock */
1035+
forward_timer_base(base);
1036+
10281037
timer->expires = expires;
10291038
/*
10301039
* If 'idx' was calculated above and the base time did not advance
1031-
* between calculating 'idx' and taking the lock, only enqueue_timer()
1032-
* and trigger_dyntick_cpu() is required. Otherwise we need to
1033-
* (re)calculate the wheel index via internal_add_timer().
1040+
* between calculating 'idx' and possibly switching the base, only
1041+
* enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
1042+
* we need to (re)calculate the wheel index via
1043+
* internal_add_timer().
10341044
*/
10351045
if (idx != UINT_MAX && clk == base->clk) {
10361046
enqueue_timer(base, timer, idx);
@@ -1510,12 +1520,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
15101520
is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
15111521
base->next_expiry = nextevt;
15121522
/*
1513-
* We have a fresh next event. Check whether we can forward the base:
1523+
* We have a fresh next event. Check whether we can forward the
1524+
* base. We can only do that when @basej is past base->clk
1525+
* otherwise we might rewind base->clk.
15141526
*/
1515-
if (time_after(nextevt, jiffies))
1516-
base->clk = jiffies;
1517-
else if (time_after(nextevt, base->clk))
1518-
base->clk = nextevt;
1527+
if (time_after(basej, base->clk)) {
1528+
if (time_after(nextevt, basej))
1529+
base->clk = basej;
1530+
else if (time_after(nextevt, base->clk))
1531+
base->clk = nextevt;
1532+
}
15191533

15201534
if (time_before_eq(nextevt, basej)) {
15211535
expires = basem;

0 commit comments

Comments
 (0)