@@ -878,7 +878,7 @@ static inline struct timer_base *get_timer_base(u32 tflags)
878878
879879#ifdef CONFIG_NO_HZ_COMMON
880880static inline struct timer_base *
881- __get_target_base (struct timer_base * base , unsigned tflags )
881+ get_target_base (struct timer_base * base , unsigned tflags )
882882{
883883#ifdef CONFIG_SMP
884884 if ((tflags & TIMER_PINNED ) || !base -> migration_enabled )
@@ -891,40 +891,34 @@ __get_target_base(struct timer_base *base, unsigned tflags)
891891
892892static inline void forward_timer_base (struct timer_base * base )
893893{
894+ unsigned long jnow = READ_ONCE (jiffies );
895+
894896 /*
895897 * We only forward the base when it's idle and we have a delta between
896898 * base clock and jiffies.
897899 */
898- if (!base -> is_idle || (long ) (jiffies - base -> clk ) < 2 )
900+ if (!base -> is_idle || (long ) (jnow - base -> clk ) < 2 )
899901 return ;
900902
901903 /*
902904 * If the next expiry value is > jiffies, then we fast forward to
903905 * jiffies otherwise we forward to the next expiry value.
904906 */
905- if (time_after (base -> next_expiry , jiffies ))
906- base -> clk = jiffies ;
907+ if (time_after (base -> next_expiry , jnow ))
908+ base -> clk = jnow ;
907909 else
908910 base -> clk = base -> next_expiry ;
909911}
910912#else
911913static inline struct timer_base *
912- __get_target_base (struct timer_base * base , unsigned tflags )
914+ get_target_base (struct timer_base * base , unsigned tflags )
913915{
914916 return get_timer_this_cpu_base (tflags );
915917}
916918
917919static inline void forward_timer_base (struct timer_base * base ) { }
918920#endif
919921
920- static inline struct timer_base *
921- get_target_base (struct timer_base * base , unsigned tflags )
922- {
923- struct timer_base * target = __get_target_base (base , tflags );
924-
925- forward_timer_base (target );
926- return target ;
927- }
928922
929923/*
930924 * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
@@ -943,7 +937,14 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
943937{
944938 for (;;) {
945939 struct timer_base * base ;
946- u32 tf = timer -> flags ;
940+ u32 tf ;
941+
942+ /*
943+ * We need to use READ_ONCE() here, otherwise the compiler
944+ * might re-read @tf between the check for TIMER_MIGRATING
945+ * and spin_lock().
946+ */
947+ tf = READ_ONCE (timer -> flags );
947948
948949 if (!(tf & TIMER_MIGRATING )) {
949950 base = get_timer_base (tf );
@@ -964,6 +965,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
964965 unsigned long clk = 0 , flags ;
965966 int ret = 0 ;
966967
968+ BUG_ON (!timer -> function );
969+
967970 /*
968971 * This is a common optimization triggered by the networking code - if
969972 * the timer is re-modified to have the same timeout or ends up in the
@@ -972,13 +975,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
972975 if (timer_pending (timer )) {
973976 if (timer -> expires == expires )
974977 return 1 ;
978+
975979 /*
976- * Take the current timer_jiffies of base, but without holding
977- * the lock!
980+ * We lock timer base and calculate the bucket index right
981+ * here. If the timer ends up in the same bucket, then we
982+ * just update the expiry time and avoid the whole
983+ * dequeue/enqueue dance.
978984 */
979- base = get_timer_base (timer -> flags );
980- clk = base -> clk ;
985+ base = lock_timer_base (timer , & flags );
981986
987+ clk = base -> clk ;
982988 idx = calc_wheel_index (expires , clk );
983989
984990 /*
@@ -988,14 +994,14 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
988994 */
989995 if (idx == timer_get_idx (timer )) {
990996 timer -> expires = expires ;
991- return 1 ;
997+ ret = 1 ;
998+ goto out_unlock ;
992999 }
1000+ } else {
1001+ base = lock_timer_base (timer , & flags );
9931002 }
9941003
9951004 timer_stats_timer_set_start_info (timer );
996- BUG_ON (!timer -> function );
997-
998- base = lock_timer_base (timer , & flags );
9991005
10001006 ret = detach_if_pending (timer , base , false);
10011007 if (!ret && pending_only )
@@ -1025,12 +1031,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
10251031 }
10261032 }
10271033
1034+ /* Try to forward a stale timer base clock */
1035+ forward_timer_base (base );
1036+
10281037 timer -> expires = expires ;
10291038 /*
10301039 * If 'idx' was calculated above and the base time did not advance
1031- * between calculating 'idx' and taking the lock, only enqueue_timer()
1032- * and trigger_dyntick_cpu() is required. Otherwise we need to
1033- * (re)calculate the wheel index via internal_add_timer().
1040+ * between calculating 'idx' and possibly switching the base, only
1041+ * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
1042+ * we need to (re)calculate the wheel index via
1043+ * internal_add_timer().
10341044 */
10351045 if (idx != UINT_MAX && clk == base -> clk ) {
10361046 enqueue_timer (base , timer , idx );
@@ -1510,12 +1520,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
15101520 is_max_delta = (nextevt == base -> clk + NEXT_TIMER_MAX_DELTA );
15111521 base -> next_expiry = nextevt ;
15121522 /*
1513- * We have a fresh next event. Check whether we can forward the base:
1523+ * We have a fresh next event. Check whether we can forward the
1524+ * base. We can only do that when @basej is past base->clk
1525+ * otherwise we might rewind base->clk.
15141526 */
1515- if (time_after (nextevt , jiffies ))
1516- base -> clk = jiffies ;
1517- else if (time_after (nextevt , base -> clk ))
1518- base -> clk = nextevt ;
1527+ if (time_after (basej , base -> clk )) {
1528+ if (time_after (nextevt , basej ))
1529+ base -> clk = basej ;
1530+ else if (time_after (nextevt , base -> clk ))
1531+ base -> clk = nextevt ;
1532+ }
15191533
15201534 if (time_before_eq (nextevt , basej )) {
15211535 expires = basem ;
0 commit comments