diff --git a/lib/proportions.c b/lib/proportions.c
index 332d8c58184d4964406dfe71179b877fd9f927f6..9508d9a7af3ef9ba482925a9d1cdc34ef2fddd0e 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -190,6 +190,8 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
  * PERCPU
  */
 
+#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
+
 int prop_local_init_percpu(struct prop_local_percpu *pl)
 {
 	spin_lock_init(&pl->lock);
@@ -230,31 +232,24 @@ void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
 
 	spin_lock_irqsave(&pl->lock, flags);
 	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
+
 	/*
 	 * For each missed period, we half the local counter.
 	 * basically:
 	 *   pl->events >> (global_period - pl->period);
-	 *
-	 * but since the distributed nature of percpu counters make division
-	 * rather hard, use a regular subtraction loop. This is safe, because
-	 * the events will only every be incremented, hence the subtraction
-	 * can never result in a negative number.
 	 */
-	while (pl->period != global_period) {
-		unsigned long val = percpu_counter_read(&pl->events);
-		unsigned long half = (val + 1) >> 1;
-
-		/*
-		 * Half of zero won't be much less, break out.
-		 * This limits the loop to shift iterations, even
-		 * if we missed a million.
-		 */
-		if (!val)
-			break;
-
-		percpu_counter_add(&pl->events, -half);
-		pl->period += period;
-	}
+	period = (global_period - pl->period) >> (pg->shift - 1);
+	if (period < BITS_PER_LONG) {
+		s64 val = percpu_counter_read(&pl->events);
+
+		if (val < (nr_cpu_ids * PROP_BATCH))
+			val = percpu_counter_sum(&pl->events);
+
+		__percpu_counter_add(&pl->events, -val + (val >> period),
+					PROP_BATCH);
+	} else
+		percpu_counter_set(&pl->events, 0);
+
 	pl->period = global_period;
 	spin_unlock_irqrestore(&pl->lock, flags);
 }
@@ -267,7 +262,7 @@ void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
 	struct prop_global *pg = prop_get_global(pd);
 
 	prop_norm_percpu(pg, pl);
-	percpu_counter_add(&pl->events, 1);
+	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
 	percpu_counter_add(&pg->events, 1);
 	prop_put_global(pd, pg);
 }