Skip to content

Commit 1e7f583

Browse files
committed
random: make /dev/urandom scalable for silly userspace programs
On a system with a 4 socket (NUMA) system where a large number of application threads were all trying to read from /dev/urandom, this can result in the system spending 80% of its time contending on the global urandom spinlock. The application should have used its own PRNG, but let's try to help it from running, lemming-like, straight over the locking cliff. Reported-by: Andi Kleen <[email protected]> Signed-off-by: Theodore Ts'o <[email protected]>
1 parent e192be9 commit 1e7f583

File tree

1 file changed

+58
-4
lines changed

1 file changed

+58
-4
lines changed

drivers/char/random.c

+58-4
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,8 @@ static int crng_init = 0;
436436
#define crng_ready() (likely(crng_init > 0))
437437
static int crng_init_cnt = 0;
438438
#define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
439+
static void _extract_crng(struct crng_state *crng,
440+
__u8 out[CHACHA20_BLOCK_SIZE]);
439441
static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]);
440442
static void process_random_ready_list(void);
441443

@@ -756,6 +758,16 @@ static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
756758

757759
static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
758760

761+
#ifdef CONFIG_NUMA
762+
/*
763+
* Hack to deal with crazy userspace progams when they are all trying
764+
* to access /dev/urandom in parallel. The programs are almost
765+
* certainly doing something terribly wrong, but we'll work around
766+
* their brain damage.
767+
*/
768+
static struct crng_state **crng_node_pool __read_mostly;
769+
#endif
770+
759771
static void crng_initialize(struct crng_state *crng)
760772
{
761773
int i;
@@ -815,7 +827,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
815827
if (num == 0)
816828
return;
817829
} else
818-
extract_crng(buf.block);
830+
_extract_crng(&primary_crng, buf.block);
819831
spin_lock_irqsave(&primary_crng.lock, flags);
820832
for (i = 0; i < 8; i++) {
821833
unsigned long rv;
@@ -835,19 +847,26 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
835847
spin_unlock_irqrestore(&primary_crng.lock, flags);
836848
}
837849

850+
static inline void maybe_reseed_primary_crng(void)
851+
{
852+
if (crng_init > 2 &&
853+
time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL))
854+
crng_reseed(&primary_crng, &input_pool);
855+
}
856+
838857
static inline void crng_wait_ready(void)
839858
{
840859
wait_event_interruptible(crng_init_wait, crng_ready());
841860
}
842861

843-
static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
862+
static void _extract_crng(struct crng_state *crng,
863+
__u8 out[CHACHA20_BLOCK_SIZE])
844864
{
845865
unsigned long v, flags;
846-
struct crng_state *crng = &primary_crng;
847866

848867
if (crng_init > 1 &&
849868
time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))
850-
crng_reseed(crng, &input_pool);
869+
crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL);
851870
spin_lock_irqsave(&crng->lock, flags);
852871
if (arch_get_random_long(&v))
853872
crng->state[14] ^= v;
@@ -857,6 +876,19 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
857876
spin_unlock_irqrestore(&crng->lock, flags);
858877
}
859878

879+
static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
880+
{
881+
struct crng_state *crng = NULL;
882+
883+
#ifdef CONFIG_NUMA
884+
if (crng_node_pool)
885+
crng = crng_node_pool[numa_node_id()];
886+
if (crng == NULL)
887+
#endif
888+
crng = &primary_crng;
889+
_extract_crng(crng, out);
890+
}
891+
860892
static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
861893
{
862894
ssize_t ret = 0, i;
@@ -1575,9 +1607,31 @@ static void init_std_data(struct entropy_store *r)
15751607
*/
15761608
static int rand_initialize(void)
15771609
{
1610+
#ifdef CONFIG_NUMA
1611+
int i;
1612+
int num_nodes = num_possible_nodes();
1613+
struct crng_state *crng;
1614+
struct crng_state **pool;
1615+
#endif
1616+
15781617
init_std_data(&input_pool);
15791618
init_std_data(&blocking_pool);
15801619
crng_initialize(&primary_crng);
1620+
1621+
#ifdef CONFIG_NUMA
1622+
pool = kmalloc(num_nodes * sizeof(void *),
1623+
GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO);
1624+
for (i=0; i < num_nodes; i++) {
1625+
crng = kmalloc_node(sizeof(struct crng_state),
1626+
GFP_KERNEL | __GFP_NOFAIL, i);
1627+
spin_lock_init(&crng->lock);
1628+
crng_initialize(crng);
1629+
pool[i] = crng;
1630+
1631+
}
1632+
mb();
1633+
crng_node_pool = pool;
1634+
#endif
15811635
return 0;
15821636
}
15831637
early_initcall(rand_initialize);

0 commit comments

Comments
 (0)