match.probability <- function( seed, M, n ) { set.seed( seed ) match <- rep( NA, M ) count <- rep( NA, M ) for ( m in 1:M ) { s <- sum( ( 1:n ) == sample( n ) ) match[ m ] <- 0 + ( s > 0 ) count[ m ] <- s if ( ( m %% 100000 ) == 0 ) print( m ) } p.hat <- mean( match ) y.bar <- mean( count ) return( list( match.probability = p.hat, expected.number.of.matches = y.bar, distribution.of.number.of.matches = table( count ) ) ) } match.probability( 1, 100000, 1 ) $match.probability [1] 1 $expected.number.of.matches [1] 1 match.probability( 1, 100000, 2 ) $match.probability [1] 0.49974 $expected.number.of.matches [1] 0.99948 match.probability( 1, 100000, 10 ) $match.probability [1] 0.63083 $expected.number.of.matches [1] 0.9999 match.probability( 1, 100000, 52 ) $match.probability [1] 0.63367 $expected.number.of.matches [1] 1.00179 system.time( print( match.probability( 1, 100000, 52 ) ) ) $match.probability [1] 0.63367 $expected.number.of.matches [1] 1.00179 user system elapsed 0.82 0.03 0.84 system.time( print( match.probability( 2, 100000, 52 ) ) ) $match.probability [1] 0.63311 $expected.number.of.matches [1] 1.0023 user system elapsed 0.83 0.01 0.84 system.time( print( match.probability( 2, 100000, 1000 ) ) ) $match.probability [1] 0.63297 $expected.number.of.matches [1] 1.00367 user system elapsed 1.97 0.03 2.00 # unbuffer output system.time( print( match.probability( 1, 1000000, 1000 ) ) ) [1] 100000 [1] 200000 [1] 300000 [1] 400000 [1] 500000 [1] 600000 [1] 700000 [1] 800000 [1] 900000 [1] 1000000 $match.probability [1] 0.631489 $expected.number.of.matches [1] 0.997748 $distribution.of.number.of.matches count 0 1 2 3 4 5 6 7 8 9 368511 367989 183758 61031 15078 3074 462 81 15 1 user system elapsed 19.93 0.04 20.00