simulating order statistics

Order Statistics
library(mosaic)
set.seed(32589)

simulate UNIF(0,1) sort, choose method

quick test

x <- runif(10)
x
##  [1] 0.53404167 0.98872010 0.29462759 0.32835362 0.31431749 0.21766789
##  [7] 0.06159685 0.29928402 0.77719811 0.62361296
sort(x)
##  [1] 0.06159685 0.21766789 0.29462759 0.29928402 0.31431749 0.32835362
##  [7] 0.53404167 0.62361296 0.77719811 0.98872010

using do() from mosaic package

ssims <- do(100000)*sort(runif(10))
head(ssims)
##           V1         V2        V3        V4        V5        V6        V7
## 1 0.06455245 0.06495090 0.1851600 0.3079420 0.5330111 0.7427528 0.7732001
## 2 0.09437625 0.12534694 0.1686205 0.1889929 0.4539027 0.5177352 0.6206473
## 3 0.24087267 0.49718261 0.6001031 0.6023546 0.6538635 0.6559504 0.7728461
## 4 0.18553682 0.19880923 0.2450878 0.6159899 0.6565422 0.6784628 0.7192482
## 5 0.08861735 0.11106265 0.1947217 0.3066704 0.3532580 0.4119642 0.4310873
## 6 0.02271287 0.05534569 0.2303783 0.3378925 0.3784268 0.4731310 0.5289918
##          V8        V9       V10
## 1 0.8012919 0.9162480 0.9995782
## 2 0.6672826 0.9293018 0.9490545
## 3 0.8249644 0.9572544 0.9637433
## 4 0.8319460 0.8581806 0.9322516
## 5 0.5836478 0.5894738 0.9802935
## 6 0.5627342 0.6196189 0.9827886

each column represents an order statistic.

histogram of \(X_{(7)}\) with mean of data as orange vertical line and theoretical mean in grey:

ggplot(ssims, aes(V7)) + geom_histogram(binwidth=.025, boundary=7/11) + 
  geom_vline(xintercept=7/11, color="grey50", alpha=0.5) + 
  geom_vline(xintercept=mean(ssims$V7), color="orange", alpha=0.5)

mean(ssims$V7)
## [1] 0.6362227
7/11
## [1] 0.6363636
ggplot(ssims, aes(V7)) + geom_density(color="blue", size=1) + 
  stat_function(fun=dbeta, args=list(shape1=7, shape2=4), color="orange", size=1)

looking at \((X_{(3)}, X_{(8)})\) pairs

density plot of X3 in magenta, X8 in green by sort method

ggplot(ssims) + geom_density(aes(V3), color="#ff00ff") + 
  geom_density(aes(V8), color="#00ff00") + theme_minimal() + 
  xlab("X value")

scatterplot of joint distribution:

ggplot(ssims, aes(V3,V8)) + geom_point(alpha=0.05, shape=1) + lims(x=c(0,1), y=c(0,1)) +
  theme_minimal()

no sorting method

X3 <- rbeta(100000, 3, 8)
X8 <- X3 + (1-X3)*rbeta(100000, 5, 3)
nsims <- data.frame(X3, X8)
head(nsims)
##          X3        X8
## 1 0.3362019 0.8330321
## 2 0.1941811 0.8148522
## 3 0.1548368 0.5210800
## 4 0.2731315 0.6193608
## 5 0.2982546 0.7919736
## 6 0.2387811 0.6832124

density plot of X3 in magenta, X8 in green by no sort method

ggplot(nsims) + geom_density(aes(X3), color="#ff00ff") + 
  geom_density(aes(X8), color="#00ff00") + theme_minimal() + 
  xlab("X value")

side-by-side of density plots of X3 and X8 with different methods:

ggplot(ssims) + geom_density(aes(V3), color="#ff00ff") + 
  geom_density(aes(V8), color="#00ff00") + theme_minimal() + 
  xlab("X value") + ggtitle("sorting")

ggplot(nsims) + geom_density(aes(X3), color="#ff00ff") + 
  geom_density(aes(X8), color="#00ff00") + theme_minimal() + 
  xlab("X value") + ggtitle("no sorting")

looking at \((X_{(3)}, X_{(8)})\) pairs

scatterplot of joint distribution for no sorting method:

ggplot(nsims, aes(X3,X8)) + geom_point(alpha=0.05, shape=1) + lims(x=c(0,1), y=c(0,1)) + 
  theme_minimal()

ggplot(ssims, aes(V3,V8)) + geom_point(alpha=0.015, shape=1) + theme_minimal() + 
  lims(x=c(0,1), y=c(0,1)) + ggtitle("sorting")
ggplot(nsims, aes(X3,X8)) + geom_point(alpha=0.015, shape=1) + theme_minimal() + 
  lims(x=c(0,1), y=c(0,1)) + ggtitle("no sorting")

second set of sims

X3b <- rbeta(100000, 3, 8)
X8b <- X3b + (1-X3b)*rbeta(100000, 5, 3)
nsimsb <- data.frame(X3b, X8b)
ssimsb <- do(100000)*sort(runif(10))
ggplot(ssimsb, aes(V3,V8)) + geom_point(alpha=0.015, shape=1) + theme_minimal() + 
  lims(x=c(0,1), y=c(0,1)) + ggtitle("sorting")
ggplot(nsimsb, aes(X3b,X8b)) + geom_point(alpha=0.015, shape=1) + theme_minimal() + 
  lims(x=c(0,1), y=c(0,1)) + ggtitle("no sorting")

plotting both types of samples ontop of each other (not sure this is helpful):

combined <- data.frame(V3=ssims$V3, V8=ssims$V8, V3b=ssimsb$V3, V8b=ssimsb$V8, X3b, X8b)
ggplot(combined) + 
  geom_density(aes(V3), color="#ff00ff") + 
  geom_density(aes(V3b), color="#ff00ff") +
  geom_density(aes(V8), color="#00ff00") +
  geom_density(aes(V8b), color="#00ff00") +
  geom_density(aes(X3), color="#ff00ff") + 
  geom_density(aes(X3b), color="#ff00ff") + 
  geom_density(aes(X8), color="#00ff00") + 
  geom_density(aes(X8b), color="#00ff00") +
  theme_minimal() + 
  xlab("X value") + ggtitle("all X3 (magenta) and X8 (green) samples")

plotting both types of samples ontop of each other in scatterplot (not sure this is helpful):

ggplot(combined) + geom_point(aes(V3, V8), alpha=0.05, shape=22) + 
  geom_point(aes(X3, X8), alpha=0.05, shape=21) + lims(x=c(0,1), y=c(0,1)) + 
  theme_minimal()