Categories
日常应用

中心极限定理的Monte Carlo模拟

中心极限定理版本一堆,每一个都牵扯一堆数学公式什么的...而与我而言,其核心就是,样本足够大的时候,可以无视其本身分布(只要均值、方差存在),(独立同分布的)样本均值将服从正态分布。这样一来,就可以使用正态分布的一系列良好性质,比如两个正态分布之间的检验什么的...

按说中心极限定理(下简称CLT)整天都在用,可是后面渐渐的习惯了计量那些矩阵推导渐进性质之类的,往往就忘了一些基本的统计量或者区间估计是怎么计算出来的...呃,眼高手低,还是老老实实的不时回头复习一下基础知识比较靠谱。

记得Yihui曾经在animation包做过一个动画展现CLT...相比而言,我就比较懒了,简单的做个模拟看看最终结果就好了。本来这种模拟应该扔给Matlab去做的,可惜啊现在电脑上米有,只能用R了。R里面可以产生随机数的分布有很多,一个个试呗...在基础的stats包里面,有一堆以r开头的函数,对应不同的分布(wiki页面建议看英文,中文长度完全不在一个量级啊...)。

  • rbeta: The Beta Distribution (wiki link)
  • rbinom: The Binomial Distribution (wiki link) (二项分布)
  • rcauchy: The Cauchy Distribution (wiki link) (柯西分布,N阶矩都不存在的分布...)
  • rchisq: The (non-central) Chi-Squared Distribution (wiki link) (卡方分布,正态分布平方的分布)
  • rexp: The Exponential Distribution (wiki link) (指数分布,独立随机事件发生的时间间隔)
  • rf: The F Distribution (wiki link) (F分布,两个卡方分布除以各自自由度)
  • rgamma: The Gamma Distribution (wiki link) (伽玛分布)
  • rgeom: The Geometric Distribution (wiki link) (几何分布,在第n次伯努利试验中,试验k次才得到第一次成功的机率)
  • rhyper: The Hypergeometric Distribution (wiki link) (超几何分布)
  • rlnorm: The Log Normal Distribution (wiki link) (对数正态分布,正态分布的指数的分布)
  • rlogis: The Logistic Distribution (wiki link) (逻辑分布)
  • rmultinom: The Multinomial Distribution (wiki link) (多变量正态分布)
  • rnbinom: The Negative Binomial Distribution (wiki link) (负二项分布)
  • rnorm: The Normal Distribution (wiki link) (正态分布)
  • rpois: The Poisson Distribution (wiki link) (泊松分布,单位时间内随机事件发生的次数)

那就...一个个试呗。计算机就是会让人的生活变得简单...结果如下。

CLT_SIMULATION中心极限定理模拟

源代码如下:

###simple Monte Carlo for means ####
sample_size = 10000

grid.arrange( ncol=2
  #Beta Distribution
 , ggplot(data.frame(obs=rbeta(n=sample_size,shape1=2,shape2=5)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Beta Distribution")
  #mean of random samples from Beta Distribution
  ,ggplot(data.frame(means=replicate(sample_size,mean(rbeta(n=sample_size/10, shape1=2,shape2=5)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Beta Distribution")

  #Binomial distribution
  ,ggplot(data.frame(obs=rbinom(n=sample_size,size=1,prob=0.5)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    #geom_density(color="deeppink3",size=1)+
    opts(title="Binomial Distribution")
  #mean of random samples from Binomial Distribution
  ,ggplot(data.frame(means=replicate(sample_size,mean(rbinom(n=sample_size/10,size=1,prob=0.5)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Binomial Distribution")

  #Cauchy  Distribution
 , ggplot(data.frame(obs=rcauchy(n=sample_size,location=0,scale=2)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Cauchy Distribution")
  #mean of random samples from Cauchy Distribution
 , ggplot(data.frame(means=replicate(sample_size,mean(rcauchy(n=sample_size/10, location=0,scale=2)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Cauchy Distribution")

  #Chi-squared Distribution
  ,ggplot(data.frame(obs=rchisq(n=sample_size,df=3)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Chi-squared Distribution")
  #mean of random samples from Chi-squared Distribution
 , ggplot(data.frame(means=replicate(sample_size,mean(rchisq(n=sample_size/10,df=3)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Chi-squared Distribution")

  #Exponential Distribution
  ,ggplot(data.frame(obs=rexp(n=sample_size,rate = 1.5)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Exponential Distribution")
  #mean of random samples from Exponential Distribution
 , ggplot(data.frame(means=replicate(sample_size,mean(rexp(n=sample_size/10, rate = 1.5)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Exponential Distribution")

  #F Distribution
  ,ggplot(data.frame(obs=rf(n=sample_size,df1=100,df2=100)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="F Distribution")
  #mean of random samples from F Distribution
 , ggplot(data.frame(means=replicate(sample_size,mean(rf(n=sample_size/10, df1=100,df2=100)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from F Distribution")

  #Gamma Distribution
 , ggplot(data.frame(obs=rgamma(n=sample_size,shape=2, rate=2)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Gamma Distribution")
  #mean of random samples from Gamma Distribution
 , ggplot(data.frame(means=replicate(sample_size,mean(rgamma(n=sample_size/10, shape=2, rate=2)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Gamma Distribution")

  #Geometric Distribution
  ,ggplot(data.frame(obs=rgeom(n=sample_size,prob=0.5)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Geometric Distribution")
  #mean of random samples from Geometric Distribution
 , ggplot(data.frame(means=replicate(sample_size,mean(rgeom(n=sample_size/10, prob=0.5)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Geometric Distribution")

  #Log-normal Distribution
  ,ggplot(data.frame(obs=rlnorm(n=sample_size)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Log-normal Distribution")
  #mean of random samples from Log-normal Distribution
  ,ggplot(data.frame(means=replicate(sample_size,mean(rlnorm(n=sample_size/10)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Log-normal Distribution")

  #Logistic Distribution
  ,ggplot(data.frame(obs=rlogis(n=sample_size)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Logistic Distribution")
  #mean of random samples from Logistic Distribution
  ,ggplot(data.frame(means=replicate(sample_size,mean(rlogis(n=sample_size/10)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Logistic Distribution")

  #Negative Binomial Distribution
  ,ggplot(data.frame(obs=rnbinom(n=sample_size,size=1,prob=0.5)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Negative Binomial Distribution")
  #mean of random samples from Negative Binomial Distribution
  ,ggplot(data.frame(means=replicate(sample_size,mean(rnbinom(n=sample_size/10,size=1,prob=0.5)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Negative Binomial Distribution")

  #Normal Distribution
  ,ggplot(data.frame(obs=rnorm(n=sample_size)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Normal Distribution")
  #mean of random samples from Normal Distribution
  ,ggplot(data.frame(means=replicate(sample_size,mean(rnorm(n=sample_size/10)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Normal Distribution")

  #Poisson Distribution
  ,ggplot(data.frame(obs=rpois(n=sample_size,lambda=4)),aes(x=obs),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="Poisson Distribution")
  #mean of random samples from Poisson Distribution
  ,ggplot(data.frame(means=replicate(sample_size,mean(rpois(n=sample_size/10,lambda=4)))),aes(x=means),fill="red")+
    geom_histogram(aes(y = ..density..),fill="gray33")+
    geom_density(color="deeppink3",size=0.5)+
    opts(title="mean of random samples from Poisson Distribution")

)

 

13 replies on “中心极限定理的Monte Carlo模拟”

也可以写一个function,把ggolot那堆东西扔进去....

不过这个代码重复率还是可以接受的...主要是敦促自己耐心的一个个看完随机数的函数,算是复习一下各种分布的来源吧...要不也不会那么耐心的一一贴出来wiki的地址了...

我写blog嘛,一半是给大家分享知识,一般是敦促自己学习...还是蛮自私的嗯。

eval(parse(text = ...))通常太暗黑了,没事儿建议绕远,它可以给程序带来各种不可预知的副作用。就这个例子而言,最好还是写个函数,两个参数,一个是数据,一个是标题。

太云最近不知云游何方去了,否则你这篇文章可以作为 http://vis.supstat.com/ 的第一篇稿件啊。我们打算整一个替代R Graph Gallery的网站,一切自动生成。投稿就是pull request,算是实现了我几个月前的“自由期刊”想法。

刚刚还看到他刷微博回yixuan呢~果然他眼中只有某人啊...

我这就是闲着无聊了...在落园灌灌水而已...

我不知道texteval,Tao大人说在session包中,我猜想是这样:texteval = function(x, ...) eval(parse(text = x), ...),你可以自己下载那个包看看源代码。

Comments are closed.