Load packages you need

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.2
library(RCurl)
library(grid)
library(ggplot2)
library(downloader)
library(gridExtra)
library(extrafont)
library(Hmisc)

Download the data

We follow the data reading approach given in the code reproducing the paper.

download("https://osf.io/fgjvw/?action=download",destfile="../data/rpp_data.csv")
download("https://osf.io/bhcsf/?action=download",destfile="../data/rpp_data_codebook.csv")
date_downloaded = date()
date()
## [1] "Thu Sep 17 14:33:12 2015"

Reproducing Figure 3 from the paper

First we download the data following the lines 30-43 of the original code and updating because we had trouble getting the functions to work from OSF.

## Here we follow 

dat = read.csv("../data/rpp_data.csv",stringsAsFactors=F)
fdat = dplyr::filter(dat, !is.na(T_pval_USE..O.), !is.na(T_pval_USE..R.))

## 99 expected
nrow(fdat)
## [1] 99
idOK <- complete.cases(fdat$T_r..O.,fdat$T_r..R.)

## 97 expected
sum(idOK)
## [1] 97

Next make the plot following lines 192-250 of the original code and updating because we had trouble getting the functions to work from OSF

## Got this from https://github.com/FredHasselman/toolboxR/blob/master/C-3PR.R was having trouble with the
## sourcing

source("utils.R")
mytheme <- gg.theme("clean")

## Line 149 needed to make numeric
fdat$Power..R. <- as.numeric(fdat$Power..R.)

fdat$oriSig = "Not Significant"
fdat$oriSig[fdat$T_pval_USE..O.<=.06] = "Significant"
fdat$oriSig = factor(fdat$oriSig)

fdat$repSig = "Not Significant"
fdat$repSig[fdat$T_pval_USE..R.<=.05] = "Significant"
fdat$repSig = factor(fdat$repSig)

blankPlot <- plotHolder()


xDense <- ggplot(fdat, aes(x=T_r..O., fill=oriSig)) + 
  geom_density(aes(y= ..count..),trim=F,alpha=.5) + 
  xlab("") + ylab("") + xlim(0,1) +
  gg.theme("noax") + 
  theme(legend.position = "none",plot.margin = unit(c(0,0,0,4), "lines"))


yDense <- ggplot(fdat, aes(x=T_r..R., fill=repSig)) + 
  geom_density(aes(y= ..count..),trim=F,alpha=.5) + 
  xlab("") + ylab("") + xlim(-.5,1) + 
  coord_flip() + 
  gg.theme("noax") + 
  theme(legend.position = "none", plot.margin = unit(c(0,0,3,0), "lines")) 


scatterP<-
  ggplot(fdat,aes(x=T_r..O.,y=T_r..R.)) +  
  geom_hline(aes(yintercept=0),linetype=2) +
  geom_abline(intercept=0,slope=1,color="Grey60")+
  geom_point(aes(size=Power..R.,fill=repSig),color="Grey30",shape=21,alpha=.8) + 
  geom_rug(aes(color=oriSig),size=1,sides="b",alpha=.6) + 
  geom_rug(aes(color=repSig),size=1,sides="l",alpha=.6) + 
  scale_x_continuous(name="Original Effect Size",limits=c(0,1),breaks=c(0,.25,.5,.75,1)) + 
  scale_y_continuous(name="Replication Effect Size",limits=c(-.5,1),breaks=c(-.5,-.25,0,.25,.5,.75,1)) + 
  ggtitle("") + xlab("") + ylab("") + 
  scale_size_continuous(name="Replication Power",range=c(2,9)) + 
  scale_color_discrete(name="p-value") +
  scale_fill_discrete(name="p-value") +
  gg.theme("clean") + 
  theme(legend.position=c(.9,.6), plot.margin = unit(c(-2,-1.5,2,2), "lines")) 

grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1.4), heights=c(1.4, 4))

dim(fdat)
## [1]  99 140

Now we transform to Fisher

We follow the lines 490-516 to get the Fisher transforms include the degrees of freedom adjustments used by the original authors.

colnames(dat)[1] = "ID"

cor_orig = dat$T_r..O.
cor_rep = dat$T_r..R.

n_orig = dat$T_df2..O. + 2
n_rep = dat$T_df2..R. + 2


### Partial correlation, so degrees of freedom plus 2 in order to get N
n_orig[dat$ID == 82] <- dat$T_df1..O.[82]+2
n_rep[dat$ID == 82] <- dat$T_df1..R.[82]+2

### Correlation
n_orig[dat$ID == 120] <- dat$T_N..O.[120]
n_rep[dat$ID == 120] <- dat$T_N..R.[120]
n_orig[dat$ID == 154] <- dat$T_N..O.[154]
n_rep[dat$ID == 154] <- dat$T_N..R.[154]
n_orig[dat$ID == 155] <- dat$T_N..O.[155]
n_rep[dat$ID == 155] <- dat$T_N..R.[155]

### t
n_orig[dat$ID == 121] <- dat$T_N..O.[121]
n_rep[dat$ID == 121] <- dat$T_N..R.[121]

### Transform to Fisher's z
fish_orig = atanh(cor_orig)
fish_rep = atanh(cor_rep)

Calculate the prediction interval

Here we calculate prediction intervals based on Fisher’s z-scores.

se_total <- sqrt(1/(n_orig-3) + 1/(n_rep-3))
low = tanh(fish_orig - se_total * 1.96)
high = tanh(fish_orig + se_total * 1.96)
too_high = (cor_rep > high)
too_low = (cor_rep < low)
use_index = (!is.na(dat$T_pval_USE..O.) & !is.na(dat$T_pval_USE..R.))

pi_dat = data.frame(cor_orig, cor_rep, low, high, se_total, too_low, too_high, n_orig, n_rep,val = (too_low+2*too_high+1),use_index)

pi_dat = dplyr::filter(pi_dat, use_index > 0)

Here we make a plot. We haven’t filtered out the cases where we have more than one degree of freedom for the test yet.

pi_dat_nona = pi_dat[rowSums(is.na(pi_dat))==0,]
cols1 = c("grey","hotpink","dodgerblue")
plot(pi_dat_nona$cor_orig,pi_dat_nona$cor_rep,ylim=c(-1,1),xlim=c(0,1),
     xlab="Original Effect",ylab="Replication Effect",col=cols1[pi_dat_nona$val],pch=19)
abline(c(0,1),lwd=3,lty=2)
abline(h=0,lwd=3,lty=2)
segments(pi_dat_nona$cor_orig,pi_dat_nona$low,pi_dat_nona$cor_orig,pi_dat_nona$high,col=cols1[pi_dat_nona$val],lwd=0.75)
legend(0.55,-0.5,col=c("grey","hotpink","dodgerblue"),pch=19,
       legend=c("In prediction interval","Below prediction interval","Above prediction interval"))