Here is code for those who want to play around with the Bayesian analysis used in the Pfizer vaccine trial study (Related blog post here). I got a request for Python code so here it is. The charts I published the other day were made in R.
# Python code for Pfizer vaccine trial analysis
import pandas as pd
import numpy as np
from scipy.stats import beta
import matplotlib.pyplot as plt
# functions
def ve2vsc (ve):
if isinstance(ve, pd.Series) is False:
ve = pd.Series(ve)
return ((1-ve)/(2-ve)).round(3)
def vsc2ve (vsc):
if isinstance(vsc, pd.Series) is False:
vsc = pd.Series(vsc)
return ((1-2*vsc)/(1-vsc)).round(3)
ve = pd.Series(np.linspace(0,1,11))
vsc = ve2vsc(ve)
# line chart showing VSC and VE relationship
plt.plot(ve, vsc, color="darkgray", lw=3)
plt.title("Relationship between Vaccine Efficacy and Vaccine's Share of Cases")
plt.xlabel("ve")
plt.ylabel("vsc")
ax = plt.gca()
ax.spines['right'].set_color("none")
ax.spines['top'].set_color("none")
ax.spines['bottom'].set_position(('data',0))
ax.spines['left'].set_position(('data',0))
xlabels = (ve*100).astype('int').astype('str')+"%"
xlabels[list(range(1,11,2))] = ""
plt.xticks(ve, xlabels)
plt.yticks(pd.Series(np.linspace(0,0.5,6)), (pd.Series(np.linspace(0,50,6))).astype('int').astype("str")+"%")
ax.hlines(ve2vsc(0.5), 0, 0.5, color="brown", ls="dotted")
ax.vlines(0.5, 0, ve2vsc(0.5), color="brown", ls="dotted")
ax.hlines(ve2vsc(0.9), 0, 0.9, color="brown", ls="dotted")
ax.vlines(0.9, 0, ve2vsc(0.9), color="brown", ls="dotted")
plt.text(0.02,ve2vsc(0.5)[0]+0.02,(ve2vsc(0.5)[0]*100).astype("int").astype("str")+"%", color="brown", fontsize=8)
plt.text(0.02,ve2vsc(0.9)[0]+0.02,(ve2vsc(0.9)[0]*100).astype("int").astype("str")+"%", color="brown", fontsize=8)
plt.show()
# plotting the posterior
a1 = 0.700102 # From protocol
b1 = 1 # From protocol
data_x = 8 # Number of cases in vaccine arm
data_n = 94 # Total number of cases
def a2 (a1, x):
return a1 + x
def b2 (b1, n, x):
return b1 + n - x
def vsc_posterior(samp, data_x):
return pd.Series(beta.pdf(samp, a2(a1,data_x), b2(b1,data_n, data_x)))
vsc_sample = pd.Series(np.linspace(0,0.5,72))
# single posterior curve vs vsc
plt.plot(vsc_sample, vsc_posterior(vsc_sample, 8), lw=3)
plt.title("Predicted Prob. of Vaccine's Share of Cases")
plt.xlabel("VSC")
plt.ylabel("Probability")
ax = plt.gca()
ax.spines['right'].set_color("none")
ax.spines['top'].set_color("none")
ax.spines['bottom'].set_position(('data',0))
ax.spines['left'].set_position(('data',0))
plt.xticks(pd.Series(np.linspace(0,0.5,6)), (pd.Series(np.linspace(0,50,6))).astype('int').astype('str')+"%")
plt.yticks(pd.Series(range(0,21,4)), [""]*6)
plt.show()
# posterior vs ve
plt.plot(vsc2ve(vsc_sample), vsc_posterior(vsc_sample, 8), lw=3)
xlabels = (pd.Series(range(0,110,10))).astype("str")+"%"
xlabels[list(range(1,11,2))] = ""
xlabels
plt.xticks(pd.Series(np.linspace(0,1.0,11)), xlabels)
plt.yticks(pd.Series(range(0,21,4)), [""]*6)
plt.title("Predicted Prob. of Vaccine Efficacy")
plt.xlabel("ve")
plt.ylabel("probability")
plt.show()
# three posterior curves vs ve
plt.plot(vsc2ve(vsc_sample), vsc_posterior(vsc_sample, 3), color="blue", lw=1.5, linestyle="dashed", alpha=0.4)
plt.plot(vsc2ve(vsc_sample), vsc_posterior(vsc_sample, 47), color="purple", lw=1.5, linestyle="dashed", alpha=0.4)
plt.plot(vsc2ve(vsc_sample), vsc_posterior(vsc_sample, 8),
color="darkgray", lw=3)
ax = plt.gca()
ax.spines['right'].set_color("none")
ax.spines['top'].set_color("none")
ax.spines['bottom'].set_position(('data',0))
ax.spines['left'].set_position(('data',0))
xlabels = (pd.Series(np.linspace(0,100,11))).astype("int").astype("str")+"%"
xlabels[list(range(1,10,2))] = ""
xlabels
plt.xticks(pd.Series(np.linspace(0,1.0,11)), xlabels)
plt.yticks(pd.Series(range(0,21,4)), [""]*6)
plt.title("Predicted Prob. of Vaccine Efficacy")
plt.xlabel("ve")
plt.ylabel("probability")
plt.text(0.02,vsc_posterior(vsc_sample, 47).max().round(2)+0.2,"47-47", color="purple", fontsize=8)
plt.text(0.02,vsc_posterior(vsc_sample, 47).max().round(2)+1,"No Effect", color="purple", fontsize=8)
plt.text(0.88,vsc_posterior(vsc_sample, 3).max().round(2)-0.2,"3-91", color="blue", fontsize=8)
plt.text(0.88,vsc_posterior(vsc_sample, 3).max().round(2)+0.5,"BEST", color="blue", fontsize=8)
plt.text(0.82,vsc_posterior(vsc_sample, 8).max().round(2)+0.2,"8-86", color="darkgray", fontsize=8)
plt.text(0.82,vsc_posterior(vsc_sample, 8).max().round(2)+1,"DATA", color="darkgray", fontsize=8, weight="bold")
plt.show()
# prior and posterior vs ve
vsc_prior = pd.Series(beta.pdf(vsc_sample, a1, b1))
plt.plot(vsc2ve(vsc_sample), vsc_prior, color="magenta", lw=1.5, ls="dashed", alpha=0.5)
plt.plot(vsc2ve(vsc_sample), vsc_posterior(vsc_sample, 8),
color="darkgray", lw=3)
ax = plt.gca()
ax.spines['right'].set_color("none")
ax.spines['top'].set_color("none")
ax.spines['bottom'].set_position(('data',0))
ax.spines['left'].set_position(('data',0))
xlabels = (pd.Series(np.linspace(0,100,11))).astype("int").astype("str")+"%"
xlabels[list(range(1,10,2))] = ""
xlabels
plt.xticks(pd.Series(np.linspace(0,1.0,11)), xlabels)
plt.yticks(pd.Series(range(0,21,4)), [""]*6)
plt.title("Prior and Posterior Prob. of Vaccine Efficacy")
plt.xlabel("ve")
plt.ylabel("probability")
plt.text(0.15, 2,"prior", color="magenta", fontsize=8)
plt.text(0.76,12,"posterior", color="darkgray", fontsize=8)
plt.show()
# compute probability VE > 30%, 70% and 90% given 8-86 split of cases
beta.cdf(ve2vsc(0.3), a2(a1,8), b2(b1,data_n, 8))[0].round(3)
beta.cdf(ve2vsc(0.7), a2(a1,8), b2(b1,data_n, 8))[0].round(3)
beta.cdf(ve2vsc(0.9), a2(a1,8), b2(b1,data_n, 8))[0].round(3)
Recent Comments