# mesofas.r # copyright André Berchtold, 2011 # version: 14.10.2011 # generic function mesofas <- function(x, ...) UseMethod("mesofas") # default method mesofas.default <- function(x, y, ...) { table <- "TRUE" CT <- xtabs(~x+y) title <- "default" value <- "NaN" df <- "NaN" p <- "NaN" n <- length(x) call <- match.call() remark <- "" type <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type),class="mesofas") } # print method print.mesofas <- function(x, ...) { cat("\n") cat("Call: ") print(x$call) cat("\n") if(x$table=="TRUE"){ print(x$CT) cat("\n") } if(x$title!="default"){ if(x$type=="asym1") { cat(x$title,": asymmetric measure") } else { cat(x$title,": symmetric measure") } cat("\n") cat("number of observations: ",x$n) cat("\n") cat("value = ",x$value) if (x$df!="NaN"){ cat(", df = ",x$df) } if (x$p!="NaN"){ cat(", p-value = ",x$p) } if (x$CI_inf!="NaN"){ cat("\n") cat("95% confidence interval: [",x$CI_inf,";",x$CI_sup,"]") } cat("\n\n") if (x$remark!=""){ cat(x$remark) cat("\n") } } if(x$type=="asym") { cat("asymmetric measure: Y|X") cat("\n") cat("value = ",x$value1,", p-value = ",x$p1) if (x$CI1_inf!="NaN"){ cat("\n") cat("95% confidence interval: [",x$CI1_inf,";",x$CI1_sup,"]") } cat("\n\n") cat("asymmetric measure: X|Y") cat("\n") cat("value = ",x$value2,", p-value = ",x$p2) cat("\n") if (x$CI2_inf!="NaN"){ cat("95% confidence interval: [",x$CI2_inf,";",x$CI2_sup,"]") cat("\n") cat("\n") } } } # summary method summary.mesofas <- function(x, ...) { # Init options(digits=3) if (x$var!="NaN") { sd <- sqrt(x$var) } else { sd <- "NaN" } if(x$type=="asym") { if (x$var1!="NaN") { sd1 <- sqrt(x$var1) } else { sd1 <- "NaN" } if (x$var2!="NaN") { sd2 <- sqrt(x$var2) } else { sd2 <- "NaN" } } # Display cat("\n") cat(x$title) cat("\n") cat(" Value sd p 95% CI ") cat("\n") cat("-----------------------------------------------") cat("\n") if(x$type=="asym1") { cat("Asym ",x$value,sd,x$p,x$CI_inf,x$CI_sup) } else { cat("Sym ",x$value,sd,x$p,x$CI_inf,x$CI_sup) } cat("\n") cat("-----------------------------------------------") cat("\n") if(x$type=="asym") { cat("Y|X ",x$value1,sd1,x$p1,x$CI1_inf,x$CI1_sup) cat("\n") cat("X|Y ",x$value2,sd2,x$p2,x$CI2_inf,x$CI2_sup) cat("\n") cat("-----------------------------------------------") cat("\n") } } # chi-2 method mesofas.chi2 <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Chi-2 independence test" value <- OUT$statistic df <- OUT$parameter p <- OUT$p.value n <- OUT$n.cases call <- match.call() remark <- "" type <- "sym" var <- "NaN" CI_inf <- qchisq(0.025,df,value) CI_sup <- qchisq(0.975,df,value) structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup,var=var),class="mesofas") } # Spearman's rank correlation method mesofas.spearman <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) if (is.numeric(x)=="FALSE"){ x <- as.numeric(x)} if (is.numeric(y)=="FALSE"){ y <- as.numeric(y)} # ranking x <- rank(x,ties.method="average",na.last="keep") y <- rank(y,ties.method="average",na.last="keep") OUT <- cor.test(x,y,method="pearson") title <- "Spearman's rank correlation" value <- OUT$estimate df <- "NaN" p <- OUT$p.value n <- length(x) call <- match.call() remark <- "Cannot compute exact p-values with ties" type <- "sym" var <- "NaN" CI_inf <- OUT$conf.int[1] CI_sup <- OUT$conf.int[2] structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup,var=var),class="mesofas") } # Pearson's correlation method mesofas.pearson <- function(x,y,table,...) { if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) if (is.numeric(x) & is.numeric(y)){ OUT <- cor.test(x,y,method="pearson") value <- OUT$estimate p <- OUT$p.value CI_inf <- OUT$conf.int[1] CI_sup <- OUT$conf.int[2] remark <- "" } else { OUT <- "NaN" value <- "NaN" p <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" remark <- "Valid only for numerical variables." } title <- "Pearson's correlation" df <- "NaN" n <- length(x) call <- match.call() type <- "sym" var <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup,var=var),class="mesofas") } # Cramer's V method mesofas.V <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Cramer's V" K2 <- OUT$statistic maxK2 <- OUT$n.cases*(min(nrow(CT),ncol(CT))-1) value <- sqrt(K2/maxK2) df <- OUT$parameter p <- OUT$p.value n <- OUT$n.cases call <- match.call() remark <- "" type <- "sym" var <- "NaN" CI_inf <- sqrt((qchisq(0.025,df,K2)+df)/maxK2) CI_sup <- sqrt((qchisq(0.975,df,K2)+df)/maxK2) structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup,var=var),class="mesofas") } # Pearson's phi method mesofas.phi <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Pearson's phi" K2 <- OUT$statistic value <- sqrt(K2/OUT$n.cases) df <- OUT$parameter p <- OUT$p.value n <- OUT$n.cases call <- match.call() remark <- "" type <- "sym" var <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,var=var,CI_inf=CI_inf,CI_sup=CI_sup),class="mesofas") } # Pearson's C contingency coefficient method mesofas.C <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Pearson's C contingency coefficient" K2 <- OUT$statistic value <- sqrt(K2/(K2+OUT$n.cases)) df <- OUT$parameter p <- OUT$p.value n <- OUT$n.cases call <- match.call() remark <- "" type <- "sym" var <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,var=var,CI_inf=CI_inf,CI_sup=CI_sup),class="mesofas") } # Sakoda's adjusted Pearson's C contingency coefficient method mesofas.Sakoda <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Sakoda's adjusted Pearson's C" K2 <- OUT$statistic k <- min(nrow(CT),ncol(CT)) n <- OUT$n.cases value <- sqrt((K2*k)/((K2+n)*(k-1))) df <- OUT$parameter p <- OUT$p.value call <- match.call() remark <- "" type <- "sym" var <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,var=var,CI_inf=CI_inf,CI_sup=CI_sup),class="mesofas") } # Tschuprow's T method mesofas.T <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Tschuprow's T" K2 <- OUT$statistic denom <- OUT$n.cases*sqrt((nrow(CT)-1)*(ncol(CT)-1)) value <- sqrt(K2/denom) df <- OUT$parameter p <- OUT$p.value n <- OUT$n.cases call <- match.call() remark <- "" type <- "sym" var <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,var=var,CI_inf=CI_inf,CI_sup=CI_sup),class="mesofas") } # Goodman and Kruskal's lambda method mesofas.lambda <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Goodman and Kruskal's lambda" n <- OUT$n.cases call <- match.call() remark <- "" type <- "asym" df <- "NaN" ######## # A. Y|X # value t1 <- 0 for (i in 1:nrow(CT)) { t1 <- t1+max(CT[i,]) } t2 <- max(colSums(CT)) value1 <- (t1-t2)/(OUT$n.cases-t2) # variance # Rem: in case of ties, we take the average. colmax <- which(colSums(CT)==max(colSums(CT))) t3<-c(rep(0,length(colmax))) for (j in 1:length(colmax)) { for (i in 1:nrow(CT)){ if (CT[i,colmax[j]]==max(CT[i,])){ t3[j] <- t3[j]+CT[i,colmax[j]] } } } t3 <- mean(t3) var1 <- ((OUT$n.cases-t1)*(t1+t2-2*t3))/((OUT$n.cases-t2)**3) # test z1 <- value1/sqrt(var1) p1 <- 1-pnorm(z1) # 95% CI CI1_inf <- value1-1.96*sqrt(var1) CI1_sup <- value1+1.96*sqrt(var1) ######## # B. X|Y # value t4 <- 0 for (i in 1:ncol(CT)) { t4 <- t4+max(CT[,i]) } t5 <- max(rowSums(CT)) value2 <- (t4-t5)/(OUT$n.cases-t5) # variance # Rem: in case of ties, we take the average. rowmax <- which(rowSums(CT)==max(rowSums(CT))) t6<-c(rep(0,length(rowmax))) for (j in 1:length(rowmax)) { for (i in 1:ncol(CT)){ if (CT[rowmax[j],i]==max(CT[,i])){ t6[j] <- t6[j]+CT[rowmax[j],i] } } } t6 <- mean(t6) var2 <- ((OUT$n.cases-t4)*(t4+t5-2*t6))/((OUT$n.cases-t5)**3) # test z2 <- value2/sqrt(var2) p2 <- 1-pnorm(z2) # 95% CI CI2_inf <- value2-1.96*sqrt(var2) CI2_sup <- value2+1.96*sqrt(var2) ############# # C. symetric # value value <- (t1+t4-t2-t5)/(2*OUT$n.cases-t2-t5) # variance (to be checked: computation of t9: peut donner des valeurs négatives!!) t7 <- 2*OUT$n.cases-t2-t5 t8 <- 2*OUT$n.cases-t1-t4 t9 <- 8*OUT$n.cases-t7-t8-2*(t3+t6+t2+t4) colmax <- which(colSums(CT)==max(colSums(CT))) rowmax <- which(rowSums(CT)==max(rowSums(CT))) t10<-CT[rowmax,colmax] t10 <- mean(t10) t11 <- max(CT) var <- (1/t7**4)*(t7*t8*t9-2*(t7**2)*(OUT$n.cases-t10)-2*(t8**2)*(OUT$n.cases-t11)) #test z <- value/sqrt(var) p <- 2*(1-pnorm(z)) # 95% CI CI_inf <- value-1.96*sqrt(var) CI_sup <- value+1.96*sqrt(var) structure(list(table=table,CT=CT,title=title,value=value,var=var,df=df,p=p,n=n,call=call, remark=remark,type=type,value1=value1,var1=var1,p1=p1,value2=value2,var2=var2,p2=p2, CI_inf=CI_inf,CI_sup=CI_sup,CI1_inf=CI1_inf,CI1_sup=CI1_sup,CI2_inf=CI2_inf,CI2_sup=CI2_sup), class="mesofas") } # Concordance and discordance method mesofas.condis <- function(CT,...) { CT <- as.matrix(CT) nr <- nrow(CT) nc <- ncol(CT) q <- min(nr,nc) # grand total n <- sum(CT) # number of pairs T <- n*(n-1)/2 # number of concordant pairs Nc <- 0 for (i in 1:(nr-1)) { for (j in 1:(nc-1)) { Nc <- Nc+CT[i,j]*sum(CT[(i+1):nr,(j+1):nc]) } } # number of discordant pairs Nd <- 0 for (i in 1:(nr-1)) { for (j in 2:nc) { Nd <- Nd+CT[i,j]*sum(CT[(i+1):nr,1:(j-1)]) } } # number of pairs with ties on the row Tr <- 0 for (i in 1:nr) { for (j in 1:(nc-1)) { Tr <- Tr+CT[i,j]*sum(CT[i,(j+1):nc]) } } # number of pairs with ties on the column Tc <- 0 for (j in 1:nc) { for (i in 1:(nr-1)) { Tc <- Tc+CT[i,j]*sum(CT[(i+1):nr,j]) } } structure(list(nr=nr,nc=nc,q=q,n=n,T=T,Nc=Nc,Nd=Nd,Tr=Tr,Tc=Tc)) } # Kendall's tau_a method mesofas.tau_a <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) cd <- mesofas.condis(CT) title <- "Kendall's tau a" value <- (cd$Nc-cd$Nd)/cd$T var <- "NaN" df <- "NaN" p <- "NaN" n <- cd$n call <- match.call() remark <- "" type <- "sym" CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,var=var,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup),class="mesofas") } # Kendall's tau_b method mesofas.tau_b <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) cd <- mesofas.condis(CT) title <- "Kendall's tau b" value <- (cd$Nc-cd$Nd)/sqrt((cd$Nc+cd$Nd+cd$Tc)*(cd$Nc+cd$Nd+cd$Tr)) var <- "NaN" df <- "NaN" p <- "NaN" n <- cd$n call <- match.call() remark <- "" type <- "sym" CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,var=var,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup),class="mesofas") } # Kendall-Stuart's tau_c method mesofas.tau_c <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) cd <- mesofas.condis(CT) title <- "Kendall-Stuart's tau c" value <- 2*(cd$Nc-cd$Nd)/(cd$n**2)*(cd$q/(cd$q-1)) var <- "NaN" df <- "NaN" p <- "NaN" n <- cd$n call <- match.call() remark <- "" type <- "sym" CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,var=var,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup),class="mesofas") } # Goodman & Kruskal's gamma method mesofas.gamma <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) cd <- mesofas.condis(CT) title <- "Goodman & Kruskal's gamma" value <- (cd$Nc-cd$Nd)/(cd$Nc+cd$Nd) var <- "NaN" df <- "NaN" p <- "NaN" n <- cd$n call <- match.call() remark <- "" type <- "sym" CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,var=var,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup),class="mesofas") } # Somers' d method mesofas.d <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) cd <- mesofas.condis(CT) title <- "Somers' d" df <- "NaN" n <- cd$n call <- match.call() remark <- "" type <- "asym" ######## # A. Y|X # value value1 <- (cd$Nc-cd$Nd)/(cd$Nc+cd$Nd+cd$Tc) # variance nr <- nrow(CT) nc <- ncol(CT) AD <- array(0,dim=c(nr,nc)) for (i in 1:(nr-1)) { for (j in 1:(nc-1)) { AD[i,j] <- AD[i,j]+sum(CT[(i+1):nr,(j+1):nc]) } } for (i in 2:nr) { for (j in 2:nc) { AD[i,j] <- AD[i,j]+sum(CT[1:(i-1),1:(j-1)]) } } for (i in 2:nr) { for (j in 1:(nc-1)) { AD[i,j] <- AD[i,j]-sum(CT[1:(i-1),(j+1):nc]) } } for (i in 1:(nr-1)) { for (j in 2:nc) { AD[i,j] <- AD[i,j]-sum(CT[(i+1):nr,1:(j-1)]) } } TT <- 0 for (i in 1:nr){ for (j in 1:nc){ TT <- TT+CT[i,j]*((2*(cd$Nc+cd$Nd+cd$Tc)*AD[i,j]-(2*cd$Nc-2*cd$Nd)*(cd$n-sum(CT[i,])))**2) } } var1 <- (4/((2*(cd$Nc+cd$Nd+cd$Tc))**4))*TT # test z1 <- value1/sqrt(var1) p1 <- 2*(1-pnorm(abs(z1))) # 95% CI CI1_inf <- value1-1.96*sqrt(var1) CI1_sup <- value1+1.96*sqrt(var1) ######## # B. X|Y # value value2 <- (cd$Nc-cd$Nd)/(cd$Nc+cd$Nd+cd$Tr) # variance: à vérifier CT2 <- t(CT) AD <- t(AD) TT <- 0 for (j in 1:nr){ for (i in 1:nc){ TT <- TT+CT2[i,j]*((2*(cd$Nc+cd$Nd+cd$Tr)*AD[i,j]-(2*cd$Nd-2*cd$Nc)*(cd$n-sum(CT2[i,])))**2) } } var2 <- (4/((2*(cd$Nc+cd$Nd+cd$Tr))**4))*TT # test z2 <- value2/sqrt(var2) p2 <- 2*(1-pnorm(abs(z2))) # 95% CI CI2_inf <- value2-1.96*sqrt(var2) CI2_sup <- value2+1.96*sqrt(var2) ############# # C. symetric # value value <- ((cd$Nc+cd$Nd+cd$Tc)*value1+(cd$Nc+cd$Nd+cd$Tr)*value2)/(2*cd$Nc+2*cd$Nd+cd$Tc+cd$Tr) # variance var <- "NaN" #test z <- "NaN" p <- "NaN" # 95% CI CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,var=var,df=df,p=p,n=n,call=call, remark=remark,type=type,value1=value1,var1=var1,p1=p1,value2=value2,var2=var2,p2=p2, CI_inf=CI_inf,CI_sup=CI_sup,CI1_inf=CI1_inf,CI1_sup=CI1_sup,CI2_inf=CI2_inf,CI2_sup=CI2_sup), class="mesofas") } # Goodman & Kruskal's tau method mesofas.tau <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Goodman & Kruskal's tau" df <- "NaN" n <- OUT$n.cases call <- match.call() remark <- "" type <- "asym" # Preliminary computations nr <- nrow(CT) nc <- ncol(CT) rs <- rowSums(CT) cs <- colSums(CT) ######## # A. Y|X # value t1 <- 0 for (i in 1:nr){ for (j in 1:nc){ t1 <- t1+(CT[i,j]**2)/rs[i] } } t2 <- sum(cs**2) value1 <- (n*t1-t2)/(n**2-t2) # variance (under the multinomial sampling model) var1 <- 0 tt1 <- (1/n)*(n-t1) tt2 <- (1/n**2)*(n**2-t2) tt3 <- tt2*(tt1+1)-2*tt1 tt4 <- matrix(0,nrow=nr,ncol=nc) for (i in 1:nr){ for (j in 1:nc){ tt4[i,j] <- -2*tt1*cs[j]/n+2*tt2*(CT[i,j]/n)*(n/rs[i]) tt5 <- 0 for (k in 1:nc){ tt5 <- tt5+(CT[i,k]/rs[i])**2 } tt4[i,j] <- tt4[i,j]-tt2*tt5 } } for (i in 1:nr){ for (j in 1:nc){ var1 <- var1+CT[i,j]*(tt4[i,j]-tt3)**2 } } var1 <- var1/((n**2)*(tt2**4)) # test z1 <- value1/sqrt(var1) p1 <- 1-pnorm(z1) # 95% CI CI1_inf <- value1-1.96*sqrt(var1) CI1_sup <- value1+1.96*sqrt(var1) ######## # B. X|Y # value t3 <- 0 for (i in 1:nr){ for (j in 1:nc){ t3 <- t3+(CT[i,j]**2)/cs[j] } } t4 <- sum(rs**2) value2 <- (n*t3-t4)/(n**2-t4) # variance (under the multinomial sampling model) var2 <- 0 tt1 <- (1/n)*(n-t3) tt2 <- (1/n**2)*(n**2-t4) tt3 <- tt2*(tt1+1)-2*tt1 tt4 <- matrix(0,nrow=nr,ncol=nc) for (i in 1:nr){ for (j in 1:nc){ tt4[i,j] <- -2*tt1*rs[i]/n+2*tt2*(CT[i,j]/n)*(n/cs[j]) tt5 <- 0 for (k in 1:nr){ tt5 <- tt5+(CT[k,j]/cs[j])**2 } tt4[i,j] <- tt4[i,j]-tt2*tt5 } } for (i in 1:nr){ for (j in 1:nc){ var2 <- var2+CT[i,j]*(tt4[i,j]-tt3)**2 } } var2 <- var2/((n**2)*(tt2**4)) # test z2 <- value2/sqrt(var2) p2 <- 1-pnorm(z2) # 95% CI CI2_inf <- value2-1.96*sqrt(var2) CI2_sup <- value2+1.96*sqrt(var2) ############# # C. symetric # value value <- ((n**2-t2)*value1+(n**2-t4)*value2)/(2*n**2-t2-t4) # variance var <- "NaN" #test z <- "NaN" p <- "NaN" # 95% CI CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,var=var,df=df,p=p,n=n,call=call, remark=remark,type=type,value1=value1,var1=var1,p1=p1,value2=value2,var2=var2,p2=p2, CI_inf=CI_inf,CI_sup=CI_sup,CI1_inf=CI1_inf,CI1_sup=CI1_sup,CI2_inf=CI2_inf,CI2_sup=CI2_sup), class="mesofas") } # Theil's u method mesofas.u <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Theil's u (uncertainty coefficient)" df <- "NaN" n <- OUT$n.cases call <- match.call() remark <- "" type <- "asym" # Preliminary computations nr <- nrow(CT) nc <- ncol(CT) rs <- rowSums(CT) cs <- colSums(CT) ######## # A. Y|X # value t1 <- 0 for (i in 1:nr){ for (j in 1:nc){ t1 <- t1+CT[i,j]*log((rs[i]*cs[j])/(n*CT[i,j])) } } t2 <- 0 for (j in 1:nc){ t2 <- t2+cs[j]*log(cs[j]/n) } value1 <- t1/t2 # variance var1 <- 0 for (i in 1:nr){ for (j in 1:nc){ var1 <- var1+CT[i,j]*(t2*log(CT[i,j]/rs[i])+(t1-t2)*log(cs[j]/n))**2 } } var1 <- var1/((t2**4)) # test z1 <- value1/sqrt(var1) p1 <- 1-pnorm(z1) # 95% CI CI1_inf <- value1-1.96*sqrt(var1) CI1_sup <- value1+1.96*sqrt(var1) ######## # B. X|Y # value t3 <- 0 for (i in 1:nr){ t3 <- t3+rs[i]*log(rs[i]/n) } value2 <- t1/t3 # variance var2 <- 0 for (i in 1:nr){ for (j in 1:nc){ var2 <- var2+CT[i,j]*(t3*log(CT[i,j]/cs[j])+(t1-t3)*log(rs[i]/n))**2 } } var2 <- var2/((t3**4)) # test z2 <- value2/sqrt(var2) p2 <- 1-pnorm(z2) # 95% CI CI2_inf <- value2-1.96*sqrt(var2) CI2_sup <- value2+1.96*sqrt(var2) ############# # C. symetric # value value <- (t2*value1+t3*value2)/(t2+t3) # variance: à recalculer: certainement faux!! var <- 0 for (i in 1:nr){ for (j in 1:nc){ var <- var+CT[i,j]*(((t1-t2-t3)*log((rs[i]*cs[j])/(n**2)) -(t2+t3)*log(CT[i,j]/n))**2) } } var <- 4*var/((n**2)*((t2+t3)**4)) var <- "NaN" #test z <- "NaN" p <- "NaN" # 95% CI CI_inf <- "NaN" CI_sup <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,var=var,df=df,p=p,n=n,call=call, remark=remark,type=type,value1=value1,var1=var1,p1=p1,value2=value2,var2=var2,p2=p2, CI_inf=CI_inf,CI_sup=CI_sup,CI1_inf=CI1_inf,CI1_sup=CI1_sup,CI2_inf=CI2_inf,CI2_sup=CI2_sup), class="mesofas") } # Eta method mesofas.eta <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) if (is.numeric(y)){ OUT <- aov(y~x) SS <- summary(OUT)[[1]]$'Sum Sq' value <- sqrt(SS[1]/(SS[1]+SS[2])) p <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" remark <- "" } else { OUT <- "NaN" value <- "NaN" p <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" remark <- "The dependent variable must be numerical." } title <- "Eta" df <- "NaN" n <- length(x) call <- match.call() type <- "asym1" var <- "NaN" structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup,var=var),class="mesofas") } # Odds ratio method mesofas.or <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Odds ratio" n <- OUT$n.cases call <- match.call() type <- "sym" df <- "NaN" nr <- nrow(CT) nc <- ncol(CT) if (nr!=2 | nc!=2){ remark <- "This is not a 2x2 table. No computation." p <- "NaN" var <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" } else { CT2 <- CT+0.5 value <- (CT2[1,1]*CT2[2,2])/(CT2[1,2]*CT2[2,1]) SE <- sqrt(1/CT2[1,1]+1/CT2[1,2]+1/CT2[2,1]+1/CT2[2,2]) L <- log(value) p <- 2*(1-pnorm(abs(L)/SE)) remark <- "The odds ratio is computed with 0.5 added to each cell." var <- (value**2)*(1/CT2[1,1]+1/CT2[1,2]+1/CT2[2,1]+1/CT2[2,2]) CI_inf <- exp(L-1.96*SE) CI_sup <- exp(L+1.96*SE) } structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup,var=var),class="mesofas") } # Yule's Q method mesofas.q <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Yule's Q" n <- OUT$n.cases call <- match.call() type <- "sym" df <- "NaN" nr <- nrow(CT) nc <- ncol(CT) if (nr!=2 | nc!=2){ remark <- "This is not a 2x2 table. No computation." p <- "NaN" var <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" } else { CT2 <- CT+0.5 v <- (CT2[1,1]*CT2[2,2])/(CT2[1,2]*CT2[2,1]) value <- (v-1)/(v+1) p <- "NaN" remark <- "Yule's Q is computed with 0.5 added to each cell." var <- ((1-value**2)**2)/4*(1/CT2[1,1]+1/CT2[1,2]+1/CT2[2,1]+1/CT2[2,2]) CI_inf <- "NaN" CI_sup <- "NaN" } structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup,var=var),class="mesofas") } # Yule's Y method mesofas.y <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Yule's Y" n <- OUT$n.cases call <- match.call() type <- "sym" df <- "NaN" nr <- nrow(CT) nc <- ncol(CT) if (nr!=2 | nc!=2){ remark <- "This is not a 2x2 table. No computation." p <- "NaN" var <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" } else { CT2 <- CT+0.5 v <- (CT2[1,1]*CT2[2,2])/(CT2[1,2]*CT2[2,1]) value <- (sqrt(v)-1)/(sqrt(v)+1) p <- "NaN" remark <- "Yule's Y is computed with 0.5 added to each cell." var <- ((1-value**2)**2)/16*(1/CT2[1,1]+1/CT2[1,2]+1/CT2[2,1]+1/CT2[2,2]) CI_inf <- "NaN" CI_sup <- "NaN" } structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup,var=var),class="mesofas") } # Relative risk method mesofas.rr <- function(x,y,table,modality,...) { options(warn=-1) call <- match.call() if(missing(table)) { table <- "FALSE" } if(missing(modality)) { modality <- 1 } if(modality!=1 & modality!=2) { modality <- 1 } CT <- xtabs(~x+y) OUT <- summary(CT) n <- OUT$n.cases type <- "asym1" df <- "NaN" if (modality==1){ CT2 <- CT title <- "Relative risk for the first column modality" } else { CT2 <- CT CT2[,1] <- CT[,2] CT2[,2] <- CT[,1] title <- "Relative risk for the second column modality" } nr <- nrow(CT) nc <- ncol(CT) if (nr!=2 | nc!=2){ remark <- "This is not a 2x2 table. No computation." value <- "NaN" p <- "NaN" var <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" } else { value <- ((CT2[1,1]+0.5)/(CT2[1,1]+CT2[1,2]+0.5))/((CT2[2,1]+0.5)/(CT2[2,1]+CT2[2,2]+0.5)) var <- "NaN" p <- "NaN" L <- log(value) tt <- (1/(CT2[1,1]+0.5)-1/(CT2[1,1]+CT2[1,2]+0.5)+1/(CT2[2,1]+0.5)-1/(CT2[2,1]+CT2[2,2]+0.5))**0.5 CI_inf <- exp(L-1.96*tt) CI_sup <- exp(L+1.96*tt) remark <- "The relative risk is computed with 0.5 added to relevant cells." } structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup,var=var),class="mesofas") } # Cohen's kappa method mesofas.kappa <- function(x,y,table,...) { options(warn=-1) if(missing(table)) { table <- "FALSE" } CT <- xtabs(~x+y) OUT <- summary(CT) title <- "Cohen's kappa measure of agreement" n <- OUT$n.cases call <- match.call() type <- "sym" df <- "NaN" nr <- nrow(CT) nc <- ncol(CT) if (nr!=nc){ remark <- "This is not a squared table. No computation." p <- "NaN" var <- "NaN" CI_inf <- "NaN" CI_sup <- "NaN" } else { remark <- "" rs <- rowSums(CT) cs <- colSums(CT) theta1 <-0 for (i in 1:nr){ theta1 <- theta1+CT[i,i] } theta1 <- theta1/n theta2 <- 0 for (i in 1:nr){ theta2 <- theta2+rs[i]*cs[i] } theta2 <- theta2/(n**2) value <- (theta1-theta2)/(1-theta2) # test using the correct variance under the independence hypothesis theta5 <- 0 for (i in 1:nr){ theta5 <- theta5+rs[i]*cs[i]*(rs[i]/n+cs[i]/n) } theta5 <- theta5/(n**2) var0 <- (theta2+theta2**2-theta5)/(n*(1-theta2)**2) z <- value/sqrt(var0) p <- 2*(1-pnorm(abs(z))) # CI theta3 <-0 for (i in 1:nr){ theta3 <- theta3+CT[i,i]*(rs[i]/n+cs[i]/n) } theta3 <- theta3/n theta4 <-0 for (i in 1:nr){ for (j in 1:nc){ theta4 <- theta4+CT[i,j]*(rs[j]/n+cs[i]/n)**2 } } theta4 <- theta4/n v1 <- (theta1*(1-theta1))/((1-theta2)**2) v2 <- (2*(1-theta1)*(2*theta1*theta2-theta3))/((1-theta2)**3) v3 <- (((1-theta1)**2)*(theta4-4*(theta2**2)))/((1-theta2)**4) var <- (v1+v2+v3)/n CI_inf <- value-1.96*sqrt(var) CI_sup <- value+1.96*sqrt(var) } structure(list(table=table,CT=CT,title=title,value=value,df=df,p=p,n=n,call=call, remark=remark,type=type,CI_inf=CI_inf,CI_sup=CI_sup,var=var),class="mesofas") }