>Example 3.2 Government vs Private Salaries > gov.data <- read.table("http://www.rohan.sdsu.edu/~babailey/stat672/t3-2.txt", header=T) > gov.data private government 1 12500 11750 2 22300 20900 3 14500 14800 4 32300 29900 5 20800 21500 6 19200 18400 7 15800 14500 8 17500 17900 9 23300 21400 10 42100 43200 11 16800 15200 12 14500 14200 > attach(gov.data) > #From Charlie Geyer's code: Wilcoxon Sign Rank Test > > mu <- 0 # hypothesized value of median > z <- sort(private - government) > z <- z - mu > z <- z[z != 0] > n <- length(z) > r <- rank(abs(z)) > print(rbind(z, r)) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] z -1100 -700 -400 -300.0 300.0 750 800 1300 1400 1600 1900 2400 r 7 4 3 1.5 1.5 5 6 8 9 10 11 12 > print(tplus <- sum(r[z > 0])) [1] 62.5 > psignrank(floor(tplus) - 1, n, lower.tail=FALSE) [1] 0.03857422 > > #Note there are ties, so wrong p-value! > > #Sample median of Walsh averages > > print(z <- sort(private - government)) [1] -1100 -700 -400 -300 300 750 800 1300 1400 1600 1900 2400 > walsh <- outer(z, z, "+") / 2 > print(walsh <- sort(walsh[lower.tri(walsh, diag = TRUE)])) [1] -1100 -900 -750 -700 -700 -550 -500 -400 -400 -350 -300 -200 [13] -175 -150 -50 0 25 50 100 150 175 200 225 250 [25] 250 300 300 350 400 450 450 500 500 525 550 550 [37] 600 600 650 650 750 750 775 800 800 800 850 850 [49] 950 1000 1025 1050 1050 1075 1100 1100 1175 1200 1300 1325 [61] 1350 1350 1350 1400 1450 1500 1575 1600 1600 1600 1650 1750 [73] 1850 1900 1900 2000 2150 2400 > median(walsh) [1] 650 > > #Confidence Intervals > conf.level <- 0.95 > print(z <- sort(private - government)) [1] -1100 -700 -400 -300 300 750 800 1300 1400 1600 1900 2400 > print(n <- length(z)) [1] 12 > walsh <- outer(z, z, "+") / 2 > print(walsh <- sort(walsh[lower.tri(walsh, diag = TRUE)])) [1] -1100 -900 -750 -700 -700 -550 -500 -400 -400 -350 -300 -200 [13] -175 -150 -50 0 25 50 100 150 175 200 225 250 [25] 250 300 300 350 400 450 450 500 500 525 550 550 [37] 600 600 650 650 750 750 775 800 800 800 850 850 [49] 950 1000 1025 1050 1050 1075 1100 1100 1175 1200 1300 1325 [61] 1350 1350 1350 1400 1450 1500 1575 1600 1600 1600 1650 1750 [73] 1850 1900 1900 2000 2150 2400 > print(m <- length(walsh)) [1] 78 > alpha <- 1 - conf.level > k <- qsignrank(alpha / 2, n) > if (k == 0) k <- k + 1 > print(k) [1] 14 > cat("achieved confidence level:",1 - 2 * psignrank(k - 1, n), "\n") achieved confidence level: 0.9575195 > c(walsh[k], walsh[m + 1 - k]) [1] -150 1450 > > > #or can use: > wilcox.test(private, government, paired=TRUE, alternative="greater") Wilcoxon signed rank test with continuity correction data: private and government V = 62.5, p-value = 0.03554 alternative hypothesis: true location shift is greater than 0 Warning message: In wilcox.test.default(private, government, paired = TRUE, alternative = "greater") : cannot compute exact p-value with ties > #Note there are ties, so wrong p-value! > wilcox.test(private, government, paired=TRUE, conf.int=TRUE) Wilcoxon signed rank test with continuity correction data: private and government V = 62.5, p-value = 0.07108 alternative hypothesis: true location shift is not equal to 0 95 percent confidence interval: -150.0000 1400.0000 sample estimates: (pseudo)median 650 Warning messages: 1: In wilcox.test.default(private, government, paired = TRUE, conf.int = TRUE) : cannot compute exact p-value with ties 2: In wilcox.test.default(private, government, paired = TRUE, conf.int = TRUE) : cannot compute exact confidence interval with ties > #or the exact test: > library(exactRankTests) Package ‘exactRankTests’ is no longer under development. Please consider using package ‘coin’ instead. > wilcox.exact(private, government, paired = TRUE, alternative = "greater") Exact Wilcoxon signed rank test data: private and government V = 62.5, p-value = 0.03345 alternative hypothesis: true mu is greater than 0 > wilcox.exact(private, government, paired = TRUE, conf.int=TRUE) Exact Wilcoxon signed rank test data: private and government V = 62.5, p-value = 0.0669 alternative hypothesis: true mu is not equal to 0 95 percent confidence interval: -150 1400 sample estimates: (pseudo)median 625 > > > proc.time() user system elapsed 2.008 0.060 2.083