4.3 Univariate Analysis: Ordinal Variable # import the relevant datafile # view the datafile in script editor pane (top left) View (Datafile) # attach the datafile to access the objects in the datafile attach (Datafile) # tell the software that the variable "Variable" is categorical Variable = as.factor(Variable) # Naming the categories Variable = factor(Variable, labels = c("Highly Dissatisfied", "Dissatisfied", "Neutral", "Satisfied", "Highly Satisfied")) # absolute frequencies summary(Variable) # another way for absolute frequencies t = table(Variable) t # total observations (sample size) l = length(Variable) l # relative frequencies = absolute frequencies/ sample size) t/l # rounding off the relative frequencies till three decimal places round(t/l, 3) # cumulative frequencies c = cumsum(t) c # cumulative frequency percentages with round off till three decimal places c_perc = round ((c/l)*100, 3) c_perc # putting the same in a dataframe Data_frame = data.frame(t) Data_frame # dataframe for cumulative frequencies Data_frame$cumulative = cumsum(Data_frame$Freq) Data_frame # dataframe for cumulative frequency percentages Data_frame$c_perc = Data_frame$cumulative /sum(t)*100 Data_frame # "agrmt" package required to estimate measure of consensus library(agrmt) consensus(t) # making only the bar graph (simple) barplot(t, col = c("indianred3", "indianred1", "grey", "aquamarine2","aquamarine4"), ylim = c(0, 400), xlab = "Satisfaction Level") # the bar-chart (using ggplot2 package) # "ggplot2" package required for graphical representation library(ggplot2) ggplot(Data_frame, aes(x=Satisfaction)) + geom_col(aes(y=Freq), fill=c("indianred3", "indianred1", "grey", "aquamarine2","aquamarine4")) # a line chart ggplot(Data_frame, aes(x=Satisfaction)) + geom_point(aes(y = c_perc)) + geom_path(aes(y = c_perc), group=1) # the 'group = 1' is added so it knows which dots need to be connected # we want all of them to be connected so 'group them as 1'. # now to get both in one graph, we need to convert one scale on the left to the right # the maximum frequency should equal to the maximum cumulative percent # the maximum cumulative percent should be 100, so we are left with # So the factor would be: scaleRight = 100 / max(Data_frame$Freq) scaleRight ggplot(Data_frame, aes(x=Satisfaction)) + geom_col(aes(y=Freq), fill= c("indianred3", "indianred1", "grey", "aquamarine2","aquamarine4"),) + geom_point(aes(y = c_perc/scaleRight)) + geom_path(aes(y = c_perc/scaleRight), group=1,colour="blue", size=0.9) + scale_y_continuous(sec.axis = sec_axis(~.*scaleRight, name = "Cumulative (%)")) + labs(title="Dual Axis Chart", x="name for x-axis", y="Count") + theme_classic() # apply Wilcoxon Test (for testing hypothesis) # wilcox.test(name of the variable, mu = central value on scale) wilcox.test(Variable, mu=3, correct = FALSE) # Ordinal Logistic Regression (1 DV with more than two categories where order matters and no IDV) # "MASS" package required for ordinal logistic regression library (MASS) regression = polr(Variable ~ 1, Hess = TRUE) # summary of above regression equation summary(regression) # computing z-value from Wilcoxon test statistic to be used for effect size wtest = wilcox.test(Variable, mu = 3, correct = FALSE) Z = abs(qnorm(wtest$p.value/2)) Z # computing effect size Eff_Size = Z / (l^(1/2)) Eff_Size # detach the datafile to remove the datafile detach(Datafile) 5.3 Bivariate Analysis: Binary = f (Metric) # import the relevant datafile # view the datafile in script editor pane (top left) View (Datafile) # attach the datafile to access the objects in the datafile attach (Datafile) # tell the software that the variable 'DV' is categorical DV = as.factor(DV) # Binary Logistic Regression (1 DV with two categories and 1 IDV which is metric in nature) x = glm(DV~IDV, family = "binomial") # summary of above regression equation summary(x) # extracting only the coefficients from the above results summary(x)$coefficients # finding the exponential of the coefficients exp(coefficients(x)) # dataframe in ascending order for Binary Logistic Curve y = data.frame(IDV=seq(min(Datafile$IDV), max(Datafile$IDV),len=100)) y$DV = predict(x, newdata=y, type="response") # fill predicted data and original data points plot(DV~IDV, data=Datafile, col="red4") # fitting the regression curve lines(DV ~ IDV, y, col="green4", lwd=2) # detach the datafile to remove the datafile detach(Datafile) 9.2 Two Paired Nominal Samples # import the relevant datafile # view the datafile in script editor pane (top left) View (Datafile) # attach the datafile to access the objects in the datafile attach (Datafile) # tell the software that the sample 'Choice_Before' is categorical Choice_Before = as.factor(Choice_Before) # tell the software that the sample 'Choice_After' is categorical Choice_After = as.factor(Choice_After) # contingency table for absolute frequencies t= table(Before, After) t # row totals margin.table(t,1) # column totals margin.table(t,2) # relative frequencies in percentage: out of total prop.table(t)*100 # relative frequencies in percentage: row-wise row_pc = round (prop.table(t, 1)*100, 2) row_pc # relative frequencies in percentage: column-wise column_pc = round (prop.table(t, 2)*100, 2) column_pc # heatmap plot t = matrix(c(t), nrow = 4, byrow = FALSE, dimnames = list(c("A", "B", "C", "D"), c("A", "B", "C", "D"))) t # ggplot2 and RColorBrewer packages required library(ggplot2) library(RColorBrewer) # create a grayscale color palette gray_palette = rev(gray.colors(100)) # Reverse the gray palette to get darker shades for higher frequency # create a data frame for plotting df = expand.grid(Choice_Before = rownames(t), Choice_After = colnames(t)) df$Frequency = c(t) # create heatmap plot with shades of grey and adjusted font size ggplot(df, aes(x = Choice_Before, y = Choice_After, fill = Frequency)) + geom_tile(color = "white") + scale_fill_gradientn(colors = gray_palette, guide = "none") + labs(x = "Choice: Before", y = "Choice: After") + theme_classic() + theme(axis.text.x = element_text(angle = 0, hjust = 1), axis.text = element_text(size = 11)) + geom_text(aes(label = paste("n =", Frequency)), color = "black", size = 4) # the compound bar chart # create bartable first bf = prop.table(table(Choice_Before)) af = prop.table(table(Choice_After)) bartable = t(rbind(bf, af)) colnames(bartable) = c("Choice: Before", "Choice: After") rownames(bartable) = c("A", "B", "C", "D") bartable # plot bartable barplot(bartable, col = c("cadetblue1", "beige", "lavender", "rosybrown1"), legend.text = TRUE, args.legend = list(x = "bottom", inset = c(0, -0.2), horiz = TRUE), horiz = TRUE) # spine plot spineplot(Choice_Before, Choice_After, xlab = "Choice_Before" , ylab = "Choice_After", col = c("cadetblue1", "beige", "lavender", "rosybrown1")) # “irr” package required for applying Bhapkar Test (for testing hypothesis) # irr package required library(irr) bhapkar(data.frame(Choice_Before, Choice_After)) # applying McNemar Test library(stats) mcnemar.test(t) # post-hoc pairwise comparison nRows = dim(t)[1] nCols = dim(t)[2] pair = vector() Exact.sig.= vector("numeric") adj.Exact.sig.= vector("numeric") Appr.sig.= vector("numeric") adj.Appr.sig.= vector("numeric") for (i in 1:(nRows-1)){for (j in (i+1):nCols){ pair[length(pair)+1] = paste(row.names(t)[i], "-", row.names(t)[j]) Exact.sig.[length(Exact.sig.)+1] = binom.test(t[i,j],t[i,j]+t[j,i])$p.value adj.Exact.sig.[length(adj.Exact.sig.)+1] = Exact.sig.[length(Exact.sig.)]*choose(nRows,2) ChiVal=(t[i,j]-t[j,i])^2 / (t[i,j]+t[j,i]) Appr.sig.[length(Appr.sig.)+1] = 1-pchisq(ChiVal,1) adj.Appr.sig.[length(adj.Appr.sig.)+1] = Appr.sig.[length(Appr.sig.)]*choose(nRows,2)}} result = data.frame (pair,Exact.sig., adj.Exact.sig., Appr.sig., adj.Appr.sig.) result # detach the datafile to remove the datafile detach(Datafile) 13.3 Multivariate Analysis: Metric = f (Metric) # import the relevant datafile # view the datafile in script editor pane (top left) View (Datafile) # attach the datafile to access the objects in the datafile attach (Datafile) # descriptive statistics library(psych) describe.by(DV) describe.by(IDV_1) describe.by(IDV_2) ggplot(Datafile, aes(x=IDV_1, y = DV, z = IDV_2)) + stat_summary_2d(fun = "mean") + labs(fill = "IDV_2") + theme_classic() library(ggplot2) library(dplyr) library(hrbrthemes) library(viridis) Datafile %>% arrange(desc(IDV_2)) %>% ggplot(aes(x=IDV_1, y=DV, size=IDV_2, color = color, fill = "grey92")) + geom_point(alpha=0.3, shape=21, color="black") + scale_size(range = c(.1, 12), name="IDV_2") + scale_fill_viridis(discrete=TRUE, guide=FALSE, option="A") + theme(legend.position="bottom") + ylab("DV") + xlab("IDV_1") + theme(legend.position = "topright") # Linear Regression (with two continuous IDVs without an interaction term) x = lm(DV ~ IDV_1 + IDV_2) summary(x) # Linear Regression (with two continuous IDVs with an interaction term) x = lm(DV ~ IDV_1*IDV_2) summary(x) x2 = lm(DV ~ IDV_2*IDV_1) # plots of regression results # “effects” and packages required for plotting library("effects") # without interaction term plot(allEffects(x)) # with interaction term plot(allEffects(x1)) plot(allEffects(x2)) # detach the datafile to remove the datafile detach(Datafile)