4.3	Univariate Analysis: Ordinal Variable
# import the relevant datafile
# view the datafile in script editor pane (top left)
View (Datafile) 
# attach the datafile to access the objects in the datafile 
attach (Datafile) 
# tell the software that the variable "Variable" is categorical 
Variable = as.factor(Variable) 
# Naming the categories 
Variable = factor(Variable, labels = c("Highly Dissatisfied", "Dissatisfied", "Neutral", "Satisfied", "Highly Satisfied")) 
# absolute frequencies 
summary(Variable) 
# another way for absolute frequencies 
t = table(Variable) 
t
# total observations (sample size) 
l = length(Variable) 
l
# relative frequencies = absolute frequencies/ sample size)
t/l 
# rounding off the relative frequencies till three decimal places
round(t/l, 3) 
# cumulative frequencies 
c = cumsum(t) 
c
# cumulative frequency percentages with round off till three decimal places 
c_perc = round ((c/l)*100, 3) 
c_perc
# putting the same in a dataframe 
Data_frame = data.frame(t) 
Data_frame
# dataframe for cumulative frequencies 
Data_frame$cumulative = cumsum(Data_frame$Freq) 
Data_frame
# dataframe for cumulative frequency percentages 
Data_frame$c_perc = Data_frame$cumulative /sum(t)*100 
Data_frame
# "agrmt" package required to estimate measure of consensus
library(agrmt)
consensus(t)
# making only the bar graph (simple)
barplot(t, col = c("indianred3", "indianred1", "grey", "aquamarine2","aquamarine4"), ylim = c(0, 400), xlab = "Satisfaction Level") 
# the bar-chart (using ggplot2 package)
# "ggplot2" package required for graphical representation
library(ggplot2)
ggplot(Data_frame, aes(x=Satisfaction)) +
  geom_col(aes(y=Freq), fill=c("indianred3", "indianred1", "grey", "aquamarine2","aquamarine4")) 
# a line chart 
ggplot(Data_frame, aes(x=Satisfaction)) +
  geom_point(aes(y = c_perc)) +
  geom_path(aes(y = c_perc), group=1) 
# the 'group = 1' is added so it knows which dots need to be connected
# we want all of them to be connected so 'group them as 1'. 
# now to get both in one graph, we need to convert one scale on the left to the right
# the maximum frequency should equal to the maximum cumulative percent
# the maximum cumulative percent should be 100, so we are left with
# So the factor would be:
scaleRight = 100 / max(Data_frame$Freq)
scaleRight
ggplot(Data_frame, aes(x=Satisfaction)) +
  geom_col(aes(y=Freq), fill= c("indianred3", "indianred1", "grey", "aquamarine2","aquamarine4"),) +
  geom_point(aes(y = c_perc/scaleRight)) +
  geom_path(aes(y = c_perc/scaleRight), group=1,colour="blue", size=0.9) + scale_y_continuous(sec.axis = sec_axis(~.*scaleRight, name = "Cumulative (%)")) + labs(title="Dual Axis Chart", x="name for x-axis", y="Count") + theme_classic()
# apply Wilcoxon Test (for testing hypothesis) 
# wilcox.test(name of the variable, mu = central value on scale) 
wilcox.test(Variable, mu=3, correct = FALSE)  
# Ordinal Logistic Regression (1 DV with more than two categories where order matters and no IDV)
# "MASS" package required for ordinal logistic regression 
library (MASS)
regression =  polr(Variable ~ 1, Hess = TRUE) 
# summary of above regression equation 
summary(regression) 
# computing z-value from Wilcoxon test statistic to be used for effect size
wtest = wilcox.test(Variable, mu = 3, correct = FALSE) 
Z = abs(qnorm(wtest$p.value/2))
Z
# computing effect size 
Eff_Size = Z / (l^(1/2)) 
Eff_Size
# detach the datafile to remove the datafile
detach(Datafile)


5.3	Bivariate Analysis: Binary = f (Metric) 
# import the relevant datafile
# view the datafile in script editor pane (top left)
View (Datafile) 
# attach the datafile to access the objects in the datafile 
attach (Datafile) 
# tell the software that the variable 'DV' is categorical 
DV = as.factor(DV) 
# Binary Logistic Regression (1 DV with two categories and 1 IDV which is metric in nature)
x = glm(DV~IDV, family = "binomial") 
# summary of above regression equation 
summary(x)
# extracting only the coefficients from the above results 
summary(x)$coefficients
# finding the exponential of the coefficients 
exp(coefficients(x))
# dataframe in ascending order for Binary Logistic Curve 
y = data.frame(IDV=seq(min(Datafile$IDV), max(Datafile$IDV),len=100)) 
y$DV = predict(x, newdata=y, type="response")
# fill predicted data and original data points 
plot(DV~IDV, data=Datafile, col="red4")
# fitting the regression curve 
lines(DV ~ IDV, y, col="green4", lwd=2) 
# detach the datafile to remove the datafile
detach(Datafile)


9.2	Two Paired Nominal Samples
# import the relevant datafile
# view the datafile in script editor pane (top left)
View (Datafile) 
# attach the datafile to access the objects in the datafile 
attach (Datafile) 
# tell the software that the sample 'Choice_Before' is categorical 
Choice_Before = as.factor(Choice_Before)
# tell the software that the sample 'Choice_After' is categorical 
Choice_After = as.factor(Choice_After)
# contingency table for absolute frequencies 
t= table(Before, After)
t
# row totals
margin.table(t,1)
# column totals
margin.table(t,2)
# relative frequencies in percentage: out of total
prop.table(t)*100
# relative frequencies in percentage: row-wise
row_pc = round (prop.table(t, 1)*100, 2)
row_pc
# relative frequencies in percentage: column-wise
column_pc = round (prop.table(t, 2)*100, 2) 
column_pc
# heatmap plot
t = matrix(c(t), nrow = 4, byrow = FALSE,
           dimnames = list(c("A", "B", "C", "D"), c("A", "B", "C", "D")))
t
# ggplot2 and RColorBrewer packages required 
library(ggplot2)
library(RColorBrewer)
# create a grayscale color palette
gray_palette = rev(gray.colors(100))  # Reverse the gray palette to get darker shades for higher frequency
# create a data frame for plotting
df = expand.grid(Choice_Before = rownames(t), Choice_After = colnames(t))
df$Frequency = c(t)
# create heatmap plot with shades of grey and adjusted font size
ggplot(df, aes(x = Choice_Before, y = Choice_After, fill = Frequency)) +
  geom_tile(color = "white") +
  scale_fill_gradientn(colors = gray_palette, guide = "none") +
  labs(x = "Choice: Before", y = "Choice: After") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 0, hjust = 1),
        axis.text = element_text(size = 11)) +
  geom_text(aes(label = paste("n =", Frequency)), color = "black", size = 4)
# the compound bar chart
# create bartable first
bf = prop.table(table(Choice_Before))
af = prop.table(table(Choice_After))
bartable = t(rbind(bf, af))
colnames(bartable) = c("Choice: Before", "Choice: After")
rownames(bartable) = c("A", "B", "C", "D")
bartable
# plot bartable 
barplot(bartable, col = c("cadetblue1", "beige", "lavender", "rosybrown1"), legend.text = TRUE, args.legend = list(x = "bottom", inset = c(0, -0.2), horiz = TRUE), horiz = TRUE) 
# spine plot 
spineplot(Choice_Before, Choice_After, xlab = "Choice_Before" , ylab = "Choice_After", col = c("cadetblue1", "beige", "lavender", "rosybrown1")) 
# “irr” package required for applying Bhapkar Test (for testing hypothesis)
# irr package required  
library(irr)
bhapkar(data.frame(Choice_Before, Choice_After))
# applying McNemar Test 
library(stats)
mcnemar.test(t)
# post-hoc pairwise comparison
nRows = dim(t)[1]
nCols = dim(t)[2]
pair = vector()
Exact.sig.= vector("numeric")
adj.Exact.sig.= vector("numeric")
Appr.sig.= vector("numeric")
adj.Appr.sig.= vector("numeric")
for (i in 1:(nRows-1)){for (j in (i+1):nCols){
  pair[length(pair)+1] = paste(row.names(t)[i], "-", row.names(t)[j])
  Exact.sig.[length(Exact.sig.)+1] = binom.test(t[i,j],t[i,j]+t[j,i])$p.value
  adj.Exact.sig.[length(adj.Exact.sig.)+1] = Exact.sig.[length(Exact.sig.)]*choose(nRows,2)
  ChiVal=(t[i,j]-t[j,i])^2 / (t[i,j]+t[j,i])
  Appr.sig.[length(Appr.sig.)+1] = 1-pchisq(ChiVal,1)
  adj.Appr.sig.[length(adj.Appr.sig.)+1] = Appr.sig.[length(Appr.sig.)]*choose(nRows,2)}}
result = data.frame (pair,Exact.sig., adj.Exact.sig., Appr.sig., adj.Appr.sig.)
result
# detach the datafile to remove the datafile
detach(Datafile)


13.3	Multivariate Analysis: Metric = f (Metric) 
# import the relevant datafile
# view the datafile in script editor pane (top left)
View (Datafile) 
# attach the datafile to access the objects in the datafile 
attach (Datafile) 
# descriptive statistics
library(psych)
describe.by(DV)
describe.by(IDV_1)
describe.by(IDV_2)
ggplot(Datafile, aes(x=IDV_1, y = DV, z = IDV_2)) + 
  stat_summary_2d(fun = "mean") + 
  labs(fill = "IDV_2") + 
  theme_classic()
library(ggplot2)
library(dplyr)
library(hrbrthemes)
library(viridis)
Datafile %>%
  arrange(desc(IDV_2)) %>% 
  ggplot(aes(x=IDV_1, y=DV, size=IDV_2, color = color, fill = "grey92")) +
  geom_point(alpha=0.3, shape=21, color="black") +
  scale_size(range = c(.1, 12), name="IDV_2") +
  scale_fill_viridis(discrete=TRUE, guide=FALSE, option="A") +
  theme(legend.position="bottom") +
  ylab("DV") +
  xlab("IDV_1") +
  theme(legend.position = "topright")
# Linear Regression (with two continuous IDVs without an interaction term)
x = lm(DV ~ IDV_1 + IDV_2)
summary(x)
# Linear Regression (with two continuous IDVs with an interaction term)
x = lm(DV ~ IDV_1*IDV_2)
summary(x)
x2 = lm(DV ~ IDV_2*IDV_1)
# plots of regression results
# “effects” and packages required for plotting
library("effects")
# without interaction term
plot(allEffects(x)) 
# with interaction term
plot(allEffects(x1))
plot(allEffects(x2))
# detach the datafile to remove the datafile
detach(Datafile)