library(irrCAC)
library(tidyverse)
Multi-label agreement
Load libraries
(install if you don’t have already)
Define functions to calculate MASI
Show the code defining the functions
#' Parse string into a character vector
#'
#' @param x string, e.g. "label_1, label_2"
#' @param sep separator, e.g. ", "
#'
#' @return character vector of labels, e.g. c("label_1", "label_2")
#' @export
#'
#' @examples
#' elements_from_string("l1, l2, l3", sep = ", ")
<- function(x, sep = ", ") {str_split(x,sep,simplify = F)[[1]]}
elements_from_string
#' Measuring Agreement on Set-valued Items (MASI) distance from text string
#' MASI Similarity or Distance (pairwise)
#'
#' @param x Person x string of labels such as "label_1, label_2, label_3"
#' @param y Person y string of labels such as "label_4, label_1, label_5, label_7"
#' @param sep Label separator in the string, default = ", "
#' @param jaccard_only Only return Jaccard index instead of MASI (default = FALSE)
#' @param type one of "dist" or "sim" (default) for a distance or similarity score.
#'
#' @return Jaccard Distance between the two sets
#' @export
#'
#' @examples
#' masi("l1, l2, l3", "l7, l2")
<- function(x,y,sep = ", ", jaccard_only = F, type = "sim"){
masi # Define the labels for each rater
<- elements_from_string(x)
lab_x <- elements_from_string(y)
lab_y
# compute set diff and intersection size
<- length(setdiff(lab_x,lab_y)) # number of elements in set x but not in set y
diff_xy_size <- length(setdiff(lab_y,lab_x)) # number of elements in set y but not in set x
diff_yx_size <- length(intersect(lab_x,lab_y)) # number of elements in common between two sets
intersection_size
# monotonicity simillarity coefficient, M, see http://www.lrec-conf.org/proceedings/lrec2006/pdf/636_pdf.pdf Rebecca Passonneau. 2006. Measuring Agreement on Set-valued Items (MASI) for Semantic and Pragmatic Annotation. In Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06), Genoa, Italy. European Language Resources Association (ELRA).
<- case_when(
m_sim == 0) & (diff_yx_size == 0) ~ 1, # the sets are identical, return 1
(diff_xy_size == 0) | (diff_yx_size == 0) ~ 2/3, # one set is a subset of the other, return 2/3
(diff_xy_size != 0) & (diff_yx_size != 0) & (intersection_size !=0) ~ 1/3, # some overlap, some non-overlap in each set, return 1/3
(diff_xy_size ==0 ~ 0 # disjoint sets, return 0
intersection_size
)
# Calculate Jaccard simmilarity; J=1 means same, J=0 means no overlap at all. See https://en.wikipedia.org/wiki/Jaccard_index
<- intersection_size/(length(lab_x) + length(lab_y) - intersection_size)
jaccard_sim
#MASI sim is M*J; MASI dist is 1-M*J
<- if_else(jaccard_only,
masi_sim
jaccard_sim,*jaccard_sim)
m_sim
return(if_else(type == "sim",
masi_sim,1-masi_sim))
}
<- function(df, sep = ", ") {
MASI_simmilarity_matrix <- sort(unique(unlist(df))) # alphabetical sorted list of all strings of labels
labels_all_combos
<- length(labels_all_combos) # number of combinations above
num_label_combos
<- matrix(nrow = num_label_combos,
masi_sim_mat ncol = num_label_combos,
dimnames = list(labels_all_combos,
labels_all_combos))
for(i in 1:num_label_combos){
for(j in 1:num_label_combos)
{<- masi(x = labels_all_combos[i],
masi_sim_mat[i,j] y = labels_all_combos[j],
sep = sep)
}}
return(masi_sim_mat)
}
Test data
#creating the dataset as dataframe
#you'll want to load in your data frame / tibble from a csv file instead
<- tribble(
dataset ~Coder1, ~Coder2, ~Coder3,
"l1, l2", "l1", "l2",
"l1, l2", "l1, l2", "l1, l2",
"l1", "l1", "l1",
"l3", "l3", NA_character_,
"l3", "l1, l3", "l1, l3",
"l4", "l4", "l4",
"l2", "l4", "l5",
"l1, l2", "l1", "l2",
"l1, l2", "l1, l2, l3", "l1, l2, l3, l9",
"l1", "l2, l4", "l1",
"l1", "l1", "l5"
)
Calculate Inter-rater reliability
# calculate MASI set difference between each pair of labels
<- MASI_simmilarity_matrix(dataset, sep = ", ")
wt
# calculating krippendorff alpha
<- krippen.alpha.raw(ratings = dataset,
ka weights = wt,
categ.labels = rownames(wt),
conflev = 0.95
)
# calculating fleiss' kappa
<- fleiss.kappa.raw(ratings = dataset,
fk weights = wt,
categ.labels = rownames(wt),
conflev = 0.95
)
bind_rows(fk$est,ka$est)
coeff.name pa pe coeff.val coeff.se conf.int
1 Fleiss' Kappa 0.5538721 0.2471891 0.40738 0.15383 (0.065,0.75)
2 Krippendorff's Alpha 0.5543077 0.2539822 0.40257 0.15186 (0.064,0.741)
p.value w.name
1 0.02438394 Custom Weights
2 0.02427498 Cutsom Weights
So Krippendorff’s Alpha is
<- ka$est$coeff.val) (kav
[1] 0.40257
And the 95% confidence interval is
$est$conf.int ka
[1] "(0.064,0.741)"
And Fleiss’ Kappa is
<- fk$est$coeff.val) (fkv
[1] 0.40738
And sampling 500 reshuffles to see what the coefficient looks like:
Code
# "randomly" reshuffled data
<- function(df){
reshuffle %>%
df %>%
unlist sample(.,size = length(.),replace = F)} %>%
{matrix(ncol = ncol(df),
dimnames = list(row.names(df),
names(df))) %>%
as_tibble()
}#reshuffled <- reshuffle(dataset)
#calculating krippendorff alpha
= c()
shuffle_ka_vec for (i in 1:500){
<- krippen.alpha.raw(ratings = reshuffle(dataset),
ka_r weights = wt,
categ.labels = rownames(wt),
conflev = 0.95
)<- ka_r$est$coeff.val
shuffle_ka_vec[i]
}
#calculating fleiss' kappa
= c()
shuffle_fk_vec for (i in 1:500){
<- fleiss.kappa.raw(ratings = reshuffle(dataset),
fk_r weights = wt,
categ.labels = rownames(wt),
conflev = 0.95
)<- fk_r$est$coeff.val
shuffle_fk_vec[i] }
Plot random reshuffle vs the actual result you got.
Code
hist(shuffle_ka_vec,
xlim = c(min(c(shuffle_ka_vec,kav)),1),
main = "Krippendorff's alpha",
xlab = "Krippendorff's alpha")
abline(v = kav,col="red")
text(x = c(0,kav,1),
y=c(10,10,10),
col=c("black","red","black"),
labels = c("random",paste0("value = ",round(kav,3)),"agree")
)
Code
hist(shuffle_fk_vec,
xlim = c(min(c(shuffle_fk_vec,fkv)),1),
main = "Fleiss's Kappa",
xlab = "Fleiss' Kappa")
abline(v = fkv,col="blue")
text(x = c(0,fkv,1),
y=c(10,10,10),
col=c("black","blue","black"),
labels = c("random",paste0("value = ",round(fkv,3)),"agree")
)