Section 2 Model training
2.1 Simulation experiment with 99% unlabeled data set.
2.1.1 Analysis
In Python:
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial import distance
from purf.pu_ensemble import PURandomForestClassifier
import pickle
import os
import re
import session_info
= []
X_list = []
y_list
for i in range(5):
= make_classification(
X, y = 5000,
n_samples = 300,
n_features = 250,
n_informative = 40,
n_redundant = 10,
n_repeated = 2,
n_classes = 1,
n_clusters_per_class = 2,
class_sep = i+1)
random_state = pd.DataFrame(X)
X # Check the contents of the set
print('%d data points and %d features' % (X.shape))
print('%d positive out of %d total' % (sum(y), len(y)))
X_list.append(X) y_list.append(y)
= []
y_orig_list = pd.DataFrame({'index' : range(5000)})
res_ = 0
i for X, y in zip(X_list, y_list):
'label_' + str(i + 1)] = y
res_[# Convert to positive-unlabeled data where labeled positives are conditionally randomly selected
= RandomForestClassifier(
rf = 1000,
n_estimators = min(sum(y==0), sum(y==1)),
max_samples = True,
oob_score = -1,
n_jobs = 30
random_state
)
rf.fit(X, y)'rf_' + str(i + 1)] = rf.oob_decision_function_[:,1]
res_[# Keep the original targets safe for later usage
= y.copy()
y_orig
y_orig_list.append(y_orig)# 99% unlabeled
0)
np.random.seed('label_' + str(i + 1)] == 1))[0], replace=False, size=50)] = 2
y[np.random.choice(np.where((res_[== 1] = 0
y[y == 2] = 1
y[y 'pu_label_' + str(i + 1)] = y
res_[# Check the new contents of the set
print('%d positive out of %d total' % (sum(y), len(y)))
= y
y_list[i] += 1
i
'./other_data/simulation_labels.csv') res_.to_csv(
def train_purf(features, outcome, res_path, pickle_path='./tmp.pkl', pos_level=0.5, save_model=False):
# Imputation
= SimpleImputer(strategy='median')
imputer = imputer.fit_transform(features)
X = pd.DataFrame(X, index=features.index, columns=features.columns)
X = outcome
y = X
features # Training PURF
= PURandomForestClassifier(
purf = 10000,
n_estimators = True,
oob_score = -1,
n_jobs = 42,
random_state = pos_level
pos_level
)
purf.fit(X, y)# Storing results
= pd.DataFrame({'protein_id': X.index, 'antigen_label' : y})
res 'OOB score'] = purf.oob_decision_function_[:,1]
res[= res.groupby('protein_id').mean().merge(features, left_index=True, right_index=True)
res
res.to_csv(res_path)if save_model is True:
with open(pickle_path, 'wb') as out:
pickle.dump(purf, out, pickle.HIGHEST_PROTOCOL)
= 0
i for X, y in zip(X_list, y_list):
='./other_data/simulation_res_' + str(i + 1) + '.csv')
train_purf(X, y, res_path+= 1 i
2.1.2 Plotting
In R:
library(rlist)
library(pROC)
library(mixR)
library(pracma)
library(reshape2)
library(ggplot2)
library(ggpubr)
library(rstatix)
library(cowplot)
<- read.csv("./other_data/simulation_labels.csv")
labels <- data.frame(sapply(1:5, function(i) read.csv(paste0("./other_data/simulation_res_", i, ".csv"), check.names = FALSE)[, "OOB score", drop = FALSE]))
tmp colnames(tmp) <- sapply(1:5, function(i) paste0("OOB_score_", i))
<- cbind(tmp, labels[, c(
data sapply(1:5, function(i) paste0("label_", i)),
sapply(1:5, function(i) paste0("pu_label_", i))
)])
<- list()
roc_true_labels <- c()
auroc_true_labels for (i in 1:5) {
<- roc(response = data[, paste0("label_", i)], predictor = data[, paste0("OOB_score_", i)])
roc_ <- list.append(roc_true_labels, data.frame(fpr = 1 - roc_$specificities, tpr = roc_$sensitivities))
roc_true_labels <- c(auroc_true_labels, as.numeric(roc_$auc))
auroc_true_labels
}
<- list()
roc_pu_labels <- c()
auroc_pu_labels for (i in 1:5) {
<- roc(response = data[, paste0("pu_label_", i)], predictor = data[, paste0("OOB_score_", i)])
roc_ <- list.append(roc_pu_labels, data.frame(fpr = 1 - roc_$specificities, tpr = roc_$sensitivities))
roc_pu_labels <- c(auroc_pu_labels, as.numeric(roc_$auc))
auroc_pu_labels
}
<- list()
roc_estimated <- c()
auroc_estimated for (i in 1:5) {
<- mixfit(data[, paste0("OOB_score_", i)], ncomp = 2)
fit # Calculate receiver operating characteristic (ROC) curve
# for putative positive and negative samples
<- seq(-0.5, 1.5, by = 0.01)
x <- pnorm(x, mean = fit$mu[1], sd = fit$sd[1])
neg_cum <- pnorm(x, mean = fit$mu[2], sd = fit$sd[2])
pos_cum <- (1 - neg_cum) / ((1 - neg_cum) + neg_cum) # false positive / (false positive + true negative)
fpr <- (1 - pos_cum) / ((1 - pos_cum) + pos_cum) # true positive / (true positive + false negative)
tpr <- list.append(roc_estimated, data.frame(fpr = fpr, tpr = tpr))
roc_estimated <- c(auroc_estimated, as.numeric(trapz(-fpr, tpr)))
auroc_estimated }
<- function(x, na.rm = TRUE) sd(x, na.rm) / sqrt(length(na.omit(x)))
sem
<- rbind(
ds do.call(rbind, roc_true_labels),
do.call(rbind, roc_pu_labels),
do.call(rbind, roc_estimated)
)
$group <- c(
dsdo.call("c", sapply(1:5, function(i) rep(paste("True labels"), nrow(roc_true_labels[[i]])))),
do.call("c", sapply(1:5, function(i) rep(paste("PU labels"), nrow(roc_pu_labels[[i]])))),
melt(sapply(1:5, function(i) rep(paste("Estimated"), nrow(roc_estimated[[i]]))))$value
)$group <- factor(ds$group)
ds
$sub_group <- c(
dsdo.call("c", sapply(1:5, function(i) rep(paste("True labels", i), nrow(roc_true_labels[[i]])))),
do.call("c", sapply(1:5, function(i) rep(paste("PU labels", i), nrow(roc_pu_labels[[i]])))),
melt(sapply(1:5, function(i) rep(paste("Estimated", i), nrow(roc_estimated[[i]]))))$value
)$sub_group <- factor(ds$sub_group)
ds
<- ggplot(ds, aes(x = fpr, y = tpr, colour = group, group = sub_group)) +
p1 geom_line(size = 0.2) +
scale_colour_manual(
values = c("#E41A1C", "#377EB8", "#4DAF4A"),
breaks = c("Estimated", "True labels", "PU labels"),
labels = c(
paste0(
"Estimated (AUROC = ", round(mean(auroc_estimated), 3), " ± ",
round(sem(auroc_estimated), 3), ")"
),paste0(
"True labels (AUROC = ", round(mean(auroc_true_labels), 3), " ± ",
round(sem(auroc_true_labels), 3), ")"
),paste0(
"PU labels (AUROC = ", round(mean(auroc_pu_labels), 3), " ± ",
round(sem(auroc_pu_labels), 3), ")"
)
)+
) theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(colour = "black"),
axis.text = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5, colour = "black"),
plot.margin = ggplot2::margin(5, 5, 5, 5, "pt"),
legend.title = element_blank(),
legend.text = element_text(colour = "black"),
legend.position = c(0.6, 0.2),
legend.background = element_blank()
+
) xlab("False positive rate") +
ylab("True positive rate")
<- data.frame("AUROC" = c(auroc_true_labels, auroc_pu_labels, auroc_estimated))
ds
$group <- c(
dsrep("True labels", length(auroc_true_labels)),
rep("PU labels", length(auroc_pu_labels)),
rep("Estimated", length(auroc_estimated))
)$group <- factor(ds$group, levels = c("Estimated", "True labels", "PU labels"))
ds
<- wilcox_test(ds, AUROC ~ group, p.adjust.method = "BH")
stats <- stats %>% add_xy_position(fun = "mean_se", x = "group")
stats
<- ggbarplot(ds,
p2 x = "group", y = "AUROC", add = "mean_se", fill = "group", alpha = 0.2, size = 0.3,
add.params = list(size = 0.3)
+
) scale_colour_manual(
values = c("#E41A1C", "#377EB8", "#4DAF4A"),
breaks = c("Estimated", "True labels", "PU labels")
+
) stat_pvalue_manual(stats, label = "p = {p.adj}", size = 3) +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_text(colour = "black"),
axis.text = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5, colour = "black"),
plot.margin = ggplot2::margin(5, 5, 5, 5, "pt"),
legend.title = element_blank(),
legend.text = element_text(colour = "black"),
legend.position = "none",
legend.background = element_blank()
+
) xlab("")
Final plot
<- plot_grid(p1, p2, nrow = 1, labels = c("a", "b"))
p_combined p_combined
2.2 Hyper-parameter tuning for PURF
2.2.1 Analysis
In Python:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial import distance
from purf.pu_ensemble import PURandomForestClassifier
import pickle
import os
import re
# input set (5393, 1 + 272)
= pd.read_csv('./data/supplementary_data_3_pf_ml_input.csv', index_col=0)
data = data.iloc[:,1:]
pf3d7_features = np.array(data.antigen_label) pf3d7_outcome
def train_purf(features, outcome, pos_level, res_path, pickle_path):
# Imputation
= SimpleImputer(strategy='median')
imputer = imputer.fit_transform(features)
X = pd.DataFrame(X, index=features.index, columns=features.columns)
X = outcome
y = X
features
# Training PURF
= PURandomForestClassifier(
purf = 100000,
n_estimators = True,
oob_score = 64,
n_jobs = 42,
random_state = pos_level
pos_level
)
purf.fit(X, y)
# Storing results
= pd.DataFrame({'protein_id': X.index, 'antigen_label': y})
res 'OOB score'] = purf.oob_decision_function_[:,1]
res[= res.groupby('protein_id').mean().merge(features, left_index=True, right_on='protein_id')
res = res[['antigen_label', 'OOB score']]
res ='\t')
features.to_csv(res_path, sepwith open(pickle_path, 'wb') as out:
pickle.dump(purf, out, pickle.HIGHEST_PROTOCOL)
for pos_level in [0.5, 0.4, 0.6, 0.3, 0.7, 0.2, 0.8, 0.1, 0.9]:
=pos_level,
train_purf(pf3d7_features, pf3d7_outcome, pos_level='~/Downloads/pos_level/without_weighting/%.1f_res.tsv' % pos_level,
res_path='~/Downloads/pos_level/without_weighting/%.1f_purf.pkl' % pos_level) pickle_path
dir = '~/Downloads/pos_level/without_wighting/'
= os.listdir(dir)
files = pd.read_csv(dir + '0.1_res.tsv', sep='\t', index_col=0)['antigen_label']
tmp
= [pd.read_csv(dir + '%.1f_res.tsv' % pos_level, sep='\t', index_col=0)['OOB scores'] for pos_level in np.arange(0.1, 1, 0.1)]
data_frames = pd.concat([tmp] + data_frames, join='outer', axis=1)
merged_df = ['antigen_label'] + ['%.1f' % pos_level for pos_level in np.arange(0.1, 1, 0.1)]
colnames = colnames
merged_df.columns './other_data/pos_level_parameter_tuning_wo_weighting.csv') merged_df.to_csv(
2.2.2 Plotting
In R:
library(mixR)
library(pracma)
library(rlist)
library(ggplot2)
library(cowplot)
library(grid)
library(gridExtra)
<- read.csv("./other_data/pos_level_parameter_tuning_wo_weighting.csv", header = TRUE, row.names = 1, check.names = FALSE)
data # Extract data with only unlabeled proteins
<- data[data$antigen_label == 0, ] data_unl
<- list()
plot_list <- list()
plot_list2 for (i in seq(0.1, 0.9, 0.1)) {
<- mixfit(data_unl[[as.character(i)]], ncomp = 2)
fit
# Calculate receiver operating characteristic (ROC) curve
# for putative positive and negative samples
<- seq(-0.5, 1.5, by = 0.01)
x <- pnorm(x, mean = fit$mu[1], sd = fit$sd[1])
neg_cum <- pnorm(x, mean = fit$mu[2], sd = fit$sd[2])
pos_cum <- (1 - neg_cum) / ((1 - neg_cum) + neg_cum) # false positive / (false positive + true negative)
fpr <- (1 - pos_cum) / ((1 - pos_cum) + pos_cum) # true positive / (true positive + false negative)
tpr
<- plot(fit, title = paste0(
p "Positive level = ", i,
" (AUROC = ", round(trapz(-fpr, tpr), 2), ")"
+
)) scale_fill_manual(values = c("blue", "red"), labels = c("Putative negative", "Putative positive")) +
theme_bw() +
{if (i == 0.1) {
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_blank(),
axis.text = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5, colour = "black"),
legend.title = element_blank(),
legend.text = element_text(colour = "black"),
legend.position = c(0.7, 0.85),
legend.background = element_blank()
)else {
} theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_blank(),
axis.text = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5, colour = "black"),
legend.position = "none"
)
}+
} xlim(-0.8, 1.5)
# Calculate percent rank for labeled positives
<- data[c("antigen_label", as.character(i))]
data_ colnames(data_) <- c("antigen_label", "OOB score")
$percent_rank <- rank(data_[["OOB score"]]) / nrow(data)
data_<- data_[data$antigen_label == 1, ]
data_ <- data_[order(-data_$percent_rank), ]
data_ $x <- 1:nrow(data_) / nrow(data_)
data_cat(paste0("EPR: ", sum(data_$`OOB score` >= 0.5) / nrow(data_), "\n"))
<- ggplot(data_, aes(x = x, y = `percent_rank`)) +
p2 geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
geom_line() +
geom_point(aes(color = `OOB score`), size = 1) +
scale_colour_gradient2(low = "blue", mid = "purple", high = "red", midpoint = 0.5, limits = c(0, 1)) +
theme_bw() +
{if (i == 0.1) {
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_blank(),
axis.text = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5, colour = "black"),
legend.title = element_text(hjust = 0.5, colour = "black", angle = 0),
legend.text = element_text(colour = "black"),
legend.position = c(0.4, 0.15),
legend.background = element_blank()
)else {
} theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_blank(),
axis.text = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5, colour = "black"),
legend.position = "none"
)
}+
}
{if (i == 0.1) {
guides(colour = guide_colourbar(title.position = "top", direction = "horizontal"))
}+
}
{if (i == 0.1) {
labs(colour = "Score (proportion of votes)")
}+
} ggtitle(paste0(
"Positive level = ", i,
" (AUC = ", round(trapz(c(0, data_$x, 1), c(1, data_$percent_rank, 0)), 2), ")"
+
)) ylim(0, 1)
<- list.append(plot_list, p)
plot_list <- list.append(plot_list2, p2)
plot_list2 }
2.3 Variable space weighting
2.3.1 Analysis
In Python:
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial import distance
from purf.pu_ensemble import PURandomForestClassifier
import pickle
import os
import re
import session_info
# input set (5393, 1 + 272)
= pd.read_csv('./data/supplementary_data_3_pf_ml_input.csv', index_col=0)
data = data.iloc[:,1:]
pf3d7_features = np.array(data.antigen_label) pf3d7_outcome
def train_purf(features, outcome, pos_level, res_path, pickle_path):
# Imputation
= SimpleImputer(strategy='median')
imputer = imputer.fit_transform(features)
X = pd.DataFrame(X, index=features.index, columns=features.columns)
X = outcome
y = X
features print('There are %d positives out of %d samples before variable space weighting.' % (sum(y), len(y)))
# variable space weighting
= X.loc[y==1,:]
lab_pos = np.median(lab_pos, axis=0)
median
# variable space weighting
= X.loc[y==1,:]
lab_pos = np.median(lab_pos, axis=0)
median = MinMaxScaler(feature_range=(1,10))
scaler = list()
dist for i in range(lab_pos.shape[0]):
dist.append(distance.euclidean(lab_pos.iloc[i, :], median))
= np.asarray(dist).reshape(-1, 1)
dist = np.round(scaler.fit_transform(dist))
counts = np.array(counts, dtype=np.int64)[:, 0]
counts
= X.iloc[y==1, :]
X_temp = X.iloc[y==0, :]
X = np.asarray([0] * X.shape[0] + [1] * (sum(counts)))
y = [X]
appended_data for i in range(len(counts)):
* counts[i]))
appended_data.append(pd.concat([X_temp.iloc[[i]]]
= pd.concat(appended_data)
X print('There are %d positives out of %d samples after variable space weighting.' % (sum(y), len(y)))
# Training PURF
= PURandomForestClassifier(
purf = 100000,
n_estimators = True,
oob_score = 64,
n_jobs = 42,
random_state = pos_level
pos_level
)
purf.fit(X, y)
# Storing results
= pd.DataFrame({'protein_id': X.index, 'antigen_label': y})
res 'OOB score'] = purf.oob_decision_function_[:,1]
res[= res.groupby('protein_id').mean().merge(features, left_index=True, right_on='protein_id')
res = res[['antigen_label', 'OOB score']]
res ='\t')
features.to_csv(res_path, sepwith open(pickle_path, 'wb') as out:
pickle.dump(purf, out, pickle.HIGHEST_PROTOCOL)
for pos_level in [0.5, 0.4, 0.6, 0.3, 0.7, 0.2, 0.8, 0.1, 0.9]:
=pos_level,
train_purf(pf3d7_features, pf3d7_outcome, pos_level='~/Downloads/pos_level/with_weighting/%.1f_res.tsv' % pos_level,
res_path='~/Downloads/pos_level/with_weighting/%.1f_purf.pkl' % pos_level) pickle_path
dir = '~/Downloads/pos_level/with_weighting/'
= os.listdir(dir)
files = pd.read_csv(dir + '0.1_res.tsv', sep='\t', index_col=0)['antigen_label']
tmp
= [pd.read_csv(dir + '%.1f_res.tsv' % pos_level, sep='\t', index_col=0)['OOB scores'] for pos_level in np.arange(0.1, 1, 0.1)]
data_frames = pd.concat([tmp] + data_frames, join='outer', axis=1)
merged_df = ['antigen_label'] + ['%.1f' % pos_level for pos_level in np.arange(0.1, 1, 0.1)]
colnames = colnames
merged_df.columns './other_data/pos_level_parameter_tuning_w_weighting.csv') merged_df.to_csv(
2.3.2 Plotting
In R:
library(mixR)
library(pracma)
library(rlist)
library(ggplot2)
library(cowplot)
library(grid)
library(gridExtra)
<- read.csv("./other_data/pos_level_parameter_tuning_w_weighting.csv", header = TRUE, row.names = 1, check.names = FALSE)
data # Extract data with only unlabeled proteins
<- data[data$antigen_label == 0, ] data_unl
<- list()
plot_list <- list()
plot_list2 for (i in seq(0.1, 0.9, 0.1)) {
<- mixfit(data_unl[[as.character(i)]], ncomp = 2)
fit
# Calculate receiver operating characteristic (ROC) curve
# for putative positive and negative samples
<- seq(-0.5, 1.5, by = 0.01)
x <- pnorm(x, mean = fit$mu[1], sd = fit$sd[1])
neg_cum <- pnorm(x, mean = fit$mu[2], sd = fit$sd[2])
pos_cum <- (1 - neg_cum) / ((1 - neg_cum) + neg_cum) # false positive / (false positive + true negative)
fpr <- (1 - pos_cum) / ((1 - pos_cum) + pos_cum) # true positive / (true positive + false negative)
tpr
<- plot(fit, title = paste0(
p "Positive level = ", i,
" (AUROC = ", round(trapz(-fpr, tpr), 2), ")"
+
)) scale_fill_manual(values = c("blue", "red"), labels = c("Putative negative", "Putative positive")) +
theme_bw() +
{if (i == 0.1) {
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_blank(),
axis.text = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5, colour = "black"),
legend.title = element_blank(),
legend.text = element_text(colour = "black"),
legend.position = c(0.7, 0.85),
legend.background = element_blank()
)else {
} theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_blank(),
axis.text = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5, colour = "black"),
legend.position = "none"
)
}+
} xlim(-0.8, 1.5)
# Calculate percent rank for labeled positives
<- data[c("antigen_label", as.character(i))]
data_ colnames(data_) <- c("antigen_label", "OOB score")
$percent_rank <- rank(data_[["OOB score"]]) / nrow(data)
data_<- data_[data$antigen_label == 1, ]
data_ <- data_[order(-data_$percent_rank), ]
data_ $x <- 1:nrow(data_) / nrow(data_)
data_cat(paste0("EPR: ", sum(data_$`OOB score` >= 0.5) / nrow(data_), "\n"))
<- ggplot(data_, aes(x = x, y = `percent_rank`)) +
p2 geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") +
geom_line() +
geom_point(aes(color = `OOB score`), size = 1) +
scale_colour_gradient2(low = "blue", mid = "purple", high = "red", midpoint = 0.5, limits = c(0, 1)) +
theme_bw() +
{if (i == 0.1) {
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_blank(),
axis.text = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5, colour = "black"),
legend.title = element_text(hjust = 0.5, colour = "black", angle = 0),
legend.text = element_text(colour = "black"),
legend.position = c(0.4, 0.15),
legend.background = element_blank()
)else {
} theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_blank(),
axis.text = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5, colour = "black"),
legend.position = "none"
)
}+
}
{if (i == 0.1) {
guides(colour = guide_colourbar(title.position = "top", direction = "horizontal"))
}+
}
{if (i == 0.1) {
labs(colour = "Score (proportion of votes)")
}+
} ggtitle(paste0(
"Positive level = ", i,
" (AUC = ", round(trapz(c(0, data_$x, 1), c(1, data_$percent_rank, 0)), 2), ")"
+
)) ylim(0, 1)
<- list.append(plot_list, p)
plot_list <- list.append(plot_list2, p2)
plot_list2 }
2.3.2.1 Bimodal distribution plot
<- textGrob("Score (proportion of votes)", gp = gpar(fontsize = 15))
x_grob <- textGrob("Density", gp = gpar(fontsize = 15), rot = 90)
y_grob grid.arrange(arrangeGrob(plot_grid(plotlist = plot_list, ncol = 3), left = y_grob, bottom = x_grob))
2.3.2.2 Known antigen ranking
<- textGrob("Ranked known antigens (scaled)", gp = gpar(fontsize = 15))
x_grob2 <- textGrob("Percent rank", gp = gpar(fontsize = 15), rot = 90)
y_grob2 grid.arrange(arrangeGrob(plot_grid(plotlist = plot_list2, ncol = 3), left = y_grob2, bottom = x_grob2))
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] gridExtra_2.3 cowplot_1.1.1 rstatix_0.7.0 ggpubr_0.4.0 ggplot2_3.4.2
## [6] reshape2_1.4.4 pracma_2.3.8 mixR_0.2.0 pROC_1.18.0 rlist_0.4.6.2
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.9 lattice_0.20-45 tidyr_1.2.0 png_0.1-7
## [5] assertthat_0.2.1 digest_0.6.29 utf8_1.2.2 R6_2.5.1
## [9] plyr_1.8.7 backports_1.4.1 evaluate_0.16 highr_0.9
## [13] pillar_1.8.1 rlang_1.1.0 rstudioapi_0.14 data.table_1.14.2
## [17] car_3.1-0 jquerylib_0.1.4 R.utils_2.12.0 R.oo_1.25.0
## [21] Matrix_1.5-3 reticulate_1.25 rmarkdown_2.16 styler_1.8.0
## [25] stringr_1.4.1 munsell_0.5.0 broom_1.0.0 compiler_4.2.3
## [29] xfun_0.32 pkgconfig_2.0.3 htmltools_0.5.3 tidyselect_1.1.2
## [33] tibble_3.1.8 bookdown_0.28 codetools_0.2-19 fansi_1.0.3
## [37] dplyr_1.0.9 withr_2.5.0 R.methodsS3_1.8.2 jsonlite_1.8.0
## [41] gtable_0.3.0 lifecycle_1.0.3 DBI_1.1.3 magrittr_2.0.3
## [45] scales_1.2.1 carData_3.0-5 cli_3.6.1 stringi_1.7.8
## [49] cachem_1.0.6 ggsignif_0.6.3 bslib_0.4.0 generics_0.1.3
## [53] vctrs_0.6.2 tools_4.2.3 R.cache_0.16.0 glue_1.6.2
## [57] purrr_0.3.4 abind_1.4-5 fastmap_1.1.0 yaml_2.3.5
## [61] colorspace_2.0-3 knitr_1.40 sass_0.4.2