Dorado Run Analyzer (Doradora)
The Bioinformatics team generated this report includes stats and diagnostic plots of the Oxford Nanopore Sequencing data.
An analytical platform designed to quantify and compare Nanopore sequencing data quality across multiple experimental conditions, enabling detection of batch-specific variability and ensuring consistency in sequencing performance.
# Do not run
baseDir <- "WORKING_DIRECTORY" # where the Nanopore stats files are located
setwd(baseDir)
outDir <- file.path(baseDir, "Results")
# Load libraries
library(dplyr)
library(ggplot2)
library(ggrepel)
library(ggridges)
library(ggsankey)
library(stringr)
library(reshape2)
library(RColorBrewer)
library(viridis)
library(cowplot)
tsvFiles <- list.files(path = file.path(baseDir, "Data"), pattern = "tsv", full.names = TRUE)
# sequencing summary
summaryL <- lapply(seq_along(tsvFiles), function(idx) {
tsvFile <- tsvFiles[idx]
tsv <- read.table(tsvFile, header = TRUE, stringsAsFactor = FALSE, sep = "\t")
tsv$sample <- gsub(".tsv", "", basename(tsvFile))
return(tsv)
})
names(summaryL) <- gsub(".tsv", "", basename(tsvFiles))
summaryL <- summaryL[sampleNames] # reorder
summary <- do.call(rbind, summaryL)
summary$sample <- factor(summary$sample, levels = sampleNames)
# pore activity
csvFiles <- list.files(path = file.path(baseDir, "Data"), pattern = "csv", full.names = TRUE)
poreL <- lapply(seq_along(csvFiles), function(idx) {
csvFile <- csvFiles[idx]
csv <- read.csv(csvFile, header = TRUE)
csv$sample <- gsub(".csv", "", basename(csvFile))
return(csv)
})
names(poreL) <- gsub(".csv", "", basename(csvFiles))
basename(tsvFiles)
## [1] "Donors.tsv" "Plate01.tsv" "Plate02.tsv" "Plate03.tsv" "Plate04.tsv"
## [6] "Plate05.tsv" "Plate06.tsv" "Plate07.tsv" "Plate08.tsv" "Plate09.tsv"
## [11] "Plate10.tsv" "Plate11.tsv" "Plate12.tsv" "Plate13.tsv" "Plate14.tsv"
readLengthMax <- max(summary$sequence_length_template)
readQualityMax <- max(summary$mean_qscore_template)
for (sampleName in sampleNames) {
stat <- summaryL[[sampleName]]
readDf <- data.frame(
readLength = stat$sequence_length_template,
readQuality = stat$mean_qscore_template
)
readDf <- densityColors(df = readDf, cols = heatCols)
cat("#### ", sampleName, "\n")
cat("\n")
cat(" * Number of reads: ", format(nrow(readDf), big.mark=",",scientific = FALSE), "\n")
plot(log10(readDf$readLength), readDf$readQuality, col = readDf$Col, xlab = "Length, log10", ylab = "Quality, phred", pch = 20, xlim = c(0, log10(readLengthMax)), ylim = c(0, readQualityMax))
cat("\n\n")
}
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. | |
|---|---|---|---|---|---|---|
| Donors | 1 | 17.83 | 22.90 | 22.83 | 27.62 | 50 |
| Plate01 | 1 | 17.31 | 21.95 | 22.24 | 26.49 | 50 |
| Plate02 | 1 | 16.76 | 21.40 | 22.12 | 26.52 | 50 |
| Plate03 | 1 | 14.00 | 19.26 | 20.63 | 25.70 | 50 |
| Plate04 | 1 | 15.38 | 20.23 | 20.78 | 25.10 | 50 |
| Plate05 | 1 | 16.08 | 20.12 | 20.55 | 24.47 | 50 |
| Plate06 | 1 | 16.87 | 21.24 | 22.22 | 26.65 | 50 |
| Plate07 | 1 | 15.08 | 19.63 | 20.09 | 24.31 | 50 |
| Plate08 | 1 | 16.86 | 21.61 | 22.50 | 27.35 | 50 |
| Plate09 | 1 | 15.67 | 19.96 | 20.48 | 24.27 | 50 |
| Plate10 | 1 | 16.53 | 21.20 | 21.87 | 26.17 | 50 |
| Plate11 | 1 | 15.99 | 20.62 | 21.91 | 26.67 | 50 |
| Plate12 | 1 | 16.63 | 21.15 | 22.05 | 26.33 | 50 |
| Plate13 | 1 | 16.45 | 21.03 | 22.03 | 26.79 | 50 |
| Plate14 | 1 | 16.11 | 21.54 | 22.40 | 27.89 | 50 |
ridgeQuality <- ggplot(summary, aes(x = mean_qscore_template, y = sample, fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis(name = "Quality, phred", option = "C") +
labs(
x = "Quality, phred",
y = "Plate"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
) +
geom_vline(xintercept = 20, linetype = "dashed", color = "red", size = 1)
print(ridgeQuality)
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. | |
|---|---|---|---|---|---|---|
| Donors | 5 | 312 | 575 | 1,647.89 | 1,673 | 1,625,012 |
| Plate01 | 5 | 247 | 514 | 1,712.78 | 1,730 | 1,348,333 |
| Plate02 | 5 | 210 | 336 | 968.91 | 716 | 2,490,141 |
| Plate03 | 5 | 186 | 283 | 652.25 | 547 | 1,436,315 |
| Plate04 | 5 | 230 | 383 | 1,224.34 | 943 | 1,415,156 |
| Plate05 | 5 | 226 | 321 | 650.05 | 565 | 1,380,165 |
| Plate06 | 5 | 216 | 360 | 1,035.02 | 852 | 1,606,542 |
| Plate07 | 5 | 226 | 376 | 1,120.82 | 910 | 1,457,070 |
| Plate08 | 5 | 202 | 317 | 911.83 | 675 | 2,165,164 |
| Plate09 | 5 | 240 | 418 | 1,242.15 | 1,094 | 1,482,838 |
| Plate10 | 5 | 225 | 412 | 1,415.85 | 1,240 | 1,617,295 |
| Plate11 | 5 | 196 | 298 | 713.64 | 591 | 1,435,036 |
| Plate12 | 5 | 211 | 343 | 976.20 | 771 | 1,843,158 |
| Plate13 | 5 | 216 | 344 | 854.97 | 703 | 1,540,083 |
| Plate14 | 5 | 206 | 311 | 786.36 | 600 | 1,387,738 |
ridgeLength <- ggplot(summary, aes(x = log10(sequence_length_template + 1), y = sample, fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis(name = "Length, log10") +
labs(
x = "Length, log10",
y = "Plate"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
print(ridgeLength)
for (sampleName in sampleNames) {
if (sampleName %in% names(poreL) & sampleName %in% names(summaryL)) {
summDf <- summaryL[[sampleName]]
sec2HalfDayBreaks <- seq(-1, (max(summDf$start_time) + 43200), 43200)
summDf$TimeInGroup <- cut(summDf$start_time, breaks = sec2HalfDayBreaks, labels = as.character(seq(0.5, (length(sec2HalfDayBreaks)-1)/2, 0.5)))
tsvDf <- data.frame(
TimeInGroup = summDf$TimeInGroup,
Value = summDf$mean_qscore_template / 50 * 100,
Category = "Quality"
)
poreDf <- poreL[[sampleName]]
poreDf$Active <- "No"
poreDf$Active[which(poreDf$Channel.State %in% c("strand", "adapter"))] <- "Yes"
poreAct <- poreDf %>% group_by(Experiment.Time..minutes.) %>% summarise(Total = sum(State.Time..samples.), Live = sum(State.Time..samples.[Active == "Yes"]))
min2HalfDayBreaks <- seq(-1, (max(poreAct$Experiment.Time..minutes.) + 720), 720)
poreAct$TimeInGroup <- cut(poreAct$Experiment.Time..minutes., breaks = min2HalfDayBreaks, labels = as.character(seq(0.5, (length(min2HalfDayBreaks)-1)/2, 0.5)))
poreAct <- poreAct %>% mutate(Prop = Live / Total)
csvDf <- data.frame(
TimeInGroup = poreAct$TimeInGroup,
Value = poreAct$Prop * 100,
Category = "Active pores"
)
poreActivityPlot <- poreActivity(df = rbind(csvDf, tsvDf), cols = c("#1B9E77", "#D95F02"))
cat("#### ", sampleName, "\n")
cat("\n")
cat(" * Read quality and pore activity over time.\n")
print(poreActivityPlot)
cat("\n\n")
} else {
summDf <- summaryL[[sampleName]]
sec2HalfDayBreaks <- seq(-1, (max(summDf$start_time) + 43200), 43200)
summDf$TimeInGroup <- cut(summDf$start_time, breaks = sec2HalfDayBreaks, labels = as.character(seq(0.5, (length(sec2HalfDayBreaks)-1)/2, 0.5)))
tsvDf <- data.frame(
TimeInGroup = summDf$TimeInGroup,
Value = summDf$mean_qscore_template / 50 * 100,
Category = "Quality"
)
csvDf <- data.frame( # dummy data for consistent visualization
TimeInGroup = as.character(seq(0.5, (length(sec2HalfDayBreaks)-1)/2, 0.5)),
Value = rep(0, length(as.character(seq(0.5, (length(sec2HalfDayBreaks)-1)/2, 0.5)))),
Category = "Active pores"
)
poreActivityPlot <- poreActivity(df = rbind(csvDf, tsvDf), cols = c("#1B9E77", "#D95F02"))
cat("#### ", sampleName, "\n")
cat("\n")
cat(" * _MinKNOW_ failed to produce a pore activity report.\n")
print(poreActivityPlot)
cat("\n\n")
}
}
data <- read.table(file.path(baseDir, "Data", "NanoStats_summary.txt"), header = T, stringsAsFactors = F, sep = "\t")
data$Plate <- sapply(str_split(data$Plate_Barcode, "_"), "[[", 1)
data$Plate <- factor(data$Plate, levels = c("Donors", "Plate01", "Plate02", "Plate03", "Plate04", "Plate05", "Plate06", "Plate07", "Plate08", "Plate09", "Plate10", "Plate11", "Plate12", "Plate13", "Plate14"))
data$Barcode <- sapply(str_split(data$Plate_Barcode, "_"), "[[", 2)
head(data[,c("Plate", "Barcode", "number_of_reads", "n50")])
## Plate Barcode number_of_reads n50
## 1 Donors barcode09 3581458 4913
## 2 Donors barcode10 4325293 4940
## 3 Donors barcode11 7151873 3846
## 4 Donors barcode12 8050965 2692
## 5 Donors barcode13 6104907 4512
## 6 Donors barcode14 4970546 3838
ggplot(data = data, aes(x = Plate, y = log10(number_of_reads + 1), fill = Plate)) +
geom_violin(position = dodge, size = 0) +
geom_boxplot(width = 0.1, position = dodge, fill="white") +
scale_fill_manual(values = sampleCols) +
labs(
x = "Plate",
y = "Number of reads, log10"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
legend.position = "none",
text = element_text(size = 12)
)
ggplot(data = data, aes(x = Plate, y = number_of_bases/number_of_reads, fill = Plate)) +
geom_violin(position = dodge, size = 0) +
geom_boxplot(width = 0.1, position = dodge, fill="white") +
scale_fill_manual(values = sampleCols) +
labs(
x = "Plate",
y = "Number of bases per read"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
legend.position = "none",
text = element_text(size = 12)
)
ggplot(data, aes(x = n50, y = Plate, fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis(name = "N50", option = "F") +
labs(
x = "N50",
y = "Plate"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
qualDf <- data.frame(
Plate = data$Plate,
Barcode = data$Barcode,
Lt10 = data$number_of_reads - data$Reads..Q10._count,
Gt10Lt15 = data$Reads..Q10._count - data$Reads..Q15._count,
Gt15lt20 = data$Reads..Q15._count - data$Reads..Q20._count,
Gt20lt25 = data$Reads..Q20._count - data$Reads..Q25._count,
Gt25lt30 = data$Reads..Q25._count - data$Reads..Q30._count,
Gt30 = data$Reads..Q30._count
)
qDf <- melt(qualDf, value.name = "Count")
colnames(qDf) <- c("Plate", "Barcode", "Quality", "Count")
qDf$Quality <- factor(qDf$Quality, levels = c("Gt30", "Gt25lt30", "Gt20lt25", "Gt15lt20", "Gt10Lt15", "Lt10"), labels = c(">=30", ">=25 and <30", ">=20 and <25", ">=15 and <20", ">=10 and <15", "<10"))
ggplot(qDf, aes(x = Plate, y = Count, fill = Quality)) +
geom_bar(position="fill", stat="identity") +
labs(
x = "Plate",
y = "Proportion"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
) + scale_fill_grey()
ggplot(data, aes(x = median_qual, y = Plate, fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis(name = "Phred", option = "C") +
labs(
x = "Quality, median",
y = "Plate"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplot(data, aes(x = mean_qual, y = Plate, fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis(name = "Phred", option = "C") +
labs(
x = "Quality, mean",
y = "Plate"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplot(data, aes(x = median_read_length, y = Plate, fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis(name = "bp", option = "D") +
labs(
x = "Length, median",
y = "Plate"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplot(data, aes(x = mean_read_length, y = Plate, fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis(name = "bp", option = "D") +
labs(
x = "Length, mean",
y = "Plate"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
sampleAnnot <- read.table(file.path(baseDir, "Data", "sample_annot.txt"), header = T, stringsAsFactors = F, sep = "\t")
annotDf <- sampleAnnot %>% make_long(DonorID, MouseSex, Week, Type)
ggplot(annotDf, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) +
geom_sankey(flow.alpha = .6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "black", fill = "white") +
scale_fill_viridis_d(option = "A", alpha = 0.95) +
theme_sankey(base_size = 18) +
labs(
x = NULL,
y = NULL
) +
theme_bw() +
theme(
axis.line = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none",
text = element_text(size = 12)
)
sessioninfo::session_info(pkgs = "attached")
## ─ Session info ───────────────────────────────────────────────────────────────────────
## setting value
## version R version 4.5.1 (2025-06-13)
## os macOS Tahoe 26.0.1
## system aarch64, darwin20
## ui X11
## language (EN)
## collate en_CA.UTF-8
## ctype en_CA.UTF-8
## tz America/Edmonton
## date 2025-11-06
## pandoc 3.8.2 @ /opt/homebrew/bin/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## cowplot * 1.2.0 2025-07-07 [1] CRAN (R 4.5.0)
## dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.5.0)
## ggplot2 * 4.0.0 2025-09-11 [1] CRAN (R 4.5.0)
## ggrepel * 0.9.6 2024-09-07 [1] CRAN (R 4.5.0)
## ggridges * 0.5.7 2025-08-27 [1] CRAN (R 4.5.0)
## ggsankey * 0.0.99999 2025-11-04 [1] Github (davidsjoberg/ggsankey@b675d0d)
## knitr * 1.50 2025-03-16 [1] CRAN (R 4.5.0)
## RColorBrewer * 1.1-3 2022-04-03 [1] CRAN (R 4.5.0)
## reshape2 * 1.4.4 2020-04-09 [1] CRAN (R 4.5.0)
## sessioninfo * 1.2.3 2025-02-05 [1] CRAN (R 4.5.0)
## stringr * 1.5.2 2025-09-08 [1] CRAN (R 4.5.0)
## viridis * 0.6.5 2024-01-29 [1] CRAN (R 4.5.0)
## viridisLite * 0.4.2 2023-05-02 [1] CRAN (R 4.5.0)
## yaml * 2.3.10 2024-07-26 [1] CRAN (R 4.5.0)
##
## [1] /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library
## * ── Packages attached to the search path.
##
## ──────────────────────────────────────────────────────────────────────────────────────