diff --git a/NAMESPACE b/NAMESPACE index 158d263..c028684 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,6 @@ export("reduced<-") export(ConnectedComponents) export(PSM) export(addFragments) -export(calculateFragments2) export(ccMatrix) export(connectedComponents) export(defaultNeutralLoss) diff --git a/R/fragments-add.R b/R/fragments-add.R index 72e9f97..f37639a 100644 --- a/R/fragments-add.R +++ b/R/fragments-add.R @@ -80,6 +80,12 @@ addFragments <- function(x, x_data <- Spectra::peaksData(x)[[1L]] y_data <- calculateFragments(y, verbose = FALSE, ...) y_data <- y_data[order(y_data$mz), ] + + ## stop if variable modifications used + ## Temporary check to allow plotSpectra to work fine + ## Will need to be removed once plotSpectra accepts variable modifications + ## See issue: https://github.com/rformassspectrometry/Spectra/issues/346 + stopifnot(length(unique(y_data[["peptide"]])) == 1) ## Find common peaks and prepare annotations idx <- which(MsCoreUtils::common(x_data[, "mz"], y_data[, "mz"], diff --git a/R/fragments-calculate.R b/R/fragments-calculate.R index 4b1cec6..0a66f3e 100644 --- a/R/fragments-calculate.R +++ b/R/fragments-calculate.R @@ -1,12 +1,12 @@ -#' @title Calculate ions produced by fragmentation -#' -#' @aliases defaultNeutralLoss calculateFragments calculateFragments,character,missing-method +#' @title Calculate ions produced by fragmentation with variable modifications +#' +#' @aliases calculateFragments modificationPositions defaultNeutralLoss calculateFragments,character,missing-method #' #' @name calculateFragments -#' +#' #' @description #' -#' These method calculates a-, b-, c-, x-, y- and z-ions produced by +#' This method calculates a-, b-, c-, x-, y- and z-ions produced by #' fragmentation. #' #' Available methods @@ -14,33 +14,35 @@ #' - The default method with signature `sequence = "character"` and #' `object = "missing"` calculates the theoretical fragments for a #' peptide sequence. It returns a `data.frame` with the columns -#' `mz`, `ion`, `type`, `pos`, `z` and `seq`. +#' `mz`, `ion`, `type`, `pos`, `z`, `seq` and `peptide`. #' #' - Additional method can be defined that will adapt their behaviour #' based on spectra defined in `object`. See for example the MSnbase #' package that implements a method for objects of class #' `Spectrum2`. -#' -#' @return The methods with `oject = "missing"` returns a -#' `data.frame`. -#' -#' @param sequence character() providing a peptide sequence. -#' -#' @param type `character` vector of target ions; possible values: -#' `c("a", "b", "c", "x", "y", "z")`. Default is `type = c("b", -#' "y")`. -#' -#' @param z `numeric` with desired charge state; default is 1. -#' -#' @param modifications A named `numeric` vector of used -#' modifications. The name must correspond to the one-letter-code -#' of the modified amino acid and the `numeric` value must -#' represent the mass that should be added to the original amino -#' accid mass, default: Carbamidomethyl `modifications = c(C = -#' 57.02146)`. Use `Nterm` or `Cterm` as names for modifications -#' that should be added to the amino respectively -#' carboxyl-terminus. -#' +#' +#' @param sequence `character()` providing a peptide sequence. +#' +#' @param type `character` vector of target ions; possible values: +#' `c("a", "b", "c", "x", "y", "z")`. Default is `type = c("b", "y")`. +#' +#' @param z numeric with a desired charge state; default is 1. +#' +#' @param fixed_modifications A named `numeric` vector of used fixed modifications. +#' The name must correspond to the one-letter-code of the modified amino acid +#' and the numeric value must represent the mass that should be added to the +#' original amino accid mass, default: Carbamidomethyl modifications = +#' c(C = 57.02146). Use Nterm or Cterm as names for modifications that should +#' be added to the amino respectively carboxyl-terminus. +#' +#' @param variable_modifications A named `numeric` vector of variable modifications. +#' Depending on the maximum number of modifications (`max_mods`), all possible +#' combinations are returned. +#' +#' @param max_mods A numeric indicating the maximum number of variable modifications +#' allowed on the sequence at once. Does not include fixed modifications. +#' Default value is positive infinity. +#' #' @param neutralLoss `list`, it has to have two named elments, #' namely `water` and `ammonia` that contain a `character` vector #' which type of neutral loss should be calculated. Currently @@ -53,26 +55,44 @@ #' the correct list. It has two arguments `disableWaterLoss` and #' `disableAmmoniaLoss` to remove single neutral loss options. See #' the example section for use cases. -#' -#' @param verbose `logical(1)`. If `TRUE` (default) the used -#' modifications are printed. -#' +#' +#' @param verbose `logical(1)`. If `TRUE` (default) the used modifications are printed. +#' +#' @param modifications Named `numeric()`. Deprecated modifications parameter. +#' Will override `fixed_modifications` but is set to `NULL` by default. Please +#' refrain from using it, opt for `fixed_modifications` instead. +#' +#' @return A `data.frame` showing all the +#' ions produced by fragmentation with all possible combinations of modifications. +#' The used variable modifications are displayed in the `peptide` column through the +#' use of amino acids followed by the modification within brackets. +#' Fixed modifications are not displayed. +#' #' @author Sebastian Gibb +#' +#' @author Guillaume Deflandre #' #' @importFrom ProtGenerics calculateFragments #' #' @exportMethod calculateFragments -#' +#' #' @examples -#' +#' ## General use +#' calculateFragments(sequence = "ARGSHKATC", type = c("b", "y"), z = 1, +#' fixed_modifications = c(C = 57), variable_modifications = c(S = 79, Y = 79, T = 79), +#' max_mods = 2) +#' #' ## calculate fragments for ACE with default modification -#' calculateFragments("ACE", modifications = c(C = 57.02146)) +#' calculateFragments("ACE", fixed_modifications = c(C = 57.02146)) #' -#' ## calculate fragments for ACE with an addition N-terminal modification -#' calculateFragments("ACE", modifications = c(C = 57.02146, Nterm = 229.1629)) +#' #' ## calculate fragments for ACE with an added variable modification +#' calculateFragments("ACE", variable_modifications = c(A = 43.25)) +#' +#' ## calculate fragments for ACE with an added N-terminal modification +#' calculateFragments("ACE", fixed_modifications = c(C = 57.02146, Nterm = 229.1629)) #' #' ## calculate fragments for ACE without any modifications -#' calculateFragments("ACE", modifications = NULL) +#' calculateFragments("ACE", fixed_modifications = NULL) #' #' calculateFragments("VESITARHGEVLQLRPK", #' type = c("a", "b", "c", "x", "y", "z"), @@ -93,16 +113,25 @@ #' #' ## disable neutral loss completely #' calculateFragments("PQR", neutralLoss=NULL) +#' setMethod("calculateFragments", c("character", "missing"), function(sequence, type = c("b", "y"), z = 1, - modifications = c(C = 57.02146), + fixed_modifications = c(C = 57.02146), + variable_modifications = numeric(), + max_mods = Inf, neutralLoss = defaultNeutralLoss(), - verbose = TRUE) { - l <- lapply(sequence, .calculateFragments, - type = type, z = z, modifications = modifications, - neutralLoss = neutralLoss, verbose = verbose) - return(do.call(rbind, l)) - }) + verbose = TRUE, + modifications = NULL) { + l <- lapply(sequence, .calculateFragments, + type = type, z = z, + fixed_modifications = fixed_modifications, + variable_modifications = variable_modifications, + max_mods = Inf, + neutralLoss = neutralLoss, + verbose = verbose, + modifications = modifications) + return(do.call(rbind, l)) + }) #' calculate fragments from a peptide sequence @@ -113,27 +142,72 @@ setMethod("calculateFragments", c("character", "missing"), #' #' @param z charge #' -#' @param modifications a named (amino acid one-letter-code; upper -#' case) vector of mass that should be added (default: -#' Carbamidomethyl 57.02146 is added to Cystein (C) and results in -#' 160.030649). -#' -#' @param neutralLoss list, currently water and ammonia loss are supported -#' -#' @param verbose verbose output? +#' @param fixed_modifications A named `numeric` vector of used fixed modifications. +#' The name must correspond to the one-letter-code of the modified amino acid +#' and the numeric value must represent the mass that should be added to the +#' original amino accid mass, default: Carbamidomethyl modifications = +#' c(C = 57.02146). Use Nterm or Cterm as names for modifications that should +#' be added to the amino respectively carboxyl-terminus. +#' +#' @param variable_modifications A named `numeric` vector of variable modifications. +#' Depending on the maximum number of modifications (`max_mods`), all possible +#' combinations are returned. +#' +#' @param max_mods A numeric indicating the maximum number of variable modifications +#' allowed on the sequence at once. Does not include fixed modifications. +#' Default value is positive infinity. +#' +#' @param neutralLoss `list`, it has to have two named elments, +#' namely `water` and `ammonia` that contain a `character` vector +#' which type of neutral loss should be calculated. Currently +#' neutral loss on the C terminal `"Cterm"`, at the amino acids +#' `c("D", "E", "S", "T")` for `"water"` (shown with an `_`) and +#' `c("K", "N", "Q", "R")` for `"ammonia"` (shown with an `*`) are +#' supported. #' +#' There is a helper function `defaultNeutralLoss()` that returns +#' the correct list. It has two arguments `disableWaterLoss` and +#' `disableAmmoniaLoss` to remove single neutral loss options. See +#' the example section for use cases. +#' +#' @param verbose `logical(1)`. If `TRUE` (default) the used modifications are printed. +#' +#' @param modifications Named `numeric()`. Deprecated modifications parameter. +#' Will override `fixed_modifications` but is set to `NULL` by default. Please +#' refrain from using it, opt for `fixed_modifications` instead. +#' #' @importFrom stats setNames #' #' @noRd -.calculateFragments <- function(sequence, type = c("b", "y"), z = 1, - modifications = c(C = 57.02146), +.calculateFragments <- function(sequence, + type = c("b", "y"), + z = 1, + fixed_modifications = c(C = 57.02146), + variable_modifications = numeric(), + max_mods = Inf, neutralLoss = defaultNeutralLoss(), - verbose = TRUE) { + verbose = TRUE, + modifications = NULL) { if (nchar(sequence) <= 1L) { stop("'sequence' has to have two or more residues.") } - - type <- match.arg(type, choices=c("a", "b", "c", "x", "y", "z"), several.ok=TRUE) + + if (!is.null(modifications)) { + warning("'modifications' is deprecated, please use 'fixed_modifications' instead.") + fixed_modifications <- modifications + } + + ## split peptide sequence into aa + fragment.seq <- strsplit(sequence, "")[[1]] + fn <- length(fragment.seq) + + mod_combinations <- + .modificationPositions(fragment.seq, + variable_modifications, + max_mods) + type <- match.arg(type, + choices = c("a", "b", "c", "x", "y", "z"), + several.ok=TRUE) type <- sort(type) ## constants mass <- getAtomicMass() @@ -154,61 +228,82 @@ setMethod("calculateFragments", c("character", "missing"), x=mass["C"]+2*mass["O"], # + CO + OH y=2*mass["H"]+mass["O"], # + H2 + OH z=-(mass["N"]+mass["H"])+mass["O"]) # + NH + OH - + aa <- getAminoAcids() aamass <- setNames(aa$ResidueMass, aa$AA) - - ## replace default mass by modifications - if (length(modifications)) { - aamass[names(modifications)] <- aamass[names(modifications)] + modifications + + ## replace default mass by masses with fixed modifications + if (length(fixed_modifications)) { + aamass[names(fixed_modifications)] <- + aamass[names(fixed_modifications)] + fixed_modifications } - + + ## message used modifications if (verbose) { - if (length(modifications)) { - mods <- paste0(names(modifications), "=", modifications, collapse=", ") + if (length(fixed_modifications)) { + mods <-paste0(names(fixed_modifications), + "=", + fixed_modifications, + collapse=", ") } else { mods <- "None" } - message("Modifications used: ", mods) + if (length(variable_modifications)) { + mods2 <- paste0(names(variable_modifications), + "=", + variable_modifications, + collapse=", ") + } else { + mods2 <- "None" + } + message("Fixed modifications used: ", mods, + "\nVariable modifications used: ", mods2) } - - ## split peptide sequence into aa - fragment.seq <- strsplit(sequence, "")[[1]] - fn <- length(fragment.seq) - + ## calculate cumulative mass starting at the amino-terminus (for a, b, c) amz <- cumsum(aamass[fragment.seq[-fn]]) ## calculate cumulative mass starting at the carboxyl-terminus (for x, y, z) cmz <- cumsum(aamass[rev(fragment.seq[-1L])]) - + ## calculate fragment mass (amino-terminus) tn <- length(amz) atype <- c("a", "b", "c") %in% type nat <- sum(atype) - amz <- rep(amz, nat) + rep(add[1:3][atype], each=tn) - ## calculate fragment mass (carboxyl-terminus) ctype <- c("x", "y", "z") %in% type nct <- sum(ctype) - cmz <- rep(cmz, nct) + rep(add[4:6][ctype], each=tn) - + ## devide by charge zn <- length(z) - amz <- rep(amz, each = zn)/z - cmz <- rep(cmz, each = zn)/z - - ## add protons (H+) - amz <- amz + mass["p"] - cmz <- cmz + mass["p"] - + ## fragment seq (amino-terminus) aseq <- rep(rep(substring(sequence, rep(1L, fn - 1L), 1L:(fn - 1L)), each = zn), nat) - + ## fragment seq (carboxyl-terminus) cseq <- rep(rep(rev(substring(sequence, 2L:fn, rep(fn, fn - 1L))), each=zn), nct) - + + ## add the variable modifications and apply steps above + amz_mod <- vector("list", length(mod_combinations)) + cmz_mod <- vector("list", length(mod_combinations)) + df <- vector("list", length(mod_combinations)) + + for (i in 1:length(mod_combinations)) { + amz_mod[[i]] <- .cumsumFragmentMasses(mod_combinations[[i]], amz) + cmz_mod[[i]] <- .cumsumFragmentMasses(rev(mod_combinations[[i]]), cmz) + + amz_mod[[i]] <- rep(amz_mod[[i]], nat) + rep(add[1:3][atype], each=tn) + cmz_mod[[i]] <- rep(cmz_mod[[i]], nct) + rep(add[4:6][ctype], each=tn) + + amz_mod[[i]] <- rep(amz_mod[[i]], each = zn)/z + cmz_mod[[i]] <- rep(cmz_mod[[i]], each = zn)/z + + ## add protons (H+) + amz_mod[[i]] <- amz_mod[[i]] + mass["p"] + cmz_mod[[i]] <- cmz_mod[[i]] + mass["p"] + } + ## fragment str (amino-terminus) atype <- rep(c("a", "b", "c")[atype], each = tn * zn) pos <- rep(1L:tn, each = zn) @@ -217,7 +312,7 @@ setMethod("calculateFragments", c("character", "missing"), } else { aion <- character() } - + ## fragment str (carboxyl-terminus) ctype <- rep(c("x", "y", "z")[ctype], each = tn * zn) if (length(ctype)) { @@ -225,23 +320,114 @@ setMethod("calculateFragments", c("character", "missing"), } else { cion <- character() } - - df <- data.frame(mz = c(amz, cmz), - ion = c(aion, cion), - type = c(atype, ctype), - pos = pos, - z = z, - seq = c(aseq, cseq), - stringsAsFactors = FALSE) - df <- .neutralLoss(df, water = neutralLoss$water, ammonia = neutralLoss$ammonia) - df <- .terminalModifications(df, modifications = modifications) + + ## generate unique dataframe with all fragments and modifications + for (i in 1:length(mod_combinations)) { + df[[i]] <- data.frame(mz = c(amz_mod[[i]], cmz_mod[[i]]), + ion = c(aion, cion), + type = c(atype, ctype), + pos = pos, + z = z, + seq = c(aseq, cseq), + stringsAsFactors = FALSE) + df[[i]] <- .neutralLoss(df[[i]], + water = neutralLoss$water, + ammonia = neutralLoss$ammonia) + df[[i]] <- .terminalModifications(df[[i]], + modifications = fixed_modifications) + rownames(df[[i]]) <- NULL + non_zero <- mod_combinations[[i]] != 0 + names(mod_combinations[[i]])[non_zero] <- + paste0(names(mod_combinations[[i]])[non_zero], + "[", + mod_combinations[[i]][non_zero], + "]") + df[[i]][["peptide"]] <- paste(names(mod_combinations[[i]]), + collapse = "") + } + + df <- do.call(rbind, df) rownames(df) <- NULL df } +#' @title Generates list of possible combinations of modifications +#' +#' @param sequence Character. A peptide sequence that may have modifications or not +#' +#' @param fixed_modifications Named numeric. Specifies which fixed modifications are used +#' +#' @param variable_modifications Named numeric. Specifies which variable modifications are used +#' +#' @param max_mods Numeric. Indicates how many modifications can be applied at once. +#' +#' @return list with all possible combinations of modifications +#' +#' @author Guillaume Deflandre +#' +#' @importFrom utils combn +#' +#' @noRd +#' +#' @examples +#' .modificationPositions("ARGHKA", variable_modifications = c(A = 4, K = 5, S = 8), max_mods = 3) +#' +.modificationPositions <- function(fragment.seq, + variable_modifications = numeric(), + max_mods = Inf) { + modifiable_positions_var <- + which(fragment.seq %in% names(variable_modifications)) + + l <- length(modifiable_positions_var) + + ## take the maximum amount of modifications possible + max_mods <- min(max_mods, l) + + if (!length(variable_modifications) || max_mods <= 0) + return( + list(setNames(integer(length(fragment.seq)), fragment.seq)) + ) + + .mod <- function(cmb, + seq_split = fragment.seq, + var_mods = variable_modifications) { + m <- setNames(integer(length(seq_split)), seq_split) + m[cmb] <- var_mods[seq_split[cmb]] + m + } + + c( + list(setNames(integer(length(fragment.seq)), fragment.seq)), + if (length(modifiable_positions_var) == 1) + lapply(modifiable_positions_var, .mod) + else + unlist( + lapply(seq_len(max_mods), + function(n)combn( + modifiable_positions_var, n, + FUN = .mod, + simplify = FALSE + ) + ), + recursive = FALSE + ) + ) +} + +.cumsumFragmentMasses <- function(modificationCombination, fragmentMasses) { + + modificationCombination <- + modificationCombination[-NROW(modificationCombination)] + + fragmentMasses + cumsum(modificationCombination) +} + #' adds neutral loss to data.frame generated by .calculateFragments +#' #' @param df data.frame generated by. calculateFragments +#' #' @return data.frame neutral loss rows added +#' #' @noRd .neutralLoss <- function(df, water = c("Cterm", "D", "E", "S", "T"), @@ -249,13 +435,13 @@ setMethod("calculateFragments", c("character", "missing"), ## see "Low energy peptide fragmentation pathways" by Hugh-G. Patterton, Ph.D. ## http://cbio.ufs.ac.za/fgap/download/fragmentation_review.pdf ## see also discussion #47: https://github.com/lgatto/MSnbase/issues/47 - + ## constants mass <- getAtomicMass() - + widx <- double() aidx <- double() - + .removeNeutralLoss <- function(df, idx, mass, ion) { if (length(idx)) { loss <- df[idx, ] @@ -266,32 +452,32 @@ setMethod("calculateFragments", c("character", "missing"), df } } - + if (length(water)) { ## N-term D/E, internal S/T rules <- c(D = "^D.", E = "^E.", S = ".S.", T = ".T.") rules <- rules[intersect(c("D", "E", "S", "T"), water)] - + if (length(rules)) { widx <- grep(paste0(rules, collapse = "|"), df$seq) } - + ## C-term COOH (all x, y, z fragments) if ("Cterm" %in% water) { widx <- unique(c(widx, grep("[xyz]", df$type))) } } - + if (length(ammonia)) { ## N-term/internal K/N/Q, internal R rules <- c(K = "^.*K.", N = "^.*N.", Q = "^.*Q.", R = ".R.") rules <- rules[intersect(c("K", "N", "Q", "R"), ammonia)] - + if (length(rules)) { aidx <- grep(paste0(rules, collapse="|"), df$seq) } } - + if (length(widx)) { df <- .removeNeutralLoss(df, idx = widx, mass = 2*mass["H"]+mass["O"], ion = "_") } @@ -310,23 +496,23 @@ setMethod("calculateFragments", c("character", "missing"), #' #' @noRd .terminalModifications <- function(df, modifications) { - + if ("Nterm" %in% names(modifications)) { isABC <- grep("[abc]", df$type) - + if (length(isABC)) { df$mz[isABC] <- df$mz[isABC] + modifications["Nterm"] / df$z[isABC] } } - + if ("Cterm" %in% names(modifications)) { isXYZ <- grep("[xyz]", df$type) - + if (length(isXYZ)) { df$mz[isXYZ] <- df$mz[isXYZ] + modifications["Cterm"] / df$z[isXYZ] } } - + df } @@ -343,4 +529,4 @@ setMethod("calculateFragments", c("character", "missing"), defaultNeutralLoss <- function(disableWaterLoss = NULL, disableAmmoniaLoss = NULL) { list(water = setdiff(c("Cterm", "D", "E", "S", "T"), disableWaterLoss), ammonia = setdiff(c("K", "N", "Q", "R"), disableAmmoniaLoss)) -} +} \ No newline at end of file diff --git a/R/fragments-calculate2.R b/R/fragments-calculate2.R deleted file mode 100644 index a79967b..0000000 --- a/R/fragments-calculate2.R +++ /dev/null @@ -1,291 +0,0 @@ -#' @title Calculate ions produced by fragmentation with variable modifications -#' -#' @aliases calculateFragments2, modificationPositions, cumsumFragmentMasses, character,missing-method -#' -#' @name calculateFragments2 -#' -#' @param sequence character() providing a peptide sequence. -#' -#' @param type character vector of target ions; possible values: -#' c("a", "b", "c", "x", "y", "z"). Default is type = c("b", "y"). -#' -#' @param z numeric with a desired charge state; default is 1. -#' -#' @param fixed_modifications A named numeric vector of used fixed modifications. -#' The name must correspond to the one-letter-code of the modified amino acid -#' and the numeric value must represent the mass that should be added to the -#' original amino accid mass, default: Carbamidomethyl modifications = -#' c(C = 57.02146). Use Nterm or Cterm as names for modifications that should -#' be added to the amino respectively carboxyl-terminus. -#' -#' @param variable_modifications A named numeric vector of variable modifications. -#' Depending on the maximum number of modifications (`max_mods`), all possible -#' combinations are returned. -#' -#' @param max_mods A numeric indicating the maximum number of variable modifications -#' allowed on the sequence at once. Does not include fixed modifications. -#' Default value is positive infinity. -#' -#' @param neutralLoss list, it has to have two named elments, namely water and -#' ammonia that contain a character vector which type of neutral loss should be -#' calculated. Currently neutral loss on the C terminal "Cterm", at the amino -#' acids c("D", "E", "S", "T") for "water" (shown with an ⁠_⁠) an -#' d c("K", "N", "Q", "R") for "ammonia" (shown with an *) are supported. -#' -#' @param verbose logical(1). If TRUE (default) the used modifications are printed. -#' -#' @param modifications Named `numeric()`. Deprecated modifications parameter. -#' Will override `fixed_modifications` but is set to `NULL` by default. Please -#' refrain from using it, opt for `fixed_modifications` instead. -#' -#' @return A dataframe showing all the -#' ions produced by fragmentation with all possible combinations of modifications. -#' The used variable modifications are displayed in the peptide column through the -#' use of amino acids within brackets. Fixed modifications are not displayed. -#' -#' -#' @examples -#' calculateFragments2(sequence = "ARGSHKATC", type = c("b", "y"), z = 1, -#' fixed_modifications = c(C = 57), variable_modifications = c(S = 79, Y = 79, T = 79), -#' max_mods = 2) -#' @export -calculateFragments2 <- function(sequence, - type = c("b", "y"), - z = 1, - fixed_modifications = c(C = 57.02146), - variable_modifications = numeric(), - max_mods = Inf, - neutralLoss = defaultNeutralLoss(), - verbose = TRUE, - modifications = NULL) { - if (nchar(sequence) <= 1L) { - stop("'sequence' has to have two or more residues.") - } - - if (!is.null(modifications)) { - warning("'modifications' is deprecated, please use 'fixed_modifications' instead.") - fixed_modifications <- modifications - } - - ## split peptide sequence into aa - fragment.seq <- strsplit(sequence, "")[[1]] - fn <- length(fragment.seq) - - mod_combinations <- - .modificationPositions(fragment.seq, - variable_modifications, - max_mods) - type <- match.arg(type, - choices = c("a", "b", "c", "x", "y", "z"), - several.ok=TRUE) - type <- sort(type) - ## constants - mass <- getAtomicMass() - ## according to Table 1 of: - ## Johnson, R. S., Martin, S. A., Biemann, K., Stults, J. T., and - ## Watson, J. T. (1987). - ## Novel fragmentation process of peptides by collision-induced - ## decomposition in a tandem mass spectrometer: differentiation of leucine - ## and isoleucine. - ## Analytical Chemistry, 59(21), 2621-2625. - ## https://doi.org/10.1021/ac00148a019 - ## - ## a proton (H+) is added later - ## (after calculation of the different charge states) - add <- c(a=-(mass["C"]+mass["O"]), # + H - CO - b=0, # + H - c=mass["N"]+3*mass["H"], # + H + NH3 - x=mass["C"]+2*mass["O"], # + CO + OH - y=2*mass["H"]+mass["O"], # + H2 + OH - z=-(mass["N"]+mass["H"])+mass["O"]) # + NH + OH - - aa <- getAminoAcids() - aamass <- setNames(aa$ResidueMass, aa$AA) - - ## replace default mass by masses with fixed modifications - if (length(fixed_modifications)) { - aamass[names(fixed_modifications)] <- - aamass[names(fixed_modifications)] + fixed_modifications - } - - ## message used modifications - if (verbose) { - if (length(fixed_modifications)) { - mods <-paste0(names(fixed_modifications), - "=", - fixed_modifications, - collapse=", ") - } else { - mods <- "None" - } - if (length(variable_modifications)) { - mods2 <- paste0(names(variable_modifications), - "=", - variable_modifications, - collapse=", ") - } else { - mods2 <- "None" - } - message("Fixed modifications used: ", mods, - "\nVariable modifications used: ", mods2) - } - - ## calculate cumulative mass starting at the amino-terminus (for a, b, c) - amz <- cumsum(aamass[fragment.seq[-fn]]) - ## calculate cumulative mass starting at the carboxyl-terminus (for x, y, z) - cmz <- cumsum(aamass[rev(fragment.seq[-1L])]) - - ## calculate fragment mass (amino-terminus) - tn <- length(amz) - atype <- c("a", "b", "c") %in% type - nat <- sum(atype) - ## calculate fragment mass (carboxyl-terminus) - ctype <- c("x", "y", "z") %in% type - nct <- sum(ctype) - - ## devide by charge - zn <- length(z) - - ## fragment seq (amino-terminus) - aseq <- rep(rep(substring(sequence, rep(1L, fn - 1L), - 1L:(fn - 1L)), each = zn), nat) - - ## fragment seq (carboxyl-terminus) - cseq <- rep(rep(rev(substring(sequence, 2L:fn, - rep(fn, fn - 1L))), each=zn), nct) - - ## add the variable modifications and apply steps above - amz_mod <- vector("list", length(mod_combinations)) - cmz_mod <- vector("list", length(mod_combinations)) - df <- vector("list", length(mod_combinations)) - - for (i in 1:length(mod_combinations)) { - amz_mod[[i]] <- .cumsumFragmentMasses(mod_combinations[[i]], amz) - cmz_mod[[i]] <- .cumsumFragmentMasses(rev(mod_combinations[[i]]), cmz) - - amz_mod[[i]] <- rep(amz_mod[[i]], nat) + rep(add[1:3][atype], each=tn) - cmz_mod[[i]] <- rep(cmz_mod[[i]], nct) + rep(add[4:6][ctype], each=tn) - - amz_mod[[i]] <- rep(amz_mod[[i]], each = zn)/z - cmz_mod[[i]] <- rep(cmz_mod[[i]], each = zn)/z - - ## add protons (H+) - amz_mod[[i]] <- amz_mod[[i]] + mass["p"] - cmz_mod[[i]] <- cmz_mod[[i]] + mass["p"] - } - - ## fragment str (amino-terminus) - atype <- rep(c("a", "b", "c")[atype], each = tn * zn) - pos <- rep(1L:tn, each = zn) - if (length(atype)) { - aion <- paste0(atype, pos) - } else { - aion <- character() - } - - ## fragment str (carboxyl-terminus) - ctype <- rep(c("x", "y", "z")[ctype], each = tn * zn) - if (length(ctype)) { - cion <- paste0(ctype, pos) - } else { - cion <- character() - } - - ## generate unique dataframe with all fragments and modifications - for (i in 1:length(mod_combinations)) { - df[[i]] <- data.frame(mz = c(amz_mod[[i]], cmz_mod[[i]]), - ion = c(aion, cion), - type = c(atype, ctype), - pos = pos, - z = z, - seq = c(aseq, cseq), - stringsAsFactors = FALSE) - df[[i]] <- .neutralLoss(df[[i]], - water = neutralLoss$water, - ammonia = neutralLoss$ammonia) - df[[i]] <- .terminalModifications(df[[i]], - modifications = fixed_modifications) - rownames(df[[i]]) <- NULL - non_zero <- mod_combinations[[i]] != 0 - names(mod_combinations[[i]])[non_zero] <- - paste0(names(mod_combinations[[i]])[non_zero], - "[", - mod_combinations[[i]][non_zero], - "]") - df[[i]][["peptide"]] <- paste(names(mod_combinations[[i]]), - collapse = "") - } - - df <- do.call(rbind, df) - rownames(df) <- NULL - df -} - -#' @title Generates list of possible combinations of modifications -#' -#' @param sequence Character. A peptide sequence that may have modifications or not -#' -#' @param fixed_modifications Named numeric. Specifies which fixed modifications are used -#' -#' @param variable_modifications Named numeric. Specifies which variable modifications are used -#' -#' @param max_mods Numeric. Indicates how many modifications can be applied at once. -#' -#' @return list with all possible combinations of modifications -#' -#' @importFrom utils combn -#' -#' @noRd -#' -#' @examples -#' .modificationPositions("ARGHKA", variable_modifications = c(A = 4, K = 5, S = 8), max_mods = 3) -#' -.modificationPositions <- function(fragment.seq, - variable_modifications = numeric(), - max_mods = Inf) { - modifiable_positions_var <- - which(fragment.seq %in% names(variable_modifications)) - - l <- length(modifiable_positions_var) - - ## take the maximum amount of modifications possible - max_mods <- min(max_mods, l) - - if (!length(variable_modifications) || max_mods <= 0) - return( - list(setNames(integer(length(fragment.seq)), fragment.seq)) - ) - - .mod <- function(cmb, - seq_split = fragment.seq, - var_mods = variable_modifications) { - m <- setNames(integer(length(seq_split)), seq_split) - m[cmb] <- var_mods[seq_split[cmb]] - m - } - - c( - list(setNames(integer(length(fragment.seq)), fragment.seq)), - if (length(modifiable_positions_var) == 1) - lapply(modifiable_positions_var, .mod) - else - unlist( - lapply(seq_len(max_mods), - function(n)combn( - modifiable_positions_var, n, - FUN = .mod, - simplify = FALSE - ) - ), - recursive = FALSE - ) - ) -} - -.cumsumFragmentMasses <- function(modificationCombination, fragmentMasses) { - - modificationCombination <- - modificationCombination[-NROW(modificationCombination)] - - fragmentMasses + cumsum(modificationCombination) -} diff --git a/man/calculateFragments.Rd b/man/calculateFragments.Rd index a669f31..67de741 100644 --- a/man/calculateFragments.Rd +++ b/man/calculateFragments.Rd @@ -2,34 +2,45 @@ % Please edit documentation in R/fragments-calculate.R \name{calculateFragments} \alias{calculateFragments} +\alias{modificationPositions} \alias{defaultNeutralLoss} \alias{calculateFragments,character,missing-method} -\title{Calculate ions produced by fragmentation} +\title{Calculate ions produced by fragmentation with variable modifications} \usage{ \S4method{calculateFragments}{character,missing}( sequence, type = c("b", "y"), z = 1, - modifications = c(C = 57.02146), + fixed_modifications = c(C = 57.02146), + variable_modifications = numeric(), + max_mods = Inf, neutralLoss = defaultNeutralLoss(), - verbose = TRUE + verbose = TRUE, + modifications = NULL ) } \arguments{ -\item{sequence}{character() providing a peptide sequence.} +\item{sequence}{\code{character()} providing a peptide sequence.} \item{type}{\code{character} vector of target ions; possible values: \code{c("a", "b", "c", "x", "y", "z")}. Default is \code{type = c("b", "y")}.} -\item{z}{\code{numeric} with desired charge state; default is 1.} +\item{z}{numeric with a desired charge state; default is 1.} -\item{modifications}{A named \code{numeric} vector of used -modifications. The name must correspond to the one-letter-code -of the modified amino acid and the \code{numeric} value must -represent the mass that should be added to the original amino -accid mass, default: Carbamidomethyl \code{modifications = c(C = 57.02146)}. Use \code{Nterm} or \code{Cterm} as names for modifications -that should be added to the amino respectively -carboxyl-terminus.} +\item{fixed_modifications}{A named \code{numeric} vector of used fixed modifications. +The name must correspond to the one-letter-code of the modified amino acid +and the numeric value must represent the mass that should be added to the +original amino accid mass, default: Carbamidomethyl modifications = +c(C = 57.02146). Use Nterm or Cterm as names for modifications that should +be added to the amino respectively carboxyl-terminus.} + +\item{variable_modifications}{A named \code{numeric} vector of variable modifications. +Depending on the maximum number of modifications (\code{max_mods}), all possible +combinations are returned.} + +\item{max_mods}{A numeric indicating the maximum number of variable modifications +allowed on the sequence at once. Does not include fixed modifications. +Default value is positive infinity.} \item{neutralLoss}{\code{list}, it has to have two named elments, namely \code{water} and \code{ammonia} that contain a \code{character} vector @@ -45,15 +56,21 @@ the correct list. It has two arguments `disableWaterLoss` and the example section for use cases. }\if{html}{\out{}}} -\item{verbose}{\code{logical(1)}. If \code{TRUE} (default) the used -modifications are printed.} +\item{verbose}{\code{logical(1)}. If \code{TRUE} (default) the used modifications are printed.} + +\item{modifications}{Named \code{numeric()}. Deprecated modifications parameter. +Will override \code{fixed_modifications} but is set to \code{NULL} by default. Please +refrain from using it, opt for \code{fixed_modifications} instead.} } \value{ -The methods with \code{oject = "missing"} returns a -\code{data.frame}. +A \code{data.frame} showing all the +ions produced by fragmentation with all possible combinations of modifications. +The used variable modifications are displayed in the \code{peptide} column through the +use of amino acids followed by the modification within brackets. +Fixed modifications are not displayed. } \description{ -These method calculates a-, b-, c-, x-, y- and z-ions produced by +This method calculates a-, b-, c-, x-, y- and z-ions produced by fragmentation. Available methods @@ -61,7 +78,7 @@ Available methods \item The default method with signature \code{sequence = "character"} and \code{object = "missing"} calculates the theoretical fragments for a peptide sequence. It returns a \code{data.frame} with the columns -\code{mz}, \code{ion}, \code{type}, \code{pos}, \code{z} and \code{seq}. +\code{mz}, \code{ion}, \code{type}, \code{pos}, \code{z}, \code{seq} and \code{peptide}. \item Additional method can be defined that will adapt their behaviour based on spectra defined in \code{object}. See for example the MSnbase package that implements a method for objects of class @@ -69,15 +86,22 @@ package that implements a method for objects of class } } \examples{ +## General use +calculateFragments(sequence = "ARGSHKATC", type = c("b", "y"), z = 1, +fixed_modifications = c(C = 57), variable_modifications = c(S = 79, Y = 79, T = 79), +max_mods = 2) ## calculate fragments for ACE with default modification -calculateFragments("ACE", modifications = c(C = 57.02146)) +calculateFragments("ACE", fixed_modifications = c(C = 57.02146)) -## calculate fragments for ACE with an addition N-terminal modification -calculateFragments("ACE", modifications = c(C = 57.02146, Nterm = 229.1629)) +#' ## calculate fragments for ACE with an added variable modification +calculateFragments("ACE", variable_modifications = c(A = 43.25)) + +## calculate fragments for ACE with an added N-terminal modification +calculateFragments("ACE", fixed_modifications = c(C = 57.02146, Nterm = 229.1629)) ## calculate fragments for ACE without any modifications -calculateFragments("ACE", modifications = NULL) +calculateFragments("ACE", fixed_modifications = NULL) calculateFragments("VESITARHGEVLQLRPK", type = c("a", "b", "c", "x", "y", "z"), @@ -98,7 +122,10 @@ calculateFragments("PQR", ## disable neutral loss completely calculateFragments("PQR", neutralLoss=NULL) + } \author{ Sebastian Gibb \href{mailto:mail@sebastiangibb.de}{mail@sebastiangibb.de} + +Guillaume Deflandre \href{mailto:guillaume.deflandre@uclouvain.be}{guillaume.deflandre@uclouvain.be} } diff --git a/man/calculateFragments2.Rd b/man/calculateFragments2.Rd deleted file mode 100644 index 74e3dad..0000000 --- a/man/calculateFragments2.Rd +++ /dev/null @@ -1,71 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fragments-calculate2.R -\name{calculateFragments2} -\alias{calculateFragments2} -\alias{calculateFragments2,} -\alias{modificationPositions,} -\alias{cumsumFragmentMasses,} -\alias{character,missing-method} -\title{Calculate ions produced by fragmentation with variable modifications} -\usage{ -calculateFragments2( - sequence, - type = c("b", "y"), - z = 1, - fixed_modifications = c(C = 57.02146), - variable_modifications = numeric(), - max_mods = Inf, - neutralLoss = defaultNeutralLoss(), - verbose = TRUE, - modifications = NULL -) -} -\arguments{ -\item{sequence}{character() providing a peptide sequence.} - -\item{type}{character vector of target ions; possible values: -c("a", "b", "c", "x", "y", "z"). Default is type = c("b", "y").} - -\item{z}{numeric with a desired charge state; default is 1.} - -\item{fixed_modifications}{A named numeric vector of used fixed modifications. -The name must correspond to the one-letter-code of the modified amino acid -and the numeric value must represent the mass that should be added to the -original amino accid mass, default: Carbamidomethyl modifications = -c(C = 57.02146). Use Nterm or Cterm as names for modifications that should -be added to the amino respectively carboxyl-terminus.} - -\item{variable_modifications}{A named numeric vector of variable modifications. -Depending on the maximum number of modifications (\code{max_mods}), all possible -combinations are returned.} - -\item{max_mods}{A numeric indicating the maximum number of variable modifications -allowed on the sequence at once. Does not include fixed modifications. -Default value is positive infinity.} - -\item{neutralLoss}{list, it has to have two named elments, namely water and -ammonia that contain a character vector which type of neutral loss should be -calculated. Currently neutral loss on the C terminal "Cterm", at the amino -acids c("D", "E", "S", "T") for "water" (shown with an ⁠_⁠) an -d c("K", "N", "Q", "R") for "ammonia" (shown with an *) are supported.} - -\item{verbose}{logical(1). If TRUE (default) the used modifications are printed.} - -\item{modifications}{Named \code{numeric()}. Deprecated modifications parameter. -Will override \code{fixed_modifications} but is set to \code{NULL} by default. Please -refrain from using it, opt for \code{fixed_modifications} instead.} -} -\value{ -A dataframe showing all the -ions produced by fragmentation with all possible combinations of modifications. -The used variable modifications are displayed in the peptide column through the -use of amino acids within brackets. Fixed modifications are not displayed. -} -\description{ -Calculate ions produced by fragmentation with variable modifications -} -\examples{ -calculateFragments2(sequence = "ARGSHKATC", type = c("b", "y"), z = 1, -fixed_modifications = c(C = 57), variable_modifications = c(S = 79, Y = 79, T = 79), -max_mods = 2) -} diff --git a/tests/testthat/test_fragments.R b/tests/testthat/test_fragments.R index 25d9bff..26e7ff0 100644 --- a/tests/testthat/test_fragments.R +++ b/tests/testthat/test_fragments.R @@ -18,8 +18,9 @@ test_that("calculateFragments", { seq = c(rep(c("P", "PQ"), 3), rep(c("R", "QR"), 4), "QR"), + peptide = rep("PQR", 15), stringsAsFactors=FALSE) - + ace <- data.frame( mz = c(22.528, 102.544, # a 36.526, 116.541, # b @@ -34,14 +35,12 @@ test_that("calculateFragments", { z = 2, seq = c(rep(c("A", "AC"), 3), rep(c("E", "CE"), 3)), + peptide = rep("ACE", 12), stringsAsFactors=FALSE) - - expect_message(calculateFragments("PQR", verbose = TRUE), - "Modifications used: C=57.02146") - expect_message(calculateFragments("PQR", modifications = NULL, - verbose = TRUE), - "Modifications used: None") - + + expect_warning(calculateFragments("PQR", modifications = c(P=2)), + "'modifications' is deprecated, please use 'fixed_modifications' instead.") + expect_equal(pqr[1:12,], calculateFragments("PQR", type = c("a", "b", "c", "x", "y", "z"), @@ -56,13 +55,13 @@ test_that("calculateFragments", { calculateFragments("PQR", type = c("x", "z"), neutralLoss = NULL, verbose = FALSE), check.attributes = FALSE, tolerance = 1e-5) - + ## neutral loss ## rownames always differ expect_equal(pqr[c(3:4, 9:10, 13:15),], calculateFragments("PQR", verbose = FALSE), check.attributes = FALSE, tolerance = 1e-5) - + ## neutral loss (water=cterm disabled), ## rownames always differ expect_equal(pqr[c(3:4, 9:10, 15),], @@ -70,7 +69,7 @@ test_that("calculateFragments", { neutralLoss = defaultNeutralLoss(disableWaterLoss = "Cterm"), verbose = FALSE), check.attributes = FALSE, tolerance = 1e-5) - + ## neutral loss (ammonia=Q disabled), ## rownames always differ expect_equal(pqr[c(3:4, 9:10, 13:14),], @@ -78,24 +77,24 @@ test_that("calculateFragments", { neutralLoss = defaultNeutralLoss(disableAmmoniaLoss = "Q"), verbose = FALSE), check.attributes = FALSE, tolerance = 1e-5) - + ## neutral loss + nterm mod, rownames always differ tpqr <- pqr[c(3:4, 9:10, 13:15),] tpqr$mz[1:2] <- tpqr$mz[1:2] + 229 expect_equal(tpqr, - calculateFragments("PQR", modifications = c(C = 57.02146, Nterm = 229), + calculateFragments("PQR", fixed_modifications = c(C = 57.02146, Nterm = 229), verbose = FALSE), check.attributes = FALSE, tolerance = 1e-5) - + ## neutral loss + nterm + cterm mod, rownames always differ tpqr$mz[3:7] <- tpqr$mz[3:7] - 100 expect_equal(tpqr, - calculateFragments("PQR", modifications = c(C = 57.02146, - Nterm = 229, - Cterm = -100), + calculateFragments("PQR", fixed_modifications = c(C = 57.02146, + Nterm = 229, + Cterm = -100), verbose = FALSE), check.attributes = FALSE, tolerance = 1e-5) - + expect_equal(ace, calculateFragments("ACE", type = c("a", "b", "c", "x", "y", "z"), z = 2, neutralLoss = NULL, verbose = FALSE), @@ -103,25 +102,25 @@ test_that("calculateFragments", { expect_equal(ace[1:6,], calculateFragments("ACE", type = letters[1:3], z = 2, verbose = FALSE), tolerance = 1e-5) - + expect_error(calculateFragments("A"), "two or more residues") - - ## issue #200 (mz are not calculated correctly for terminal modifications + + ## issue #200 (mz are not calculated correctly for terminal fixed_modifications ## and z > 1) p <- getAtomicMass()["p"] expect_equal(calculateFragments("AA", z = 2, - modifications = c(Nterm = 10), + fixed_modifications = c(Nterm = 10), type = "b")$mz - p, - (calculateFragments("AA", z = 1, - modifications = c(Nterm = 10), - type = "b")$mz - p )/ 2) + (calculateFragments("AA", z = 1, + fixed_modifications = c(Nterm = 10), + type = "b")$mz - p )/ 2) expect_equal(calculateFragments("AA", z = 2, neutralLoss = NULL, - modifications = c(Cterm = 10), + fixed_modifications = c(Cterm = 10), type = "y")$mz - p, - (calculateFragments("AA", z = 1, neutralLoss = NULL, - modifications = c(Cterm = 10), - type = "y")$mz - p) / 2) - + (calculateFragments("AA", z = 1, neutralLoss = NULL, + fixed_modifications = c(Cterm = 10), + type = "y")$mz - p) / 2) + ## See issue 573 in MSnbase (charge is ignored in neutral loss ## calculation) expect_equal( @@ -132,11 +131,12 @@ test_that("calculateFragments", { type = c("b", "b_"), pos = 7L, z = 3, - seq = "PEPTIDE" + seq = "PEPTIDE", + peptide = "PEPTIDEE" ), check.attributes = FALSE # row.names differ ) - + }) test_that("defaultNeutralLoss", { @@ -149,7 +149,166 @@ test_that("defaultNeutralLoss", { disableAmmoniaLoss = c("K", "Q")), list(water = c("Cterm"), ammonia = c("N", "R"))) expect_equal(defaultNeutralLoss(disableWaterLoss = c("Cterm", - "T", "E", "S", "D"), + "T", "E", "S", "D"), disableAmmoniaLoss = c("K", "N", "Q", "R")), list(water = character(), ammonia = character())) }) + +## For additional tests, refer to calculateFragments from PSMatch package +test_that("calculateFragments: Default behaviour without modifications", { + + ## Test 1: Default behavior without modifications + sequence <- "PQR" + result <- calculateFragments( + sequence = sequence, + type = c("b", "y"), + z = 1, + fixed_modifications = NULL, + variable_modifications = numeric(), + max_mods = Inf, + neutralLoss = defaultNeutralLoss(), + verbose = FALSE + ) + + ## Check unique peptide without modifications + expect_identical(unique(result$peptide), "PQR") + }) + +test_that("calculateFragments: Behaviour with fixed modifications", { + ## Test 2: Fixed modifications + sequence <- "PQR" + result <- calculateFragments( + sequence = sequence, + type = c("b", "y"), + z = 1, + fixed_modifications = NULL, + variable_modifications = NULL, + max_mods = Inf, + neutralLoss = list(water = c(), ammonia = c()), + verbose = FALSE, + modifications = NULL + ) + fixed_modifications <- c(P = 79.966) + result_fixed <- calculateFragments( + sequence = sequence, + type = c("b", "y"), + z = 1, + fixed_modifications = fixed_modifications, + variable_modifications = NULL, + max_mods = 0, + neutralLoss = list(water = c(), ammonia = c()), + verbose = FALSE, + modifications = NULL + ) + + ## Fixed modifications do not produce additional unique peptides + expect_identical(unique(result_fixed$peptide), "PQR") + expect_identical(nrow(result), nrow(result_fixed)) + + ## Fixed modifications do change the fragment masses + expect_false(all(result$mz == result_fixed$mz)) +}) + +test_that("calculateFragments: Behaviour with variable modifications", { + ## Test 3: Variable modifications only + sequence <- "PQR" + result <- calculateFragments( + sequence = sequence, + type = c("b", "y"), + z = 1, + fixed_modifications = NULL, + variable_modifications = NULL, + max_mods = Inf, + neutralLoss = list(water = c(), ammonia = c()), + verbose = FALSE + ) + + fixed_modifications <- c(P = 79.966) + result_fixed <- calculateFragments( + sequence = sequence, + type = c("b", "y"), + z = 1, + fixed_modifications = fixed_modifications, + variable_modifications = NULL, + max_mods = 0, + neutralLoss = list(water = c(), ammonia = c()), + verbose = FALSE + ) + + variable_modifications <- c(P = 79.966, Q = 20, R = 10) + max_mods <- 2 + result_var <- calculateFragments( + sequence = sequence, + type = c("b", "y"), + z = 1, + fixed_modifications = NULL, + variable_modifications = variable_modifications, + max_mods = max_mods, + neutralLoss = list(water = c(), ammonia = c()), + verbose = FALSE + ) + + ## Calculate expected combinations + expected_combinations <- + choose(3, 0) + choose(3, 1) + choose(3, 2) + choose(3, 3) + + ## Check if number of unique peptides matches expectations + expect_equal(length(unique(result_var$peptide)), expected_combinations) + + ## Check if it's true in case there are less modifications than max_mods + variable_modifications <- c(P = 79.966) + max_mods <- 2 + result_var <- calculateFragments( + sequence = sequence, + type = c("b", "y"), + z = 1, + fixed_modifications = NULL, + variable_modifications = variable_modifications, + max_mods = max_mods, + neutralLoss = list(water = c(), ammonia = c()), + verbose = FALSE + ) + + ## Calculate expected combinations + expected_combinations <- choose(1, 0) + choose(1,1) + + ## Check if number of unique peptides matches expectations + expect_equal(length(unique(result_var$peptide)), expected_combinations) + + ## Test 4: Fixed and variable modifications combined + result_combined <- calculateFragments( + sequence = sequence, + type = c("b", "y"), + z = 1, + fixed_modifications = fixed_modifications, + variable_modifications = variable_modifications, + max_mods = max_mods, + neutralLoss = list(water = c(), ammonia = c()), + verbose = FALSE + ) + + ## Check equal mass of variable mods fragments and no mods fragments + expect_true(all(result$mz == result_var[result_var$peptide == "PQR","mz"])) + + ## Check equal mass of variable mods fragments and fixed mods fragments + expect_true(all(result_fixed$mz == result_var[result_var$peptide == "[P]QR","mz"])) +}) + +test_that(".cumsumFragmentMasses: Behaviour with any modification", { + ## Test4: Check behaviour of .cumsumFragmentMasses function + + ## Modifications used + mods_forward <- c(P = 5, Q = 0, R = 7) + mods_backward <- c(R = 7, Q = 0, P = 5) + + ## theoretical masses P = 15, Q = 25, R = 10) + fragments_forward <- c(P = 15, Q = 40) ## representing cumsum forward ions + fragments_backward <- c(R = 10, Q = 35) ## representing cumsum backward ions + + result_forward <- .cumsumFragmentMasses(mods_forward, fragments_forward) + result_backward <- .cumsumFragmentMasses(mods_backward, fragments_backward) + + expect_identical(c(P = 20, Q = 45), result_forward) + expect_identical(c(R = 17, Q = 42), result_backward) +}) + diff --git a/tests/testthat/test_fragments2.R b/tests/testthat/test_fragments2.R deleted file mode 100644 index 3305a80..0000000 --- a/tests/testthat/test_fragments2.R +++ /dev/null @@ -1,164 +0,0 @@ -## For additional tests, refer to calculateFragments from PSMatch package -test_that("calculateFragments2: Default behaviour without modifications", { - - ## Test 1: Default behavior without modifications - sequence <- "PQR" - original_result <- calculateFragments( - sequence = sequence, - type = c("b", "y"), - z = 1, - modifications = NULL, - neutralLoss = defaultNeutralLoss(), - verbose = FALSE - ) - result <- calculateFragments2( - sequence = sequence, - type = c("b", "y"), - z = 1, - fixed_modifications = NULL, - variable_modifications = NULL, - max_mods = Inf, - neutralLoss = defaultNeutralLoss(), - verbose = FALSE - ) - - ## Check unique peptide without modifications - expect_identical(unique(result$peptide), "PQR") - expect_identical(result[, names(original_result)], - original_result) - }) - -test_that("calculateFragments2: Behaviour with fixed modifications", { - ## Test 2: Fixed modifications - sequence <- "PQR" - result <- calculateFragments2( - sequence = sequence, - type = c("b", "y"), - z = 1, - fixed_modifications = NULL, - variable_modifications = NULL, - max_mods = Inf, - neutralLoss = list(water = c(), ammonia = c()), - verbose = FALSE - ) - fixed_modifications <- c(P = 79.966) - result_fixed <- calculateFragments2( - sequence = sequence, - type = c("b", "y"), - z = 1, - fixed_modifications = fixed_modifications, - variable_modifications = NULL, - max_mods = 0, - neutralLoss = list(water = c(), ammonia = c()), - verbose = FALSE - ) - - ## Fixed modifications do not produce additional unique peptides - expect_identical(unique(result_fixed$peptide), "PQR") - expect_identical(nrow(result), nrow(result_fixed)) - - ## Fixed modifications do change the fragment masses - expect_false(all(result$mz == result_fixed$mz)) -}) - -test_that("calculateFragments2: Behaviour with variable modifications", { - ## Test 3: Variable modifications only - sequence <- "PQR" - result <- calculateFragments2( - sequence = sequence, - type = c("b", "y"), - z = 1, - fixed_modifications = NULL, - variable_modifications = NULL, - max_mods = Inf, - neutralLoss = list(water = c(), ammonia = c()), - verbose = FALSE - ) - - fixed_modifications <- c(P = 79.966) - result_fixed <- calculateFragments2( - sequence = sequence, - type = c("b", "y"), - z = 1, - fixed_modifications = fixed_modifications, - variable_modifications = NULL, - max_mods = 0, - neutralLoss = list(water = c(), ammonia = c()), - verbose = FALSE - ) - - variable_modifications <- c(P = 79.966, Q = 20, R = 10) - max_mods <- 2 - result_var <- calculateFragments2( - sequence = sequence, - type = c("b", "y"), - z = 1, - fixed_modifications = NULL, - variable_modifications = variable_modifications, - max_mods = max_mods, - neutralLoss = list(water = c(), ammonia = c()), - verbose = FALSE - ) - - ## Calculate expected combinations - expected_combinations <- choose(3, 0) + choose(3, 1) + choose(3, 2) - - ## Check if number of unique peptides matches expectations - expect_equal(length(unique(result_var$peptide)), expected_combinations) - - ## Check if it's true in case there are less modifications than max_mods - variable_modifications <- c(P = 79.966) - max_mods <- 2 - result_var <- calculateFragments2( - sequence = sequence, - type = c("b", "y"), - z = 1, - fixed_modifications = NULL, - variable_modifications = variable_modifications, - max_mods = max_mods, - neutralLoss = list(water = c(), ammonia = c()), - verbose = FALSE - ) - - ## Calculate expected combinations - expected_combinations <- choose(1, 0) + choose(1,1) - - ## Check if number of unique peptides matches expectations - expect_equal(length(unique(result_var$peptide)), expected_combinations) - - ## Test 4: Fixed and variable modifications combined - result_combined <- calculateFragments2( - sequence = sequence, - type = c("b", "y"), - z = 1, - fixed_modifications = fixed_modifications, - variable_modifications = variable_modifications, - max_mods = max_mods, - neutralLoss = list(water = c(), ammonia = c()), - verbose = FALSE - ) - - ## Check equal mass of variable mods fragments and no mods fragments - expect_true(all(result$mz == result_var[result_var$peptide == "PQR","mz"])) - - ## Check equal mass of variable mods fragments and fixed mods fragments - expect_true(all(result_fixed$mz == result_var[result_var$peptide == "[P]QR","mz"])) -}) - -test_that(".cumsumFragmentMasses: Behaviour with any modification", { - ## Test4: Check behaviour of .cumsumFragmentMasses function - - ## Modifications used - mods_forward <- c(P = 5, Q = 0, R = 7) - mods_backward <- c(R = 7, Q = 0, P = 5) - - ## theoretical masses P = 15, Q = 25, R = 10) - fragments_forward <- c(P = 15, Q = 40) ## representing cumsum forward ions - fragments_backward <- c(R = 10, Q = 35) ## representing cumsum backward ions - - result_forward <- .cumsumFragmentMasses(mods_forward, fragments_forward) - result_backward <- .cumsumFragmentMasses(mods_backward, fragments_backward) - - expect_identical(c(P = 20, Q = 45), result_forward) - expect_identical(c(R = 17, Q = 42), result_backward) -})