compomics · RalfG · Dec 14, 2021 · Aug 20, 2021 · Aug 27, 2021 · Sep 1, 2021
diff --git a/ms2rescore/__init__.py b/ms2rescore/__init__.py
@@ -8,7 +8,7 @@
 from multiprocessing import cpu_count
 from typing import Dict, Optional, Union
 
-import pandas as pd
+from pandas.errors import EmptyDataError
 
 from ms2rescore import id_file_parser, rescore_core, setup_logging
 from ms2rescore._exceptions import MS2ReScoreError
@@ -108,21 +108,21 @@ def _infer_pipeline(identification_file: str):
 
     def _select_pipeline(self):
         """Select specific rescoring pipeline."""
-        if self.config["general"]["pipeline"] == "infer":
+        if self.config["general"]["pipeline"].lower() == "infer":
             pipeline = self._infer_pipeline(
                 self.config["general"]["identification_file"]
             )
-        elif self.config["general"]["pipeline"] == "pin":
+        elif self.config["general"]["pipeline"].lower() == "pin":
             pipeline = id_file_parser.PinPipeline
-        elif self.config["general"]["pipeline"] == "maxquant":
+        elif self.config["general"]["pipeline"].lower() == "maxquant":
             pipeline = id_file_parser.MaxQuantPipeline
-        elif self.config["general"]["pipeline"] == "msgfplus":
+        elif self.config["general"]["pipeline"].lower() == "msgfplus":
             pipeline = id_file_parser.MSGFPipeline
-        elif self.config["general"]["pipeline"] == "tandem":
+        elif self.config["general"]["pipeline"].lower() == "tandem":
             pipeline = id_file_parser.TandemPipeline
-        elif self.config["general"]["pipeline"] == "peptideshaker":
+        elif self.config["general"]["pipeline"].lower() == "peptideshaker":
             pipeline = id_file_parser.PeptideShakerPipeline
-        elif self.config["general"]["pipeline"] == "Peaks":
+        elif self.config["general"]["pipeline"].lower() == "peaks":
             pipeline = id_file_parser.PeaksPipeline
         else:
             raise NotImplementedError(self.config["general"]["pipeline"])
@@ -254,39 +254,41 @@ def run(self):
         if self.config["general"]["run_percolator"]:
             self._run_percolator()
 
-        logger.info("Generating Rescore plots")
-        if self.config["general"]["plotting"]:
+            # Only use plotting module when run_percolator is true
+            if self.config["general"]["plotting"]:
+                logger.info("Generating Rescore plots")
 
-            plotting.PIN(
-                peprec_filename, self.config["general"]["output_filename"]
-            )
-
-            for fset in self.config["general"]["feature_sets"]:
-                pout_file = (
-                    self.config["general"]["output_filename"]
-                    + "_"
-                    + "_".join(fset)
-                    + "_features.pout"
+                plotting.PIN(
+                    peprec_filename, self.config["general"]["output_filename"]
                 )
-                pout_decoy_file = (
-                    self.config["general"]["output_filename"]
-                    + "_"
-                    + "_".join(fset)
-                    + "_features.pout_dec"
-                )
-                try:
-                    plotting.POUT(
-                        pout_file,
-                        pout_decoy_file,
-                        self.config["general"]["output_filename"],
-                        " ".join(fset)
-                    )
-                except pd.errors.EmptyDataError:
-                    continue
 
-            plotting.RescoreRecord.save_plots_to_pdf(
-                self.config["general"]["output_filename"] + "_plots.pdf",
-                FDR_thresholds=[0.01, 0.001],
-            )
+                for fset in self.config["general"]["feature_sets"]:
+                    pout_file = (
+                        self.config["general"]["output_filename"]
+                        + "_"
+                        + "_".join(fset)
+                        + "_features.pout"
+                    )
+                    pout_decoy_file = (
+                        self.config["general"]["output_filename"]
+                        + "_"
+                        + "_".join(fset)
+                        + "_features.pout_dec"
+                    )
+                    try:
+                        plotting.POUT(
+                            pout_file,
+                            pout_decoy_file,
+                            self.config["general"]["output_filename"],
+                            " ".join(fset)
+                        )
+                    except EmptyDataError:
+                        logger.warn(f"Feature set: {'_'.join(fset)} returned empty pout file")
+                        continue
+
+                plotting.RescoreRecord.save_plots_to_pdf(
+                    self.config["general"]["output_filename"] + "_plots.pdf",
+                    FDR_thresholds=[0.01, 0.001],
+                )
 
         logger.info("MS²ReScore finished!")
diff --git a/ms2rescore/id_file_parser.py b/ms2rescore/id_file_parser.py
@@ -8,8 +8,7 @@
 
 import numpy as np
 import pandas as pd
-from pyteomics import tandem
-from pyteomics import mzid
+from pyteomics import tandem, mzid
 from tqdm import tqdm
 
 from ms2rescore._exceptions import MS2ReScoreError
@@ -67,6 +66,7 @@ def __init__(self, config: Dict, output_basename: Union[str, os.PathLike]) -> No
             "generic": r".+_([0-9]+)_[0-9]+_[0-9]+",
             "tandem": r".+_([0-9]+)_[0-9]+_[0-9]+",
             "msgfplus": r".+_SII_([0-9]+)_[0-9]+_[0-9]+_[0-9]+",
+            "USI": r"mzspec:PXD[0-9]{6}:[^\s\:]*:scan:([0-9]+)"
         }
 
         # Private attributes specific to pipeline, override these in each subclass
@@ -562,8 +562,6 @@ def read_df_from_mzid(self) -> pd.DataFrame:
                 retrieved_data["charge"] = flat_dict[
                     "SpectrumIdentificationItem_chargeState"
                 ]
-                # TODO: create class for SpectrumIdentificationItem
-                # print(spectrum_identification_result)
                 retrieved_data["protein_list"] = [
                     d["accession"]
                     for d in spectrum_identification_result[

diff --git a/ms2rescore/package_data/config_schema.json b/ms2rescore/package_data/config_schema.json
@@ -13,7 +13,7 @@
                 "pipeline": {
                     "description": "Pipeline to use, depending on input format",
                     "type": "string",
-                    "enum": ["infer", "pin", "tandem", "maxquant", "msgfplus", "peptideshaker", "Peaks"],
+                    "enum": ["infer", "pin", "tandem", "maxquant", "msgfplus", "peptideshaker", "peaks"],
                     "default": "infer"
                 },
                 "feature_sets": {

diff --git a/ms2rescore/percolator.py b/ms2rescore/percolator.py
@@ -250,13 +250,13 @@ def _get_sequence_column(self) -> pd.Series:
 
     def _get_charge_column(self) -> pd.Series:
         """Get charge column from one-hot encoded `ChargeX` columns."""
-        charge_cols = [col for col in self.df.columns if col.startswith("charge")]
+        charge_cols = [col for col in self.df.columns if col.lower().startswith("charge")]
         if not (self.df[charge_cols] == 1).any(axis=1).all():
             raise PercolatorInError("Not all PSMs have an assigned charge state.")
         return (
             self.df[charge_cols]
             .rename(
-                columns={col: int(col.replace("charge", "")) for col in charge_cols}
+                columns={col: int(col.lower().replace("charge", "")) for col in charge_cols}
             )
             .idxmax(1)
         )
@@ -424,6 +424,7 @@ def get_feature_table(self) -> pd.DataFrame:
         feature_cols = [col for col in self.df.columns if col not in non_feature_cols]
         return self.df[feature_cols]
 
+    # TODO if USI is used extract_spectrum_index should be False
     def to_peptide_record(
         self,
         extract_spectrum_index: Optional[bool] = True,