Noble-Lab · Lilferrit · Feb 7, 2025 · Feb 10, 2025
diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
@@ -181,7 +181,8 @@ def sequence(
     config, model = setup_model(
         model, config, output_path, output_root_name, False
     )
-    start_time = time.time()
+    start_time = time.perf_counter()
+    utils.log_intro_report(start_time=start_time)
     with ModelRunner(
         config,
         model,
@@ -202,7 +203,10 @@ def sequence(
             evaluate=evaluate,
         )
         utils.log_annotate_report(
-            runner.writer.psms, start_time=start_time, end_time=time.time()
+            runner.writer.psms,
+            start_time=start_time,
+            sequence_start_time=runner.sequence_start_time,
+            end_time=time.perf_counter(),
         )
 
 
@@ -241,7 +245,8 @@ def db_search(
     config, model = setup_model(
         model, config, output_path, output_root_name, False
     )
-    start_time = time.time()
+    start_time = time.perf_counter()
+    utils.log_intro_report(start_time=start_time)
     with ModelRunner(
         config,
         model,
@@ -262,7 +267,9 @@ def db_search(
             str((output_path / output_root_name).with_suffix(".mztab")),
         )
         utils.log_annotate_report(
-            runner.writer.psms, start_time=start_time, end_time=time.time()
+            runner.writer.psms,
+            start_time=start_time,
+            end_time=time.perf_counter(),
         )
 
 
@@ -306,7 +313,8 @@ def train(
     config, model = setup_model(
         model, config, output_path, output_root_name, True
     )
-    start_time = time.time()
+    start_time = time.perf_counter()
+    utils.log_intro_report(start_time=start_time)
     with ModelRunner(
         config,
         model,
@@ -326,7 +334,9 @@ def train(
             logger.info("  %s", peak_file)
 
         runner.train(train_peak_path, validation_peak_path)
-        utils.log_run_report(start_time=start_time, end_time=time.time())
+        utils.log_run_report(
+            start_time=start_time, end_time=time.perf_counter()
+        )
 
 
 @main.command()

diff --git a/casanovo/data/psm.py b/casanovo/data/psm.py
@@ -1,7 +1,7 @@
 """Peptide spectrum match dataclass."""
 
 import dataclasses
-from typing import Tuple, Iterable
+from typing import Optional, Tuple, Iterable
 
 
 @dataclasses.dataclass
@@ -31,6 +31,8 @@ class PepSpecMatch:
     aa_scores : Iterable[float]
         A list of scores for individual amino acids in the peptide
         sequence, where len(aa_scores) == len(sequence).
+    sequence_time : Optional[float], default=None
+        System time in seconds at inference completion
     protein : str
         Protein associated with the peptide sequence (for db mode).
     """
@@ -42,4 +44,5 @@ class PepSpecMatch:
     calc_mz: float
     exp_mz: float
     aa_scores: Iterable[float]
+    sequence_time: Optional[float] = None
     protein: str = "null"
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
@@ -4,6 +4,7 @@
 import heapq
 import itertools
 import logging
+import time
 import warnings
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
@@ -18,7 +19,7 @@
 
 from . import evaluate
 from .. import config
-from ..data import ms_io
+from ..data import ms_io, psm
 
 logger = logging.getLogger("casanovo")
 
@@ -822,7 +823,7 @@ def validation_step(
 
     def predict_step(
         self, batch: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], *args
-    ) -> List[ms_io.PepSpecMatch]:
+    ) -> List[psm.PepSpecMatch]:
         """
         A single prediction step.
 
@@ -834,7 +835,7 @@ def predict_step(
 
         Returns
         -------
-        predictions: List[ms_io.PepSpecMatch]
+        predictions: List[psm.PepSpecMatch]
             Predicted PSMs for the given batch of spectra.
         """
         predictions = []
@@ -843,15 +844,17 @@ def predict_step(
             precursor_mz,
             spectrum_i,
             spectrum_preds,
+            batch_time,
         ) in zip(
             batch[1][:, 1].cpu().detach().numpy(),
             batch[1][:, 2].cpu().detach().numpy(),
             batch[2],
             self.forward(batch[0], batch[1]),
+            itertools.repeat(time.perf_counter()),
         ):
             for peptide_score, aa_scores, peptide in spectrum_preds:
                 predictions.append(
-                    ms_io.PepSpecMatch(
+                    psm.PepSpecMatch(
                         sequence=peptide,
                         spectrum_id=tuple(spectrum_i),
                         peptide_score=peptide_score,
@@ -861,6 +864,7 @@ def predict_step(
                         ),
                         exp_mz=precursor_mz,
                         aa_scores=aa_scores,
+                        sequence_time=batch_time,
                     )
                 )
 
@@ -901,7 +905,7 @@ def on_validation_epoch_end(self) -> None:
         self._log_history()
 
     def on_predict_batch_end(
-        self, outputs: List[ms_io.PepSpecMatch], *args
+        self, outputs: List[psm.PepSpecMatch], *args
     ) -> None:
         """
         Write the predicted peptide sequences and amino acid scores to
@@ -1000,7 +1004,7 @@ def predict_step(
         self,
         batch: Tuple[torch.Tensor, torch.Tensor, np.ndarray, np.ndarray],
         *args,
-    ) -> List[ms_io.PepSpecMatch]:
+    ) -> List[psm.PepSpecMatch]:
         """
         A single prediction step.
 
@@ -1012,7 +1016,7 @@ def predict_step(
 
         Returns
         -------
-        predictions: List[ms_io.PepSpecMatch]
+        predictions: List[psm.PepSpecMatch]
             Predicted PSMs for the given batch of spectra.
         """
         predictions_all = collections.defaultdict(list)
@@ -1044,7 +1048,7 @@ def predict_step(
             ):
                 spectrum_i = tuple(spectrum_i)
                 predictions_all[spectrum_i].append(
-                    ms_io.PepSpecMatch(
+                    psm.PepSpecMatch(
                         sequence=peptide,
                         spectrum_id=spectrum_i,
                         peptide_score=peptide_score,

diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
@@ -5,6 +5,7 @@
 import logging
 import os
 import tempfile
+import time
 import uuid
 import warnings
 from pathlib import Path
@@ -73,6 +74,7 @@ def __init__(
         self.model = None
         self.loaders = None
         self.writer = None
+        self.sequence_start_time = None
 
         if output_dir is None:
             self.callbacks = []
@@ -285,6 +287,7 @@ def predict(
         self.writer.set_ms_run(test_index.ms_files)
         self.initialize_data_module(test_index=test_index)
         self.loaders.setup(stage="test", annotated=False)
+        self.sequence_start_time = time.perf_counter()
         self.trainer.predict(self.model, self.loaders.test_dataloader())
 
         if evaluate:

diff --git a/casanovo/utils.py b/casanovo/utils.py
@@ -161,6 +161,30 @@
     }
 
 
+def log_intro_report(start_time: Optional[float] = None) -> None:
+    """
+    log start of run report
+
+    parameters
+    ----------
+    start_time : Optional[float], default=None
+        The start time of the sequencing run in seconds since the epoch.
+    """
+    if start_time is not None:
+        start_datetime = datetime.fromtimestamp(start_time)
+        logger.info(
+            "Run Start Time: %s",
+            start_datetime.strftime("%y/%m/%d %H:%M:%S"),
+        )
+
+        logger.info("Executing Command: %s", " ".join(sys.argv))
+        logger.info("Executing on Host Machine: %s", socket.gethostname())
+
+    if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name()
+        logger.info("Executing on GPU: %s", gpu_name)
+
+
 def log_run_report(
     start_time: Optional[float] = None, end_time: Optional[float] = None
 ) -> None:
@@ -174,22 +198,16 @@
     end_time : Optional[float], default=None
         The end time of the sequencing run in seconds since the epoch.
     """
-    logger.info("======= End of Run Report =======")
-    if start_time is not None and end_time is not None:
-        start_datetime = datetime.fromtimestamp(start_time)
+    if end_time is not None:
         end_datetime = datetime.fromtimestamp(end_time)
-        delta_datetime = end_datetime - start_datetime
-        logger.info(
-            "Run Start Time: %s",
-            start_datetime.strftime("%y/%m/%d %H:%M:%S"),
-        )
         logger.info(
             "Run End Time: %s", end_datetime.strftime("%y/%m/%d %H:%M:%S")
         )
-        logger.info("Time Elapsed: %s", delta_datetime)
 
-    logger.info("Executed Command: %s", " ".join(sys.argv))
-    logger.info("Executed on Host Machine: %s", socket.gethostname())
+        if start_time is not None:
+            start_datetime = datetime.fromtimestamp(start_time)
+            delta_datetime = end_datetime - start_datetime
+            logger.info("Time Elapsed: %s", delta_datetime)
 
     if torch.cuda.is_available():
         gpu_util = torch.cuda.max_memory_allocated()
@@ -199,6 +217,7 @@
 def log_annotate_report(
     predictions: List[PepSpecMatch],
     start_time: Optional[float] = None,
+    sequence_start_time: Optional[float] = None,
     end_time: Optional[float] = None,
     score_bins: Iterable[float] = SCORE_BINS,
 ) -> None:
@@ -211,6 +230,8 @@
         PSM predictions.
     start_time : Optional[float], default=None
         The start time of the sequencing run in seconds since the epoch.
+    sequence_start_time : Optional[float], default=None
+        The inference start time in seconds since the epoch.
     end_time : Optional[float], default=None
         The end time of the sequencing run in seconds since the epoch.
     score_bins: Iterable[float], Optional
@@ -254,6 +275,32 @@
             "Median Peptide Length: %d", run_report["median_sequence_length"]
         )
 
+        sequence_end_time = predictions[-1].sequence_time
+        if sequence_start_time is not None and sequence_end_time is not None:
+            sequence_start_time = datetime.fromtimestamp(sequence_start_time)
+            sequence_end_time = datetime.fromtimestamp(sequence_end_time)
+            sequence_time_elapsed = sequence_end_time - sequence_start_time
+
+            if start_time is not None:
+                start_time = datetime.fromtimestamp(start_time)
+                setup_time = sequence_start_time - start_time
+
+                logger.info("Setup Time Elapsed: %s", setup_time)
+
+            logger.info(
+                "Sequencing Start Time: %s",
+                sequence_start_time.strftime("%y/%m/%d %H:%M:%S"),
+            )
+            logger.info(
+                "Sequencing End Time: %s",
+                sequence_end_time.strftime("%y/%m/%d %H:%M:%S"),
+            )
+            logger.info("Sequencing Time Elapsed: %s", sequence_time_elapsed)
+            logger.info(
+                "Spectra Sequenced Per Second: %.4f spectra/s",
+                len(predictions) / sequence_time_elapsed.total_seconds(),
+            )
+
 
 def check_dir_file_exists(
     dir: pathlib.Path, file_patterns: Iterable[str] | str