Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log Average Throughput and Inference Run Time #431

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions casanovo/casanovo.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,8 @@ def sequence(
config, model = setup_model(
model, config, output_path, output_root_name, False
)
start_time = time.time()
start_time = time.perf_counter()
utils.log_intro_report(start_time=start_time)
with ModelRunner(
config,
model,
Expand All @@ -202,7 +203,10 @@ def sequence(
evaluate=evaluate,
)
utils.log_annotate_report(
runner.writer.psms, start_time=start_time, end_time=time.time()
runner.writer.psms,
start_time=start_time,
sequence_start_time=runner.sequence_start_time,
end_time=time.perf_counter(),
)


Expand Down Expand Up @@ -241,7 +245,8 @@ def db_search(
config, model = setup_model(
model, config, output_path, output_root_name, False
)
start_time = time.time()
start_time = time.perf_counter()
utils.log_intro_report(start_time=start_time)
with ModelRunner(
config,
model,
Expand All @@ -262,7 +267,9 @@ def db_search(
str((output_path / output_root_name).with_suffix(".mztab")),
)
utils.log_annotate_report(
runner.writer.psms, start_time=start_time, end_time=time.time()
runner.writer.psms,
start_time=start_time,
end_time=time.perf_counter(),
)


Expand Down Expand Up @@ -306,7 +313,8 @@ def train(
config, model = setup_model(
model, config, output_path, output_root_name, True
)
start_time = time.time()
start_time = time.perf_counter()
utils.log_intro_report(start_time=start_time)
with ModelRunner(
config,
model,
Expand All @@ -326,7 +334,9 @@ def train(
logger.info(" %s", peak_file)

runner.train(train_peak_path, validation_peak_path)
utils.log_run_report(start_time=start_time, end_time=time.time())
utils.log_run_report(
start_time=start_time, end_time=time.perf_counter()
)


@main.command()
Expand Down
5 changes: 4 additions & 1 deletion casanovo/data/psm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Peptide spectrum match dataclass."""

import dataclasses
from typing import Tuple, Iterable
from typing import Optional, Tuple, Iterable


@dataclasses.dataclass
Expand Down Expand Up @@ -31,6 +31,8 @@ class PepSpecMatch:
aa_scores : Iterable[float]
A list of scores for individual amino acids in the peptide
sequence, where len(aa_scores) == len(sequence).
sequence_time : Optional[float], default=None
System time in seconds at inference completion
protein : str
Protein associated with the peptide sequence (for db mode).
"""
Expand All @@ -42,4 +44,5 @@ class PepSpecMatch:
calc_mz: float
exp_mz: float
aa_scores: Iterable[float]
sequence_time: Optional[float] = None
protein: str = "null"
20 changes: 12 additions & 8 deletions casanovo/denovo/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import heapq
import itertools
import logging
import time
import warnings
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
Expand All @@ -18,7 +19,7 @@

from . import evaluate
from .. import config
from ..data import ms_io
from ..data import ms_io, psm

logger = logging.getLogger("casanovo")

Expand Down Expand Up @@ -822,7 +823,7 @@ def validation_step(

def predict_step(
self, batch: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], *args
) -> List[ms_io.PepSpecMatch]:
) -> List[psm.PepSpecMatch]:
"""
A single prediction step.

Expand All @@ -834,7 +835,7 @@ def predict_step(

Returns
-------
predictions: List[ms_io.PepSpecMatch]
predictions: List[psm.PepSpecMatch]
Predicted PSMs for the given batch of spectra.
"""
predictions = []
Expand All @@ -843,15 +844,17 @@ def predict_step(
precursor_mz,
spectrum_i,
spectrum_preds,
batch_time,
) in zip(
batch[1][:, 1].cpu().detach().numpy(),
batch[1][:, 2].cpu().detach().numpy(),
batch[2],
self.forward(batch[0], batch[1]),
itertools.repeat(time.perf_counter()),
):
for peptide_score, aa_scores, peptide in spectrum_preds:
predictions.append(
ms_io.PepSpecMatch(
psm.PepSpecMatch(
sequence=peptide,
spectrum_id=tuple(spectrum_i),
peptide_score=peptide_score,
Expand All @@ -861,6 +864,7 @@ def predict_step(
),
exp_mz=precursor_mz,
aa_scores=aa_scores,
sequence_time=batch_time,
)
)

Expand Down Expand Up @@ -901,7 +905,7 @@ def on_validation_epoch_end(self) -> None:
self._log_history()

def on_predict_batch_end(
self, outputs: List[ms_io.PepSpecMatch], *args
self, outputs: List[psm.PepSpecMatch], *args
) -> None:
"""
Write the predicted peptide sequences and amino acid scores to
Expand Down Expand Up @@ -1000,7 +1004,7 @@ def predict_step(
self,
batch: Tuple[torch.Tensor, torch.Tensor, np.ndarray, np.ndarray],
*args,
) -> List[ms_io.PepSpecMatch]:
) -> List[psm.PepSpecMatch]:
"""
A single prediction step.

Expand All @@ -1012,7 +1016,7 @@ def predict_step(

Returns
-------
predictions: List[ms_io.PepSpecMatch]
predictions: List[psm.PepSpecMatch]
Predicted PSMs for the given batch of spectra.
"""
predictions_all = collections.defaultdict(list)
Expand Down Expand Up @@ -1044,7 +1048,7 @@ def predict_step(
):
spectrum_i = tuple(spectrum_i)
predictions_all[spectrum_i].append(
ms_io.PepSpecMatch(
psm.PepSpecMatch(
sequence=peptide,
spectrum_id=spectrum_i,
peptide_score=peptide_score,
Expand Down
3 changes: 3 additions & 0 deletions casanovo/denovo/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import os
import tempfile
import time
import uuid
import warnings
from pathlib import Path
Expand Down Expand Up @@ -73,6 +74,7 @@ def __init__(
self.model = None
self.loaders = None
self.writer = None
self.sequence_start_time = None

if output_dir is None:
self.callbacks = []
Expand Down Expand Up @@ -285,6 +287,7 @@ def predict(
self.writer.set_ms_run(test_index.ms_files)
self.initialize_data_module(test_index=test_index)
self.loaders.setup(stage="test", annotated=False)
self.sequence_start_time = time.perf_counter()
self.trainer.predict(self.model, self.loaders.test_dataloader())

if evaluate:
Expand Down
69 changes: 58 additions & 11 deletions casanovo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,30 @@
}


def log_intro_report(start_time: Optional[float] = None) -> None:
"""
log start of run report

parameters
----------
start_time : Optional[float], default=None
The start time of the sequencing run in seconds since the epoch.
"""
if start_time is not None:
start_datetime = datetime.fromtimestamp(start_time)
logger.info(
"Run Start Time: %s",
start_datetime.strftime("%y/%m/%d %H:%M:%S"),
)

logger.info("Executing Command: %s", " ".join(sys.argv))
logger.info("Executing on Host Machine: %s", socket.gethostname())

if torch.cuda.is_available():
gpu_name = torch.cuda.get_device_name()
logger.info("Executing on GPU: %s", gpu_name)

Check warning on line 185 in casanovo/utils.py

View check run for this annotation

Codecov / codecov/patch

casanovo/utils.py#L184-L185

Added lines #L184 - L185 were not covered by tests


def log_run_report(
start_time: Optional[float] = None, end_time: Optional[float] = None
) -> None:
Expand All @@ -174,22 +198,16 @@
end_time : Optional[float], default=None
The end time of the sequencing run in seconds since the epoch.
"""
logger.info("======= End of Run Report =======")
if start_time is not None and end_time is not None:
start_datetime = datetime.fromtimestamp(start_time)
if end_time is not None:
end_datetime = datetime.fromtimestamp(end_time)
delta_datetime = end_datetime - start_datetime
logger.info(
"Run Start Time: %s",
start_datetime.strftime("%y/%m/%d %H:%M:%S"),
)
logger.info(
"Run End Time: %s", end_datetime.strftime("%y/%m/%d %H:%M:%S")
)
logger.info("Time Elapsed: %s", delta_datetime)

logger.info("Executed Command: %s", " ".join(sys.argv))
logger.info("Executed on Host Machine: %s", socket.gethostname())
if start_time is not None:
start_datetime = datetime.fromtimestamp(start_time)
delta_datetime = end_datetime - start_datetime
logger.info("Time Elapsed: %s", delta_datetime)

if torch.cuda.is_available():
gpu_util = torch.cuda.max_memory_allocated()
Expand All @@ -199,6 +217,7 @@
def log_annotate_report(
predictions: List[PepSpecMatch],
start_time: Optional[float] = None,
sequence_start_time: Optional[float] = None,
end_time: Optional[float] = None,
score_bins: Iterable[float] = SCORE_BINS,
) -> None:
Expand All @@ -211,6 +230,8 @@
PSM predictions.
start_time : Optional[float], default=None
The start time of the sequencing run in seconds since the epoch.
sequence_start_time : Optional[float], default=None
The inference start time in seconds since the epoch.
end_time : Optional[float], default=None
The end time of the sequencing run in seconds since the epoch.
score_bins: Iterable[float], Optional
Expand Down Expand Up @@ -254,6 +275,32 @@
"Median Peptide Length: %d", run_report["median_sequence_length"]
)

sequence_end_time = predictions[-1].sequence_time
if sequence_start_time is not None and sequence_end_time is not None:
sequence_start_time = datetime.fromtimestamp(sequence_start_time)
sequence_end_time = datetime.fromtimestamp(sequence_end_time)
sequence_time_elapsed = sequence_end_time - sequence_start_time

if start_time is not None:
start_time = datetime.fromtimestamp(start_time)
setup_time = sequence_start_time - start_time

logger.info("Setup Time Elapsed: %s", setup_time)

logger.info(
"Sequencing Start Time: %s",
sequence_start_time.strftime("%y/%m/%d %H:%M:%S"),
)
logger.info(
"Sequencing End Time: %s",
sequence_end_time.strftime("%y/%m/%d %H:%M:%S"),
)
logger.info("Sequencing Time Elapsed: %s", sequence_time_elapsed)
logger.info(
"Spectra Sequenced Per Second: %.4f spectra/s",
len(predictions) / sequence_time_elapsed.total_seconds(),
)


def check_dir_file_exists(
dir: pathlib.Path, file_patterns: Iterable[str] | str
Expand Down
Loading