Skip to content

Commit

Permalink
Make bcf::IndexedReader::fetch functionality inline with htslib regio…
Browse files Browse the repository at this point in the history
…ns (#309)

* add docs for bcf Record::format

* make bcf indexreader fetch inline with htslib functionality

* update changelog
  • Loading branch information
mbhall88 authored Jul 5, 2021
1 parent e60cea2 commit cd6a29f
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ This project adheres to [Semantic Versioning](http://semver.org/).
### Added
- `bcf::Record` methods `end`, `clear`, and `rlen` (@mbhall88)

### Changes
- `bcf::IndexReader::fetch` parameter `end` is now an `Option<u64>`. This is inline with
htslib regions, which do not require an end position (@mbhall88)

[Unreleased]: https://github.com/rust-bio/rust-htslib/compare/v0.36.0...HEAD

## [0.36.0] - 2020-11-23
Expand Down
62 changes: 52 additions & 10 deletions src/bcf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ pub struct IndexedReader {
header: Rc<HeaderView>,

/// The position of the previous fetch, if any.
current_region: Option<(u32, u64, u64)>,
current_region: Option<(u32, u64, Option<u64>)>,
}

unsafe impl Send for IndexedReader {}
Expand Down Expand Up @@ -318,10 +318,14 @@ impl IndexedReader {
///
/// * `rid` - numeric ID of the reference to jump to; use `HeaderView::name2rid` for resolving
/// contig name to ID.
/// * `start` - `0`-based start coordinate of region on reference.
/// * `end` - `0`-based end coordinate of region on reference.
pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
let contig = self.header.rid2name(rid).unwrap();
/// * `start` - `0`-based **inclusive** start coordinate of region on reference.
/// * `end` - Optional `0`-based **inclusive** end coordinate of region on reference. If `None`
/// is given, records are fetched from `start` until the end of the contig.
///
/// # Note
/// The entire contig can be fetched by setting `start` to `0` and `end` to `None`.
pub fn fetch(&mut self, rid: u32, start: u64, end: Option<u64>) -> Result<()> {
let contig = self.header.rid2name(rid)?;
let contig = ffi::CString::new(contig).unwrap();
if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
Err(Error::GenomicSeek {
Expand Down Expand Up @@ -367,10 +371,11 @@ impl Read for IndexedReader {

match self.current_region {
Some((rid, _start, end)) => {
if record.rid().is_some()
&& rid == record.rid().unwrap()
&& record.pos() as u64 <= end
{
let endpos = match end {
Some(e) => e,
None => u64::MAX,
};
if Some(rid) == record.rid() && record.pos() as u64 <= endpos {
Some(Ok(()))
} else {
None
Expand Down Expand Up @@ -925,10 +930,47 @@ mod tests {
.header()
.name2rid(b"1")
.expect("Translating from contig '1' to ID failed.");
bcf.fetch(rid, 10_033, 10_060).expect("Fetching failed");
bcf.fetch(rid, 10_033, Some(10_060))
.expect("Fetching failed");
assert_eq!(bcf.records().count(), 28);
}

#[test]
fn test_fetch_all() {
let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
bcf.set_threads(2).unwrap();
let rid = bcf
.header()
.name2rid(b"1")
.expect("Translating from contig '1' to ID failed.");
bcf.fetch(rid, 0, None).expect("Fetching failed");
assert_eq!(bcf.records().count(), 62);
}

#[test]
fn test_fetch_open_ended() {
let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
bcf.set_threads(2).unwrap();
let rid = bcf
.header()
.name2rid(b"1")
.expect("Translating from contig '1' to ID failed.");
bcf.fetch(rid, 10077, None).expect("Fetching failed");
assert_eq!(bcf.records().count(), 6);
}

#[test]
fn test_fetch_start_greater_than_last_vcf_pos() {
let mut bcf = IndexedReader::from_path(&"test/test.bcf").expect("Error opening file.");
bcf.set_threads(2).unwrap();
let rid = bcf
.header()
.name2rid(b"1")
.expect("Translating from contig '1' to ID failed.");
bcf.fetch(rid, 20077, None).expect("Fetching failed");
assert_eq!(bcf.records().count(), 0);
}

#[test]
fn test_write() {
let mut bcf = Reader::from_path(&"test/test_multi.bcf").expect("Error opening file.");
Expand Down
33 changes: 32 additions & 1 deletion src/bcf/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,37 @@ impl Record {
})
}

/// Retrieve data for a `FORMAT` field
///
/// # Example
/// *Note: some boilerplate for the example is hidden for clarity. See [module documentation](../index.html#example-writing)
/// for an example of the setup used here.*
///
/// ```rust
/// # use rust_htslib::bcf::{Format, Writer};
/// # use rust_htslib::bcf::header::Header;
/// #
/// # // Create minimal VCF header with a single sample
/// # let mut header = Header::new();
/// header.push_sample(b"sample1").push_sample(b"sample2").push_record(br#"##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"#);
/// #
/// # // Write uncompressed VCF to stdout with above header and get an empty record
/// # let mut vcf = Writer::from_stdout(&header, true, Format::VCF).unwrap();
/// # let mut record = vcf.empty_record();
/// record.push_format_integer(b"DP", &[20, 12]).expect("Failed to set DP format field");
///
/// let read_depths = record.format(b"DP").integer().expect("Couldn't retrieve DP field");
/// let sample1_depth = read_depths[0];
/// assert_eq!(sample1_depth, &[20]);
/// let sample2_depth = read_depths[1];
/// assert_eq!(sample2_depth, &[12])
/// ```
///
/// # Errors
/// **Attention:** the returned [`BufferBacked`] from [`integer()`](Format::integer)
/// (`read_depths`), which holds the data, has to be kept in scope as long as the data is
/// accessed. If parts of the data are accessed after the `BufferBacked` object is been
/// dropped, you will access unallocated memory.
pub fn format<'a>(&'a self, tag: &'a [u8]) -> Format<'a, Buffer> {
self.format_shared_buffer(tag, Buffer::new())
}
Expand Down Expand Up @@ -1222,7 +1253,7 @@ impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Format<'a, B> {
/// Get format data as integers.
///
/// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
/// as along as the data is accessed. If parts of the data are accessed while
/// as long as the data is accessed. If parts of the data are accessed while
/// the BufferBacked object is already dropped, you will access unallocated
/// memory.
pub fn integer(mut self) -> Result<BufferBacked<'b, Vec<&'b [i32]>, B>> {
Expand Down

0 comments on commit cd6a29f

Please sign in to comment.