Skip to content

Commit

Permalink
GFA walks with no interval; fixes vgteam/vg#4517
Browse files Browse the repository at this point in the history
  • Loading branch information
jltsiren committed Feb 4, 2025
1 parent ebb77a1 commit a0da8ea
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 45 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2019, 2020, 2021, 2022, 2023, 2024 Jouni Siren and other authors
Copyright (c) 2019, 2020, 2021, 2022, 2023, 2024, 2025 Jouni Siren and other authors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
13 changes: 10 additions & 3 deletions src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,10 +869,17 @@ MetadataBuilder::add_walk(const std::string& sample, const std::string& haplotyp

// Start position as fragment identifier.
size_t phase_block = PathMetadata::NO_PHASE_BLOCK;
try { phase_block = std::stoul(start); }
catch(const std::invalid_argument&)
if (start == "*")
{
throw std::runtime_error("MetadataBuilder: Invalid start position " + start);
phase_block = 0;
}
else
{
try { phase_block = std::stoul(start); }
catch(const std::invalid_argument&)
{
throw std::runtime_error("MetadataBuilder: Invalid start position " + start);
}
}

// Add as a haplotype
Expand Down
19 changes: 19 additions & 0 deletions tests/test_gfa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,25 @@ TEST_F(GBWTMetadata, Walks)
this->check_metadata(index.metadata, expected_metadata);
}

TEST_F(GBWTMetadata, WalksNoInterval)
{
auto gfa_parse = gfa_to_gbwt("gfas/components_walks_no_interval.gfa");
const gbwt::GBWT& index = *(gfa_parse.first);

gbwt::Metadata expected_metadata;
std::vector<std::string> samples = { "sample" };
expected_metadata.setSamples(samples);
expected_metadata.setHaplotypes(2);
expected_metadata.setContigs(this->names);
expected_metadata.addPath(0, 0, 1, 0);
expected_metadata.addPath(0, 0, 2, 0);
expected_metadata.addPath(0, 1, 1, 0);
expected_metadata.addPath(0, 1, 2, 0);

ASSERT_TRUE(index.hasMetadata()) << "No GBWT metadata was created";
this->check_metadata(index.metadata, expected_metadata);
}

TEST_F(GBWTMetadata, WalksAndPaths)
{
auto gfa_parse = gfa_to_gbwt("gfas/example_walks.gfa");
Expand Down
102 changes: 61 additions & 41 deletions tests/test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,6 @@ TEST_F(SourceTest, TranslateSegments)

//------------------------------------------------------------------------------

struct StandAlonePathName
{
std::string sample;
std::string contig;
size_t haplotype;
size_t fragment;
};

class MetadataBuilderTest : public ::testing::Test
{
public:
Expand All @@ -192,7 +184,7 @@ class MetadataBuilderTest : public ::testing::Test
void create_example(
std::vector<std::string>& samples,
std::vector<std::string>& contigs,
std::vector<StandAlonePathName>& paths,
std::vector<gbwt::FullPathName>& paths,
bool generic_reference) const
{
std::string reference_sample = (generic_reference ? REFERENCE_PATH_SAMPLE_NAME : "GRCh38");
Expand All @@ -218,7 +210,7 @@ class MetadataBuilderTest : public ::testing::Test

void add_hg004(
std::vector<std::string>& samples,
std::vector<StandAlonePathName>& paths) const
std::vector<gbwt::FullPathName>& paths) const
{
samples.push_back("HG004");
paths.push_back({ "HG004", "chr1", 1, 0 });
Expand All @@ -227,59 +219,76 @@ class MetadataBuilderTest : public ::testing::Test
paths.push_back({ "HG004", "chr2", 2, 0 });
}

size_t get_job(const StandAlonePathName& path) const
size_t get_job(const gbwt::FullPathName& path) const
{
if(path.contig == "chr1") { return 0; }
if(path.contig == "chr2") { return 1; }
if(path.contig_name == "chr1") { return 0; }
if(path.contig_name == "chr2") { return 1; }
return 0;
}

void add_haplotypes(MetadataBuilder& builder, const std::vector<StandAlonePathName>& paths, size_t from, bool assign_job)
void add_haplotypes(MetadataBuilder& builder, const std::vector<gbwt::FullPathName>& paths, size_t from, bool assign_job)
{
for(size_t i = from; i < paths.size(); i++)
{
const StandAlonePathName& path = paths[i];
const gbwt::FullPathName& path = paths[i];
size_t job = (assign_job ? get_job(path) : 0);
if(path.sample == REFERENCE_PATH_SAMPLE_NAME)
if(path.sample_name == REFERENCE_PATH_SAMPLE_NAME)
{
builder.add_generic_path(path.contig_name, job);
}
else
{
builder.add_haplotype(path.sample_name, path.contig_name, path.haplotype, path.offset, job);
}
}
}

void add_walks(MetadataBuilder& builder, const std::vector<gbwt::FullPathName>& paths)
{
for(const gbwt::FullPathName& path : paths)
{
if(path.sample_name == REFERENCE_PATH_SAMPLE_NAME)
{
builder.add_generic_path(path.contig, job);
builder.add_generic_path(path.contig_name);
}
else
{
builder.add_haplotype(path.sample, path.contig, path.haplotype, path.fragment, job);
std::string haplotype = std::to_string(path.haplotype);
std::string start = std::to_string(path.offset);
builder.add_walk(path.sample_name, haplotype, path.contig_name, start);
}
}
}

void add_walks(MetadataBuilder& builder, const std::vector<StandAlonePathName>& paths)
void add_walks_no_interval(MetadataBuilder& builder, const std::vector<gbwt::FullPathName>& paths)
{
for(const StandAlonePathName& path : paths)
std::string no_interval = "*";
for(const gbwt::FullPathName& path : paths)
{
if(path.sample == REFERENCE_PATH_SAMPLE_NAME)
if(path.sample_name == REFERENCE_PATH_SAMPLE_NAME)
{
builder.add_generic_path(path.contig);
builder.add_generic_path(path.contig_name);
}
else
{
std::string haplotype = std::to_string(path.haplotype);
std::string start = std::to_string(path.fragment);
builder.add_walk(path.sample, haplotype, path.contig, start);
builder.add_walk(path.sample_name, haplotype, path.contig_name, no_interval);
}
}
}

void add_named_paths(MetadataBuilder& builder, const std::vector<StandAlonePathName>& paths)
void add_named_paths(MetadataBuilder& builder, const std::vector<gbwt::FullPathName>& paths)
{
for(const StandAlonePathName& path : paths)
for(const gbwt::FullPathName& path : paths)
{
std::string name;
if(path.sample == REFERENCE_PATH_SAMPLE_NAME)
if(path.sample_name == REFERENCE_PATH_SAMPLE_NAME)
{
name = path.contig;
name = path.contig_name;
}
else
{
name = path.sample + "#" + std::to_string(path.haplotype) + "#" + path.contig;
name = path.sample_name + "#" + std::to_string(path.haplotype) + "#" + path.contig_name;
}
builder.add_path(name);
}
Expand All @@ -289,7 +298,7 @@ class MetadataBuilderTest : public ::testing::Test
const gbwt::Metadata& metadata,
const std::vector<std::string>& samples,
const std::vector<std::string>& contigs,
const std::vector<StandAlonePathName>& paths) const
const std::vector<gbwt::FullPathName>& paths) const
{
ASSERT_EQ(metadata.samples(), samples.size()) << "Invalid number of samples";
for(size_t i = 0; i < samples.size(); i++)
Expand All @@ -307,10 +316,10 @@ class MetadataBuilderTest : public ::testing::Test
for(size_t i = 0; i < paths.size(); i++)
{
gbwt::PathName path = metadata.path(i);
EXPECT_EQ(metadata.sample(path.sample), paths[i].sample) << "Invalid sample name for path " << i;
EXPECT_EQ(metadata.contig(path.contig), paths[i].contig) << "Invalid contig name for path " << i;
EXPECT_EQ(metadata.sample(path.sample), paths[i].sample_name) << "Invalid sample name for path " << i;
EXPECT_EQ(metadata.contig(path.contig), paths[i].contig_name) << "Invalid contig name for path " << i;
EXPECT_EQ(path.phase, paths[i].haplotype) << "Invalid haplotype for path " << i;
EXPECT_EQ(path.count, paths[i].fragment) << "Invalid fragment for path " << i;
EXPECT_EQ(path.count, paths[i].offset) << "Invalid offset for path " << i;
}
}
};
Expand All @@ -323,7 +332,7 @@ TEST_F(MetadataBuilderTest, Empty)
TEST_F(MetadataBuilderTest, GenericPathsAndHaplotypes)
{
std::vector<std::string> samples, contigs;
std::vector<StandAlonePathName> paths;
std::vector<gbwt::FullPathName> paths;
this->create_example(samples, contigs, paths, true);

MetadataBuilder builder;
Expand All @@ -334,18 +343,29 @@ TEST_F(MetadataBuilderTest, GenericPathsAndHaplotypes)
TEST_F(MetadataBuilderTest, GFAPathsAndWalks)
{
std::vector<std::string> samples, contigs;
std::vector<StandAlonePathName> paths;
std::vector<gbwt::FullPathName> paths;
this->create_example(samples, contigs, paths, true);

MetadataBuilder builder;
this->add_walks(builder, paths);
this->check_metadata(builder.get_metadata(), samples, contigs, paths);
}

TEST_F(MetadataBuilderTest, GFAWalksNoInterval)
{
std::vector<std::string> samples, contigs;
std::vector<gbwt::FullPathName> paths;
this->create_example(samples, contigs, paths, true);

MetadataBuilder builder;
this->add_walks_no_interval(builder, paths);
this->check_metadata(builder.get_metadata(), samples, contigs, paths);
}

TEST_F(MetadataBuilderTest, PanSN)
{
std::vector<std::string> samples, contigs;
std::vector<StandAlonePathName> paths;
std::vector<gbwt::FullPathName> paths;
this->create_example(samples, contigs, paths, false);

MetadataBuilder builder(
Expand All @@ -360,7 +380,7 @@ TEST_F(MetadataBuilderTest, PanSN)
TEST_F(MetadataBuilderTest, Clear)
{
std::vector<std::string> samples, contigs;
std::vector<StandAlonePathName> paths;
std::vector<gbwt::FullPathName> paths;
this->create_example(samples, contigs, paths, true);

MetadataBuilder builder;
Expand All @@ -372,7 +392,7 @@ TEST_F(MetadataBuilderTest, Clear)
TEST_F(MetadataBuilderTest, MultipleFormats)
{
std::vector<std::string> samples, contigs;
std::vector<StandAlonePathName> paths;
std::vector<gbwt::FullPathName> paths;
this->create_example(samples, contigs, paths, true);

MetadataBuilder builder;
Expand All @@ -390,7 +410,7 @@ TEST_F(MetadataBuilderTest, MultipleFormats)
TEST_F(MetadataBuilderTest, FromMetadata)
{
std::vector<std::string> samples, contigs;
std::vector<StandAlonePathName> paths;
std::vector<gbwt::FullPathName> paths;
this->create_example(samples, contigs, paths, true);
size_t old_paths = paths.size();

Expand All @@ -407,13 +427,13 @@ TEST_F(MetadataBuilderTest, FromMetadata)
TEST_F(MetadataBuilderTest, MultipleJobs)
{
std::vector<std::string> samples, contigs;
std::vector<StandAlonePathName> paths;
std::vector<gbwt::FullPathName> paths;
this->create_example(samples, contigs, paths, true);

MetadataBuilder builder;
this->add_haplotypes(builder, paths, 0, true);

std::vector<StandAlonePathName> reordered_paths;
std::vector<gbwt::FullPathName> reordered_paths;
for(size_t job = 0; job < contigs.size(); job++)
{
for(size_t i = 0; i < paths.size(); i++)
Expand Down

0 comments on commit a0da8ea

Please sign in to comment.