Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop → master merge for 1.4.4 #738

Merged
merged 24 commits into from
Feb 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0bec5f2
CompatHelper: bump compat for SBML to 1, (keep existing compat)
Jan 25, 2023
4a30519
be more careful about SBML version
exaexa Jan 25, 2023
07f4e8b
Merge pull request #729 from LCSB-BioCore/compathelper/new_version/20…
exaexa Jan 25, 2023
6712526
remove a config from local CI
exaexa Jan 26, 2023
d54d60e
Merge pull request #730 from LCSB-BioCore/mk-slash-ci-worker
laurentheirendt Jan 26, 2023
f3f6dc4
add an actual GPA/GRR to DNF parser
exaexa Jan 27, 2023
eed3038
fix the tests to match the new state
exaexa Jan 27, 2023
4c03061
fix ordering issue
stelmo Jan 27, 2023
cd589ce
fix ordering issue in GRRs manually
stelmo Jan 27, 2023
4581ec8
change test values (order of input changed, hence this fix)
stelmo Jan 27, 2023
7224f46
automatic formatting
exaexa Jan 27, 2023
795d50e
Merge pull request #732 from LCSB-BioCore/mo-mk.actual-parser
exaexa Jan 27, 2023
478c03a
remove some duct tape
exaexa Jan 27, 2023
ac27ca5
Merge pull request #731 from LCSB-BioCore/mk-actual-grr-parser
exaexa Jan 27, 2023
74830b2
make the GPA parsing completely independent of hashtable order
exaexa Jan 28, 2023
6bb6a86
Merge pull request #733 from LCSB-BioCore/mk-gpa-ordering-fix
exaexa Jan 28, 2023
b2a81cc
small fix 1
exaexa Jan 28, 2023
c47427c
fix gra order in tests
exaexa Jan 28, 2023
dcc348d
never going to use Set for compaction again
exaexa Jan 28, 2023
ab0474d
Merge pull request #734 from LCSB-BioCore/mk-gra-fix-compat
exaexa Jan 28, 2023
aeaf923
version bump for 1.4.4
exaexa Feb 10, 2023
9e014c7
Merge pull request #737 from LCSB-BioCore/mk-144
exaexa Feb 10, 2023
a13f98d
clean up SBML unit-error throwing
exaexa Feb 12, 2023
28ddfbd
Merge pull request #744 from LCSB-BioCore/mk-clean-up-sbml-unit-error
exaexa Feb 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,6 @@ variables:
- Invoke-Expression $Env:ARTENOLIS_SOFT_PATH"\julia\"$Env:JULIA_VER"\bin\julia --inline=yes --check-bounds=yes --color=yes --project=@. -e 'import Pkg; Pkg.test(; coverage = true)'"
- exit $LASTEXITCODE

.global_env_win8: &global_env_win8
tags:
- windows8
<<: *global_env_win

.global_env_win10: &global_env_win10
tags:
- windows10
Expand Down Expand Up @@ -153,12 +148,6 @@ linux:julia1.6:
# Additional platform&environment compatibility tests
#

windows8:julia1.8:
stage: test-compat
<<: *global_trigger_compat_tests
<<: *global_julia18
<<: *global_env_win8

windows10:julia1.8:
stage: test-compat
<<: *global_trigger_compat_tests
Expand All @@ -171,12 +160,6 @@ mac:julia1.8:
<<: *global_julia18
<<: *global_env_mac

windows8:julia1.6:
stage: test-compat
<<: *global_trigger_compat_tests
<<: *global_julia16
<<: *global_env_win8

windows10:julia1.6:
stage: test-compat
<<: *global_trigger_compat_tests
Expand Down
6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "COBREXA"
uuid = "babc4406-5200-4a30-9033-bf5ae714c842"
authors = ["The developers of COBREXA.jl"]
version = "1.4.3"
version = "1.4.4"

[deps]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Expand All @@ -14,6 +14,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
PikaParser = "3bbf5609-3e7b-44cd-8549-7c69f321e792"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SBML = "e5567a89-2604-4b09-9718-f5f78e97c3bb"
Expand All @@ -32,7 +33,8 @@ JuMP = "1"
MAT = "0.10"
MacroTools = "0.5.6"
OrderedCollections = "1.4"
SBML = "~1.3"
PikaParser = "0.5"
SBML = "~1.3, ~1.4"
StableRNGs = "1.0"
Tulip = "0.7.0, 0.8.0, 0.9.2"
julia = "1.5"
Expand Down
5 changes: 3 additions & 2 deletions src/COBREXA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,22 @@ module COBREXA

using Distributed
using DistributedData
using DocStringExtensions
using HDF5
using JSON
using JuMP
using LinearAlgebra
using MAT
using MacroTools
using MAT
using OrderedCollections
using Random
using Serialization
using SparseArrays
using StableRNGs
using Statistics
using DocStringExtensions

import Base: findfirst, getindex, show
import PikaParser as PP
import Pkg
import SBML # conflict with Reaction struct name

Expand Down
2 changes: 1 addition & 1 deletion src/base/types/SBMLModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ function bounds(model::SBMLModel)::Tuple{Vector{Float64},Vector{Float64}}
if unit != common_unit
throw(
DomainError(
units_in_sbml,
unit,
"The SBML file uses multiple units; loading would need conversion",
),
)
Expand Down
2 changes: 1 addition & 1 deletion src/base/types/StandardModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ Return the gene reaction rule in string format for reaction with `id` in `model`
Return `nothing` if not available.
"""
reaction_gene_association(model::StandardModel, id::String)::Maybe{GeneAssociation} =
_maybemap(identity, model.reactions[id].grr)
model.reactions[id].grr

"""
$(TYPEDSIGNATURES)
Expand Down
225 changes: 150 additions & 75 deletions src/base/utils/gene_associations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,56 @@
"""
$(TYPEDSIGNATURES)

Parse `SBML.GeneProductAssociation` structure to the simpler GeneAssociation.
The input must be (implicitly) in a positive DNF.
A helper for producing predictable unique sequences. Might be faster if
compacting would be done directly in sort().
"""
function _sortunique(x)
o = collect(x)
sort!(o)
put = prevind(o, firstindex(o))
for i in eachindex(o)
if put >= firstindex(o) && o[i] == o[put]
# we already have this one
continue
else
put = nextind(o, put)
if put != i
o[put] = o[i]
end
end
end
o[begin:put]
end

"""
$(TYPEDSIGNATURES)

Parse `SBML.GeneProductAssociation` structure and convert it to a strictly
positive DNF [`GeneAssociation`](@ref). Negation (`SBML.GPANot`) is not
supported.
"""
function _parse_grr(gpa::SBML.GeneProductAssociation)::GeneAssociation
parse_ref(x) =
typeof(x) == SBML.GPARef ? [x.gene_product] :
begin
@_models_log @warn "Could not parse a part of gene association, ignoring: $x"
String[]

function fold_and(dnfs::Vector{Vector{Vector{String}}})::Vector{Vector{String}}
if isempty(dnfs)
[String[]]
else
_sortunique(
_sortunique(String[l; r]) for l in dnfs[1] for r in fold_and(dnfs[2:end])
)
end
parse_and(x) =
typeof(x) == SBML.GPAAnd ? vcat([parse_and(i) for i in x.terms]...) : parse_ref(x)
parse_or(x) =
typeof(x) == SBML.GPAOr ? vcat([parse_or(i) for i in x.terms]...) : [parse_and(x)]
return parse_or(gpa)
end

dnf(x::SBML.GPARef) = [[x.gene_product]]
dnf(x::SBML.GPAOr) = _sortunique(vcat(dnf.(x.terms)...))
dnf(x::SBML.GPAAnd) = fold_and(dnf.(x.terms))
dnf(x) = throw(
DomainError(
x,
"unsupported gene product association contents of type $(typeof(x))",
),
)
return dnf(gpa)
end

"""
Expand Down Expand Up @@ -49,69 +84,109 @@ julia> _parse_grr("(YIL010W and YLR043C) or (YIL010W and YGR209C)")
_parse_grr(s::String)::Maybe{GeneAssociation} = _maybemap(_parse_grr, _parse_grr_to_sbml(s))

"""
$(TYPEDSIGNATURES)

Internal helper for parsing the string GRRs into SBML data structures. More
general than [`_parse_grr`](@ref).
PikaParser grammar for stringy GRR expressions.
"""
function _parse_grr_to_sbml(str::String)::Maybe{SBML.GeneProductAssociation}
s = str
toks = String[]
m = Nothing
while !isnothing(
begin
m = match(r"( +|[a-zA-Z0-9_-]+|[^ a-zA-Z0-9_()-]+|[(]|[)])(.*)", s)
end,
)
tok = strip(m.captures[1])
!isempty(tok) && push!(toks, tok)
s = m.captures[2]
const _grr_grammar = begin
# characters that typically form the identifiers
isident(x::Char) =
isletter(x) ||
isdigit(x) ||
x == '_' ||
x == '-' ||
x == ':' ||
x == '.' ||
x == '\'' ||
x == '[' ||
x == ']' ||
x == '\x03' # a very ugly exception for badly parsed MAT files

# scanner helpers
eat(p) = m -> begin
last = 0
for i in eachindex(m)
p(m[i]) || break
last = i
end
last
end

fail() = throw(DomainError(str, "Could not parse GRR"))

# shunting yard
ops = Symbol[]
vals = SBML.GeneProductAssociation[]
fold(sym, op) =
while !isempty(ops) && last(ops) == sym
r = pop!(vals)
l = pop!(vals)
pop!(ops)
push!(vals, op([l, r]))
end
for tok in toks
if tok in ["and", "AND", "&", "&&"]
push!(ops, :and)
elseif tok in ["or", "OR", "|", "||"]
fold(:and, SBML.GPAAnd)
push!(ops, :or)
elseif tok == "("
push!(ops, :paren)
elseif tok == ")"
fold(:and, SBML.GPAAnd)
fold(:or, SBML.GPAOr)
if isempty(ops) || last(ops) != :paren
fail()
else
pop!(ops)
end
else
push!(vals, SBML.GPARef(tok))
end
# eat one of keywords
kws(w...) = m -> begin
last = eat(isident)(m)
m[begin:last] in w ? last : 0
end

fold(:and, SBML.GPAAnd)
fold(:or, SBML.GPAOr)
PP.make_grammar(
[:expr],
PP.flatten(
Dict(
:space => PP.first(PP.scan(eat(isspace)), PP.epsilon),
:id => PP.scan(eat(isident)),
:orop =>
PP.first(PP.tokens("||"), PP.token('|'), PP.scan(kws("OR", "or"))),
:andop => PP.first(
PP.tokens("&&"),
PP.token('&'),
PP.scan(kws("AND", "and")),
),
:expr => PP.seq(:space, :orexpr, :space, PP.end_of_input),
:orexpr => PP.first(
:or => PP.seq(:andexpr, :space, :orop, :space, :orexpr),
:andexpr,
),
:andexpr => PP.first(
:and => PP.seq(:baseexpr, :space, :andop, :space, :andexpr),
:baseexpr,
),
:baseexpr => PP.first(
:id,
:parenexpr => PP.seq(
PP.token('('),
:space,
:orexpr,
:space,
PP.token(')'),
),
),
),
Char,
),
)
end

if !isempty(ops) || length(vals) > 1
fail()
end
_grr_grammar_open(m, _) =
m.rule == :expr ? Bool[0, 1, 0, 0] :
m.rule == :parenexpr ? Bool[0, 0, 1, 0, 0] :
m.rule in [:or, :and] ? Bool[1, 0, 0, 0, 1] :
m.rule in [:andexpr, :orexpr, :notexpr, :baseexpr] ? Bool[1] :
(false for _ in m.submatches)

_grr_grammar_fold(m, _, subvals) =
m.rule == :id ? SBML.GPARef(m.view) :
m.rule == :and ? SBML.GPAAnd([subvals[1], subvals[5]]) :
m.rule == :or ? SBML.GPAOr([subvals[1], subvals[5]]) :
m.rule == :parenexpr ? subvals[3] :
m.rule == :expr ? subvals[2] : isempty(subvals) ? nothing : subvals[1]

if isempty(vals)
nothing
"""
$(TYPEDSIGNATURES)

Internal helper for parsing the string GRRs into SBML data structures. More
general than [`_parse_grr`](@ref).
"""
function _parse_grr_to_sbml(str::String)::Maybe{SBML.GeneProductAssociation}
all(isspace, str) && return nothing
tree = PP.parse_lex(_grr_grammar, str)
match = PP.find_match_at!(tree, :expr, 1)
if match > 0
return PP.traverse_match(
tree,
match,
open = _grr_grammar_open,
fold = _grr_grammar_fold,
)
else
first(vals)
throw(DomainError(str, "cannot parse GRR"))
end
end

Expand All @@ -124,14 +199,14 @@ string.
# Example
```
julia> _unparse_grr(String, [["YIL010W", "YLR043C"], ["YIL010W", "YGR209C"]])
"(YIL010W and YLR043C) or (YIL010W and YGR209C)"
"(YIL010W && YLR043C) || (YIL010W && YGR209C)"
```
"""
function _unparse_grr(::Type{String}, grr::GeneAssociation)::String
grr_strings = String[]
for gr in grr
push!(grr_strings, "(" * join([g for g in gr], " and ") * ")")
end
grr_string = join(grr_strings, " or ")
return grr_string
function _unparse_grr(
::Type{String},
grr::GeneAssociation;
and = " && ",
or = " || ",
)::String
return join(("(" * join(gr, and) * ")" for gr in grr), or)
end
4 changes: 2 additions & 2 deletions test/analysis/gecko.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
haskey(ecoli_core_reaction_kcats, rid) ?
collect(
Isozyme(
Dict(grr .=> ecoli_core_protein_stoichiometry[rid][i]),
Dict(grr .=> fill(1.0, size(grr))),
ecoli_core_reaction_kcats[rid][i]...,
) for (i, grr) in enumerate(reaction_gene_association(model, rid))
) : Isozyme[]
Expand Down Expand Up @@ -41,7 +41,7 @@

@test isapprox(
rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"],
0.812827846796761,
0.8129179015245396,
atol = TEST_TOLERANCE,
)

Expand Down
Loading