Skip to content

Commit

Permalink
Initial version of working biomodels loader
Browse files Browse the repository at this point in the history
  • Loading branch information
pgleeson committed Jan 27, 2025
1 parent 542e9cd commit 2b8d663
Show file tree
Hide file tree
Showing 7 changed files with 756 additions and 41 deletions.
25 changes: 20 additions & 5 deletions libraries/client/biomodels_info.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,39 @@
"""
Script to get Biomodels project info
Script to get BioModels project info
"""

import requests
import json
import pprint
from loadbiomodels import get_model_identifiers, get_model_info

verbose = True #
verbose = False

info_all = {}

API_URL: str = "https://www.ebi.ac.uk/biomodels"
out_format = "json"


def get_model_identifiers():
response = requests.get(API_URL + "/model/identifiers?format=" + out_format)
response.raise_for_status()
output = response.json()
return output


def get_model_info(model_id):
response = requests.get(API_URL + "/" + model_id + "?format=" + out_format)
response.raise_for_status()
output = response.json()
return output


if __name__ == "__main__":
min_index = 0
max_index = 10000
max_index = 10
index = 1

from loadbiomodels import get_model_identifiers

model_ids = get_model_identifiers()["models"]

selection = model_ids[min_index:max_index]
Expand Down
4 changes: 2 additions & 2 deletions libraries/client/cached_info/osb_gh.json
Original file line number Diff line number Diff line change
Expand Up @@ -213011,9 +213011,9 @@
},
"private": false,
"pulls_url": "https://api.github.com/repos/OpenSourceBrain/OSBv2/pulls{/number}",
"pushed_at": "2025-01-27T14:55:01Z",
"pushed_at": "2025-01-27T18:21:56Z",
"releases_url": "https://api.github.com/repos/OpenSourceBrain/OSBv2/releases{/id}",
"size": 56350,
"size": 46654,
"ssh_url": "[email protected]:OpenSourceBrain/OSBv2.git",
"stargazers_count": 12,
"stargazers_url": "https://api.github.com/repos/OpenSourceBrain/OSBv2/stargazers",
Expand Down
482 changes: 482 additions & 0 deletions libraries/client/cached_info/repos_v2dev.json

Large diffs are not rendered by default.

263 changes: 233 additions & 30 deletions libraries/client/loadbiomodels.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
import requests

import pprint
import workspaces_cli
from pprint import pprint
from workspaces_cli.api import rest_api, k8s_api
import logging
import json
import sys

API_URL: str = "https://www.ebi.ac.uk/biomodels"
out_format = "json"
from utils import get_tags_info
from utils import known_users, lookup_user

from workspaces_cli.models import (
OSBRepository,
RepositoryContentType,
)

# Take from the accessToken cookie after login
TOKEN = "EDITME"
Expand All @@ -25,38 +32,234 @@
else:
dry_run = False # dry_run = True

BIOMODELS_URL: str = "https://www.ebi.ac.uk/biomodels"

index = 0
min_index = 0
max_index = 10

verbose = True #
verbose = False

configuration = workspaces_cli.Configuration(
host="https://workspaces.%s.opensourcebrain.org/api" % v2_or_v2dev,
access_token=TOKEN,
)

owner_user_id = known_users["OSBAdmin_v2"]
if v2_or_v2dev == "v2dev":
owner_user_id = known_users["OSBAdmin_v2dev"]

# Enter a context with an instance of the API client
with workspaces_cli.ApiClient(configuration) as api_client:
# Create an instance of the API class
api_instance = k8s_api.K8sApi(api_client)

try:
# Test if application is healthy
api_response = api_instance.live()
pprint(api_response)
except workspaces_cli.ApiException as e:
print("Exception when calling K8sApi->live: %s\n" % e)

filename = "cached_info/biomodels.json"
biomodels_info = json.load(open(filename))
biomodels_info.pop("0", None)

print("Loaded info on %s biomodels models" % len(biomodels_info))

all_updated = []
all_added = []
multi_matches = []
all_errors = []


with workspaces_cli.ApiClient(configuration) as api_client:
api_instance = rest_api.RestApi(api_client)

def add_biomodels_model(biomodels_model, index):
biomodels_model_id = biomodels_model["publicationId"]
name = biomodels_model["name"]

print(
"\n================ %i: %s, %s ================\n"
% (index, biomodels_model_id, name)
)

if biomodels_model["curationStatus"] != "CURATED":
print(
" Not adding, as curationStatus = %s"
% biomodels_model["curationStatus"]
)
return False

biomodels_uri = f"{BIOMODELS_URL}/{biomodels_model_id}"
search = f"uri__like={biomodels_uri}"

found = api_instance.osbrepository_get(q=search)

def get_model_identifiers():
response = requests.get(API_URL + "/model/identifiers?format=" + out_format)
response.raise_for_status()
output = response.json()
return output
if found.osbrepositories:
matching_repos = []
matches = []

for r in found.osbrepositories:
if r.uri == biomodels_uri:
matching_repos.append(
"M: URL to OSBv2 repo: https://%s.opensourcebrain.org/repositories/%i (%s) - %s\n"
% (v2_or_v2dev, r.id, r.uri, biomodels_uri)
)
matches.append(r)

def get_model_info(model_id):
response = requests.get(API_URL + "/" + model_id + "?format=" + out_format)
response.raise_for_status()
output = response.json()
return output
if len(matching_repos) > 1:
print("Matching: %s" % matching_repos)
err_info = " More than one match for [%s] (search: %s):\n" % (
biomodels_uri,
search,
)
for r in found.osbrepositories:
err_info += (
" - URL to OSBv2 repo: https://%s.opensourcebrain.org/repositories/%i (%s)\n"
% (v2_or_v2dev, r.id, r.uri)
)
err_info += " - Owner %s\n" % (lookup_user(r.user_id, ""))

print(err_info)
exit()
if verbose:
print(
"\n ------------ Current OSB %s repo info: ---------"
% v2_or_v2dev
)
print(" %s" % found)
print(" ------------ OSB API info: ---------")
print(" %s" % biomodels_model)

def main():
model_ids = get_model_identifiers()["models"]
max_count = 10
count = 0
multi_matches.append(err_info)
return False

for model_id in model_ids:
if count < max_count:
if "BIOMD" in model_id:
model_link = f"[{model_id}](https://www.ebi.ac.uk/biomodels/{model_id})"
info = get_model_info(model_id)
# model_name = info["name"]
print("\n===============================================")
existing_repo = matches[0]
url_info = (
" URL to OSBv2 repo: https://%s.opensourcebrain.org/repositories/%i"
% (v2_or_v2dev, existing_repo.id)
)
try:
print(
f" {model_id} ({model_link}): \n{pprint.pformat(info['name'])}--"
" %s already exists (owner: %s); updating..."
% (biomodels_model_id, lookup_user(existing_repo.user_id, url_info))
)
except Exception:
exit(-1)
print(url_info)
all_updated.append(url_info)

if verbose:
print(
"\n ------------ Current OSB %s repo info: ---------"
% v2_or_v2dev
)
print(" %s" % found)
print(" ------------ OSB API info: ---------")
print(" %s" % biomodels_model)

tags = get_tags_info(biomodels_info=biomodels_model)

if not dry_run:
desc = biomodels_model["description"]
print(" Description: %s..." % desc[:150])
latest_version = biomodels_model["history"]["revisions"][-1]
print(" Latest version: %s" % latest_version)

return api_instance.osbrepository_id_put(
existing_repo.id,
OSBRepository(
uri=biomodels_uri,
name=name,
summary=desc,
tags=tags,
default_context=existing_repo.default_context,
content_types_list=[RepositoryContentType(value="modeling")],
content_types="modeling",
user_id=owner_user_id,
repository_type="biomodels",
auto_sync=True,
),
)
else:
print(f"Search failed: {search}")
print(
" **********************************************************************"
)
print(" **** Adding %s: %s" % (biomodels_model_id, name))
print(
" **********************************************************************"
)

tags = get_tags_info(biomodels_info=biomodels_model)

all_added.append(" %s, index %i" % (name, index))
desc = biomodels_model["description"]
print(" Description: %s..." % desc[:150])
latest_version = biomodels_model["history"]["revisions"][-1]
print(" Latest version: %s" % latest_version)

if not dry_run:
return api_instance.osbrepository_post(
OSBRepository(
uri=biomodels_uri,
name=name,
summary=desc,
tags=tags,
default_context=str(latest_version["version"]),
content_types_list=[RepositoryContentType(value="modeling")],
content_types="modeling",
user_id=owner_user_id,
repository_type="biomodels",
auto_sync=True,
)
)
count += 1

url_info = (
" URL to OSBv2 repo: https://%s.opensourcebrain.org/repositories/%s"
% (v2_or_v2dev, "???")
) # found.osbrepositories[0].id)
print(url_info)

for biomodels_model_id in biomodels_info:
biomodels_model = biomodels_info[biomodels_model_id]
if index >= min_index and index < max_index:
try:
added = add_biomodels_model(biomodels_model, index)
except Exception as e:
print("----------")
logging.exception("Error adding %s" % biomodels_model)
print("----------")
print("Error: %s" % str(e))
print("----------")
if "context_resources" not in str(e):
print("Exiting due to unknown error...")
exit()
else:
print("Known error...")

index += 1

# print(added)

print(
"\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
+ "\n\nDone! All updated (%i total; dry_run: %s):" % (len(all_updated), dry_run)
)
for m in all_updated:
print(m)

print("\nAll added (%i total):" % len(all_added))
for m in all_added:
print(m)

print("\nMultiple matches found (%i total):" % len(multi_matches))
for m in multi_matches:
print(m)

if __name__ == "__main__":
main()
print("\nErrors found (%i total):" % len(all_errors))
for de in all_errors:
print(de)
2 changes: 1 addition & 1 deletion libraries/client/loadmodeldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

index = 0
min_index = 0
max_index = 920
max_index = 10000

verbose = True #
verbose = False
Expand Down
4 changes: 2 additions & 2 deletions libraries/client/modeldb_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
print("\nStill to be forked (%i total):" % len(to_be_forked))
for m in to_be_forked:
print(m)
info[0]["to_be_forked"].append(m.trim())
info[0]["to_be_forked"].append(m.strip())

print("\nMany forks (%i total):" % len(many_forks))
for m in many_forks:
Expand All @@ -167,7 +167,7 @@
print("\nErrors (%i total):" % len(errors))
for m in errors:
print(m)
info[0]["errors"].append(m.trim())
info[0]["errors"].append(m.strip())

strj = json.dumps(info, indent=" ", sort_keys=True)
with open(filename, "w") as fp:
Expand Down
17 changes: 16 additions & 1 deletion libraries/client/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,25 @@ def lookup_user(uid, url):


def get_tags_info(
dandi_api_info=None, dandishowcase_info=None, osbv1_info=None, modeldb_info=None
dandi_api_info=None,
dandishowcase_info=None,
osbv1_info=None,
modeldb_info=None,
biomodels_info=None,
):
tags = []

if biomodels_info is not None:
tags.append("BioModels")
tags.append("BioModels:%s" % biomodels_info["publicationId"])
tags.append(biomodels_info["format"]["identifier"])
for mla in biomodels_info["modelLevelAnnotations"]:
if mla["qualifier"] == "bqbiol:hasTaxon":
tags.append(mla["name"])
if mla["qualifier"] == "bqbiol:isVersionOf":
n = mla["name"]
tags.append(n[0].upper() + n[1:])

if modeldb_info is not None:
tags.append("ModelDB")
tags.append("ModelDB:%s" % modeldb_info["id"])
Expand Down

0 comments on commit 2b8d663

Please sign in to comment.