Initial version of working biomodels loader

OpenSourceBrain · Jan 27, 2025 · 2b8d663 · 2b8d663
1 parent 542e9cd
commit 2b8d663
Show file tree

Hide file tree

Showing 7 changed files with 756 additions and 41 deletions.
diff --git a/libraries/client/biomodels_info.py b/libraries/client/biomodels_info.py
@@ -1,24 +1,39 @@
 """
-Script to get Biomodels project info
+Script to get BioModels project info
 """
 
+import requests
 import json
 import pprint
-from loadbiomodels import get_model_identifiers, get_model_info
 
 verbose = True  #
 verbose = False
 
 info_all = {}
 
+API_URL: str = "https://www.ebi.ac.uk/biomodels"
+out_format = "json"
+
+
+def get_model_identifiers():
+    response = requests.get(API_URL + "/model/identifiers?format=" + out_format)
+    response.raise_for_status()
+    output = response.json()
+    return output
+
+
+def get_model_info(model_id):
+    response = requests.get(API_URL + "/" + model_id + "?format=" + out_format)
+    response.raise_for_status()
+    output = response.json()
+    return output
+
 
 if __name__ == "__main__":
     min_index = 0
-    max_index = 10000
+    max_index = 10
     index = 1
 
-    from loadbiomodels import get_model_identifiers
-
     model_ids = get_model_identifiers()["models"]
 
     selection = model_ids[min_index:max_index]

diff --git a/libraries/client/cached_info/osb_gh.json b/libraries/client/cached_info/osb_gh.json
@@ -213011,9 +213011,9 @@
         },
         "private": false,
         "pulls_url": "https://api.github.com/repos/OpenSourceBrain/OSBv2/pulls{/number}",
-        "pushed_at": "2025-01-27T14:55:01Z",
+        "pushed_at": "2025-01-27T18:21:56Z",
         "releases_url": "https://api.github.com/repos/OpenSourceBrain/OSBv2/releases{/id}",
-        "size": 56350,
+        "size": 46654,
         "ssh_url": "[email protected]:OpenSourceBrain/OSBv2.git",
         "stargazers_count": 12,
         "stargazers_url": "https://api.github.com/repos/OpenSourceBrain/OSBv2/stargazers",

diff --git a/libraries/client/cached_info/repos_v2dev.json b/libraries/client/cached_info/repos_v2dev.json
diff --git a/libraries/client/loadbiomodels.py b/libraries/client/loadbiomodels.py
@@ -1,10 +1,17 @@
-import requests
-
-import pprint
+import workspaces_cli
+from pprint import pprint
+from workspaces_cli.api import rest_api, k8s_api
+import logging
+import json
 import sys
 
-API_URL: str = "https://www.ebi.ac.uk/biomodels"
-out_format = "json"
+from utils import get_tags_info
+from utils import known_users, lookup_user
+
+from workspaces_cli.models import (
+    OSBRepository,
+    RepositoryContentType,
+)
 
 # Take from the accessToken cookie after login
 TOKEN = "EDITME"
@@ -25,38 +32,234 @@
 else:
     dry_run = False  # dry_run = True
 
+BIOMODELS_URL: str = "https://www.ebi.ac.uk/biomodels"
+
+index = 0
+min_index = 0
+max_index = 10
+
+verbose = True  #
+verbose = False
+
+configuration = workspaces_cli.Configuration(
+    host="https://workspaces.%s.opensourcebrain.org/api" % v2_or_v2dev,
+    access_token=TOKEN,
+)
+
+owner_user_id = known_users["OSBAdmin_v2"]
+if v2_or_v2dev == "v2dev":
+    owner_user_id = known_users["OSBAdmin_v2dev"]
+
+# Enter a context with an instance of the API client
+with workspaces_cli.ApiClient(configuration) as api_client:
+    # Create an instance of the API class
+    api_instance = k8s_api.K8sApi(api_client)
+
+    try:
+        # Test if application is healthy
+        api_response = api_instance.live()
+        pprint(api_response)
+    except workspaces_cli.ApiException as e:
+        print("Exception when calling K8sApi->live: %s\n" % e)
+
+filename = "cached_info/biomodels.json"
+biomodels_info = json.load(open(filename))
+biomodels_info.pop("0", None)
+
+print("Loaded info on %s biomodels models" % len(biomodels_info))
+
+all_updated = []
+all_added = []
+multi_matches = []
+all_errors = []
+
+
+with workspaces_cli.ApiClient(configuration) as api_client:
+    api_instance = rest_api.RestApi(api_client)
+
+    def add_biomodels_model(biomodels_model, index):
+        biomodels_model_id = biomodels_model["publicationId"]
+        name = biomodels_model["name"]
+
+        print(
+            "\n================ %i: %s, %s ================\n"
+            % (index, biomodels_model_id, name)
+        )
+
+        if biomodels_model["curationStatus"] != "CURATED":
+            print(
+                "  Not adding, as curationStatus = %s"
+                % biomodels_model["curationStatus"]
+            )
+            return False
+
+        biomodels_uri = f"{BIOMODELS_URL}/{biomodels_model_id}"
+        search = f"uri__like={biomodels_uri}"
+
+        found = api_instance.osbrepository_get(q=search)
 
-def get_model_identifiers():
-    response = requests.get(API_URL + "/model/identifiers?format=" + out_format)
-    response.raise_for_status()
-    output = response.json()
-    return output
+        if found.osbrepositories:
+            matching_repos = []
+            matches = []
 
+            for r in found.osbrepositories:
+                if r.uri == biomodels_uri:
+                    matching_repos.append(
+                        "M: URL to OSBv2 repo: https://%s.opensourcebrain.org/repositories/%i (%s) - %s\n"
+                        % (v2_or_v2dev, r.id, r.uri, biomodels_uri)
+                    )
+                    matches.append(r)
 
-def get_model_info(model_id):
-    response = requests.get(API_URL + "/" + model_id + "?format=" + out_format)
-    response.raise_for_status()
-    output = response.json()
-    return output
+            if len(matching_repos) > 1:
+                print("Matching: %s" % matching_repos)
+                err_info = "    More than one match for [%s] (search: %s):\n" % (
+                    biomodels_uri,
+                    search,
+                )
+                for r in found.osbrepositories:
+                    err_info += (
+                        "         - URL to OSBv2 repo: https://%s.opensourcebrain.org/repositories/%i (%s)\n"
+                        % (v2_or_v2dev, r.id, r.uri)
+                    )
+                    err_info += "         - Owner %s\n" % (lookup_user(r.user_id, ""))
 
+                print(err_info)
+                exit()
+                if verbose:
+                    print(
+                        "\n    ------------ Current OSB %s repo info: ---------"
+                        % v2_or_v2dev
+                    )
+                    print("    %s" % found)
+                    print("    ------------ OSB API info: ---------")
+                    print("    %s" % biomodels_model)
 
-def main():
-    model_ids = get_model_identifiers()["models"]
-    max_count = 10
-    count = 0
+                multi_matches.append(err_info)
+                return False
 
-    for model_id in model_ids:
-        if count < max_count:
-            if "BIOMD" in model_id:
-                model_link = f"[{model_id}](https://www.ebi.ac.uk/biomodels/{model_id})"
-                info = get_model_info(model_id)
-                # model_name = info["name"]
-                print("\n===============================================")
+            existing_repo = matches[0]
+            url_info = (
+                "    URL to OSBv2 repo: https://%s.opensourcebrain.org/repositories/%i"
+                % (v2_or_v2dev, existing_repo.id)
+            )
+            try:
                 print(
-                    f"  {model_id} ({model_link}): \n{pprint.pformat(info['name'])}--"
+                    "    %s already exists (owner: %s); updating..."
+                    % (biomodels_model_id, lookup_user(existing_repo.user_id, url_info))
+                )
+            except Exception:
+                exit(-1)
+            print(url_info)
+            all_updated.append(url_info)
+
+            if verbose:
+                print(
+                    "\n    ------------ Current OSB %s repo info: ---------"
+                    % v2_or_v2dev
+                )
+                print("    %s" % found)
+                print("    ------------ OSB API info: ---------")
+                print("    %s" % biomodels_model)
+
+            tags = get_tags_info(biomodels_info=biomodels_model)
+
+            if not dry_run:
+                desc = biomodels_model["description"]
+                print("    Description: %s..." % desc[:150])
+                latest_version = biomodels_model["history"]["revisions"][-1]
+                print("    Latest version: %s" % latest_version)
+
+                return api_instance.osbrepository_id_put(
+                    existing_repo.id,
+                    OSBRepository(
+                        uri=biomodels_uri,
+                        name=name,
+                        summary=desc,
+                        tags=tags,
+                        default_context=existing_repo.default_context,
+                        content_types_list=[RepositoryContentType(value="modeling")],
+                        content_types="modeling",
+                        user_id=owner_user_id,
+                        repository_type="biomodels",
+                        auto_sync=True,
+                    ),
+                )
+        else:
+            print(f"Search failed: {search}")
+            print(
+                "    **********************************************************************"
+            )
+            print("    ****  Adding %s: %s" % (biomodels_model_id, name))
+            print(
+                "    **********************************************************************"
+            )
+
+            tags = get_tags_info(biomodels_info=biomodels_model)
+
+            all_added.append("  %s, index %i" % (name, index))
+            desc = biomodels_model["description"]
+            print("    Description: %s..." % desc[:150])
+            latest_version = biomodels_model["history"]["revisions"][-1]
+            print("    Latest version: %s" % latest_version)
+
+            if not dry_run:
+                return api_instance.osbrepository_post(
+                    OSBRepository(
+                        uri=biomodels_uri,
+                        name=name,
+                        summary=desc,
+                        tags=tags,
+                        default_context=str(latest_version["version"]),
+                        content_types_list=[RepositoryContentType(value="modeling")],
+                        content_types="modeling",
+                        user_id=owner_user_id,
+                        repository_type="biomodels",
+                        auto_sync=True,
+                    )
                 )
-                count += 1
 
+            url_info = (
+                "    URL to OSBv2 repo: https://%s.opensourcebrain.org/repositories/%s"
+                % (v2_or_v2dev, "???")
+            )  # found.osbrepositories[0].id)
+            print(url_info)
+
+    for biomodels_model_id in biomodels_info:
+        biomodels_model = biomodels_info[biomodels_model_id]
+        if index >= min_index and index < max_index:
+            try:
+                added = add_biomodels_model(biomodels_model, index)
+            except Exception as e:
+                print("----------")
+                logging.exception("Error adding %s" % biomodels_model)
+                print("----------")
+                print("Error: %s" % str(e))
+                print("----------")
+                if "context_resources" not in str(e):
+                    print("Exiting due to unknown error...")
+                    exit()
+                else:
+                    print("Known error...")
+
+        index += 1
+
+        # print(added)
+
+print(
+    "\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
+    + "\n\nDone! All updated (%i total; dry_run: %s):" % (len(all_updated), dry_run)
+)
+for m in all_updated:
+    print(m)
+
+print("\nAll added (%i total):" % len(all_added))
+for m in all_added:
+    print(m)
+
+print("\nMultiple matches found (%i total):" % len(multi_matches))
+for m in multi_matches:
+    print(m)
 
-if __name__ == "__main__":
-    main()
+print("\nErrors found (%i total):" % len(all_errors))
+for de in all_errors:
+    print(de)
diff --git a/libraries/client/loadmodeldb.py b/libraries/client/loadmodeldb.py
@@ -36,7 +36,7 @@
 
 index = 0
 min_index = 0
-max_index = 920
+max_index = 10000
 
 verbose = True  #
 verbose = False

diff --git a/libraries/client/modeldb_info.py b/libraries/client/modeldb_info.py
@@ -158,7 +158,7 @@
 print("\nStill to be forked (%i total):" % len(to_be_forked))
 for m in to_be_forked:
     print(m)
-    info[0]["to_be_forked"].append(m.trim())
+    info[0]["to_be_forked"].append(m.strip())
 
 print("\nMany forks (%i total):" % len(many_forks))
 for m in many_forks:
@@ -167,7 +167,7 @@
 print("\nErrors (%i total):" % len(errors))
 for m in errors:
     print(m)
-    info[0]["errors"].append(m.trim())
+    info[0]["errors"].append(m.strip())
 
 strj = json.dumps(info, indent="    ", sort_keys=True)
 with open(filename, "w") as fp:

diff --git a/libraries/client/utils.py b/libraries/client/utils.py
@@ -21,10 +21,25 @@ def lookup_user(uid, url):
 
 
 def get_tags_info(
-    dandi_api_info=None, dandishowcase_info=None, osbv1_info=None, modeldb_info=None
+    dandi_api_info=None,
+    dandishowcase_info=None,
+    osbv1_info=None,
+    modeldb_info=None,
+    biomodels_info=None,
 ):
     tags = []
 
+    if biomodels_info is not None:
+        tags.append("BioModels")
+        tags.append("BioModels:%s" % biomodels_info["publicationId"])
+        tags.append(biomodels_info["format"]["identifier"])
+        for mla in biomodels_info["modelLevelAnnotations"]:
+            if mla["qualifier"] == "bqbiol:hasTaxon":
+                tags.append(mla["name"])
+            if mla["qualifier"] == "bqbiol:isVersionOf":
+                n = mla["name"]
+                tags.append(n[0].upper() + n[1:])
+
     if modeldb_info is not None:
         tags.append("ModelDB")
         tags.append("ModelDB:%s" % modeldb_info["id"])