Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: update in_clinvar flag in smallvariants (#2222) #2224

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions backend/clinvar/management/commands/in_clinvar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Setting in_clinvar field in SmallVariant aligned with Clinvar database."""

from django.core.management.base import BaseCommand
from django.db import transaction
from projectroles.models import Project

from clinvar.models import Clinvar
from variants.models import SmallVariant


def update_in_clinvar_full(case):
"""Update the in_clinvar field in SmallVariant with information from Clinvar database."""
variants = SmallVariant.objects.filter(case_id=case.id)
result = {
"true": 0,
"false": 0,
"total": 0,
}
for var in variants:
coords = {
"release": var.release,
"chromosome": var.chromosome,
"start": var.start,
"end": var.end,
"reference": var.reference,
"alternative": var.alternative,
}
var.in_clinvar = Clinvar.objects.filter(**coords).exists()
var.save()
result["true" if var.in_clinvar else "false"] += 1
result["total"] += 1
return result


def update_in_clinvar_light(project, printer=lambda x: None):
result = 0
for cv in Clinvar.objects.all():
coords = {
"release": cv.release,
"chromosome": cv.chromosome,
"start": cv.start,
"end": cv.end,
"reference": cv.reference,
"alternative": cv.alternative,
}
for case in project.case_set.all():
for var in SmallVariant.objects.filter(case_id=case.id, **coords):
if var.in_clinvar:
continue
var.in_clinvar = True
var.save()
result += 1
return result

Comment on lines +35 to +54
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Optimize database queries and remove unused parameter.

The function has performance and code quality issues:

  1. Triple nested loops with database queries could be inefficient.
  2. The printer parameter is unused.

Consider this optimized implementation:

-def update_in_clinvar_light(project, printer=lambda x: None):
+def update_in_clinvar_light(project):
     result = 0
+    batch_size = 1000
+    to_update = []
     for cv in Clinvar.objects.all():
         coords = {
             "release": cv.release,
             "chromosome": cv.chromosome,
             "start": cv.start,
             "end": cv.end,
             "reference": cv.reference,
             "alternative": cv.alternative,
         }
-        for case in project.case_set.all():
-            for var in SmallVariant.objects.filter(case_id=case.id, **coords):
-                if var.in_clinvar:
-                    continue
-                var.in_clinvar = True
-                var.save()
-                result += 1
+        # Get all cases in one query
+        case_ids = project.case_set.values_list('id', flat=True)
+        # Update variants in bulk
+        variants = SmallVariant.objects.filter(
+            case_id__in=case_ids,
+            in_clinvar=False,
+            **coords
+        )
+        to_update.extend(variants)
+        result += variants.count()
+        
+        # Perform bulk update when batch size is reached
+        if len(to_update) >= batch_size:
+            SmallVariant.objects.bulk_update(to_update, ['in_clinvar'])
+            to_update = []
+    
+    # Update remaining variants
+    if to_update:
+        SmallVariant.objects.bulk_update(to_update, ['in_clinvar'])
     return result

Committable suggestion skipped: line range outside the PR's diff.


@transaction.atomic
def run(*args, **options):
"""Run the command."""

project = Project.objects.get(sodar_uuid=options["project_uuid"])
updates = {
"true": 0,
"false": 0,
"total": 0,
}

if options["full"]:
for case in project.case_set.all():
result = update_in_clinvar_full(case)
updates["true"] += result["true"]
updates["false"] += result["false"]
updates["total"] += result["total"]
else:
updates["true"] += update_in_clinvar_light(project)

return updates


class Command(BaseCommand):
"""Command to set the in_clinvar field in SmallVariant with information from Clinvar database.

Background: When updating the Clinvar database, the in_clinvar field in SmallVariant
should be updated to reflect the changes in the Clinvar database.
"""

#: Help message displayed on the command line.
help = "Update in_clinvar field in SmallVariant with information from Clinvar database."

def add_arguments(self, parser):
"""Add the command's argument to the ``parser``."""
parser.add_argument("--project-uuid", help="Project UUID to apply on")
parser.add_argument(
"--full",
help="Reset ALL variants instead of only variants in Clinvar.",
action="store_true",
)
# parser.add_argument("--all", help="Apply on all projects", action="store_true")

def handle(self, *args, **options):
self.stdout.write("Call options:")
self.stdout.write(f"- {options['project_uuid']=}")
self.stdout.write(f"- {options['full']=}")
self.stdout.write(f"Performing ...")
updates = run(*args, **options)
self.stdout.write(f"{updates=}")
self.stdout.write(f"Done.")
Loading