release (#127)

* Changed api/gene/symbols output format and updated documentation * Removed relevant print() * fix: for Russian locale return lang_en if lang_ru column is empty * OG-936: fix empty response for measurementMethod field * Add updated datasets with missing experiments (#107) * New datasets with studies that were not added * Correct dataset for blue form * Correct dataset for pink form * Rename new datasets * Change the names of the datasets * Update dataset with missing experiments * Noissue. Do not change timestamp when gene was created * Noissue. Don't perform delete in longevity associations script since now * Noissue. Update confidence_level.tsv dataset * Updated first query assignment to avoid duplicates * OG-943: Change logic and refactor code * Fix query causing duplicated items in response (#108) * Noissue. Fix query causing duplicated items in response * Added join in _form, changed filter behavior, deleted a test router * Update README.md * #115. Remove GROUP BY * OG-943. Revert and fix script, update dataset, fix output (#112) * Revert OG-943 * Bring back changes from OG-391, add check for existing mechanism * OG-943. Fix script output, add comments * Update Hallmarks of Aging and Go-terms in a dataset * OG-943. Fix query where value were instead of id * OG-943. Fix. Only select the result with an identical GO-term name * OG-943. Fix. Consider GO-term children in SQL query for endpoint * Noissue. Update hallmarks dataset * Add info about DB dump * OG-949. Output manually bound aging mechanisms (#119) * Altered AgingMechanisms filter in GeneCommon, GeneSearchInput * Altered fields for AgingMechanism model, added filter for additional UUIDs (in str format) * Altered AgingMechanisms filter in GeneCommon, GeneSearchInput * Altered fields for AgingMechanism model, added filter for additional UUIDs (in str format) * Updated agingMechanisms SQL request, added @lang@ in proteinClasses * Updated gene/search endpoint: avoid duplicates in agingMechanisms response * Removed duplicates from aginMechanism response in gene/{id_or_symbol}, added sorting --------- Co-authored-by: psnewbee <[email protected]> Co-authored-by: Pavel <[email protected]> * OG-957. Fix filter byConservativeIn (#121) * Created presenters/taxon.py, added TaxonDAO * Added TaxonDAO * Created gene/taxon endpoint * OG-957. Fix pandas import and add check if agingMechanism exists for gene (#123) --------- Co-authored-by: psnewbee <[email protected]> Co-authored-by: anthony <[email protected]> Co-authored-by: Pavel <[email protected]>
open-genes · Mar 31, 2023 · ab5f853 · ab5f853
1 parent 58e053b
commit ab5f853
Show file tree

Hide file tree

Showing 7 changed files with 182 additions and 183 deletions.
diff --git a/README.md b/README.md
@@ -21,6 +21,8 @@ Open Genes API is based on [FastAPI](https://fastapi.tiangolo.com/) framework an
   - *uniprot* -
 
 ## Development
+Before you start working on a project, you need a database dump which you can [download from our website](https://open-genes.com/open_genes_sql_dump.zip) or you can roll up a test database in Docker container from [this repository](https://github.com/open-genes/open-genes-cms).
+
 
 
 

diff --git a/api/db/dao.py b/api/db/dao.py
@@ -1179,3 +1179,22 @@ def link_gene(self, gene_id: int, ortholog_id: int):
             self.cnx.commit()
 
         cur.close()
+
+
+class TaxonDAO(BaseDAO):
+    def get_all(self):
+        cur = self.cnx.cursor(dictionary=True)
+        cur.execute('SET SESSION group_concat_max_len = 100000;')
+        cur.execute(
+            '''
+            SELECT CAST(CONCAT('[', GROUP_CONCAT(
+            distinct JSON_OBJECT(
+                'id', taxon.id,
+                'name', taxon.name_en
+            )
+            ORDER BY taxon.id
+            SEPARATOR ","), ']') AS JSON) AS jsonobj
+            FROM taxon
+            '''
+        )
+        return cur.fetchall()
diff --git a/api/endpoints/gene.py b/api/endpoints/gene.py
@@ -1,8 +1,8 @@
-import json
+import pandas as pd
 from typing import List
 
 from config import Language
-from db.dao import GeneDAO, GeneSuggestionDAO
+from db.dao import GeneDAO, GeneSuggestionDAO, TaxonDAO
 from fastapi import APIRouter, Depends, HTTPException, Query
 from models.gene import GeneSearchInput, GeneSearchOutput, GeneSingle, GeneSingleInput
 from presenters.gene import (
@@ -13,6 +13,9 @@
     GeneSymbolsOutput,
     GeneWithResearches,
 )
+from presenters.taxon import TaxonOutput
+from json import loads
+
 
 router = APIRouter()
 
@@ -29,7 +32,13 @@ async def gene_search(input: GeneSearchInput = Depends(GeneSearchInput)) -> List
         input.bySuggestions = None
         input.byGeneId = suggfilter
 
-    return GeneDAO().search(input)
+    search_result = GeneDAO().search(input)
+
+    for item in search_result.get("items", []):
+        if item["agingMechanisms"]:
+            item["agingMechanisms"] = pd.DataFrame(item["agingMechanisms"], dtype=object).drop_duplicates().sort_values(by=['id', 'uuid'], ascending=True).to_dict('records')
+
+    return search_result
 
 
 @router.get(
@@ -125,6 +134,11 @@ async def get_gene_by_expression_change(expression_change: str, lang: Language =
     return GeneDAO().get()
 
 
+@router.get('/gene/taxon', response_model=List[TaxonOutput])
+async def get_taxon():
+    return loads(TaxonDAO().get_all()[0]['jsonobj'])
+
+
 @router.get('/gene/{id_or_symbol}', response_model=GeneSingle)
 async def gene_search(
     id_or_symbol: int | str, input: GeneSingleInput = Depends(GeneSingleInput)
@@ -133,13 +147,14 @@ async def gene_search(
         input.byGeneId = id_or_symbol
     if isinstance(id_or_symbol, str):
         input.bySymbol = id_or_symbol
-    re = GeneDAO().single(input)
-    if not re:
+    search_result = GeneDAO().single(input)
+    if not search_result:
         raise HTTPException(
             status_code=404,
             detail='Gene not found',
         )
-    return re
+    search_result["agingMechanisms"] = pd.DataFrame(search_result["agingMechanisms"], dtype=object).drop_duplicates().sort_values(by=['id', 'uuid'], ascending=True).to_dict('records')
+    return search_result
 
 
 @router.get(
@@ -197,3 +212,6 @@ async def get_gene_by_id(ncbi_id: int, lang: Language = Language.en):
 )
 async def get_gene_by_id(ncbi_id: int, lang: Language = Language.en):
     return 'dummy'
+
+
+
diff --git a/api/models/gene.py b/api/models/gene.py
@@ -1,3 +1,4 @@
+from typing import Any, Optional, Type, Union
 from models import *
 from models.calorie_experiment import CalorieRestrictionExperiment
 from models.location import *
@@ -36,8 +37,9 @@ class ProteinClass(BaseModel):
 
 
 class AgingMechanism(BaseModel):
-    id: int
+    id: Optional[int] = None
     name: str
+    uuid: Optional[str] = None
 
 
 class FunctionalCluster(BaseModel):
@@ -140,7 +142,7 @@ class GeneCommon(BaseModel):
             ProteinClass,
             _select={
                 'id': "protein_class.id",
-                'name': "COALESCE(protein_class.name_en,protein_class.name_en)",
+                'name': "COALESCE(protein_class.name_@LANG@, protein_class.name_en)",
             },
             _from="from gene join gene_to_protein_class on gene_to_protein_class.gene_id=gene.id join  protein_class on protein_class.id=gene_to_protein_class.protein_class_id",
         )
@@ -151,13 +153,20 @@ class GeneCommon(BaseModel):
             AgingMechanism,
             _select={
                 'id': 'aging_mechanism.id',
-                'name': 'coalesce(aging_mechanism.name_@LANG@,aging_mechanism.name_en)',
+                'name': 'coalesce(aging_mechanism.name_@LANG@, aging_mechanism.name_ru)',
+                'uuid': 'NULL'
             },
             _from="""
 FROM gene 
-LEFT JOIN gene_to_ontology ON gene.id = gene_to_ontology.gene_id
+LEFT JOIN gene_to_ontology ON gene_to_ontology.gene_id = gene.id
+LEFT JOIN gene_ontology_relation ON gene_to_ontology.gene_ontology_id = gene_ontology_parent_id
 LEFT JOIN gene_ontology_to_aging_mechanism_visible ON gene_to_ontology.gene_ontology_id = gene_ontology_to_aging_mechanism_visible.gene_ontology_id
 INNER JOIN aging_mechanism ON gene_ontology_to_aging_mechanism_visible.aging_mechanism_id = aging_mechanism.id AND aging_mechanism.name_en != ''
+UNION 
+SELECT 'dummy_constant', NULL, COALESCE(aging_mechanism.name_@LANG@, aging_mechanism.name_en), aging_mechanism_to_gene.uuid
+FROM aging_mechanism
+JOIN aging_mechanism_to_gene ON aging_mechanism.id = aging_mechanism_to_gene.aging_mechanism_id
+JOIN gene ON aging_mechanism_to_gene.gene_id = gene.id
 """,
         )
     ]
@@ -220,6 +229,7 @@ class GeneSearchInput(PaginationInput, LanguageInput, SortInput):
     byExpressionChange: str = None
     bySelectionCriteria: str = None
     byAgingMechanism: str = None
+    byAgingMechanismUUID: str = None
     byProteinClass: str = None
     bySpecies: str = None
     byOrigin: str = None
@@ -278,7 +288,23 @@ class GeneSearchInput(PaginationInput, LanguageInput, SortInput):
             lambda value: value.split(',') + [len(value.split(','))],
         ],
         'byAgingMechanism': [
-            lambda value: '(select count(distinct aging_mechanism_id) from gene_to_ontology o join gene_ontology_to_aging_mechanism_visible a on a.gene_ontology_id=o.gene_ontology_id where o.gene_id=gene.id and aging_mechanism_id in ('
+            lambda value: '''
+            (SELECT COUNT(distinct aging_mechanism_id)
+            FROM gene_to_ontology AS o
+            JOIN gene_ontology_to_aging_mechanism_visible AS a
+            ON a.gene_ontology_id=o.gene_ontology_id
+            WHERE o.gene_id=gene.id AND aging_mechanism_id in (
+            '''
+            + ','.join(['%s' for v in value.split(',')])
+            + '))=%s',
+            lambda value: value.split(',') + [len(value.split(','))],
+        ],
+        'byAgingMechanismUUID': [
+            lambda value: '''
+            (SELECT COUNT(distinct uuid)
+            FROM aging_mechanism_to_gene AS amtg
+            WHERE amtg.gene_id=gene.id AND uuid in (
+            '''
             + ','.join(['%s' for v in value.split(',')])
             + '))=%s',
             lambda value: value.split(',') + [len(value.split(','))],

diff --git a/api/presenters/taxon.py b/api/presenters/taxon.py
@@ -0,0 +1,7 @@
+from pydantic.dataclasses import dataclass
+
+
+@dataclass
+class TaxonOutput:
+    id: int
+    name: str
Original file line number	Diff line number	Diff line change
Expand Up		@@ -21,6 +21,8 @@ Open Genes API is based on [FastAPI](https://fastapi.tiangolo.com/) framework an
		- uniprot -

		## Development
		Before you start working on a project, you need a database dump which you can [download from our website](https://open-genes.com/open_genes_sql_dump.zip) or you can roll up a test database in Docker container from [this repository](https://github.com/open-genes/open-genes-cms).




Expand Down