Skip to content

Commit

Permalink
Handle the Wikidata SPARQL endpoint split
Browse files Browse the repository at this point in the history
  • Loading branch information
egonw committed Sep 21, 2024
1 parent 495d995 commit dd99554
Showing 1 changed file with 89 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,18 @@ public String getEntityID(String doi) throws BioclipseException {
+ " ?work wdt:P356 \"" + doi + "\" ."
+ "}";
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query.wikidata.org/sparql", hasWorkByDOI
"https://query-scholarly.wikidata.org/sparql", hasWorkByDOI
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, hasWorkByDOI);
if (results.getRowCount() == 0)
byte[] resultRaw2 = bioclipse.sparqlRemote(
"https://query-main.wikidata.org/sparql", hasWorkByDOI
);
IStringMatrix results2 = rdf.processSPARQLXML(resultRaw2, hasWorkByDOI);
if (results.getRowCount() + results2.getRowCount() == 0)
throw new BioclipseException("No work in Wikidata with the DOI: " + doi);
if (results.getRowCount() > 1)
if (results.getRowCount() + results2.getRowCount() > 1)
throw new BioclipseException("Too many works in Wikidata with the DOI: " + doi);
return results.get(1, "work");
return results.getRowCount() > 0 ? results.get(1, "work") : results2.get(1, "work");
}

/**
Expand Down Expand Up @@ -179,13 +183,22 @@ public Map<String,String> getEntityIDsForDOIs(List<String> dois) throws Bioclips
+ values
+ " ?work wdt:P356 ?doi ."
+ "}";
// handle the split Wikidata SPARQL endpoints, as a DOI can be for a scholarly article (first call)
// and for other types, like datasets (second call)
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query.wikidata.org/sparql", query
);
"https://query-scholarly.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
for (int i=1; i<=results.getRowCount(); i++) {
mappings.put(doiMappings.get(results.get(i, "doi")), results.get(i, "work"));
}
resultRaw = bioclipse.sparqlRemote(
"https://query-main.wikidata.org/sparql", query
);
results = rdf.processSPARQLXML(resultRaw, query);
for (int i=1; i<=results.getRowCount(); i++) {
mappings.put(doiMappings.get(results.get(i, "doi")), results.get(i, "work"));
}
}
return mappings;
}
Expand All @@ -202,11 +215,20 @@ public List<String> getEntityIDsForType(String type) throws BioclipseException {
+ "SELECT DISTINCT ?entity WHERE {"
+ " ?entity wdt:P31 wd:" + type + " ."
+ "}";
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
return results.getColumn("entity");
// handle the split Wikidata SPARQL endpoints, as a DOI can be for a scholarly article (first call)
// and for other types, like datasets (second call)
List<String> entities = new ArrayList<>();
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query-scholarly.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
if (results.getRowCount() > 0) entities.addAll(results.getColumn("entity"));
resultRaw = bioclipse.sparqlRemote(
"https://query-main.wikidata.org/sparql", query
);
results = rdf.processSPARQLXML(resultRaw, query);
if (results.getRowCount() > 0) entities.addAll(results.getColumn("entity"));
return entities;
}

/**
Expand All @@ -222,11 +244,20 @@ public List<String> getEntityIDsForWorksOfAuthor(String author) throws Bioclipse
+ "SELECT DISTINCT ?entity WHERE {"
+ " ?entity wdt:P50 wd:" + author + " ."
+ "}";
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
return results.getColumn("entity");
// handle the split Wikidata SPARQL endpoints, as a DOI can be for a scholarly article (first call)
// and for other types, like datasets (second call)
List<String> entities = new ArrayList<>();
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query-scholarly.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
entities = results.getColumn("entity");
resultRaw = bioclipse.sparqlRemote(
"https://query-main.wikidata.org/sparql", query
);
results = rdf.processSPARQLXML(resultRaw, query);
entities.addAll(results.getColumn("entity"));
return entities;
}

/**
Expand All @@ -243,11 +274,20 @@ public List<String> getDOIsForWorksOfAuthor(String author) throws BioclipseExcep
+ " ?entity wdt:P50 wd:" + author + " ;"
+ " wdt:P356 ?doi ."
+ "}";
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
return results.getColumn("doi");
// handle the split Wikidata SPARQL endpoints, as a DOI can be for a scholarly article (first call)
// and for other types, like datasets (second call)
List<String> dois = new ArrayList<>();
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query-scholarly.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
dois = results.getColumn("doi");
resultRaw = bioclipse.sparqlRemote(
"https://query-main.wikidata.org/sparql", query
);
results = rdf.processSPARQLXML(resultRaw, query);
dois.addAll(results.getColumn("doi"));
return dois;
}

/**
Expand All @@ -263,11 +303,20 @@ public List<String> getEntityIDsForWorksOfVenue(String venue) throws BioclipseEx
+ "SELECT DISTINCT ?entity WHERE {"
+ " ?entity wdt:P1433 wd:" + venue + " ."
+ "}";
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
return results.getColumn("entity");
// handle the split Wikidata SPARQL endpoints, as a DOI can be for a scholarly article (first call)
// and for other types, like datasets (second call)
List<String> entities = new ArrayList<>();
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query-scholarly.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
entities = results.getColumn("entity");
resultRaw = bioclipse.sparqlRemote(
"https://query-main.wikidata.org/sparql", query
);
results = rdf.processSPARQLXML(resultRaw, query);
entities.addAll(results.getColumn("entity"));
return entities;
}

/**
Expand All @@ -284,11 +333,20 @@ public List<String> getDOIsForWorksOfVenue(String venue) throws BioclipseExcepti
+ " ?entity wdt:P1433 wd:" + venue + " ;"
+ " wdt:P356 ?doi ."
+ "}";
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
return results.getColumn("doi");
// handle the split Wikidata SPARQL endpoints, as a DOI can be for a scholarly article (first call)
// and for other types, like datasets (second call)
List<String> dois = new ArrayList<>();
byte[] resultRaw = bioclipse.sparqlRemote(
"https://query-scholarly.wikidata.org/sparql", query
);
IStringMatrix results = rdf.processSPARQLXML(resultRaw, query);
dois = results.getColumn("doi");
resultRaw = bioclipse.sparqlRemote(
"https://query-main.wikidata.org/sparql", query
);
results = rdf.processSPARQLXML(resultRaw, query);
dois.addAll(results.getColumn("doi"));
return dois;
}

/**
Expand Down

0 comments on commit dd99554

Please sign in to comment.