Skip to content

Commit

Permalink
Merge pull request #15 from geonetwork/dataset-link-simplification
Browse files Browse the repository at this point in the history
Dataset link simplification
  • Loading branch information
josegar74 authored Dec 2, 2024
2 parents b80d724 + 494ff22 commit d52ce80
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@
import javax.persistence.Column;
import javax.persistence.MappedSuperclass;
import javax.persistence.Transient;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

//Represents a link in a document
@MappedSuperclass
Expand Down Expand Up @@ -83,25 +86,48 @@ public DocumentLink() {

//--

public static List<String> validProtocols = Arrays.asList(new String[]{
public static List<String> validViewProtocols = Arrays.asList(new String[]{
"wms",
"http://www.opengis.net/def/serviceType/ogc/wms".toLowerCase(),
"http://www.opengis.net/def/serviceType/ogc/wmts".toLowerCase(),
"http://www.opengis.net/def/serviceType/ogc/wfs".toLowerCase(),
"https://tools.ietf.org/html/rfc4287".toLowerCase(),
"ATOM Syndication Format".toLowerCase(),
"OGC Web Feature Service".toLowerCase(),
"OGC Web Map Service".toLowerCase(),
"OGC Web Map Tile Service".toLowerCase(),
"wms",
"Web Map Service (WMS)".toLowerCase(),
"OGC:WMS".toLowerCase(),
"http://www.opengeospatial.org/standards/wms",
"wmts",
"http://www.opengis.net/def/serviceType/ogc/wmts".toLowerCase(),
"OGC Web Map Tile Service".toLowerCase(),
"OGC:WMTS".toLowerCase(),
"http://www.opengeospatial.org/standards/wmts"
});

public static List<String> validDownloadProtocols = Arrays.asList(new String[]{
"wfs",
"atom",
"http://www.opengeospatial.org/standards/wms",
"http://www.opengeospatial.org/standards/wmts",
"http://www.opengis.net/def/serviceType/ogc/wfs".toLowerCase(),
"OGC Web Feature Service".toLowerCase(),
"Web Feature Service (WFS)".toLowerCase(),
"OGC:WFS".toLowerCase(),
"http://www.opengeospatial.org/standards/wfs",
"INSPIRE Atom".toLowerCase()
"atom",
"https://tools.ietf.org/html/rfc4287".toLowerCase(),
"ATOM Syndication Format".toLowerCase(),
"INSPIRE Atom".toLowerCase(),
"wcs",
"OGC:WCS".toLowerCase(),
"http://www.opengis.net/def/serviceType/ogc/wcs".toLowerCase(),
"api features",
"OGC - API Features".toLowerCase(),
"OGC:OGC-API-Features-items".toLowerCase(),
"HTTP:OGC:API-Features".toLowerCase(),
"http://www.opengis.net/def/interface/ogcapi-features".toLowerCase(),
"SensorThings".toLowerCase(),
"sos",
"OGC:SOS".toLowerCase(),
"http://www.opengis.net/def/serviceType/ogc/sos".toLowerCase()
});

public static List<String> validProtocols = Stream.concat(validViewProtocols.stream(),
validDownloadProtocols.stream()).collect(Collectors.toList());

public static List<String> validAtomProtocols = Arrays.asList(new String[]{
"https://tools.ietf.org/html/rfc4287".toLowerCase(),
"ATOM Syndication Format".toLowerCase(),
Expand All @@ -116,20 +142,48 @@ public DocumentLink() {
"http://inspire.ec.europa.eu/metadata-codelist/SpatialDataServiceType/view".toLowerCase()
});

public static final String VALID_PROTOCOLS_VIEW_REGEX = "(.*wms.*|.*wmts.*|.*web map service.*)";

public static final String VALID_PROTOCOLS_DOWNLOAD_REGEX = "(.*wfs.*|.*atom.*|.*wcs.*|.*sos.*|.*api.*feature.*|.*sensorthings.*|.*web feature service.*)";

public static final String VALID_PROTOCOLS_REGEX = "(.*wfs.*|.*atom.*|.*wcs.*|.*sos.*|.*api.*feature.*|.*sensorthings.*|.*wms.*|.*wmts.*|.*web map service.*|.*web feature service.*)";

public boolean isInspireSimplifiedLink() {
if ((rawURL == null) || (protocol == null) || (applicationProfile == null))
// Relax the check to process links with the applicationProfile information
if ((rawURL == null) || (protocol == null))
return false;
if (rawURL.isEmpty() || protocol.isEmpty() || applicationProfile.isEmpty())
if (rawURL.isEmpty() || protocol.isEmpty())
return false;

if (!validProtocols.contains(protocol.toLowerCase())) {
// Check protocol match "simple" values instead of exact match
if (!protocol.toLowerCase().matches(VALID_PROTOCOLS_REGEX)) {
return false;
}
}


return true;
}


/*public boolean isInspireSimplifiedLink() {
if ((rawURL == null) || (protocol == null) || (applicationProfile == null))
if ((rawURL == null) || (protocol == null))
return false;
if (rawURL.isEmpty() || protocol.isEmpty() || applicationProfile.isEmpty())
if (rawURL.isEmpty() || protocol.isEmpty())
return false;
if (!validProtocols.contains(protocol.toLowerCase()))
return false;
if (!validAppProfiles.contains(applicationProfile.toLowerCase()))
return false;
return true;
}
}*/



//--
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,68 @@ public DatasetDocumentLink create(DatasetMetadataRecord datasetMetadataRecord, O
result.setFunction(onlineResource.getFunction());
result.setOperationName(onlineResource.getOperationName());
result.setRawURL(onlineResource.getRawURL());
result.setProtocol(onlineResource.getProtocol());

String protocolFromUrl = inferProtocolFromUrl(onlineResource.getRawURL());

if ((onlineResource.getProtocol() == null) && (protocolFromUrl != null)) {
// If no protocol defined, try to infer the protocol from the URL
result.setProtocol(protocolFromUrl);
} else {
result.setProtocol(onlineResource.getProtocol());

// if the XML document's protocol isn't compatible with the actual URL
// then use the inferred URL protocol.
// Example;
// xml protocol is WMS (view)
// but, url is "...?service=WFS" (inferred url protocol is download and not view)
// then, set the protocol to Download (ignore the XML)
if (protocolFromUrl != null) {
boolean isDownloadProtocol = ServiceDocumentLink.validDownloadProtocols.contains(onlineResource.getProtocol().toLowerCase());
boolean isDownloadUrlProtocol = ServiceDocumentLink.validDownloadProtocols.contains(protocolFromUrl.toLowerCase());
boolean isViewProtocol = ServiceDocumentLink.validViewProtocols.contains(onlineResource.getProtocol().toLowerCase());
boolean isViewUrlProtocol = ServiceDocumentLink.validViewProtocols.contains(protocolFromUrl);

if (isDownloadProtocol) {
if (!isDownloadUrlProtocol && isViewUrlProtocol) {
result.setProtocol(protocolFromUrl);
}
} else if (isViewProtocol) {
if (!isViewUrlProtocol && isDownloadUrlProtocol) {
result.setProtocol(protocolFromUrl);
}
}
}
}

result.setApplicationProfile(onlineResource.getApplicationProfile());

result.setLinkCheckJobId(datasetMetadataRecord.getLinkCheckJobId());

return result;
}


private String inferProtocolFromUrl(String url) {
String normalizedUrl = url.toLowerCase();

if (normalizedUrl.indexOf("wms") > -1) {
return "wms";
} else if (normalizedUrl.indexOf("wmts") > -1) {
return "wmts";
} else if (normalizedUrl.indexOf("wfs") > -1) {
return "wfs";
} else if (normalizedUrl.indexOf("atom") > -1) {
return "atom";
} else if (normalizedUrl.indexOf("wcs") > -1) {
return "wcs";
} else if (normalizedUrl.indexOf("sos") > -1) {
return "sos";
} else if (normalizedUrl.indexOf("api features") > -1) {
return "api features";
} else if (normalizedUrl.indexOf("sensorthings") > -1) {
return "sensorthings";
}

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -323,13 +323,40 @@ private void process() {
.filter(x -> (x.getCapabilitiesDocumentType() == CapabilitiesType.WFS) || (x.getCapabilitiesDocumentType() == CapabilitiesType.Atom))
.collect(Collectors.toList());

if (!viewLinks.isEmpty())

if (!viewLinks.isEmpty()) {
localDatasetMetadataRecord.setINDICATOR_VIEW_LINK_TO_DATA(IndicatorStatus.PASS);
if (!downloadLinks.isEmpty())
localDatasetMetadataRecord.setINDICATOR_DOWNLOAD_LINK_TO_DATA(IndicatorStatus.PASS);
localDatasetMetadataRecord.setNumberOfViewDataLinks(viewLinks.size());
} else {
// Dataset link simplification
if (!localDatasetMetadataRecord.getDocumentLinks().isEmpty()) {
List<DocumentLink> viewLinksMetadataOnlineResources = localDatasetMetadataRecord.getDocumentLinks().stream()
.filter(x -> (x.getLinkState().equals(LinkState.Complete) && x.getLinkHTTPStatusCode() == 200) && (DocumentLink.validViewProtocols.contains(x.getProtocol().toLowerCase()) || x.getProtocol().toLowerCase().matches(DocumentLink.VALID_PROTOCOLS_VIEW_REGEX)))
.collect(Collectors.toList());

if (!viewLinksMetadataOnlineResources.isEmpty()) {
localDatasetMetadataRecord.setINDICATOR_VIEW_LINK_TO_DATA(IndicatorStatus.PASS);
localDatasetMetadataRecord.setNumberOfViewDataLinks(viewLinksMetadataOnlineResources.size());
}
}
}

localDatasetMetadataRecord.setNumberOfViewDataLinks(viewLinks.size());
localDatasetMetadataRecord.setNumberOfDownloadDataLinks(downloadLinks.size());
if (!downloadLinks.isEmpty()) {
localDatasetMetadataRecord.setINDICATOR_DOWNLOAD_LINK_TO_DATA(IndicatorStatus.PASS);
localDatasetMetadataRecord.setNumberOfDownloadDataLinks(downloadLinks.size());
} else {
// Dataset link simplification
if (!localDatasetMetadataRecord.getDocumentLinks().isEmpty()) {
List<DocumentLink> downloadLinksMetadataOnlineResources = localDatasetMetadataRecord.getDocumentLinks().stream()
.filter(x -> (x.getLinkState().equals(LinkState.Complete) && x.getLinkHTTPStatusCode() == 200) && (DocumentLink.validDownloadProtocols.contains(x.getProtocol().toLowerCase()) || x.getProtocol().toLowerCase().matches(DocumentLink.VALID_PROTOCOLS_DOWNLOAD_REGEX)))
.collect(Collectors.toList());

if (!downloadLinksMetadataOnlineResources.isEmpty()) {
localDatasetMetadataRecord.setINDICATOR_DOWNLOAD_LINK_TO_DATA(IndicatorStatus.PASS);
localDatasetMetadataRecord.setNumberOfDownloadDataLinks(downloadLinksMetadataOnlineResources.size());
}
}
}

// List<ServiceDocSearchResult> serviceLinks = new ArrayList<>();
// List<CapabilitiesLinkResult> capLinks = new ArrayList<>();
Expand Down

0 comments on commit d52ce80

Please sign in to comment.