Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a simple Overture example #862

Merged
merged 5 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .run/overture-serve.run.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="overture-serve" type="Application" factoryName="Application">
<option name="MAIN_CLASS_NAME" value="org.apache.baremaps.cli.Baremaps" />
<module name="baremaps-cli" />
<option name="PROGRAM_PARAMETERS" value="map serve --tileset tileset.json --style style.json" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/examples/overture" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component>
11 changes: 11 additions & 0 deletions .run/overture-workflow.run.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="overture-workflow" type="Application" factoryName="Application">
<option name="MAIN_CLASS_NAME" value="org.apache.baremaps.cli.Baremaps" />
<module name="baremaps-cli" />
<option name="PROGRAM_PARAMETERS" value="workflow execute --file workflow.json" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/examples/overture" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component>
8 changes: 8 additions & 0 deletions baremaps-cli/src/main/resources/log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Root logger option
log4j.rootLogger=INFO, stdout

# Direct log messages to console
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,17 @@ public DataSchema schema() {
this.schema = GeoParquetTypeConversion.asSchema(path.toString(), schema);
return this.schema;
} catch (URISyntaxException e) {
throw new GeoParquetException("Fail toe get the schema.", e);
throw new GeoParquetException("Failed to get the schema.", e);
}
}
return schema;
}

public int srid(String column) {
try {
return reader().getGeoParquetMetadata().getSrid(column);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ public static List<Object> asRowValues(GeoParquetGroup group) {
Schema schema = group.getSchema();
List<Field> fields = schema.fields();
for (int i = 0; i < fields.size(); i++) {
if (group.getValues(i).isEmpty()) {
Drabble marked this conversation as resolved.
Show resolved Hide resolved
values.add(null);
continue;
}
Field field = fields.get(i);
switch (field.type()) {
case BINARY -> values.add(group.getBinaryValue(i).getBytes());
Expand All @@ -92,6 +96,9 @@ public static Map<String, Object> asNested(GeoParquetGroup group) {
Schema schema = group.getSchema();
List<Field> fields = schema.fields();
for (int i = 0; i < fields.size(); i++) {
if (group.getValues(i).isEmpty()) {
Drabble marked this conversation as resolved.
Show resolved Hide resolved
continue;
}
Field field = fields.get(i);
nested.put(field.name(), switch (field.type()) {
case BINARY -> group.getBinaryValue(i).getBytes();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
@Type(value = ImportDaylightFeatures.class, name = "ImportDaylightFeatures"),
@Type(value = ImportDaylightTranslations.class, name = "ImportDaylightTranslations"),
@Type(value = ImportGeoPackage.class, name = "ImportGeoPackage"),
@Type(value = ImportGeoParquet.class, name = "ImportGeoParquet"),
@Type(value = ImportOsmOsc.class, name = "ImportOsmOsc"),
@Type(value = ImportOsmPbf.class, name = "ImportOsmPbf"),
@Type(value = ImportShapefile.class, name = "ImportShapefile"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.baremaps.workflow.tasks;

import java.net.URI;
import java.util.StringJoiner;
import org.apache.baremaps.data.storage.DataTableGeometryMapper;
import org.apache.baremaps.data.storage.DataTableMapper;
import org.apache.baremaps.openstreetmap.function.ProjectionTransformer;
import org.apache.baremaps.storage.geoparquet.GeoParquetDataStore;
import org.apache.baremaps.storage.geoparquet.GeoParquetDataTable;
import org.apache.baremaps.storage.postgres.PostgresDataStore;
import org.apache.baremaps.workflow.Task;
import org.apache.baremaps.workflow.WorkflowContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Import a GeoParquet into a database.
*/
public class ImportGeoParquet implements Task {

private static final Logger logger = LoggerFactory.getLogger(ImportGeoParquet.class);

private URI uri;
private String tableName;
private Object database;
private Integer databaseSrid;

/**
* Constructs a {@code ImportGeoParquet}.
*/
public ImportGeoParquet() {

}

/**
* Constructs an {@code ImportGeoParquet}.
*
* @param uri the GeoParquet uri
* @param database the database
* @param databaseSrid the target SRID
*/
public ImportGeoParquet(URI uri, String tableName, Object database, Integer databaseSrid) {
this.uri = uri;
this.tableName = tableName;
this.database = database;
this.databaseSrid = databaseSrid;
}

/**
* {@inheritDoc}
*/
@Override
public void execute(WorkflowContext context) throws Exception {
var geoParquetDataStore = new GeoParquetDataStore(uri);
var dataSource = context.getDataSource(database);
var postgresDataStore = new PostgresDataStore(dataSource);
for (var name : geoParquetDataStore.list()) {
var geoParquetTable = (GeoParquetDataTable) geoParquetDataStore.get(name);
// TODO : How can we apply a different SRID for each column based on the geometry
Drabble marked this conversation as resolved.
Show resolved Hide resolved
var projectionTransformer =
new ProjectionTransformer(geoParquetTable.srid("geometry"), databaseSrid);
var rowTransformer =
new DataTableGeometryMapper(geoParquetTable, projectionTransformer);
var transformedDataTable =
new DataTableMapper(geoParquetDataStore.get(name), rowTransformer);
postgresDataStore.add(tableName, transformedDataTable);
}
}

/**
* {@inheritDoc}
*/
@Override
public String toString() {
return new StringJoiner(", ", ImportGeoParquet.class.getSimpleName() + "[", "]")
.add("uri=" + uri)
.add("database=" + database)
.add("databaseSrid=" + databaseSrid)
.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.schema.MessageType;
Expand Down Expand Up @@ -95,13 +97,27 @@
try {
if (files == null) {
files = new HashMap<>();
Path globPath = new Path(uri.getPath());
URI rootUri = getRootUri(uri);
FileSystem fileSystem = FileSystem.get(rootUri, configuration);

// Iterate over all the files in the path
for (FileStatus file : fileSystem.globStatus(globPath)) {
files.put(file, buildFileInfo(file));
Copy link
Member

@bchapuis bchapuis Jun 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason why buildFileInfo got removed? You may need to rebase the branch.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I was just using an old version and didn't merge correctly. I had to do a few fixes for S3 files to work. I will try to rollback tomorrow.

FileSystem fs = FileSystem.get(uri, configuration);
FileStatus[] fileStatuses = fs.globStatus(new Path(uri));

for (FileStatus fileStatus : fileStatuses) {
Path filePath = fileStatus.getPath();
ParquetFileReader reader = ParquetFileReader.open(configuration, filePath);
Fixed Show fixed Hide fixed
Long recordCount = reader.getRecordCount();
MessageType messageType = reader.getFileMetaData().getSchema();
Map<String, String> keyValueMetadata = reader.getFileMetaData().getKeyValueMetaData();
GeoParquetMetadata geoParquetMetadata = null;
GeoParquetGroup.Schema geoParquetSchema = null;
if (keyValueMetadata.containsKey("geo")) {
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
geoParquetMetadata =
objectMapper.readValue(keyValueMetadata.get("geo"), GeoParquetMetadata.class);
geoParquetSchema =
GeoParquetGroupFactory.createGeoParquetSchema(messageType, geoParquetMetadata);
}
files.put(fileStatus, new FileInfo(fileStatus, recordCount, keyValueMetadata, messageType,
geoParquetMetadata, geoParquetSchema));
}

// Verify that the files all have the same schema
Expand All @@ -110,12 +126,12 @@
if (commonMessageType == null) {
commonMessageType = entry.messageType;
} else if (!commonMessageType.equals(entry.messageType)) {
throw new GeoParquetException("The files do not have the same schema");
throw new RuntimeException("The files do not have the same schema");
}
}
}
} catch (IOException e) {
throw new GeoParquetException("IOException while attempting to list files.", e);
throw new RuntimeException("IOException while attempting to list files.", e);
}
return files;
}
Expand Down Expand Up @@ -254,31 +270,11 @@
}

private static Configuration createConfiguration() {
Configuration configuration = new Configuration();
configuration.set("fs.s3a.aws.credentials.provider",
"org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider");
configuration.setBoolean("fs.s3a.path.style.access", true);
return configuration;
}

private static URI getRootUri(URI uri) throws URISyntaxException {
// TODO:
// This is a quick and dirty way to get the root uri of the path.
// We take everything before the first wildcard in the path.
// This is not a perfect solution, and we should probably look for a better way to do this.
String path = uri.getPath();
int index = path.indexOf("*");
if (index != -1) {
path = path.substring(0, path.lastIndexOf("/", index) + 1);
}
return new URI(
uri.getScheme(),
uri.getUserInfo(),
uri.getHost(),
uri.getPort(),
path,
null,
null);
Configuration conf = new Configuration();
conf.set("fs.s3a.endpoint", "s3.us-west-2.amazonaws.com");
conf.set("fs.s3a.aws.credentials.provider", AnonymousAWSCredentialsProvider.class.getName());
conf.set("fs.s3a.impl", S3AFileSystem.class.getName());
conf.set("fs.s3a.path.style.access", "true");
return conf;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ public interface GeoParquetGroup {
*/
GeoParquetGroup createGroup(int fieldIndex);

List<Primitive> getValues(int fieldIndex);

Binary getBinaryValue(int fieldIndex);

List<Binary> getBinaryValues(int fieldIndex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ else if (!field.isPrimitive()) {
GeoParquetGroup.Schema geoParquetSchema = createGeoParquetSchema(groupType, metadata);
return (Field) new GeoParquetGroup.GroupField(
groupType.getName(),
GeoParquetGroup.Cardinality.REQUIRED,
cardinality,
geoParquetSchema);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@
}
}

private List<Primitive> getValues(int fieldIndex) {
public List<Primitive> getValues(int fieldIndex) {
Fixed Show fixed Hide fixed
return (List<Primitive>) data[fieldIndex];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public void setColumns(Map<String, GeoParquetColumnMetadata> columns) {
this.columns = columns;
}


public int getSrid(String column) {
return Optional.ofNullable(getColumns().get(column).getCrs()).map(crs -> {
JsonNode id = crs.get("id");
Expand All @@ -72,6 +73,7 @@ public int getSrid(String column) {
}).orElse(4326);
}


public boolean isGeometryColumn(String column) {
return columns.containsKey(column);
}
Expand Down
15 changes: 15 additions & 0 deletions examples/overture/indexes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to you under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
CREATE INDEX IF NOT EXISTS overture_admins_administrative_boundary_materialized_view_gist ON overture_admins_administrative_boundary_materialized_view USING GIST(geom);
24 changes: 24 additions & 0 deletions examples/overture/style.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"version" : 8,
"sources" : {
"baremaps" : {
"type" : "vector",
"url" : "http://localhost:9000/tiles.json"
}
},
"layers" : [ {
"id" : "administrative_boundary",
"type" : "line",
"source" : "baremaps",
"source-layer" : "administrative_boundary",
"layout" : {
"visibility" : "visible"
},
"paint" : {
"line-color": "black",
"line-width": 1
}
}],
"center" : [ 0, 0 ],
"zoom" : 2
}
24 changes: 24 additions & 0 deletions examples/overture/tiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"tilejson": "2.2.0",
"tiles": [
"http://localhost:9000/tiles/{z}/{x}/{y}.mvt"
],
"minzoom": 0.0,
"maxzoom": 14.0,
"center": [0, 0],
"bounds": [-180, -85, 180, 85],
"zoom": 2,
"database": "jdbc:postgresql://localhost:5432/baremaps?&user=baremaps&password=baremaps",
"vector_layers": [
{
"id": "administrative_boundary",
"queries": [
{
"minzoom": 0,
"maxzoom": 14,
"sql": "SELECT id, tags, geom FROM overture_admins_administrative_boundary_materialized_view"
}
]
}
]
}
Loading
Loading