From fafef657a509b00642fb87343968e9e2fe91f276 Mon Sep 17 00:00:00 2001 From: AlePalu Date: Sun, 10 Nov 2024 20:47:08 +0100 Subject: [PATCH] shapefile reader bug fix, multipolygons support (correctly materialize shapefile's polygons with more than one ring) --- fdaPDE/geoframe/shp.h | 116 +++++++++++++++++++----------- fdaPDE/geometry/dcel.h | 5 +- fdaPDE/geometry/polygon.h | 122 ++++++++++++++++++++++++++++---- fdaPDE/geometry/primitives.h | 32 +++++++-- fdaPDE/geometry/triangulation.h | 30 +++++++- 5 files changed, 244 insertions(+), 61 deletions(-) diff --git a/fdaPDE/geoframe/shp.h b/fdaPDE/geoframe/shp.h index a884d62..f514357 100644 --- a/fdaPDE/geoframe/shp.h +++ b/fdaPDE/geoframe/shp.h @@ -52,7 +52,7 @@ template T read(char* buff, int& head, tag_little_endian) { class shp_reader { void skip(int n_bytes) { head_ += n_bytes; } // move head_ n_bytes forward - struct sf_header_t { + struct sf_header_t_ { static constexpr int size = 100; // the size (in number of bytes) of the header int file_code; // file code: 9994 int file_length; // total length of the file in 8-bit words @@ -60,14 +60,14 @@ class shp_reader { std::array bbox; // x_min, y_min, x_max, y_max, z_min, z_max, m_min, m_max int shape_type; }; - struct sf_point_t { + struct sf_point_t_ { int shape_type; int record_number; double x, y, z, m; // point coordinates (z and m optional) - sf_point_t() = default; - sf_point_t(double x_, double y_) : x(x_), y(y_) { } - sf_point_t(int record_number_, shp_reader& file) : + sf_point_t_() = default; + sf_point_t_(double x_, double y_) : x(x_), y(y_) { } + sf_point_t_(int record_number_, shp_reader& file) : shape_type(file.shape_type()), record_number(record_number_) { // point specific content x = read(file.buff_, file.head_, LittleEndian); @@ -78,8 +78,16 @@ class shp_reader { m = read(file.buff_, file.head_, LittleEndian); } } + friend bool operator==(const sf_point_t_& lhs, const sf_point_t_& rhs) { + if (lhs.shape_type != rhs.shape_type) { return false; } + bool equal = (lhs.x == rhs.x) && (lhs.y == rhs.y); + if (lhs.shape_type == shape_t::PointM) { equal &= (lhs.m == rhs.m); } + if (lhs.shape_type == shape_t::PointZ) { equal &= ((lhs.m == rhs.m) && (lhs.z == rhs.z)); } + return equal; + } + friend bool operator!=(const sf_point_t_& lhs, const sf_point_t_& rhs) { return !(lhs == rhs); } }; - struct sf_multipoint_t { + struct sf_multipoint_t_ { private: void read_zm_block(int n_points, std::array& range, std::vector points, shp_reader& file) { points.resize(n_points); @@ -95,8 +103,8 @@ class shp_reader { int n_points; // number of points in the record std::vector x, y, z, m; // points coordinates (z and m optional) - sf_multipoint_t() = default; - sf_multipoint_t(int record_number_, shp_reader& file) : + sf_multipoint_t_() = default; + sf_multipoint_t_(int record_number_, shp_reader& file) : shape_type(file.shape_type()), record_number(record_number_) { // multipoint specific content for (int i = 0; i < 4; ++i) { bbox[i] = read(file.buff_, file.head_, LittleEndian); } @@ -114,10 +122,10 @@ class shp_reader { } } }; - struct sf_polygon_t { + struct sf_polygon_t_ { private: void - read_zm_block(int n_points, std::array& range, std::vector& points, shp_reader& file) { + read_zm_block(int n_points, std::array& range, std::vector& points, shp_reader& file) { range[0] = read(file.buff_, file.head_, LittleEndian); range[1] = read(file.buff_, file.head_, LittleEndian); for (int i = 0; i < n_points; ++i) { @@ -137,10 +145,10 @@ class shp_reader { int n_rings; // number of closed polygons in the record int n_points; // overall number of points std::vector ring_begin, ring_end; // first and last points in each ring, as offsets in points vector - std::vector points; // points coordinates (z and m optional) + std::vector points; // points coordinates (z and m optional) - sf_polygon_t() = default; - sf_polygon_t(int record_number_, shp_reader& file) : + sf_polygon_t_() = default; + sf_polygon_t_(int record_number_, shp_reader& file) : shape_type(file.shape_type()), record_number(record_number_) { // polygon specific content for (int i = 0; i < 4; ++i) { bbox[i] = read(file.buff_, file.head_, LittleEndian); } @@ -175,17 +183,17 @@ class shp_reader { } // iterator over rings class ring_iterator { - const sf_polygon_t* polygon_; + const sf_polygon_t_* polygon_; int index_; - std::vector::const_iterator it; + std::vector::const_iterator it; public: - ring_iterator(const sf_polygon_t* polygon, int index) : polygon_(polygon), index_(index) { + ring_iterator(const sf_polygon_t_* polygon, int index) : polygon_(polygon), index_(index) { it = polygon_->points.begin(); } - std::vector::const_iterator begin() const { return it + polygon_->ring_begin[index_]; } - std::vector::const_iterator end() const { return it + polygon_->ring_end[index_]; } - const sf_point_t* operator->() { return it.operator->(); } - const sf_point_t& operator*() { return *it; }; + std::vector::const_iterator begin() const { return it + polygon_->ring_begin[index_]; } + std::vector::const_iterator end() const { return it + polygon_->ring_end[index_]; } + const sf_point_t_* operator->() { return it.operator->(); } + const sf_point_t_& operator*() { return *it; }; ring_iterator& operator++() { index_++; return *this; @@ -194,31 +202,41 @@ class shp_reader { return it1.index_ != it2.index_; } int n_nodes() const { return polygon_->ring_end[index_] - polygon_->ring_begin[index_]; } - // RowMajor expansion of polygon coordinates - Eigen::Matrix nodes() { - int n_rows = n_nodes(); + // RowMajor expansion of ring coordinates (already discards last point, as it coincides with end point) + Eigen::Matrix nodes() const { + int n_rows = n_nodes() - (end() != polygon_->points.end() ? 0 : 1); int n_cols = 2 + (polygon_->shape_type == shape_t::PointM ? 1 : (polygon_->shape_type == shape_t::PointZ ? 2 : 0)); Eigen::Matrix coords(n_rows, n_cols); int row = 0; - for (auto jt = begin(), ht = end(); jt != ht; ++jt) { + for (auto jt = begin(), ht = end(); jt != ht && row < n_rows; ++jt) { coords(row, 0) = jt->x; coords(row, 1) = jt->y; if (polygon_->shape_type == shape_t::PointM) { coords(row, 2) = jt->m; } - if (polygon_->shape_type == shape_t::PointZ) { coords(row, 3) = jt->z; } - row++; + if (polygon_->shape_type == shape_t::PointZ) { + coords(row, 2) = jt->m; + coords(row, 3) = jt->z; + } + row++; } return coords; } }; ring_iterator begin() const { return ring_iterator(this, 0); } ring_iterator end() const { return ring_iterator(this, n_rings); } + // provides all nodes coordinates, orgnanized by rings + std::vector> nodes() const { + std::vector> nodes_; + nodes_.reserve(n_rings); + for (ring_iterator it = begin(); it != end(); ++it) { nodes_.push_back(it.nodes()); } + return nodes_; + } }; // a polyline has the same layout of a polygon, where rings are not necessarily closed (and can self-intersect) - struct sf_polyline_t : public sf_polygon_t { - sf_polyline_t() = default; - sf_polyline_t(int record_number_, shp_reader& file) : sf_polygon_t(record_number_, file) { } + struct sf_polyline_t_ : public sf_polygon_t_ { + sf_polyline_t_() = default; + sf_polyline_t_(int record_number_, shp_reader& file) : sf_polygon_t(record_number_, file) { } }; template void read_into(T& container) { @@ -232,15 +250,19 @@ class shp_reader { std::string file_name_; int n_records_; - sf_header_t header_; // shapefile header + sf_header_t_ header_; // shapefile header int head_ = 0; // currently pointed byte in buff_ char* buff_ = nullptr; // loaded binary data // only one of the following containers is active (by shapefile specification) - std::vector points_; - std::vector polylines_; - std::vector polygons_; - std::vector multipoints_; + std::vector points_; + std::vector polylines_; + std::vector polygons_; + std::vector multipoints_; public: + using sf_point_t = sf_point_t_; + using sf_polyline_t = sf_polyline_t_; + using sf_polygon_t = sf_polygon_t_; + using sf_multipoint_t = sf_multipoint_t_; // supported shapefile format // all the non-null shapes in a shapefile are required to be of the same shape type (cit. Shapefile standard) enum shape_t { @@ -287,27 +309,24 @@ class shp_reader { int shape_type() const { return header_.shape_type; } std::array bbox() const { return {header_.bbox[0], header_.bbox[1], header_.bbox[2], header_.bbox[3]}; } int n_records() const { return n_records_; } - const sf_header_t& header() const { return header_; } + const sf_header_t_& header() const { return header_; } int n_points() const { return points_.size(); } const sf_point_t& point(int index) const { fdapde_assert( shape_type() == shape_t::Point || shape_type() == shape_t::PointZ || shape_type() == shape_t::PointM); return points_[index]; } - int n_polylines() const { return polylines_.size(); } const sf_polyline_t& polyline(int index) const { fdapde_assert( shape_type() == shape_t::PolyLine || shape_type() == shape_t::PolyLineZ || shape_type() == shape_t::PolyLineM); return polylines_[index]; } - int n_polygons() const { return polygons_.size(); } const sf_polygon_t& polygon(int index) const { fdapde_assert( shape_type() == shape_t::Polygon || shape_type() == shape_t::PolygonZ || shape_type() == shape_t::PolygonM); return polygons_[index]; } - int n_multipoints() const { return multipoints_.size(); } const sf_multipoint_t& multipoint(int index) const { fdapde_assert( shape_type() == shape_t::MultiPoint || shape_type() == shape_t::MultiPointZ || @@ -417,7 +436,14 @@ class ShapeFile { std::string gcs_ = "UNDEFINED"; // geographic coordinate system (GCS) std::string filename_; public: - ShapeFile(std::string filename) : filename_(filename) { + ShapeFile(std::string filename) : filename_() { + std::filesystem::path filepath(filename); + if (!std::filesystem::exists(filepath)) { throw std::runtime_error("File " + filename + " not found."); } + if (filepath.extension() == ".shp") { + filename_ = filepath.parent_path() / filepath.stem(); + } else { + throw std::runtime_error(filename + "is not a .shp file."); + } // load geometric features and associated data shp_ = shp_reader(filename_ + ".shp"); dbf_ = dbf_reader(filename_ + ".dbf"); @@ -433,10 +459,20 @@ class ShapeFile { gcs_ = line.substr(i, j - i); } } - // getters + // observers const shp_reader& shp() const { return shp_; } template std::vector get_as(std::string colname) const { return dbf_.get_as(colname); } std::vector> field_descriptors() const { return dbf_.field_descriptors(); } + int shape_type() const { return shp_.shape_type(); } + int n_records() const { return shp_.n_records(); } + // geometry + const auto& point(int index) const { return shp_.point(index); } + const auto& polyline(int index) const { return shp_.polyline(index); } + const auto& polygon(int index) const { return shp_.polygon(index); } + const auto& multipoint(int index) const { return shp_.multipoint(index); } + // accessor + template std::vector get(std::string colname) const { return dbf_.get_as(colname); } + // output stream friend std::ostream& operator<<(std::ostream& os, const ShapeFile& sf) { os << "file: " << sf.filename_ << std::endl; std::string shape_type; diff --git a/fdaPDE/geometry/dcel.h b/fdaPDE/geometry/dcel.h index c05e368..3826a80 100644 --- a/fdaPDE/geometry/dcel.h +++ b/fdaPDE/geometry/dcel.h @@ -17,8 +17,11 @@ #ifndef __DCEL_H__ #define __DCEL_H__ -namespace fdapde { +#include "../utils/traits.h" +#include "../utils/symbols.h" +namespace fdapde { + // implementation of the Double Connected Edge List data structure (also known as DCEL or half-edge) template class DCEL { public: diff --git a/fdaPDE/geometry/polygon.h b/fdaPDE/geometry/polygon.h index 7b9ee82..178de82 100644 --- a/fdaPDE/geometry/polygon.h +++ b/fdaPDE/geometry/polygon.h @@ -17,14 +17,16 @@ #ifndef __POLYGON_H__ #define __POLYGON_H__ -#include "primitives.h" -#include "dcel.h" #include #include +#include "dcel.h" +#include "primitives.h" +#include "triangulation.h" + namespace fdapde { -// a polygon is a connected list of vertices +// a polygon is a connected list of vertices, possibly formed by more than one ring template class Polygon { public: static constexpr int local_dim = LocalDim; @@ -34,26 +36,22 @@ template class Polygon { Polygon() noexcept = default; Polygon(const DMatrix& nodes) noexcept : triangulation_() { fdapde_assert(nodes.rows() > 0 && nodes.cols() == embed_dim); - // check if nodes are sorted counter-clocwise - double area = 0; - int n_nodes = nodes.rows(); - for (int i = 0; i < n_nodes - 1; ++i) { - area += (nodes(i, 0) + nodes(i + 1, 0)) * (nodes(i + 1, 1) - nodes(i, 1)); - } - area += (nodes(n_nodes - 1, 0) + nodes(0, 0)) * (nodes(0, 1) - nodes(n_nodes - 1, 1)); // last edge - if (area > 0) { + if (internals::are_2d_counterclockwise_sorted(nodes)) { triangulate_(nodes); } else { // nodes are in clocwise order, reverse node ordering + int n_nodes = nodes.rows(); DMatrix reversed_nodes(n_nodes, embed_dim); for (int i = 0; i < n_nodes; ++i) { reversed_nodes.row(i) = nodes.row(n_nodes - 1 - i); } triangulate_(reversed_nodes); } } + Polygon(const Polygon&) noexcept = default; Polygon(Polygon&&) noexcept = default; // observers const DMatrix& nodes() const { return triangulation_.nodes(); } const DMatrix& cells() const { return triangulation_.cells(); } + Eigen::Map> edges() const { return triangulation_.edges(); } const Triangulation& triangulation() const { return triangulation_; } double measure() const { return triangulation_.measure(); } int n_nodes() const { return triangulation_.n_nodes(); } @@ -65,6 +63,10 @@ template class Polygon { bool contains(const Eigen::Matrix& p) const { return triangulation_.locate(p) != -1; } + // random sample points in polygon + DMatrix sample(int n_samples, int seed = fdapde::random_seed) { + return triangulation_.sample(n_samples, seed); + } private: // perform polygon triangulation void triangulate_(const DMatrix& nodes) { @@ -86,7 +88,7 @@ template class Polygon { // partition an arbitrary polygon P into a set of monotone polygons (plane sweep approach, section 3.2 of De Berg, // M. (2000). Computational geometry: algorithms and applications. Springer Science & Business Media.) - std::vector> monotone_partition_(const DMatrix& coords) { + std::vector> monotone_partition_(const DMatrix& coords) { using poly_t = DCEL; using halfedge_t = typename poly_t::halfedge_t; using halfedge_ptr_t = std::add_pointer_t; @@ -133,7 +135,7 @@ template class Polygon { int n_nodes = dcel.n_nodes(); int n_edges = dcel.n_edges(); std::set sweep_line; // edges pierced by sweep line, sorted by x-coord - std::vector helper(n_nodes, nullptr); + std::vector helper(n_edges, nullptr); std::vector nodes(n_nodes); // maps node id to one of its halfedges // O(n) node type detection @@ -353,6 +355,100 @@ template class Polygon { Triangulation triangulation_; }; +// a multipolygon is a collection of polygons, backed by a single face-based triangulation +template class MultiPolygon { + public: + static constexpr int local_dim = LocalDim; + static constexpr int embed_dim = EmbedDim; + using polygon_t = Polygon; + + MultiPolygon() : n_polygons_(0) { } + // rings is a vector of matrix of coordinates, where, each inner matrix defines a closed non self-intersecting loop + MultiPolygon(const std::vector>& rings) : n_polygons_(0) { + // ESRI shapefile format specification: loops in clockwise order define the outer border of a polygon, + // loops in counterclockwise order defines a hole inside the last found outer polygonal ring + int n_rings = rings.size(); + if (n_rings == 1) { + triangulation_ = Polygon(rings[0]).triangulation(); + } else { + // detect polygons with holes + using iterator = typename std::vector>::const_iterator; + constexpr int n_nodes_per_cell = 3; + std::vector> polygons; + iterator begin = rings.begin(); + iterator end = begin; + int n_nodes = 0; + while (end != rings.end()) { + n_nodes += end->rows(); + ++end; + while (end != rings.end() && internals::are_2d_counterclockwise_sorted(*end)) { // detect holes + n_nodes += end->rows(); + ++end; + } + polygons.emplace_back(begin, end); + begin = end; + } + Eigen::Matrix nodes(n_nodes, embed_dim); + std::vector cells; + int nodes_off_ = 0; + for (const Eigen::Matrix& coords : rings) { + // triangulate polygon + Triangulation poly_tri = Polygon(coords).triangulation(); + nodes.middleRows(nodes_off_, poly_tri.n_nodes()) = poly_tri.nodes(); + // copy cells + for (int i = 0, n = poly_tri.cells().rows(); i < n; ++i) { + for (int j = 0; j < n_nodes_per_cell; ++j) { cells.push_back(poly_tri.cells()(i, j) + nodes_off_); } + } + nodes_off_ += poly_tri.n_nodes(); + n_polygons_++; + n_nodes_per_polygon_.push_back(poly_tri.n_nodes()); + } + // set up face-based storage + triangulation_ = Triangulation( + nodes, + Eigen::Map>( + cells.data(), cells.size() / n_nodes_per_cell, n_nodes_per_cell), + DVector::Ones(nodes.rows())); + } + } + // observers + const DMatrix& nodes() const { return triangulation_.nodes(); } + const DMatrix& cells() const { return triangulation_.cells(); } + Eigen::Map> edges() const { return triangulation_.edges(); } + // computes only the boundary edges of the multipoligon (discards triangulation's edges) + Eigen::Matrix boundary_edges() const { + int n_edges = triangulation_.n_boundary_edges(); + Eigen::Matrix m(n_edges, 2); + int j = 0; + for (int i = 0, k = triangulation_.n_edges(); i < k; ++i) { + if (triangulation_.is_edge_on_boundary(i)) { m.row(j++) = triangulation_.edges().row(i); } + } + return m; + } + const Triangulation& triangulation() const { return triangulation_; } + double measure() const { return triangulation_.measure(); } + int n_nodes() const { return triangulation_.n_nodes(); } + int n_cells() const { return triangulation_.n_cells(); } + int n_edges() const { return triangulation_.n_edges(); } + int n_boundary_edges() const { return triangulation_.n_boundary_nodes(); } + int n_polygons() const { return n_polygons_; } + // test whether point p is contained in polygon + template + requires((Rows == embed_dim && Cols == 1) || (Cols == embed_dim && Rows == 1)) + bool contains(const Eigen::Matrix& p) const { + return triangulation_.locate(p) != -1; + } + // random sample points in polygon + DMatrix sample(int n_samples, int seed = fdapde::random_seed) { + return triangulation_.sample(n_samples, seed); + } + private: + // internal face-based storage + Triangulation triangulation_; + std::vector n_nodes_per_polygon_; + int n_polygons_; +}; + } // namespace fdapde #endif // __POLYGON_H__ diff --git a/fdaPDE/geometry/primitives.h b/fdaPDE/geometry/primitives.h index fbba108..2e8ef4f 100644 --- a/fdaPDE/geometry/primitives.h +++ b/fdaPDE/geometry/primitives.h @@ -18,6 +18,7 @@ #define __GEOMETRIC_PRIMITIVES_H__ #include "../utils/symbols.h" +#include "../utils/traits.h" namespace fdapde { namespace internals { @@ -47,13 +48,34 @@ template constexpr bool are_2d_counterclockwise_sorted(const PointT& a, const PointT& b, const PointT& c) { return signed_measure_2d_tri(a, b, c) > 0; } -// area of 2D polygon given counterclockwise sorted vertices v_0, v_1, \ldots, v_{n-1} (lemma 1.3.3 of (1)) -template constexpr double area_2d_polygon(const PointList& points) { +// area of 2D polygon given counterclockwise sorted vertices v_0, v_1, \ldots, v_{n - 1} (lemma 1.3.3 of (1)) +template + requires(fdapde::is_eigen_dense_v || fdapde::is_subscriptable) +constexpr double signed_measure_2d_polygon(const PointList& points) { double area = 0; - for (std::size_t i = 1, n = points.size() - 1; i < n; ++i) { - area += signed_mesure_2d_tri(points[0], points[i], points[i + 1]); + if constexpr (fdapde::is_eigen_dense_v) { + fdapde_assert(points.rows() > 0 && points.cols() == 2); + int n_points = points.rows(); + for (int i = 0; i < n_points - 1; ++i) { + area += (points(i, 0) + points(i + 1, 0)) * (points(i + 1, 1) - points(i, 1)); + } + area += (points(n_points - 1, 0) + points(0, 0)) * (points(0, 1) - points(n_points - 1, 1)); + } else if (fdapde::is_subscriptable) { + // assume points to be a RowMajor expansion of the polygon nodes' coordinates + fdapde_assert(points.size() % 2 == 0); + int n_points = points.size() / 2; + for (int i = 0; i < n_points - 1; ++i) { + area += (points[i] + points[i + 2]) * (points[i + 3] - points[i + 1]); + } + area += (points[n_points - 2] + points[0]) * (points[1] - points[n_points - 1]); } - return 0.5 * area; + return area; +} +template constexpr bool are_2d_counterclockwise_sorted(const PointList& points) { + return signed_measure_2d_polygon(points) > 0; +} +template constexpr bool are_2d_clockwise_sorted(const PointList& points) { + return signed_measure_2d_polygon(points) < 0; } // 2D point-line orientation test diff --git a/fdaPDE/geometry/triangulation.h b/fdaPDE/geometry/triangulation.h index 5d48e45..bb5a548 100644 --- a/fdaPDE/geometry/triangulation.h +++ b/fdaPDE/geometry/triangulation.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "../linear_algebra/binary_matrix.h" #include "../utils/constexpr.h" @@ -191,6 +192,31 @@ template class TriangulationBase { boundary_node_iterator boundary_nodes_end() const { return boundary_node_iterator(n_nodes_, static_cast(this)); } + // random sample points in triangulation + DMatrix sample(int n_samples, int seed = fdapde::random_seed) { + // set up random number generation + int seed_ = (seed == fdapde::random_seed) ? std::random_device()() : seed; + std::mt19937 rng(seed_); + // probability of random sampling a cell equals (measure of cell)/(measure of polygon) + std::vector weights(n_cells_); + for (cell_iterator it = cells_begin(); it != cells_end(); ++it) { weights[it->id()] = it->measure(); } + std::discrete_distribution rand_cell(weights.begin(), weights.end()); + std::uniform_real_distribution rand_real(0, 1); + DMatrix coords(n_samples, embed_dim); + for (int i = 0; i < n_samples; ++i) { + // generate random element + int cell_id = rand_cell(rng); + auto e = static_cast(*this).cell(cell_id); + // generate random point in cell + double t = rand_real(rng); + coords.row(i) = t * e.node(0) + (1 - t) * e.node(1); + for (int j = 1; j < local_dim; ++j) { + t = rand_real(rng); + coords.row(i) = (1 - t) * e.node(1 + j) + t * coords.row(i); + } + } + return coords; + } protected: DMatrix nodes_ {}; // physical coordinates of mesh's vertices DMatrix cells_ {}; // nodes (as row indexes in nodes_ matrix) composing each cell @@ -219,8 +245,8 @@ template class Triangulation<2, N> : public TriangulationBase<2, N, Tria Triangulation() = default; Triangulation( - const DMatrix& nodes, const DMatrix& faces, const DMatrix& boundary, int flags = 0) : - Base(nodes, faces, boundary, flags) { + const DMatrix& nodes, const DMatrix& cells, const DMatrix& boundary, int flags = 0) : + Base(nodes, cells, boundary, flags) { if (Base::flags_ & cache_cells) { // populate cache if cell caching is active cell_cache_.reserve(n_cells_); for (int i = 0; i < n_cells_; ++i) { cell_cache_.emplace_back(i, this); }