Skip to content

Commit

Permalink
Performance improvements
Browse files Browse the repository at this point in the history
Horned-OWL indexes other than the set index are now optional/build on demand
  • Loading branch information
b-gehrke authored Jun 26, 2024
2 parents e963510 + 00d9886 commit 02d9c1e
Show file tree
Hide file tree
Showing 11 changed files with 616 additions and 251 deletions.
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "py-horned-owl"
version = "1.0.0"
version = "1.0.0-dev.1"
authors = ["Janna Hastings <j.hastings@ucl.ac.uk>"]
edition = "2018"

Expand All @@ -15,8 +15,8 @@ version = "0.21"
features = ["abi3-py37", "extension-module", "experimental-inspect", "multiple-pymethods"]

[dependencies]
horned-owl = "1.0.0"
horned-bin = "1.0.0"
horned-owl = {git = "https://github.com/b-gehrke/horned-owl.git", branch = "py-horned-owl-performance"}
horned-bin = {git = "https://github.com/b-gehrke/horned-owl.git", branch = "py-horned-owl-performance"}
curie = "0.1.2"
failure = "0.1.8"
quote = "1.0"
Expand Down
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Contents
quickstart
installation
usage
performance
details
api

15 changes: 15 additions & 0 deletions docs/source/performance.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Performance
===========

The underlying Rust library Horned-OWL offers an `index system <https://docs.rs/horned-owl/latest/horned_owl/ontology/index.html>`_ to allow fast access to different kinds of parts of an ontology.

By default, Py-Horned-OWL loads an ontology in a simple hash set (using a `set index <https://docs.rs/horned-owl/latest/horned_owl/ontology/set/index.html>`_). While this allows for quick loading and iterating of all components in an ontology, queries like "give me all classes" or "give me all axioms for this IRI" are slower or even not supported at all.

Especially queries by IRI are not supported without having an `iri mapped index <https://docs.rs/horned-owl/latest/horned_owl/ontology/iri_mapped/index.html>`_. By default, it is implicitly created when a function requiring it is called.

The `component mapped index <https://docs.rs/horned-owl/latest/horned_owl/ontology/component_mapped/index.html>`_ improves the performance of entity or specific axioms lookups. For example, the functions :func:`~pyhornedowl.PyIndexedOntology.get_classes`, :func:`~pyhornedowl.PyIndexedOntology.get_object_properties`, etc., :func:`~pyhornedowl.PyIndexedOntology.get_iri`, or :func:`~pyhornedowl.PyIndexedOntology.get_iri_for_label` benefit from a component index.

The indexes can be build manually using the :func:`~pyhornedowl.PyIndexedOntology.build_iri_index`, :func:`~pyhornedowl.PyIndexedOntology.build_component_index`, or to build both indexes together :func:`~pyhornedowl.PyIndexedOntology.build_indexes` methods.

You change the behaviour for index creating using the `index_creation_strategy` parameters in the :func:`~pyhornedowl.open_ontology` functions or the constructor of :func:`~pyhornedowl.PyIndexedOntology`.

58 changes: 48 additions & 10 deletions gen_pyi.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,39 +28,76 @@
f.write("]\n")

with open("pyhornedowl/__init__.pyi", "w") as f:
f.write("import typing\nfrom typing import *\n\n")
f.write("import typing\n")
f.write("from typing import *\n")
f.write("from typing_extensions import deprecated\n\n")
f.write("import model\n")
f.write("\n")

for name, entry in pho.__dict__.items():
if isinstance(entry, type):
f.write(f"class {name}:\n")
# There appears to be a bug with pyo3. Documentation on enum
# variants is not attached to their mapped python types. Hence we
# use a workarround of adding their documentation to the enum in
# the style: "<MemberName>: <doc string>".
member_docs = {}
if hasattr(entry, "__doc__"):
entry_doc = entry.__doc__
if entry_doc is not None:
f.write(" \"\"\"\n")
for line in entry_doc.splitlines():
member_doc_m = re.match(r"^(\w+): (.*)$", line)
if member_doc_m:
member_docs[member_doc_m.group(1)]=member_doc_m.group(2)
else:
f.write(f" {line}\n")

f.write(" \"\"\"\n")

for member_name, member in entry.__dict__.items():
if member_name.startswith("_"):
continue

if hasattr(member, "__doc__"):

# E.g. for enums
if isinstance(member, entry):
f.write(f" {member_name}: typing.Self\n")
if member_name in member_docs or hasattr(member, "__doc__") and member.__doc__ is not None:
doc = member_docs.get(member_name, getattr(member, "__doc__"))
f.write(" \"\"\"\n")
for line in doc.splitlines():
f.write(f" {line}\n")
f.write(" \"\"\"\n")
elif hasattr(member, "__doc__"):
doc = member.__doc__
if doc is not None:
lines = doc.splitlines()
if len(lines) > 2:
sign = lines[0]
annotations_end = lines.index(next(x for x in lines if not x.startswith("@")), 0)
annotations = lines[:annotations_end]
sign = lines[annotations_end]

for ann in annotations:
f.write(f" {ann}\n")

f.write(f" def {sign}:\n")
doc = "\n".join([f" {l}" for l in lines[2:]])
doc = "\n".join([f" {l}" for l in lines[annotations_end+2:]])
f.write(f' """\n{doc}\n """\n ...\n\n')

if callable(entry):
f.write("\n")
elif callable(entry):
if hasattr(entry, "__doc__"):
doc = entry.__doc__
if doc is not None:
lines = doc.splitlines()
if len(lines) > 2:
sign = lines[0]
annotations_end = lines.index(next(x for x in lines if not x.startswith("@")), 0)
annotations = lines[:annotations_end]
sign = lines[annotations_end]

for ann in annotations:
f.write(f"{ann}\n")
f.write(f"def {sign}:\n")
doc = "\n".join([f" {l}" for l in lines[2:]])
doc = "\n".join([f" {l}" for l in lines[annotations_end+2:]])
f.write(f' """\n{doc}\n """\n ...\n\n')

f.write("\n")
Expand Down Expand Up @@ -90,7 +127,8 @@ def handle_module(module: str):

with open(f"pyhornedowl/{module}/__init__.pyi", "w") as f:
f.write("import typing\n")
f.write("from typing import *\n\n")
f.write("from typing import *\n")
f.write("from typing_extensions import deprecated\n\n")

for name, entry in getattr(pho, module).__dict__.items():
if isinstance(entry, type):
Expand Down
4 changes: 2 additions & 2 deletions pyhornedowl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import annotations
from .pyhornedowl import PyIndexedOntology, open_ontology, open_ontology_from_file, open_ontology_from_string, get_descendants, get_ancestors
from .pyhornedowl import PyIndexedOntology, IndexCreationStrategy, open_ontology, open_ontology_from_file, open_ontology_from_string, get_descendants, get_ancestors

__all__ = ["PyIndexedOntology", "open_ontology", "open_ontology_from_file", "open_ontology_from_string", "get_descendants", "get_ancestors"]
__all__ = ["PyIndexedOntology", "IndexCreationStrategy", "open_ontology", "open_ontology_from_file", "open_ontology_from_string", "get_descendants", "get_ancestors"]
54 changes: 47 additions & 7 deletions pyhornedowl/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import typing
from typing import *
from typing_extensions import deprecated

import model

class PyIndexedOntology:
"""
Represents a loaded ontology.
"""
def add_default_prefix_names(self) -> None:
"""
Adds the prefix for rdf, rdfs, xsd, and owl
Expand Down Expand Up @@ -218,7 +222,7 @@ class PyIndexedOntology:
"""
...

def annotation_property(self, iri: str, *, absolute: Optional[bool]=None) -> model.annotationProperty:
def annotation_property(self, iri: str, *, absolute: Optional[bool]=None) -> model.AnnotationProperty:
"""
Convenience method to create an annotationProperty from an IRI.
Expand Down Expand Up @@ -258,14 +262,50 @@ class PyIndexedOntology:
"""
...

def get_ancestors(onto: PyIndexedOntology, child: str, iri_is_absolute: Optional[bool] = None) -> Set[str]:
def get_ancestors(self, onto: PyIndexedOntology, child: str, iri_is_absolute: Optional[bool] = None) -> Set[str]:
"""
Gets all direct and indirect super classes of a class.
"""
...

def build_iri_index(self) -> None:
"""
Builds an index by iri (IRIMappedIndex).
"""
...

def component_index(self) -> None:
"""
Builds an index by component kind (ComponentMappedIndex).
"""
...

def build_indexes(self) -> None:
"""
Builds indexes to allow (a quicker) access to axioms and entities.
"""
...


class IndexCreationStrategy:
"""
Values to indicate when to build the additional indexes.
"""
OnLoad: typing.Self
"""
Create the additional indexes when the ontology is loaded
"""
OnQuery: typing.Self
"""
Create the additional indexes only when they are needed
"""
Explicit: typing.Self
"""
Only create the additional indexes when explicity requested
"""

def open_ontology(ontology: str, serialization: Optional[typing.Literal['owl', 'rdf','ofn', 'owx']]=None) -> PyIndexedOntology:
def open_ontology(ontology: str, serialization: Optional[typing.Literal['owl', 'rdf','ofn', 'owx']]=None, index_strategy = IndexCreationStrategy.OnQuery) -> PyIndexedOntology:
"""
Opens an ontology from a path or plain text.
Expand All @@ -277,7 +317,7 @@ def open_ontology(ontology: str, serialization: Optional[typing.Literal['owl', '
...


def open_ontology_from_file(path: str, serialization: Optional[typing.Literal['owl', 'rdf','ofn', 'owx']]=None) -> PyIndexedOntology:
def open_ontology_from_file(path: str, serialization: Optional[typing.Literal['owl', 'rdf','ofn', 'owx']]=None, index_strategy = IndexCreationStrategy.OnQuery) -> PyIndexedOntology:
"""
Opens an ontology from a file
Expand All @@ -286,7 +326,7 @@ def open_ontology_from_file(path: str, serialization: Optional[typing.Literal['o
...


def open_ontology_from_string(ontology: str, serialization: Optional[typing.Literal['owl', 'rdf','ofn', 'owx']]=None) -> PyIndexedOntology:
def open_ontology_from_string(ontology: str, serialization: Optional[typing.Literal['owl', 'rdf','ofn', 'owx']]=None, index_strategy = IndexCreationStrategy.OnQuery) -> PyIndexedOntology:
"""
Opens an ontology from plain text.
Expand All @@ -295,17 +335,17 @@ def open_ontology_from_string(ontology: str, serialization: Optional[typing.Lite
...


@deprecated("please use `PyIndexedOntology.get_descendants` instead")
def get_descendants(onto: PyIndexedOntology, parent: str) -> Set[str]:
"""
DEPRECATED: please use `PyIndexedOntology::get_descendants` instead
Gets all direct and indirect subclasses of a class.
"""
...


@deprecated(please use `PyIndexedOntology.get_ancestors` instead)
def get_ancestors(onto: PyIndexedOntology, child: str) -> Set[str]:
"""
DEPRECATED: please use `PyIndexedOntology::get_ancestors` instead
Gets all direct and indirect super classes of a class.
"""
...
Expand Down
1 change: 1 addition & 0 deletions pyhornedowl/model/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import typing
from typing import *
from typing_extensions import deprecated

class Class:
first: IRI
Expand Down
Loading

0 comments on commit 02d9c1e

Please sign in to comment.