Skip to content

Commit

Permalink
Initial support for OWL-awareness
Browse files Browse the repository at this point in the history
1. Support URL nodes with no "name"
2. Support URL nodes with search query string
3. Understand native rdfs:domain and rdfs:range directives
4. Do not treat OWL Class/Property & other ontology declarations as enum
   membership
5. Context support for unnamed URLs ("schema:" is totally valid)
6. Allow type=Class to be transitive
7. Support 1-char long strings

This begins building support for google#169
  • Loading branch information
Eyas committed Feb 23, 2022
1 parent 7a919f6 commit d9d05b8
Show file tree
Hide file tree
Showing 20 changed files with 692 additions and 154 deletions.
3 changes: 2 additions & 1 deletion .eslintignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
node_modules/
dist
coverage
jest.config.js
schema-dts/lib/
schema-dts/dist/
41 changes: 32 additions & 9 deletions packages/schema-dts-gen/src/transform/toClass.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ import {Log} from '../logging/index.js';
import {ObjectPredicate, Topic, TypedTopic} from '../triples/triple.js';
import {UrlNode} from '../triples/types.js';
import {
IsNamedClass,
IsDirectlyNamedClass,
IsDataType,
ClassIsDataType,
IsNamedUrl,
IsSubclass,
} from '../triples/wellKnown.js';
import {
AliasBuiltin,
Expand All @@ -29,7 +31,7 @@ import {
DataTypeUnion,
RoleBuiltin,
} from '../ts/class.js';
import {assert} from '../util/assert.js';
import {assert, asserted, assertIs} from '../util/assert.js';

function toClass(cls: Class, topic: Topic, map: ClassMap): Class {
const rest: ObjectPredicate[] = [];
Expand Down Expand Up @@ -61,11 +63,21 @@ const wellKnownTypes = [
new AliasBuiltin('http://schema.org/Date', AliasBuiltin.Alias('string')),
new AliasBuiltin('http://schema.org/DateTime', AliasBuiltin.Alias('string')),
new AliasBuiltin('http://schema.org/Boolean', AliasBuiltin.Alias('boolean')),
new RoleBuiltin(UrlNode.Parse('http://schema.org/Role')),
new RoleBuiltin(UrlNode.Parse('http://schema.org/OrganizationRole')),
new RoleBuiltin(UrlNode.Parse('http://schema.org/EmployeeRole')),
new RoleBuiltin(UrlNode.Parse('http://schema.org/LinkRole')),
new RoleBuiltin(UrlNode.Parse('http://schema.org/PerformanceRole')),
new RoleBuiltin(
asserted(UrlNode.Parse('http://schema.org/Role'), IsNamedUrl)
),
new RoleBuiltin(
asserted(UrlNode.Parse('http://schema.org/OrganizationRole'), IsNamedUrl)
),
new RoleBuiltin(
asserted(UrlNode.Parse('http://schema.org/EmployeeRole'), IsNamedUrl)
),
new RoleBuiltin(
asserted(UrlNode.Parse('http://schema.org/LinkRole'), IsNamedUrl)
),
new RoleBuiltin(
asserted(UrlNode.Parse('http://schema.org/PerformanceRole'), IsNamedUrl)
),
];

// Should we allow 'string' to be a valid type for all values of this type?
Expand All @@ -90,7 +102,13 @@ function ForwardDeclareClasses(topics: readonly TypedTopic[]): ClassMap {
if (IsDataType(topic.Subject)) {
classes.set(topic.Subject.toString(), dataType);
continue;
} else if (!IsNamedClass(topic)) continue;
} else if (!IsDirectlyNamedClass(topic) && !IsSubclass(topic)) continue;

if (!IsNamedUrl(topic.Subject)) {
throw new Error(
`Unexpected unnamed URL ${topic.Subject.toString()} as a class.`
);
}

const wk = wellKnownTypes.find(wk => wk.subject.equivTo(topic.Subject));
if (ClassIsDataType(topic)) {
Expand All @@ -108,6 +126,7 @@ function ForwardDeclareClasses(topics: readonly TypedTopic[]): ClassMap {
wks.equivTo(topic.Subject)
);
if (allowString) cls.addTypedef(AliasBuiltin.Alias('string'));
if (IsDirectlyNamedClass(topic)) cls.markAsExplicitClass();

classes.set(topic.Subject.toString(), cls);
}
Expand All @@ -117,12 +136,16 @@ function ForwardDeclareClasses(topics: readonly TypedTopic[]): ClassMap {

function BuildClasses(topics: readonly TypedTopic[], classes: ClassMap) {
for (const topic of topics) {
if (!IsNamedClass(topic)) continue;
if (!IsDirectlyNamedClass(topic) && !IsSubclass(topic)) continue;

const cls = classes.get(topic.Subject.toString());
assert(cls);
toClass(cls, topic, classes);
}

for (const cls of classes.values()) {
cls.validateClass();
}
}

/**
Expand Down
6 changes: 4 additions & 2 deletions packages/schema-dts-gen/src/triples/reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ function object(content: string) {
}

const totalRegex =
/\s*<([^<>]+)>\s*<([^<>]+)>\s*((?:<[^<>"]+>)|(?:"(?:[^"]|(?:\\"))+(?:[^\"]|\\")"(?:@[a-zA-Z]+)?))\s*\./;
/\s*<([^<>]+)>\s*<([^<>]+)>\s*((?:<[^<>"]+>)|(?:"(?:[^"]|(?:\\"))*(?:[^\"]|\\")"(?:@[a-zA-Z]+)?))\s*\./;
export function toTripleStrings(data: string[]) {
const linearTriples = data
.join('')
Expand Down Expand Up @@ -215,7 +215,9 @@ export function* process(triples: string[][]): Iterable<Triple> {
} catch (parseError) {
const e = parseError as Error;
throw new Error(
`ParseError: ${e.name}: ${e.message} while parsing line ${match}.\nOriginal Stack:\n${e.stack}\nRethrown from:`
`ParseError: ${e.name}: ${e.message} while parsing line ${match
.map(t => `\{${t}\}`)
.join(', ')}.\nOriginal Stack:\n${e.stack}\nRethrown from:`
);
}
}
Expand Down
4 changes: 2 additions & 2 deletions packages/schema-dts-gen/src/triples/triple.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {Rdfs, SchemaString, UrlNode} from './types.js';
import {NamedUrlNode, Rdfs, SchemaString, UrlNode} from './types.js';

/** Represents a parsed Subject-Predicate-Object statement. */
export interface Triple {
Expand Down Expand Up @@ -41,7 +41,7 @@ export interface ObjectPredicate {
* A Node that can correspond to a "concept" in the ontology (class, property,
* etc.).
*/
export type TTypeName = UrlNode;
export type TTypeName = NamedUrlNode;

/** A set of statements applying to the same Subject. */
export interface Topic {
Expand Down
24 changes: 15 additions & 9 deletions packages/schema-dts-gen/src/triples/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ export interface ReadonlyUrl {
readonly path: readonly string[];
readonly search: string;
}
function fromString(urlString: string): ReadonlyUrl {
const url = new URL(urlString);
function fromUrl(url: URL): ReadonlyUrl {
return {
href: url.href,
protocol: url.protocol,
Expand All @@ -34,6 +33,9 @@ function fromString(urlString: string): ReadonlyUrl {
search: url.search,
};
}
function fromString(urlString: string): ReadonlyUrl {
return fromUrl(new URL(urlString));
}
function pathEqual(first: readonly string[], second: readonly string[]) {
if (first.length !== second.length) return false;
for (let i = 0; i < first.length; ++i) {
Expand All @@ -50,7 +52,7 @@ function pathEqual(first: readonly string[], second: readonly string[]) {
export class UrlNode {
readonly type = 'UrlNode';
constructor(
readonly name: string,
readonly name: string | undefined,
readonly context: ReadonlyUrl,
readonly href: string
) {}
Expand Down Expand Up @@ -101,21 +103,25 @@ export class UrlNode {
}

if (url.search) {
throw new Error(
`Can't handle Search string in ${url.search} in ${url.href}`
// A URL with no hash but some "?..." search params
// should be treated the same as an unnamed URL.
return new UrlNode(
/*name=*/ undefined,
/*context=*/ fromUrl(url),
/*href=*/ url.href
);
}

const split = url.pathname.split('/');
const name = split.pop();
if (!name) {
throw new Error(`Unexpected URL ${url.href} with no room for 'name'.`);
}
let name = split.pop();
if (name === '') name = undefined;

const context = url.origin + split.join('/');

return new UrlNode(name, fromString(context), url.href);
}
}
export type NamedUrlNode = UrlNode & {name: string};

/**
* In-memory representation of a node in a Triple corresponding to a string
Expand Down
88 changes: 67 additions & 21 deletions packages/schema-dts-gen/src/triples/wellKnown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import {
TTypeName,
TypedTopic,
} from './triple.js';
import {UrlNode} from './types.js';
import {NamedUrlNode, UrlNode} from './types.js';

/** Whether the context corresponds to rdf-schema. */
export function IsRdfSchema(value: UrlNode): boolean {
Expand All @@ -43,6 +43,13 @@ export function IsRdfSyntax(value: UrlNode): boolean {
export function IsSchemaObject(value: UrlNode): boolean {
return value.context.hostname === 'schema.org';
}
/** Wheter the context corresponds to OWL */
export function IsOWL(value: UrlNode): boolean {
return (
value.context.hostname === 'www.w3.org' &&
value.context.path[value.context.path.length - 1] === 'owl'
);
}

/**
* If an ObjectPredicate represents a comment, returns the comment. Otherwise
Expand All @@ -66,20 +73,35 @@ export function GetComment(value: ObjectPredicate): {comment: string} | null {
*/
export function GetSubClassOf(
value: ObjectPredicate
): {subClassOf: TSubject} | null {
): {subClassOf: TTypeName} | null {
if (IsRdfSchema(value.Predicate) && value.Predicate.name === 'subClassOf') {
if (value.Object.type === 'SchemaString' || value.Object.type === 'Rdfs') {
throw new Error(
`Unexpected object for predicate 'subClassOf': ${value.Object}.`
);
}
if (!IsNamedUrl(value.Object)) {
throw new Error(
`Unexpected "unnamed" URL used as a super-class: ${value.Object}`
);
}
return {subClassOf: value.Object};
}
return null;
}

/** Return true iff this object is a subclass of some other entity. */
export function IsSubclass(topic: TypedTopic) {
return topic.values.some(op => GetSubClassOf(op) !== null);
}

/** Returns true iff a UrlNode has a "name" it can be addressed with. */
export function IsNamedUrl(t: UrlNode): t is NamedUrlNode {
return t.name !== undefined;
}

/** Returns true iff a node corresponds to http://schema.org/DataType */
export function IsDataType(t: TTypeName): boolean {
export function IsDataType(t: TSubject): boolean {
return IsSchemaObject(t) && t.name === 'DataType';
}

Expand All @@ -89,8 +111,17 @@ export function ClassIsDataType(topic: TypedTopic): boolean {
return false;
}

/** Returns true iff a Topic represents a named class. */
export function IsNamedClass(topic: TypedTopic): boolean {
/**
* Returns true iff a Topic represents a named class.
*
* Note that some schemas define subclasses without explicitly redefining them
* as classes. So just because a topic isn't directly named as a class doesn't
* mean that it isn't a named class.
*
* A named class is such if it *OR ANY OF ITS PARENTS* are directly named
* classes.
*/
export function IsDirectlyNamedClass(topic: TypedTopic): boolean {
// Skip anything that isn't a class.
return topic.types.some(IsClassType);
}
Expand All @@ -99,13 +130,19 @@ export function IsNamedClass(topic: TypedTopic): boolean {
* Returns true iff a Predicate corresponds to http://schema.org/domainIncludes
*/
export function IsDomainIncludes(value: TPredicate): boolean {
return IsSchemaObject(value) && value.name === 'domainIncludes';
return (
(IsSchemaObject(value) && value.name === 'domainIncludes') ||
(IsRdfSchema(value) && value.name === 'domain')
);
}
/**
* Returns true iff a Predicate corresponds to http://schema.org/rangeIncludes
*/
export function IsRangeIncludes(value: TPredicate): boolean {
return IsSchemaObject(value) && value.name === 'rangeIncludes';
return (
(IsSchemaObject(value) && value.name === 'rangeIncludes') ||
(IsRdfSchema(value) && value.name === 'range')
);
}
/**
* Returns true iff a Predicate corresponds to http://schema.org/supersededBy.
Expand Down Expand Up @@ -150,27 +187,17 @@ export function GetTypes(
): readonly TTypeName[] {
const types = values.map(GetType).filter((t): t is TTypeName => !!t);

if (types.length === 0) {
throw new Error(
`No type found for Subject ${key.toString()}. Triples include:\n${values
.map(
v =>
`${v.Predicate.toString()}: ${JSON.stringify(
v.Predicate
)}\n\t=> ${v.Object.toString()}`
)
.join('\n')}`
);
}

// Allow empty types. Some custom schema assume "transitive" typing, e.g.
// gs1 has a TypeCode class which is an rdfs:Class, but its subclasses are
// not explicitly described as an rdfs:Class.
return types;
}

/**
* Returns true iff a Type corresponds to
* http://www.w3.org/2000/01/rdf-schema#Class
*/
export function IsClassType(type: TTypeName): boolean {
export function IsClassType(type: UrlNode): boolean {
return IsRdfSchema(type) && type.name === 'Class';
}

Expand All @@ -193,6 +220,25 @@ export function HasEnumType(types: readonly TTypeName[]): boolean {
// Skip well-known types.
if (IsClassType(type) || IsPropertyType(type) || IsDataType(type)) continue;

// Skip OWL "meta" types:
if (IsOWL(type)) {
if (
[
'Ontology',
'Class',
'DatatypeProperty',
'ObjectProperty',
'FunctionalProperty',
'InverseFunctionalProperty',
'AnnotationProperty',
'SymmetricProperty',
'TransitiveProperty',
].includes(type.name)
) {
continue;
}
}

// If we're here, this is a 'Type' that is not well known.
return true;
}
Expand Down
Loading

0 comments on commit d9d05b8

Please sign in to comment.