-
Notifications
You must be signed in to change notification settings - Fork 11
/
MetadataParsing.ns
159 lines (146 loc) · 5.73 KB
/
MetadataParsing.ns
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
Newspeak3
'Root'
class MetadataParsing usingPlatform: platform scannerClass: S <NewspeakPredictiveParsing Scanner class>
(* :exemplar:
[
|
asts = NewspeakASTs usingPlatform: platform.
Parser = NewspeakPredictiveParsing usingPlatform: platform asts: asts.
|
MetadataParsing usingPlatform: platform scannerClass: Parser Scanner
] value
*)
= (
(*
Module for parsing Newspeak language metadata.
*)
|
List = platform collections List.
Map = platform collections Map.
Scanner = S.
|
) (
public class MetadataParser onSource: s <String>
(* :exemplar: MetadataParser onSource: '(* :exemplar: foo *) =' *) = (
(*
A MetadataParser identifies the metadata comments in a string s, between the start or end of the string and next (respectively previous) token. These are stored in a map, metadataMap, that maps tags to payloads (the definitions of these are given below). Non-metadata comments are stored in a list, comments.
Metadata is a comment that begins with special syntax that gives the name of the metadata processor that should handle it, followed by the actual metadata, which we call the 'payload'. In the proper context, we will refer to this name concisely as the 'tag'. If no tag is given, we have a regular comment.
One may access the metadata via the method metadata, which will lazily compute it via a forward scan of the source. Or you can initiate a backward scan to find metadata earlier in the string. In either case, the scan ends when a token is encountered.
*)
|
input <String> = s.
position <Integer> ::= 1.
metadataMap <Map[String, String]> = Map new.
public comments <List[String]> = List new.
|
) (
skipWhitespace = (
[position > 0
ifFalse: [false]
ifTrue: [(input at: position) <= 32]
] whileTrue: [position:: position + 1].
)
skipWhitespaceBackwards = (
[position > 0
ifFalse: [false]
ifTrue: [(input at: position) <= 32]
] whileTrue: [position:: position - 1].
)
getTagDataFrom: mc <String> ^ <{String. Integer}> = (
(* Find the tag in a comment (if any). Return a pair of the tag (including the delimiting ':' characters) and the first index of the payload.*)
| pos ::= 1. tagStart tagEnd |
[pos < mc size
ifFalse: [false]
ifTrue: [(mc at: pos) <= 32]
] whileTrue: [pos:: pos + 1].
(mc at: pos) = 58 ifFalse: [^{''. 0}].
tagStart:: pos.
#BOGUS yourself. (* should check that there's an id between the colons *)
tagEnd:: mc indexOf: ':' startingAt: tagStart + 1.
tagEnd = 0 ifTrue: [^{''. 0}].
^{mc copyFrom: tagStart to: tagEnd. tagEnd + 1}
)
public metadata ^ <Map[String, String]> = (
metadataMap isEmpty ifTrue: [gatherMetadata].
^metadataMap
)
public gatherMetadata = (
(* Go thru the string and process all leading comments (i.e., comments prior to the first token in the string). Metadata ends up in the metadata map, regular comments in the comments list. *)
| outerLoopPosition ::= 0. metadataComment <String> |
[outerLoopPosition = position] whileFalse:
[outerLoopPosition:: position.
skipWhitespace.
metadataComment:: gatherComment.
metadataComment isEmpty ifFalse: [noteMetadataComment: metadataComment].
].
)
gatherComment ^ <String> = (
(* Assuming we are at the start of a comment, scan forward and return its contents. If we are not at the start of a comment, return an empty string.*)
| start <Integer> = position. |
position + 1 <= input size ifFalse: [^''].
(input at: position) = 40 (* ( *) ifFalse: [^''].
(input at: position + 1) = 42 (* * *) ifFalse: [^''].
position:: position + 2.
[position + 1 <= input size] whileTrue:
[ (* End of comment. *)
(input at: position) = 42 (* * *) ifTrue:
[(input at: position + 1) = 41 (* ) *) ifTrue:
[position:: position + 2. ^input copyFrom: start + 2 to: position - 3]].
(* Nested comment. *)
(input at: position) = 40 (* ( *) ifTrue:
[(input at: position + 1) = 42 (* * *) ifTrue:
[gatherComment.
position:: position - 1]].
position:: position + 1].
scanError: 'Unterminated comment'
)
gatherCommentBackwards ^ <String> = (
| start <Integer> = position. |
position - 3 > 0 ifFalse: [^''].
(input at: position) = 41 (* ) *) ifFalse: [^''].
(input at: position - 1) = 42 (* * *) ifFalse: [^''].
position:: position - 2.
[position - 1 > 0] whileTrue:
[ (* Start of comment. *)
(input at: position) = 42 (* * *) ifTrue:
[(input at: position - 1) = 40 (* ( *) ifTrue:
[position:: position - 2. ^input copyFrom: position + 3 to: start - 2]].
(* Nested comment. *)
(input at: position) = 41 (* ) *) ifTrue:
[(input at: position - 1) = 42 (* * *) ifTrue:
[gatherCommentBackwards.
position:: position - 1]].
position:: position - 1].
scanError: 'Unterminated comment'
)
public gatherMetadataBackwards = (
(* Go thru the string backwards up to the previous token and process all comments. Metadata ends up in the metadata map, regular comments in the comments list. *)
| outerLoopPosition ::= input size + 1. metadataComment <String> |
position:: input size.
[outerLoopPosition = position] whileFalse:
[outerLoopPosition:: position.
skipWhitespaceBackwards.
metadataComment:: gatherCommentBackwards.
metadataComment isEmpty ifFalse: [noteMetadataComment: metadataComment].
].
)
noteMetadataComment: mc <String> = (
(* Given a comment's contents, if it is a metadata comment map its tag to its payload, otherwise store it as a plain comment. *)
|
tagData = getTagDataFrom: mc.
tag = tagData first.
pos = tagData at: 2.
|
tag isEmpty
ifTrue: [comments add: mc]
ifFalse: [
metadataMap
at: (tag copyFrom: 2 to: tagData first size -1)
put: (mc copyFrom: pos to: mc size) withBlanksTrimmed
].
)
) : (
)
) : (
public usingPlatform: platform = (^usingPlatform: platform scannerClass: nil)
)