diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml new file mode 100644 index 0000000..623860f --- /dev/null +++ b/.github/workflows/TagBot.yml @@ -0,0 +1,15 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} \ No newline at end of file diff --git a/Project.toml b/Project.toml index 362df18..ced3268 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GreekScientificOrthography" uuid = "049fe880-56b1-44e6-a5bf-3b4decaa7e2d" authors = ["Neel Smith "] -version = "0.1.0" +version = "0.2.0" [deps] DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" diff --git a/quarto/_quarto.yml b/quarto/_quarto.yml index 4462a2c..4552731 100644 --- a/quarto/_quarto.yml +++ b/quarto/_quarto.yml @@ -5,7 +5,7 @@ theme: spacelab execute: freeze: auto date: last-modified -margin-header: "Package version: `0.1.0`" +margin-header: "Package version: `0.2.0`" format: html: toc: true @@ -42,6 +42,7 @@ website: - title: "Guides" contents: - guides/index.qmd + - guides/numbers.qmd - title: "Reference" contents: diff --git a/quarto/concepts/index.qmd b/quarto/concepts/index.qmd index 8500766..a15dd48 100644 --- a/quarto/concepts/index.qmd +++ b/quarto/concepts/index.qmd @@ -49,11 +49,7 @@ Astronomical symbols are represented by single Unicode codepoints such as `🜚 In contrast to the other token types, the syntax of numeric values and mathematical figure labels is case-sensitive. -Integer values in the Milesian notational system are written using 27 alphabetic or archaic alphabetic characters, followed by the numeric marker 'ʹ' (Unicode x0374). (See a [tutorial on writing numbers](../tutorials/numbers.qmd).) A comma separates digits for ones, tens and hundreds (to the right of the comma) from digits for thousands (left of the commma). `Μʹ` is the value for "myriad," 10,000. `μ` is the digit for 40. +Integer values in the Milesian notational system are written using 27 alphabetic or archaic alphabetic characters, followed by the numeric marker 'ʹ' (Unicode x0374). (See full documentation in a [guide to writing numbers](../guides/numbers.qmd).) A comma separates digits for ones, tens and hundreds (to the right of the comma) from digits for thousands (left of the commma). `Μʹ` is the value for "myriad," 10,000. `μ` is the digit for 40. Mathematical figure labels are defined as strings of upper-case purely alphabetic characters, without any breathing or accents. The string `Ἡ` is a lexical token (the nominative feminine singular of the article, in upper case), while `Η` is a figure label, "eta". - -## Work in progress - -Support for tokenizing fractions and integer values greater than 9,999 is planned for the next release. diff --git a/quarto/guides/arenarius.qmd b/quarto/guides/arenarius.qmd new file mode 100644 index 0000000..c87cd19 --- /dev/null +++ b/quarto/guides/arenarius.qmd @@ -0,0 +1,15 @@ +# Large values + + +## Using integer values in hexadecimal units + + +::: {.callout-note title="TBA"} + +An alternative to tedious sums of fractions: + +- divide whole units into sixtieths, seconds (3600ths), thirds (216000ths)... each written with an integer value up to 59 +- typically used in tables where columns determine meaning of integer +- but manually written columns make null values dangerous if they're left blank, so adds special character `Ο` (οὐδέν, "nothing") for 0. + +::: \ No newline at end of file diff --git a/quarto/guides/index.qmd b/quarto/guides/index.qmd index 8c57150..8ef4837 100644 --- a/quarto/guides/index.qmd +++ b/quarto/guides/index.qmd @@ -2,14 +2,18 @@ title: "Guides" --- -Recipe book for more complex tasks using `GreekScientificOrthography.jl` can be added here. +Recipe book for more complex tasks using `GreekScientificOrthography.jl`: -::: {.callout-note title="TBA"} -Add guides here for +- [Formatting numbers in Milesian notation](./numbers.qmd) + + + +::: {.callout-note title="Planned additions"} + 1. the complicated interactions that can happen with Unicode equivalence using modifying and non-modifying characters for prime, double prime, and Greek numeric marker -2. implementing Archimedes' system of writing arbitrarily large numbers +2. a note on Archimedes' system of writing arbitrarily large numbers, and on the normal practice of using hexadecimal digits for fractional values (firsts, seconds, thirds... ) ::: \ No newline at end of file diff --git a/quarto/guides/numbers.qmd b/quarto/guides/numbers.qmd new file mode 100644 index 0000000..1a57021 --- /dev/null +++ b/quarto/guides/numbers.qmd @@ -0,0 +1,179 @@ +# Formatting numeric tokens +```{julia} +#| warning: false +#| echo: false +#| output: false +using GreekScientificOrthography, Orthography +``` + +Manuscripts of Greek scientific and mathematical texts use the "Milesian" system of numeric notation. + +## Integers in the "Milesian" system of notation + +### Values from 1-999 + +The Milesian system is essentially a place-value system. The 27 values for ones (1-9), tens (10-90), and hundreds (100-900) are noted with specific alphabetic characters in normal alphabetic order, with the additions of `ϛ` for 6, `ϙ` for 90 and `ϡ` for 900. + +::: {.callout-note} +Note that in `GreekScientificOrthography`, characters used to write the basic integer values *must be in lower-case*. +::: + + +::: {.column width="50%"} + +| Ones | Tens | Hundreds | +| --- | --- | --- | +| α | ι | ρ | +| β | κ | σ | +| γ | λ | τ | +| δ | μ | υ | +| ε| ν | φ | +| ϛ | ξ | χ | +| ζ| ο | ψ | +| η| π | ω | +| θ | ϙ | ϡ | + +::: + + +Since most of the characters used to write integers in the Milesian system can also be read as alphabetic characters, integer tokens are flagged with a special marker, the numeric tick mark, `ʹ` (Unicode x0374). The integer value `1` is written like this: + +```{julia} +#| output: false +one = "αʹ" +``` + + + +Like our place value notation, the left-to-right sequence of digits is always largest to smallest. Note that unlike our numeric notation, there is no need for a zero character to represent the absence of a value in the hundreds, tens or ones column, since the characters for each column are distinct. + + +```{julia} +#| output: false +eleven = "ιαʹ" +one_hundred_one = "ραʹ" +one_hundred_eleven = "ριαʹ" +``` + +All of these are valid strings in a `GreekSciOrthography`. + +```{julia} +ortho = stemortho() +validstring(one, ortho) == validstring(eleven, ortho) == validstring(one_hundred_one, ortho) == validstring(one_hundred_eleven, ortho) +``` + + +A comma separates a thousands column (with values from 1,000 - 9,000) to the left of the comma, from hundreds, tens and ones values to the right of the comma, again just like our practice. The thousands column reuses the same characters as the ones column. + +```{julia} +one_thousand_one = "α,αʹ" +validstring(one_thousand_one, ortho) +``` + +All of these strings represent a single integer token (type `MilesianIntegerToken`). + +```{julia} +tokenize(one, ortho) +``` + + +```{julia} +tokenize(one_thousand_one, ortho) +``` + +## Integers from 999-19,999 + +The value for 10,000 is written with the upper-case mu, `Μ` (for μυριάς, "myriad"). + +```{julia} +myriad = "Μʹ" +validstring(myriad, ortho) +``` + +Conventionally, the myriads value is written as a separate token separated by white space from the smaller columns. That means that the string for a value like 10,001 will be represented by two tokens. + +```{julia} +tenthousand_one = "Μʹ αʹ" +tokenize(tenthousand_one, ortho) +``` + +::: {.callout-note} +Greek manuscripts do not normally repeat the tick mark on the myriad marker; `GreekScientificOrthography` requires this to guarantee context-independent parsing of tokens. +::: + + +You can express whole numbers up to 19,999 in this way. + +```{julia} +nineteen_999 = "Μʹ θ,ϡϙθʹ" +validstring(nineteen_999, ortho) +``` + +```{julia} +tokenize(nineteen_999, ortho) +``` + + +## Integers greater than 19,999 + +To write values larger than 19,999, Milesian notation begins by multiplying the myriad character. In manuscripts, the multiplier is normally written above the `Μ`; in `GreekScientificOrthography`, we use the markdown convention for superscript that is supported by pandoc, among others, of bracketing the superscript value with carets. In this context, neither multiplicand requires the numeric tick. + +20,000, for example, is written as Μ multiplied by β, like this example: + +```{julia} +twentyk = "Μ^β^" +validstring(twentyk, ortho) +``` + + +In an environment that supports pandoc's markdown extension, the multiplier will display as a superscript. + +```{julia} +using Markdown +Markdown.parse(twentyk) +``` + + +Archimedes uses this notation as he derives limiting values for *pi* in his treatise *Measurement of a Circle*. The value 349,450, for example, is written with these two tokens: + +```{julia} +threefortynine450 = "Μ^λδ^ ,θυνʹ" +Markdown.parse(threefortynine450) +``` + +```{julia} +tokenize(threefortynine450, ortho) +``` + + + +## Fractional values + +`GreekScientificOrthography` includes characters for three fractional values that are often written in manuscripts with special symbols: `𐅵` for one half (Unicode x10175), `𐅷` for two-thirds (Unicode x10177), and `𐅸` for three quarters (Unicode x10178). (The package also makes these characters available with the contsant names `ONE_HALF`, `TWO_THIRDS` and `THREE_FOURTHS`.) + +Apart from these special cases, the only notation for fractional values available to Greek mathematicians was to use normal integer notation, but flagged with a special double-prime marker indicating that this is a *reciprocal* value. + +```{julia} +sixth = "ϛ″" +thirtysixth = "λϛ″" +validstring(sixth, ortho) == validstring(thirtysixth, ortho) +``` + +Other fractional values would be written as sums of these. The fraction 2/3 can, for example, appear as 1/2 + 1/6, written simply as a succession of individual fractional tokens. As with integer values, the left-to-right sequence is from greatest to least, and the value of the sum of the tokens. + +```{julia} +twothirds = "β″ ϛ″" +tokenize(twothirds, ortho) +``` + +Fractions can of course be mixed with integer numbers. + +```{julia} +six_and_twothirds = "ϛʹ β″ ϛ″" +validstring(six_and_twothirds, stemortho()) +``` + + +```{julia} +tokenize(six_and_twothirds, stemortho()) +``` \ No newline at end of file diff --git a/quarto/index.qmd b/quarto/index.qmd index bf63a62..147a7e9 100644 --- a/quarto/index.qmd +++ b/quarto/index.qmd @@ -4,8 +4,31 @@ title: "GreekScientificOrthography.jl" > An orthographic system for ancient Greek mathematical and scientific texts. + +## Quick start: tokenization + +Validate the orthography of a string: + +```{julia} +using GreekScientificOrthography, Orthography +ortho = stemortho() + +archimedes = "ἡ ΓΕ πρὸς ΓΗ μείζονα λόγον ἔχει ἤπερ φοαʹ πρὸς ρνγʹ." +validstring(archimedes, ortho) +``` + + +Tokenize a string: + +```{julia} +tkns = tokenize(archimedes, ortho) +``` + + + +## Documentation + | | Learning the package | Using the package | | --- | --- | --- | | **Hands-on** | Try a [tutorial](./tutorials/) to start using the package | Find [recipes](./guides/) for specific tasks | | **Cognition** | The [orthography of Greek math and science](./concepts/) | See [reference information](./reference/) (including API documentation for all exported functions) | - diff --git a/quarto/tutorials/index.qmd b/quarto/tutorials/index.qmd index b71ef11..397ef1f 100644 --- a/quarto/tutorials/index.qmd +++ b/quarto/tutorials/index.qmd @@ -1,31 +1,10 @@ --- -title: Tutorials +title: Hands-on tutorials --- -## Quick start -- Validate the orthography of a string: -```{julia} -using GreekScientificOrthography, Orthography -ortho = stemortho() - -archimedes = "ἡ ΓΕ πρὸς ΓΗ μείζονα λόγον ἔχει ἤπερ φοαʹ πρὸς ρνγʹ." -validstring(archimedes, ortho) -``` - - -- Tokenize a string: - -```{julia} -tkns = tokenize(archimedes, ortho) -``` - - -## Hands-on tutorials - - -- [validating and tokenizing strings](./tokens.qmd) -- [formatting numeric tokens](./numbers.qmd) +- [Validating and tokenizing strings](./tokens.qmd) +- [Evaluating numeric values](./numbers.qmd) diff --git a/quarto/tutorials/numbers.qmd b/quarto/tutorials/numbers.qmd index 2eecfa3..acf7dab 100644 --- a/quarto/tutorials/numbers.qmd +++ b/quarto/tutorials/numbers.qmd @@ -1,42 +1,105 @@ -# Formatting numeric tokens +# Evaluating numeric values +`GreekScientificOrthography` includes the `milesian` function for parsing Greek numbers in Milesian notation into numeric values. -Manuscripts of Greek scientific and mathematical texts use the "Milesian" system of numeric notation. +::: {.callout-tip title="Milesian notation"} +See [this guide](../guides/numbers.qmd) for full documentation of the Milesian notation +recogized by `GreekScientificOrthography`. +::: -In version `0.1.0` of `GreekScientificOrthography`, integer values from 1 to 9,999 in this notation are correctly tokenized. -## Integers in the "Milesian" system of notation +## Integers -The Milesian system is essentially a place-value system. The 27 values for ones (1-9), tens (10-90), and hundreds (100-900) are noted with specific alphabetic characters in normal alphabetic order, with the additions of `ϛ` for 6, `ϙ` for 90 and `ϡ` for 900. +Integer values are identified with the numeric tick, `ʹ`. +```{julia} +#| warning: false +using GreekScientificOrthography +milesian("δʹ") +``` +```{julia} +milesian("κδʹ") +``` -- ones, tens, hundreds: alphabetic characters followed by the Greek numeric marker 'ʹ' (Unicode `x0374`). -- comma separates thousands -- myriad (10,000) is upper-case mu, Μ - +Thousands are separated by a comma from lower-value digits. +```{julia} +milesian("α,αʹ") +``` -Summary of the Milesian system: +## Fractions -::: {.column width="40%"} +Fractional are written as unit fractions values given by the reciprocal value and tagged with a double-prime marker. -| Ones | Tens | Hundreds | -| --- | --- | --- | -| α | ι | ρ | -| β | κ | σ | -| γ | λ | τ | -| δ | μ | υ | -| ε| ν | φ | -| ϛ | ξ | χ | -| ζ| ο | ψ | -| η| π | ω | -| θ | ϙ | ϡ | -::: +```{julia} +milesian("δ″") +``` + +```{julia} +milesian("κδ″") +``` + +Other values have to be expressed as the sum of a series of unit fractions. Two-thirds can be written as 1/2 + 1/6, for example. + +```{julia} +milesian("β″ ϛ″") +``` + + +You can of course mix integers and fractions. + +```{julia} +milesian("βʹ β″") +``` + +`GreekScientificOrthography` includes three special characters often used in Greek manuscripts for the values 1/2, 2/3 and 3/4. + +```{julia} +milesian("𐅵″") +``` + +## Myriads + +The basic notation lets you write integers up to 9,999. + +```{julia} +milesian("θ,ϡϙθʹ") +``` + +10,000 is written with upper-case mu, for "myriad". + + +```{julia} +milesian("Μʹ") +``` + +It's treated as a distinct token. As with fractions, the value of the string expression is the sum of the tokens. + +```{julia} +milesian("Μʹ α,αʹ") +``` + +Myriads can be multiplied! In `GreekScientificOrthography` this is indicated with a Markdown superscript expression (enclosed in carets). + +```{julia} +milesian("Μ^β^") +``` + + +A myriad myriads is 10^8^! + +```{julia} +milesian("Μ^Μ^") +``` + +The biggest integer value we can write in this system is 100009999. + +```{julia} +milesian("Μ^Μ^ θ,ϡϙθʹ") +``` + -## Integers greater than 9,999 -## Fractional values -## Using integer values in hexadecimal units \ No newline at end of file diff --git a/src/GreekScientificOrthography.jl b/src/GreekScientificOrthography.jl index 86c49b5..835d801 100644 --- a/src/GreekScientificOrthography.jl +++ b/src/GreekScientificOrthography.jl @@ -22,11 +22,32 @@ export codepoints, tokentypes, tokenize const NUMERIC_TICK = 'ʹ' # unicode x0374 export NUMERIC_TICK +const PRIME = '′' # unicode x2032 +export PRIME +const FRACTION_TICK = '″' # unicode x2033, "double prime" +export FRACTION_TICK + +const ONE_HALF = '𐅵' # unicode x10175 +export ONE_HALF + +const TWO_THIRDS = '𐅷' # unicode x10177 +export TWO_THIRDS + +const THREE_FOURTHS = '𐅸' # unicode x10178 +export THREE_FOURTHS + +const EVIL_PRIME = Char(0x2b9) # "modifier letter prime" +const EVIL_DOUBLE_PRIME = Char(0x2ba) # "modifier letter double prime" + + +export milesian include("types.jl") include("tokens.jl") include("ortho.jl") include("lexical.jl") +include("digits.jl") +include("milesian.jl") end # module GreekSciOrthography diff --git a/src/digits.jl b/src/digits.jl new file mode 100644 index 0000000..82089a7 --- /dev/null +++ b/src/digits.jl @@ -0,0 +1,33 @@ +const digitvalues = Dict( + 'α' => 1, + 'β' => 2, + 'γ' => 3, + 'δ' => 4, + 'ε' => 5, + 'ϛ' => 6, + 'ζ' => 7, + 'η' => 8, + 'θ' => 9, + + 'ι' => 10, + 'κ' => 20, + 'λ' => 30, + 'μ' => 40, + 'ν' => 50, + 'ξ' => 60, + 'ο' => 70, + 'π' => 80, + 'ϙ' => 90, + + 'ρ' => 100, + 'σ' => 200, + 'τ' => 300, + 'υ' => 400, + 'φ' => 500, + 'χ' => 600, + 'ψ' => 700, + 'ω' => 800, + 'ϡ' => 900, + + 'Ο' => 0 +) \ No newline at end of file diff --git a/src/milesian.jl b/src/milesian.jl new file mode 100644 index 0000000..37a14a3 --- /dev/null +++ b/src/milesian.jl @@ -0,0 +1,159 @@ +"""Compute numeric value of string in Milesian notation. +$(SIGNATURES) +""" +function milesian(s::AbstractString) + tkns = tokenize(s, stemortho()) + milesian.(tkns) |> sum +end + +"""Compute numeric value of an orthographic tkoen in Milesian notation. +$(SIGNATURES) +""" +function milesian(tkn::OrthographicToken) + if tokencategory(tkn) isa MilesianFractionToken + fractionvalue(tkn) + + elseif tokencategory(tkn) isa MilesianIntegerToken + @debug("Its an int: $(tkn)") + intvalue(tkn) + else + throw(DomainError("Not a Milesian token: $(tkn).")) + end +end + +"""Compute numeric value of a token for a Milesian integer. +$(SIGNATURES) +""" +function intvalue(tkn::OrthographicToken) + @assert tokencategory(tkn) isa MilesianIntegerToken + intvalue(tokentext(tkn)) #|> sum +end + + + +"""Compute numeric value of a token expressing numbers of myriads. +$(SIGNATURES) +""" +function myriadvalue(s::AbstractString) + myriad = 10000 + parts = split(s, "^") + if length(parts) == 1 + myriad + else + factor = intvalue(parts[2]) + myriad * factor + end +end + + +"""Compute numeric value of a string expressing an integer value below 1,000. +$(SIGNATURES) +""" +function intdigits(s::AbstractString) + pieces = [] + for c in s + if istick(c) + # ignore + + else + if ! (c in keys(digitvalues)) + throw(DomainError("Invalid digit: $(c) $(codepoint(c))")) + else + push!(pieces, digitvalues[c]) + end + end + end + if length(pieces) > 1 + max = length(pieces) - 1 + for i in 1:max + if pieces[i] <= pieces[i + 1] + throw(DomainError("Invalid sequence of numeric digits: $(s)")) + end + end + end + pieces |> sum +end + +"""Compute numeric value of a single string for token a Milesian integer. +$(SIGNATURES) +""" +function intvalue(s::AbstractString) + @debug("Int of $(s)") + if startswith(s, "Μ") + myriadvalue(s) + else + pieces = split(s, ",") + if length(pieces) == 1 + @debug("Get intdigits for $(pieces[1])") + intdigits(pieces[1]) + + else + @debug("Here are the pieces: $(pieces)") + thousands = intdigits(pieces[1]) * 1000 + lowervals = filter(s -> ! istick(s), pieces[2]) + isempty(lowervals) ? thousands : thousands + intdigits(lowervals) + + end + + end +end + + + + +"""Compute numeric value of a token for a Milesian integer. +$(SIGNATURES) +""" +function intpieces(tkn::OrthographicToken) + @assert tokencategory(tkn) isa MilesianIntegerToken + intpieces(tokentext(tkn)) +end + + +"""True if `c` is one of the "tick" characters that +Unicode equivalence can botch. +$(SIGNATURES) +""" +function istick(c::Char) + c in [NUMERIC_TICK, FRACTION_TICK, PRIME, EVIL_PRIME, EVIL_DOUBLE_PRIME] +end + + +"""Compute numeric value of an orthographic token expressing a fractional value in Milesian notation. +$(SIGNATURES) +""" +function fractionvalue(tkn::OrthographicToken) + fractionpieces(tkn) |> sum +end + +"""Compute numeric value of an orthographic token expressing a fractional value in Milesian notation. +$(SIGNATURES) +""" +function fractionpieces(tkn::OrthographicToken) + @assert tokencategory(tkn) isa MilesianFractionToken + fractionpieces(tokentext(tkn)) +end + +"""Compute numeric value of a string for a single token expressing a fractional value in Milesian notation. +$(SIGNATURES) +""" +function fractionpieces(s::AbstractString) + if !(istick(s[end])) + throw(DomainError("String not marked with fraction tick: $(s)")) + end + + if s[1] == ONE_HALF + 1 /2 + + elseif s[1] == TWO_THIRDS + 2 / 3 + + elseif s[1] == THREE_FOURTHS + 3 / 4 + + else + @debug("Get int value of $(s)") + 1 / intvalue(s) + + end +end diff --git a/src/ortho.jl b/src/ortho.jl index 87cd56f..ce0b944 100644 --- a/src/ortho.jl +++ b/src/ortho.jl @@ -69,7 +69,7 @@ function splitPunctuation(s::AbstractString) end function numeric() - "$(NUMERIC_TICK)ϛϙϡΜ𐅵𐅷𐅸" + "$(NUMERIC_TICK)$(FRACTION_TICK)ϛϙϡΜ𐅵𐅷𐅸^Ο" end @@ -118,8 +118,10 @@ function tokenforstring(s::AbstractString) OrthographicToken(normed, AstronomicalSymbol()) elseif islabel(s) OrthographicToken(normed, FigureLabelToken()) - elseif isnum(s) + elseif isinteger(s) OrthographicToken(normed, MilesianIntegerToken()) + elseif isfraction(s) + OrthographicToken(normed, MilesianFractionToken()) elseif isPunctuation(s) OrthographicToken(normed, PunctuationToken()) elseif isAlphabetic(normed) diff --git a/src/tokens.jl b/src/tokens.jl index 4aca5d2..612a1af 100644 --- a/src/tokens.jl +++ b/src/tokens.jl @@ -16,11 +16,36 @@ function islabel(s) validlabel end -"""True if final character of `s` is numeric marker. +"""True if string is in fraction format in Milesian notation. + +$(SIGNATURES) +""" +function isfraction(s) + s[end] == FRACTION_TICK # unicode x2033 +end + +"""True if string is in integer format in Milesian notation. + $(SIGNATURES) """ -function isnum(s) - s[end] == NUMERIC_TICK # unicode x0374 +function isinteger(s) + + @debug("Look at integer string $(s). Ends with tick? $(s[end])") + if endswith(s, "$(NUMERIC_TICK)$(NUMERIC_TICK)") + false + elseif s == "Μ" # upper case Mu, unicode x039c + true + + elseif s[1] == 'Μ' && s[end] == '^' + true + + elseif s[end] == NUMERIC_TICK || s[end] == EVIL_PRIME # unicode x0374 + true + + else + @debug("Not an int.") + false + end end diff --git a/src/types.jl b/src/types.jl index 07b7f02..463ab18 100644 --- a/src/types.jl +++ b/src/types.jl @@ -13,4 +13,3 @@ struct MilesianFractionToken <: GreekNumericToken end - diff --git a/test/runtests.jl b/test/runtests.jl index 9bc78d7..802f709 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,4 +4,5 @@ using Orthography, PolytonicGreek using Unicode -include("test_tokenization.jl") \ No newline at end of file +include("test_tokenization.jl") +include("test_milesian.jl") \ No newline at end of file diff --git a/test/test_milesian.jl b/test/test_milesian.jl new file mode 100644 index 0000000..7aa9172 --- /dev/null +++ b/test/test_milesian.jl @@ -0,0 +1,30 @@ + +@testset "Test parsing integer values" begin + @test milesian("Ο$(NUMERIC_TICK)") == 0 + @test milesian("α$(NUMERIC_TICK)") == 1 + @test milesian("ρα$(NUMERIC_TICK)") == 101 + @test milesian("ρια$(NUMERIC_TICK)") == 111 + @test milesian("α,$(NUMERIC_TICK)") == 1000 + @test milesian("α,α$(NUMERIC_TICK)") == 1001 + + @test milesian("θ,ϡϙθ$(NUMERIC_TICK)") == 9999 + @test milesian("Μ$(NUMERIC_TICK)") == 10000 + + @test milesian("Μ$(NUMERIC_TICK) α$(NUMERIC_TICK)") == 10001 + @test milesian("Μ$(NUMERIC_TICK) θ,ϡϙθ$(NUMERIC_TICK)") == 19999 + @test milesian("Μ^β^") == 20000 + @test milesian("Μ^β^ α,$(NUMERIC_TICK)") == 21000 + @test milesian("Μ^β^ α,α$(NUMERIC_TICK)") == 21001 +end + + +@testset "Test parsing fractional values" begin + @test milesian("𐅵$(FRACTION_TICK)") == 0.5 + @test milesian("𐅷$(FRACTION_TICK)") == (2 / 3) + @test milesian("𐅸$(FRACTION_TICK)") == 0.75 + + + @test milesian("β$(FRACTION_TICK)") == 0.5 + @test milesian("ιβ$(FRACTION_TICK)") == (1 / 12) + +end diff --git a/test/test_tokenization.jl b/test/test_tokenization.jl index 30ec63c..2c74e03 100644 --- a/test/test_tokenization.jl +++ b/test/test_tokenization.jl @@ -56,16 +56,23 @@ end @test tokencategory(hundreds_tkns[1]) isa MilesianIntegerToken myriads = "Μ^β^" - @test_broken validstring(myriads, ortho) # == 20,000 + @test validstring(myriads, ortho) # == 20,000 myriads_tkns = tokenize(myriads, ortho) @test length(myriads_tkns) == 1 - @test_broken tokencategory(myriads_tkns[1]) isa MilesianIntegerToken + @test tokencategory(myriads_tkns[1]) isa MilesianIntegerToken fraction = "η″" - @test_broken validstring(fraction, ortho) # == 1/8 + @test validstring(fraction, ortho) # == 1/8 fraction_tkns = tokenize(fraction, ortho) @test length(fraction_tkns) == 1 - @test_broken tokencategory(fraction_tkns[1]) isa MilesianFractionToken + @test tokencategory(fraction_tkns[1]) isa MilesianFractionToken + + + ouden = "Οʹ" + @test validstring(ouden, ortho) + ouden_tkns = tokenize(ouden, ortho) + @test length(ouden_tkns) == 1 + @test tokencategory(ouden_tkns[1]) isa MilesianIntegerToken end