From fc935f866b87888022c14d283adafcc1164bdf1f Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Fri, 26 Jul 2024 09:29:18 -0400 Subject: [PATCH 01/11] formatting --- quarto/index.qmd | 1 - 1 file changed, 1 deletion(-) diff --git a/quarto/index.qmd b/quarto/index.qmd index bf63a62..4d24b73 100644 --- a/quarto/index.qmd +++ b/quarto/index.qmd @@ -8,4 +8,3 @@ title: "GreekScientificOrthography.jl" | --- | --- | --- | | **Hands-on** | Try a [tutorial](./tutorials/) to start using the package | Find [recipes](./guides/) for specific tasks | | **Cognition** | The [orthography of Greek math and science](./concepts/) | See [reference information](./reference/) (including API documentation for all exported functions) | - From 693f143ec498c17c553ff306cdde678daf2fab0f Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Fri, 26 Jul 2024 14:07:48 -0400 Subject: [PATCH 02/11] Correctly parse fractions and large numbers --- Project.toml | 2 +- src/GreekScientificOrthography.jl | 3 +++ src/ortho.jl | 6 ++++-- src/tokens.jl | 26 +++++++++++++++++++++++--- test/test_tokenization.jl | 8 ++++---- 5 files changed, 35 insertions(+), 10 deletions(-) diff --git a/Project.toml b/Project.toml index 362df18..ced3268 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GreekScientificOrthography" uuid = "049fe880-56b1-44e6-a5bf-3b4decaa7e2d" authors = ["Neel Smith "] -version = "0.1.0" +version = "0.2.0" [deps] DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" diff --git a/src/GreekScientificOrthography.jl b/src/GreekScientificOrthography.jl index 86c49b5..1100229 100644 --- a/src/GreekScientificOrthography.jl +++ b/src/GreekScientificOrthography.jl @@ -22,6 +22,9 @@ export codepoints, tokentypes, tokenize const NUMERIC_TICK = 'ʹ' # unicode x0374 export NUMERIC_TICK +const FRACTION_TICK = '″' # unicode x2033, "double prime" +export FRACTION_TICK + include("types.jl") diff --git a/src/ortho.jl b/src/ortho.jl index 87cd56f..1ab4cfb 100644 --- a/src/ortho.jl +++ b/src/ortho.jl @@ -69,7 +69,7 @@ function splitPunctuation(s::AbstractString) end function numeric() - "$(NUMERIC_TICK)ϛϙϡΜ𐅵𐅷𐅸" + "$(NUMERIC_TICK)$(FRACTION_TICK)ϛϙϡΜ𐅵𐅷𐅸^" end @@ -118,8 +118,10 @@ function tokenforstring(s::AbstractString) OrthographicToken(normed, AstronomicalSymbol()) elseif islabel(s) OrthographicToken(normed, FigureLabelToken()) - elseif isnum(s) + elseif isinteger(s) OrthographicToken(normed, MilesianIntegerToken()) + elseif isfraction(s) + OrthographicToken(normed, MilesianFractionToken()) elseif isPunctuation(s) OrthographicToken(normed, PunctuationToken()) elseif isAlphabetic(normed) diff --git a/src/tokens.jl b/src/tokens.jl index 4aca5d2..3041ae4 100644 --- a/src/tokens.jl +++ b/src/tokens.jl @@ -16,11 +16,31 @@ function islabel(s) validlabel end -"""True if final character of `s` is numeric marker. +"""True if string is in fraction format in Milesian notation. + +$(SIGNATURES) +""" +function isfraction(s) + s[end] == FRACTION_TICK # unicode x2033 +end + +"""True if string is in integer format in Milesian notation. + $(SIGNATURES) """ -function isnum(s) - s[end] == NUMERIC_TICK # unicode x0374 +function isinteger(s) + if s == "Μ" # upper case Mu, unicode x039c + true + + elseif s[1] == 'Μ' && s[end] == '^' + true + + elseif s[end] == NUMERIC_TICK # unicode x0374 + true + + else + false + end end diff --git a/test/test_tokenization.jl b/test/test_tokenization.jl index 30ec63c..e1fc461 100644 --- a/test/test_tokenization.jl +++ b/test/test_tokenization.jl @@ -56,16 +56,16 @@ end @test tokencategory(hundreds_tkns[1]) isa MilesianIntegerToken myriads = "Μ^β^" - @test_broken validstring(myriads, ortho) # == 20,000 + @test validstring(myriads, ortho) # == 20,000 myriads_tkns = tokenize(myriads, ortho) @test length(myriads_tkns) == 1 - @test_broken tokencategory(myriads_tkns[1]) isa MilesianIntegerToken + @test tokencategory(myriads_tkns[1]) isa MilesianIntegerToken fraction = "η″" - @test_broken validstring(fraction, ortho) # == 1/8 + @test validstring(fraction, ortho) # == 1/8 fraction_tkns = tokenize(fraction, ortho) @test length(fraction_tkns) == 1 - @test_broken tokencategory(fraction_tkns[1]) isa MilesianFractionToken + @test tokencategory(fraction_tkns[1]) isa MilesianFractionToken end From 7c2d376b9d520032637350edbaded764075bbed1 Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Fri, 26 Jul 2024 14:45:54 -0400 Subject: [PATCH 03/11] Document numeric notation --- quarto/tutorials/numbers.qmd | 108 ++++++++++++++++++++++++++++++----- src/types.jl | 1 - 2 files changed, 95 insertions(+), 14 deletions(-) diff --git a/quarto/tutorials/numbers.qmd b/quarto/tutorials/numbers.qmd index 2eecfa3..8882c3b 100644 --- a/quarto/tutorials/numbers.qmd +++ b/quarto/tutorials/numbers.qmd @@ -1,22 +1,22 @@ # Formatting numeric tokens - +```{julia} +#| warning: false +#| echo: false +#| output: false +using GreekScientificOrthography, Orthography +``` Manuscripts of Greek scientific and mathematical texts use the "Milesian" system of numeric notation. -In version `0.1.0` of `GreekScientificOrthography`, integer values from 1 to 9,999 in this notation are correctly tokenized. - ## Integers in the "Milesian" system of notation -The Milesian system is essentially a place-value system. The 27 values for ones (1-9), tens (10-90), and hundreds (100-900) are noted with specific alphabetic characters in normal alphabetic order, with the additions of `ϛ` for 6, `ϙ` for 90 and `ϡ` for 900. - +### Values from 1-999 -- ones, tens, hundreds: alphabetic characters followed by the Greek numeric marker 'ʹ' (Unicode `x0374`). -- comma separates thousands -- myriad (10,000) is upper-case mu, Μ - +The Milesian system is essentially a place-value system. The 27 values for ones (1-9), tens (10-90), and hundreds (100-900) are noted with specific alphabetic characters in normal alphabetic order, with the additions of `ϛ` for 6, `ϙ` for 90 and `ϡ` for 900. - -Summary of the Milesian system: +::: {.callout-note} +Note that in `GreekScientificOrthography`, characters used to write the basic integer values *must be in lower-case*. +::: ::: {.column width="40%"} @@ -35,8 +35,90 @@ Summary of the Milesian system: ::: -## Integers greater than 9,999 + +Since most of the characters used to write integers in the Milesian system can also be alphabetic characters, integer tokens are flagged with a special marker, the numeric tick mark, `ʹ` (Unicode x0374). The integer value `1` is written like this: + +```{julia} +one = "αʹ" +``` + + +The sequence of digits is always smallest to largest from right to left. Note that their is no need for a zero character to represent the absence of a value is hundreds, tens or ones column, since the characters are distinct. + + + +```{julia} +eleven = "ιαʹ" +one_hundred_one = "ραʹ" +one_hundred_eleven = "ριαʹ" +``` + +All of these are valid strings in a `GreekSciOrthography`. + +```{julia} +ortho = stemortho() +validstring(one, ortho) == validstring(eleven, ortho) == validstring(one_hundred_one, ortho) == validstring(one_hundred_eleven, ortho) +``` + + +A comma separates thousands values from 1,000 - 9,000 (left of the comma) from hundreds, tens and ones. The thousands values reuse the same characters as the ones values. + +```{julia} +one_thousand_one = "α,αʹ" +validstring(one_thousand_one, ortho) +``` + +All of these strings represent a single integer token (type `MilesianIntegerToken`). + +```{julia} +tokenize(one, ortho) +``` + + +```{julia} +tokenize(one_thousand_one, ortho) +``` + +## Integers from 999-19,999 + +The value for 10,000 is written with the upper-case mu, `Μ` (for μυριάς, "myriad"). + +```{julia} +myriad = "Μʹ" +validstring(myriad, ortho) +``` + +Conventionally, the myriads value is written as a separate token separated by white space from the smaller columns. That means that a value like 10,001 will be parsed as two tokens. + +```{julia} +tenthousand_one = "Μʹ αʹ" +tokenize(tenthousand_one, ortho) +``` + +This is capable of expressing whole numbers up to 19,999. + +```{julia} +nineteen_999 = "Μʹ θ,ϡϙθʹ" +validstring(nineteen_999, ortho) +``` + +```{julia} +tokenize(nineteen_999, ortho) +``` + + +## Integers greater than 19,999 + + + ## Fractional values -## Using integer values in hexadecimal units \ No newline at end of file + + +## Using integer values in hexadecimal units + + + +- values 0-59. Adds special character `Ο` (οὐδέν, "nothing") for 0. +- tokenize \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index 07b7f02..463ab18 100644 --- a/src/types.jl +++ b/src/types.jl @@ -13,4 +13,3 @@ struct MilesianFractionToken <: GreekNumericToken end - From 02c6f6fd4d0abccd495cd384e255594f21a35e48 Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Fri, 26 Jul 2024 15:43:33 -0400 Subject: [PATCH 04/11] test ouden --- quarto/tutorials/numbers.qmd | 67 ++++++++++++++++++++++++++++++++++-- src/ortho.jl | 2 +- test/test_tokenization.jl | 7 ++++ 3 files changed, 72 insertions(+), 4 deletions(-) diff --git a/quarto/tutorials/numbers.qmd b/quarto/tutorials/numbers.qmd index 8882c3b..bc9eb9d 100644 --- a/quarto/tutorials/numbers.qmd +++ b/quarto/tutorials/numbers.qmd @@ -39,6 +39,7 @@ Note that in `GreekScientificOrthography`, characters used to write the basic in Since most of the characters used to write integers in the Milesian system can also be alphabetic characters, integer tokens are flagged with a special marker, the numeric tick mark, `ʹ` (Unicode x0374). The integer value `1` is written like this: ```{julia} +#| output: false one = "αʹ" ``` @@ -48,6 +49,7 @@ The sequence of digits is always smallest to largest from right to left. Note th ```{julia} +#| output: false eleven = "ιαʹ" one_hundred_one = "ραʹ" one_hundred_eleven = "ριαʹ" @@ -95,7 +97,12 @@ tenthousand_one = "Μʹ αʹ" tokenize(tenthousand_one, ortho) ``` -This is capable of expressing whole numbers up to 19,999. +::: {.callout-note} +Greek manuscripts do not normally repeat the tick mark on the myriad marker; `GreekScientificOrthography` requires this to guarantee context-independent parsing of tokens. +::: + + +You can express whole numbers up to 19,999 in this way. ```{julia} nineteen_999 = "Μʹ θ,ϡϙθʹ" @@ -109,16 +116,70 @@ tokenize(nineteen_999, ortho) ## Integers greater than 19,999 +To write values larger than 19,999, Milesian notation begins by multiplying the myriad character. In manuscripts, the multiplier is normally written above the `Μ`; in `GreekScientificOrthography`, we use the markdown convention for superscript that is supported by pandoc, among others, of bracketing the superscript value with carets. In this context, neither numeric character requires the numeric tick. + +20,000, for example, is written as Μ multiplied by β, like this example: + +```{julia} +twentyk = "Μ^β^" +validstring(twentyk, ortho) +``` + +When you display the value in an environment that supports pandoc's markdown extension, the multiplier will display as a superscript. + +```{julia} +using Markdown +Markdown.parse(twentyk) +``` + + +Archimedes uses this notation as he derives limiting values for *pi* in his treatise *Measurement of a Circle*. The value 349,450, for example, is written with these two tokens: + +```{julia} +threefortynine450 = "Μ^λδ^ ,θυνʹ" +Markdown.parse(threefortynine450) +``` + +```{julia} +tokenize(threefortynine450, ortho) +``` ## Fractional values +`GreekScientificOrthography` includes characters for three fractional values that are often written in manuscripts with special symbols: `𐅵` for one half, `𐅷` for two-thirds, and `𐅸` for three quarters. +Apart from these special cases, the only notation for fractional values available to Greek mathematicians was to use normal integer notation, but flagged with a special double-prime marker indicating that this is a *reciprocal* value. + +```{julia} +sixth = "ϛ″" +thirtysixth = "λϛ″" +validstring(sixth, ortho) == validstring(thirtysixth, ortho) +``` + +Other fractional values would be written as sums of these. The fraction 2/3 can, for example, appear as 1/2 + 1/6, written simply as a succession of individual fractional tokens. As with integer values, the sequence is least to greatest from right to left. + +```{julia} +twothirds = "β″ ϛ″" +tokenize(twothirds, ortho) +``` + +Fractions can of course be mixed with integer numbers. + +```{julia} +six_and_twothirds = "ϛʹ β″ ϛ″" +``` ## Using integer values in hexadecimal units +::: {.callout-note title="TBA"} + +An alternative to tedious sums of fractions: + +- divide whole units into sixtieths, seconds (3600ths), thirds (216000ths)... each written with an integer value up to 59 +- typically used in tables where columns determine meaning of integer +- but manually written columns make null values dangerous if they're left blank, so adds special character `Ο` (οὐδέν, "nothing") for 0. -- values 0-59. Adds special character `Ο` (οὐδέν, "nothing") for 0. -- tokenize \ No newline at end of file +::: \ No newline at end of file diff --git a/src/ortho.jl b/src/ortho.jl index 1ab4cfb..ce0b944 100644 --- a/src/ortho.jl +++ b/src/ortho.jl @@ -69,7 +69,7 @@ function splitPunctuation(s::AbstractString) end function numeric() - "$(NUMERIC_TICK)$(FRACTION_TICK)ϛϙϡΜ𐅵𐅷𐅸^" + "$(NUMERIC_TICK)$(FRACTION_TICK)ϛϙϡΜ𐅵𐅷𐅸^Ο" end diff --git a/test/test_tokenization.jl b/test/test_tokenization.jl index e1fc461..2c74e03 100644 --- a/test/test_tokenization.jl +++ b/test/test_tokenization.jl @@ -68,4 +68,11 @@ end @test length(fraction_tkns) == 1 @test tokencategory(fraction_tkns[1]) isa MilesianFractionToken + + ouden = "Οʹ" + @test validstring(ouden, ortho) + ouden_tkns = tokenize(ouden, ortho) + @test length(ouden_tkns) == 1 + @test tokencategory(ouden_tkns[1]) isa MilesianIntegerToken + end From 928311a5e90cc19bb567b1ac2f00926ea22a0394 Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Fri, 26 Jul 2024 15:44:14 -0400 Subject: [PATCH 05/11] Change version number --- quarto/_quarto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quarto/_quarto.yml b/quarto/_quarto.yml index 4462a2c..b958efa 100644 --- a/quarto/_quarto.yml +++ b/quarto/_quarto.yml @@ -5,7 +5,7 @@ theme: spacelab execute: freeze: auto date: last-modified -margin-header: "Package version: `0.1.0`" +margin-header: "Package version: `0.2.0`" format: html: toc: true From dfb2e272c13fff231f0330f0050f20fcc9f3361f Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Sat, 27 Jul 2024 07:09:54 -0400 Subject: [PATCH 06/11] Start on parsing numbers --- src/GreekScientificOrthography.jl | 14 ++++++ src/digits.jl | 31 ++++++++++++ src/milesian.jl | 80 +++++++++++++++++++++++++++++++ test/runtests.jl | 3 +- test/test_milesian.jl | 4 ++ 5 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 src/digits.jl create mode 100644 src/milesian.jl create mode 100644 test/test_milesian.jl diff --git a/src/GreekScientificOrthography.jl b/src/GreekScientificOrthography.jl index 1100229..af2934e 100644 --- a/src/GreekScientificOrthography.jl +++ b/src/GreekScientificOrthography.jl @@ -22,14 +22,28 @@ export codepoints, tokentypes, tokenize const NUMERIC_TICK = 'ʹ' # unicode x0374 export NUMERIC_TICK +const PRIME = '′' # unicode x2032 +export PRIME + const FRACTION_TICK = '″' # unicode x2033, "double prime" export FRACTION_TICK +const ONE_HALF = '𐅵' # unicode x10175 +export ONE_HALF + +const TWO_THIRDS = '𐅷' # unicode x10177 +export TWO_THIRDS + +const THREE_FOURTHS = '𐅸' # unicode x10178 +export THREE_FOURTHS +export milesian include("types.jl") include("tokens.jl") include("ortho.jl") include("lexical.jl") +include("digits.jl") +include("milesian.jl") end # module GreekSciOrthography diff --git a/src/digits.jl b/src/digits.jl new file mode 100644 index 0000000..bbbdec4 --- /dev/null +++ b/src/digits.jl @@ -0,0 +1,31 @@ +const digitvalues = Dict( + 'α' => 1, + 'β' => 2, + 'γ' => 3, + 'δ' => 4, + 'ε' => 5, + 'ϛ' => 6, + 'ζ' => 7, + 'η' => 8, + 'θ' => 9, + + 'ι' => 10, + 'κ' => 20, + 'λ' => 30, + 'μ' => 40, + 'ν' => 50, + 'ξ' => 60, + 'ο' => 70, + 'π' => 80, + 'ϙ' => 90, + + 'ρ' => 100, + 'σ' => 200, + 'τ' => 300, + 'υ' => 400, + 'φ' => 500, + 'χ' => 600, + 'ψ' => 700, + 'ω' => 800, + 'ϡ' => 900 +) \ No newline at end of file diff --git a/src/milesian.jl b/src/milesian.jl new file mode 100644 index 0000000..436d121 --- /dev/null +++ b/src/milesian.jl @@ -0,0 +1,80 @@ +"""Compute numeric value of string in Milesian notation. +$(SIGNATURES) +""" +function milesian(s::AbstractString) + tkns = tokenize(s, stemortho()) + milesian.(tkns) |> sum +end + +"""Compute numeric value of an orthographic tkoen in Milesian notation. +$(SIGNATURES) +""" +function milesian(tkn::OrthographicToken) + if tokencategory(tkn) isa MilesianFractionToken + fractionvalue(tkn) + + elseif tokencategory(tkn) isa MilesianIntegerToken + + else + throw(DomainError("Not a Milesian token: $(tkn).")) + end +end + + +function intvalue(tkn::OrthographicToken) +end + + + + + +function istick(c::Char) + c in [NUMERIC_TICK, FRACTION_TICK, PRIME] +end + +function fractionvalue(tkn::OrthographicToken) + fractionpieces(tkn) |> sum +end + +"""Compute numeric value of a orthographic token in Milesian notation. +$(SIGNATURES) +""" +function fractionpieces(tkn::OrthographicToken) + @assert tokencategory(tkn) isa MilesianFractionToken + fractionpieces(tokentext(tkn)) +end + +function fractionpieces(s::AbstractString) + @assert(istick(s[end])) + + pieces = [] + for gr in graphemes(s) + @debug("Eval grapheme $(gr)") + if gr == "$(ONE_HALF)" + grval = 1 /2 + push!(pieces, grval) + elseif gr == "$(TWO_THIRDS)" + grval = 2 / 3 + push!(pieces, grval) + elseif gr == "$(THREE_FOURTHS)" + grval = 3 / 4 + push!(pieces, grval) + + elseif istick(gr[1]) + else + @debug("Eval char $(gr[1])") + @assert gr[1] in keys(digitvalues) + grval = 1 / digitvalues[gr[1]] + push!(pieces, grval) + end + end + pieces +end + +#= +if s == "Μʹ" + 10000 +elseif if s == "Οʹ" + 0 +else +=# \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 9bc78d7..802f709 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,4 +4,5 @@ using Orthography, PolytonicGreek using Unicode -include("test_tokenization.jl") \ No newline at end of file +include("test_tokenization.jl") +include("test_milesian.jl") \ No newline at end of file diff --git a/test/test_milesian.jl b/test/test_milesian.jl new file mode 100644 index 0000000..d4534e0 --- /dev/null +++ b/test/test_milesian.jl @@ -0,0 +1,4 @@ + +@testset "Test conversion of numeric values" begin + +end \ No newline at end of file From be5e09e79cc4075e3be68acb6e5ee34574c9ec6d Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Sat, 27 Jul 2024 08:43:19 -0400 Subject: [PATCH 07/11] Fail tests on integer values --- quarto/tutorials/numbers.qmd | 2 +- test/test_milesian.jl | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/quarto/tutorials/numbers.qmd b/quarto/tutorials/numbers.qmd index bc9eb9d..49d1fad 100644 --- a/quarto/tutorials/numbers.qmd +++ b/quarto/tutorials/numbers.qmd @@ -19,7 +19,7 @@ Note that in `GreekScientificOrthography`, characters used to write the basic in ::: -::: {.column width="40%"} +::: {.column width="50%"} | Ones | Tens | Hundreds | | --- | --- | --- | diff --git a/test/test_milesian.jl b/test/test_milesian.jl index d4534e0..2992421 100644 --- a/test/test_milesian.jl +++ b/test/test_milesian.jl @@ -1,4 +1,17 @@ -@testset "Test conversion of numeric values" begin +@testset "Test parsing integer values" begin + @test_broken milesian("Ο$(NUMERIC_TICK)") == 0 + @test_broken milesian("α$(NUMERIC_TICK)") == 1 + @test_broken milesian("ρα$(NUMERIC_TICK)") == 101 + @test_broken milesian("ρια$(NUMERIC_TICK)") == 111 + @test_broken milesian("α,$(NUMERIC_TICK)") == 1000 + @test_broken milesian("α,α$(NUMERIC_TICK)") == 1001 + + @test_broken milesian("θ,ϡϙθ$(NUMERIC_TICK)") == 9999 + @test_broken milesian("Μ$(NUMERIC_TICK)") == 10000 + @test_broken milesian("Μ$(NUMERIC_TICK) α$(NUMERIC_TICK)") == 10001 + @test_broken milesian("Μ$(NUMERIC_TICK) θ,ϡϙθ$(NUMERIC_TICK)") == 19999 + @test_broken milesian("Μ^β^") == 20000 + @test_broken milesian("Μ^β^ α,$(NUMERIC_TICK)") == 21001 end \ No newline at end of file From 303fa588a86d70e4cf2bcdac2241a8a9f3eb854e Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Sat, 27 Jul 2024 09:18:14 -0400 Subject: [PATCH 08/11] Progress parsing ints --- src/GreekScientificOrthography.jl | 4 ++ src/milesian.jl | 82 +++++++++++++++++++++++++++++-- test/test_milesian.jl | 24 ++++----- 3 files changed, 95 insertions(+), 15 deletions(-) diff --git a/src/GreekScientificOrthography.jl b/src/GreekScientificOrthography.jl index af2934e..835d801 100644 --- a/src/GreekScientificOrthography.jl +++ b/src/GreekScientificOrthography.jl @@ -37,6 +37,10 @@ export TWO_THIRDS const THREE_FOURTHS = '𐅸' # unicode x10178 export THREE_FOURTHS +const EVIL_PRIME = Char(0x2b9) # "modifier letter prime" +const EVIL_DOUBLE_PRIME = Char(0x2ba) # "modifier letter double prime" + + export milesian include("types.jl") diff --git a/src/milesian.jl b/src/milesian.jl index 436d121..359123a 100644 --- a/src/milesian.jl +++ b/src/milesian.jl @@ -14,22 +14,98 @@ function milesian(tkn::OrthographicToken) fractionvalue(tkn) elseif tokencategory(tkn) isa MilesianIntegerToken - + intvalue(tkn) else throw(DomainError("Not a Milesian token: $(tkn).")) end end - +"""Compute numeric value of a token for a Milesian integer. +$(SIGNATURES) +""" function intvalue(tkn::OrthographicToken) + @assert tokencategory(tkn) isa MilesianIntegerToken + intvalue(tokentext(tkn)) #|> sum +end + +function myriadvalue(s::AbstractString) + myriad = 10000 + parts = split(s, "^") + if length(parts) == 1 + myriad + else + factor = intvalue(parts[2]) + myriad * factor + end +end + +function intdigits(s) + pieces = [] + for c in s + if istick(c) + # ignore + else + if ! (c in keys(digitvalues)) + throw(DomainError("Invalid digit: $(c) $(codepoint(c))")) + else + push!(pieces, digitvalues[c]) + end + end + end + if length(pieces) > 1 + max = length(pieces) - 1 + for i in 1:max + if pieces[i] <= pieces[i + 1] + throw(DomainError("Invalid sequence of numeric digits: $(s)")) + end + end + end + pieces |> sum end +"""Compute numeric value of a single string for token a Milesian integer. +$(SIGNATURES) +""" +function intvalue(s::AbstractString) + @info("Int of $(s)") + if startswith(s, "Μ") + myriadvalue(s) + else + pieces = split(s, ",") + if length(pieces) == 1 + #intdigits(pieces[1]) + @info("Get intdigits for $(pieces[1])") + intdigits(pieces[1]) + + else + thousands = intdigits(pieces[1]) * 1000 + isempty(pieces[2]) ? thousands : thousands + intdigits(pieces[2]) + + end + + end +end +"""Compute numeric value of a token for a Milesian integer. +$(SIGNATURES) +""" +function intpieces(tkn::OrthographicToken) + @assert tokencategory(tkn) isa MilesianIntegerToken + intpieces(tokentext(tkn)) +end + +function intpieces(s::AbstractString) +end + +"""True if `c` is one of the "tick" characters that +Unicode equivalence can botch. +$(SIGNATURES) +""" function istick(c::Char) - c in [NUMERIC_TICK, FRACTION_TICK, PRIME] + c in [NUMERIC_TICK, FRACTION_TICK, PRIME, EVIL_PRIME, EVIL_DOUBLE_PRIME] end function fractionvalue(tkn::OrthographicToken) diff --git a/test/test_milesian.jl b/test/test_milesian.jl index 2992421..888db2e 100644 --- a/test/test_milesian.jl +++ b/test/test_milesian.jl @@ -1,17 +1,17 @@ @testset "Test parsing integer values" begin - @test_broken milesian("Ο$(NUMERIC_TICK)") == 0 - @test_broken milesian("α$(NUMERIC_TICK)") == 1 - @test_broken milesian("ρα$(NUMERIC_TICK)") == 101 - @test_broken milesian("ρια$(NUMERIC_TICK)") == 111 - @test_broken milesian("α,$(NUMERIC_TICK)") == 1000 - @test_broken milesian("α,α$(NUMERIC_TICK)") == 1001 + @test milesian("Ο$(NUMERIC_TICK)") == 0 + @test milesian("α$(NUMERIC_TICK)") == 1 + @test milesian("ρα$(NUMERIC_TICK)") == 101 + @test milesian("ρια$(NUMERIC_TICK)") == 111 + @test milesian("α,$(NUMERIC_TICK)") == 1000 + @test milesian("α,α$(NUMERIC_TICK)") == 1001 - @test_broken milesian("θ,ϡϙθ$(NUMERIC_TICK)") == 9999 - @test_broken milesian("Μ$(NUMERIC_TICK)") == 10000 + @test milesian("θ,ϡϙθ$(NUMERIC_TICK)") == 9999 + @test milesian("Μ$(NUMERIC_TICK)") == 10000 - @test_broken milesian("Μ$(NUMERIC_TICK) α$(NUMERIC_TICK)") == 10001 - @test_broken milesian("Μ$(NUMERIC_TICK) θ,ϡϙθ$(NUMERIC_TICK)") == 19999 - @test_broken milesian("Μ^β^") == 20000 - @test_broken milesian("Μ^β^ α,$(NUMERIC_TICK)") == 21001 + @test milesian("Μ$(NUMERIC_TICK) α$(NUMERIC_TICK)") == 10001 + @test milesian("Μ$(NUMERIC_TICK) θ,ϡϙθ$(NUMERIC_TICK)") == 19999 + @test milesian("Μ^β^") == 20000 + @test milesian("Μ^β^ α,$(NUMERIC_TICK)") == 21001 end \ No newline at end of file From 1f76871c95fd7aa45940eefe1bc8ad50ceb79cf2 Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Sat, 27 Jul 2024 20:57:04 -0400 Subject: [PATCH 09/11] Work on numeric parsing --- src/digits.jl | 4 +++- src/milesian.jl | 31 ++++++++++++++++++------------- src/tokens.jl | 4 +++- test/test_milesian.jl | 17 +++++++++++++++-- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/digits.jl b/src/digits.jl index bbbdec4..82089a7 100644 --- a/src/digits.jl +++ b/src/digits.jl @@ -27,5 +27,7 @@ const digitvalues = Dict( 'χ' => 600, 'ψ' => 700, 'ω' => 800, - 'ϡ' => 900 + 'ϡ' => 900, + + 'Ο' => 0 ) \ No newline at end of file diff --git a/src/milesian.jl b/src/milesian.jl index 359123a..747c9d4 100644 --- a/src/milesian.jl +++ b/src/milesian.jl @@ -14,6 +14,7 @@ function milesian(tkn::OrthographicToken) fractionvalue(tkn) elseif tokencategory(tkn) isa MilesianIntegerToken + @info("Its an int: $(tkn)") intvalue(tkn) else throw(DomainError("Not a Milesian token: $(tkn).")) @@ -28,6 +29,11 @@ function intvalue(tkn::OrthographicToken) intvalue(tokentext(tkn)) #|> sum end + + +"""Compute numeric value of a token expressing numbers of myriads. +$(SIGNATURES) +""" function myriadvalue(s::AbstractString) myriad = 10000 parts = split(s, "^") @@ -39,11 +45,16 @@ function myriadvalue(s::AbstractString) end end -function intdigits(s) + +"""Compute numeric value of a string expressing an integer value below 1,000. +$(SIGNATURES) +""" +function intdigits(s::AbstractString) pieces = [] for c in s if istick(c) # ignore + else if ! (c in keys(digitvalues)) throw(DomainError("Invalid digit: $(c) $(codepoint(c))")) @@ -78,8 +89,10 @@ function intvalue(s::AbstractString) intdigits(pieces[1]) else + @info("Here are the pieces: $(pieces)") thousands = intdigits(pieces[1]) * 1000 - isempty(pieces[2]) ? thousands : thousands + intdigits(pieces[2]) + lowervals = filter(s -> ! istick(s), pieces[2]) + isempty(lowervals) ? thousands : thousands + intdigits(lowervals) end @@ -97,8 +110,6 @@ function intpieces(tkn::OrthographicToken) intpieces(tokentext(tkn)) end -function intpieces(s::AbstractString) -end """True if `c` is one of the "tick" characters that Unicode equivalence can botch. @@ -108,11 +119,13 @@ function istick(c::Char) c in [NUMERIC_TICK, FRACTION_TICK, PRIME, EVIL_PRIME, EVIL_DOUBLE_PRIME] end + + function fractionvalue(tkn::OrthographicToken) fractionpieces(tkn) |> sum end -"""Compute numeric value of a orthographic token in Milesian notation. +"""Compute numeric value of an orthographic token expressing a fractional value in Milesian notation. $(SIGNATURES) """ function fractionpieces(tkn::OrthographicToken) @@ -146,11 +159,3 @@ function fractionpieces(s::AbstractString) end pieces end - -#= -if s == "Μʹ" - 10000 -elseif if s == "Οʹ" - 0 -else -=# \ No newline at end of file diff --git a/src/tokens.jl b/src/tokens.jl index 3041ae4..4f59187 100644 --- a/src/tokens.jl +++ b/src/tokens.jl @@ -29,13 +29,15 @@ end $(SIGNATURES) """ function isinteger(s) + + @info("Look at $(s). Ends with tick? $(s[end])") if s == "Μ" # upper case Mu, unicode x039c true elseif s[1] == 'Μ' && s[end] == '^' true - elseif s[end] == NUMERIC_TICK # unicode x0374 + elseif s[end] == NUMERIC_TICK || s[end] == EVIL_PRIME # unicode x0374 true else diff --git a/test/test_milesian.jl b/test/test_milesian.jl index 888db2e..7aa9172 100644 --- a/test/test_milesian.jl +++ b/test/test_milesian.jl @@ -13,5 +13,18 @@ @test milesian("Μ$(NUMERIC_TICK) α$(NUMERIC_TICK)") == 10001 @test milesian("Μ$(NUMERIC_TICK) θ,ϡϙθ$(NUMERIC_TICK)") == 19999 @test milesian("Μ^β^") == 20000 - @test milesian("Μ^β^ α,$(NUMERIC_TICK)") == 21001 -end \ No newline at end of file + @test milesian("Μ^β^ α,$(NUMERIC_TICK)") == 21000 + @test milesian("Μ^β^ α,α$(NUMERIC_TICK)") == 21001 +end + + +@testset "Test parsing fractional values" begin + @test milesian("𐅵$(FRACTION_TICK)") == 0.5 + @test milesian("𐅷$(FRACTION_TICK)") == (2 / 3) + @test milesian("𐅸$(FRACTION_TICK)") == 0.75 + + + @test milesian("β$(FRACTION_TICK)") == 0.5 + @test milesian("ιβ$(FRACTION_TICK)") == (1 / 12) + +end From c24b4e4bdd6fe5baf08fba0a13273ce0b3f5b07f Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Mon, 29 Jul 2024 09:16:20 -0400 Subject: [PATCH 10/11] Add TagBot gh action --- .github/workflows/TagBot.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .github/workflows/TagBot.yml diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml new file mode 100644 index 0000000..623860f --- /dev/null +++ b/.github/workflows/TagBot.yml @@ -0,0 +1,15 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} \ No newline at end of file From f1511e313c0615e240b19028197b20267431cce0 Mon Sep 17 00:00:00 2001 From: Neel Smith Date: Mon, 29 Jul 2024 10:42:14 -0400 Subject: [PATCH 11/11] docs --- quarto/_quarto.yml | 1 + quarto/concepts/index.qmd | 6 +- quarto/guides/arenarius.qmd | 15 +++ quarto/guides/index.qmd | 12 ++- quarto/guides/numbers.qmd | 179 +++++++++++++++++++++++++++++++++++ quarto/index.qmd | 24 +++++ quarto/tutorials/index.qmd | 27 +----- quarto/tutorials/numbers.qmd | 156 ++++++++---------------------- src/milesian.jl | 56 ++++++----- src/tokens.jl | 7 +- 10 files changed, 301 insertions(+), 182 deletions(-) create mode 100644 quarto/guides/arenarius.qmd create mode 100644 quarto/guides/numbers.qmd diff --git a/quarto/_quarto.yml b/quarto/_quarto.yml index b958efa..4552731 100644 --- a/quarto/_quarto.yml +++ b/quarto/_quarto.yml @@ -42,6 +42,7 @@ website: - title: "Guides" contents: - guides/index.qmd + - guides/numbers.qmd - title: "Reference" contents: diff --git a/quarto/concepts/index.qmd b/quarto/concepts/index.qmd index 8500766..a15dd48 100644 --- a/quarto/concepts/index.qmd +++ b/quarto/concepts/index.qmd @@ -49,11 +49,7 @@ Astronomical symbols are represented by single Unicode codepoints such as `🜚 In contrast to the other token types, the syntax of numeric values and mathematical figure labels is case-sensitive. -Integer values in the Milesian notational system are written using 27 alphabetic or archaic alphabetic characters, followed by the numeric marker 'ʹ' (Unicode x0374). (See a [tutorial on writing numbers](../tutorials/numbers.qmd).) A comma separates digits for ones, tens and hundreds (to the right of the comma) from digits for thousands (left of the commma). `Μʹ` is the value for "myriad," 10,000. `μ` is the digit for 40. +Integer values in the Milesian notational system are written using 27 alphabetic or archaic alphabetic characters, followed by the numeric marker 'ʹ' (Unicode x0374). (See full documentation in a [guide to writing numbers](../guides/numbers.qmd).) A comma separates digits for ones, tens and hundreds (to the right of the comma) from digits for thousands (left of the commma). `Μʹ` is the value for "myriad," 10,000. `μ` is the digit for 40. Mathematical figure labels are defined as strings of upper-case purely alphabetic characters, without any breathing or accents. The string `Ἡ` is a lexical token (the nominative feminine singular of the article, in upper case), while `Η` is a figure label, "eta". - -## Work in progress - -Support for tokenizing fractions and integer values greater than 9,999 is planned for the next release. diff --git a/quarto/guides/arenarius.qmd b/quarto/guides/arenarius.qmd new file mode 100644 index 0000000..c87cd19 --- /dev/null +++ b/quarto/guides/arenarius.qmd @@ -0,0 +1,15 @@ +# Large values + + +## Using integer values in hexadecimal units + + +::: {.callout-note title="TBA"} + +An alternative to tedious sums of fractions: + +- divide whole units into sixtieths, seconds (3600ths), thirds (216000ths)... each written with an integer value up to 59 +- typically used in tables where columns determine meaning of integer +- but manually written columns make null values dangerous if they're left blank, so adds special character `Ο` (οὐδέν, "nothing") for 0. + +::: \ No newline at end of file diff --git a/quarto/guides/index.qmd b/quarto/guides/index.qmd index 8c57150..8ef4837 100644 --- a/quarto/guides/index.qmd +++ b/quarto/guides/index.qmd @@ -2,14 +2,18 @@ title: "Guides" --- -Recipe book for more complex tasks using `GreekScientificOrthography.jl` can be added here. +Recipe book for more complex tasks using `GreekScientificOrthography.jl`: -::: {.callout-note title="TBA"} -Add guides here for +- [Formatting numbers in Milesian notation](./numbers.qmd) + + + +::: {.callout-note title="Planned additions"} + 1. the complicated interactions that can happen with Unicode equivalence using modifying and non-modifying characters for prime, double prime, and Greek numeric marker -2. implementing Archimedes' system of writing arbitrarily large numbers +2. a note on Archimedes' system of writing arbitrarily large numbers, and on the normal practice of using hexadecimal digits for fractional values (firsts, seconds, thirds... ) ::: \ No newline at end of file diff --git a/quarto/guides/numbers.qmd b/quarto/guides/numbers.qmd new file mode 100644 index 0000000..1a57021 --- /dev/null +++ b/quarto/guides/numbers.qmd @@ -0,0 +1,179 @@ +# Formatting numeric tokens +```{julia} +#| warning: false +#| echo: false +#| output: false +using GreekScientificOrthography, Orthography +``` + +Manuscripts of Greek scientific and mathematical texts use the "Milesian" system of numeric notation. + +## Integers in the "Milesian" system of notation + +### Values from 1-999 + +The Milesian system is essentially a place-value system. The 27 values for ones (1-9), tens (10-90), and hundreds (100-900) are noted with specific alphabetic characters in normal alphabetic order, with the additions of `ϛ` for 6, `ϙ` for 90 and `ϡ` for 900. + +::: {.callout-note} +Note that in `GreekScientificOrthography`, characters used to write the basic integer values *must be in lower-case*. +::: + + +::: {.column width="50%"} + +| Ones | Tens | Hundreds | +| --- | --- | --- | +| α | ι | ρ | +| β | κ | σ | +| γ | λ | τ | +| δ | μ | υ | +| ε| ν | φ | +| ϛ | ξ | χ | +| ζ| ο | ψ | +| η| π | ω | +| θ | ϙ | ϡ | + +::: + + +Since most of the characters used to write integers in the Milesian system can also be read as alphabetic characters, integer tokens are flagged with a special marker, the numeric tick mark, `ʹ` (Unicode x0374). The integer value `1` is written like this: + +```{julia} +#| output: false +one = "αʹ" +``` + + + +Like our place value notation, the left-to-right sequence of digits is always largest to smallest. Note that unlike our numeric notation, there is no need for a zero character to represent the absence of a value in the hundreds, tens or ones column, since the characters for each column are distinct. + + +```{julia} +#| output: false +eleven = "ιαʹ" +one_hundred_one = "ραʹ" +one_hundred_eleven = "ριαʹ" +``` + +All of these are valid strings in a `GreekSciOrthography`. + +```{julia} +ortho = stemortho() +validstring(one, ortho) == validstring(eleven, ortho) == validstring(one_hundred_one, ortho) == validstring(one_hundred_eleven, ortho) +``` + + +A comma separates a thousands column (with values from 1,000 - 9,000) to the left of the comma, from hundreds, tens and ones values to the right of the comma, again just like our practice. The thousands column reuses the same characters as the ones column. + +```{julia} +one_thousand_one = "α,αʹ" +validstring(one_thousand_one, ortho) +``` + +All of these strings represent a single integer token (type `MilesianIntegerToken`). + +```{julia} +tokenize(one, ortho) +``` + + +```{julia} +tokenize(one_thousand_one, ortho) +``` + +## Integers from 999-19,999 + +The value for 10,000 is written with the upper-case mu, `Μ` (for μυριάς, "myriad"). + +```{julia} +myriad = "Μʹ" +validstring(myriad, ortho) +``` + +Conventionally, the myriads value is written as a separate token separated by white space from the smaller columns. That means that the string for a value like 10,001 will be represented by two tokens. + +```{julia} +tenthousand_one = "Μʹ αʹ" +tokenize(tenthousand_one, ortho) +``` + +::: {.callout-note} +Greek manuscripts do not normally repeat the tick mark on the myriad marker; `GreekScientificOrthography` requires this to guarantee context-independent parsing of tokens. +::: + + +You can express whole numbers up to 19,999 in this way. + +```{julia} +nineteen_999 = "Μʹ θ,ϡϙθʹ" +validstring(nineteen_999, ortho) +``` + +```{julia} +tokenize(nineteen_999, ortho) +``` + + +## Integers greater than 19,999 + +To write values larger than 19,999, Milesian notation begins by multiplying the myriad character. In manuscripts, the multiplier is normally written above the `Μ`; in `GreekScientificOrthography`, we use the markdown convention for superscript that is supported by pandoc, among others, of bracketing the superscript value with carets. In this context, neither multiplicand requires the numeric tick. + +20,000, for example, is written as Μ multiplied by β, like this example: + +```{julia} +twentyk = "Μ^β^" +validstring(twentyk, ortho) +``` + + +In an environment that supports pandoc's markdown extension, the multiplier will display as a superscript. + +```{julia} +using Markdown +Markdown.parse(twentyk) +``` + + +Archimedes uses this notation as he derives limiting values for *pi* in his treatise *Measurement of a Circle*. The value 349,450, for example, is written with these two tokens: + +```{julia} +threefortynine450 = "Μ^λδ^ ,θυνʹ" +Markdown.parse(threefortynine450) +``` + +```{julia} +tokenize(threefortynine450, ortho) +``` + + + +## Fractional values + +`GreekScientificOrthography` includes characters for three fractional values that are often written in manuscripts with special symbols: `𐅵` for one half (Unicode x10175), `𐅷` for two-thirds (Unicode x10177), and `𐅸` for three quarters (Unicode x10178). (The package also makes these characters available with the contsant names `ONE_HALF`, `TWO_THIRDS` and `THREE_FOURTHS`.) + +Apart from these special cases, the only notation for fractional values available to Greek mathematicians was to use normal integer notation, but flagged with a special double-prime marker indicating that this is a *reciprocal* value. + +```{julia} +sixth = "ϛ″" +thirtysixth = "λϛ″" +validstring(sixth, ortho) == validstring(thirtysixth, ortho) +``` + +Other fractional values would be written as sums of these. The fraction 2/3 can, for example, appear as 1/2 + 1/6, written simply as a succession of individual fractional tokens. As with integer values, the left-to-right sequence is from greatest to least, and the value of the sum of the tokens. + +```{julia} +twothirds = "β″ ϛ″" +tokenize(twothirds, ortho) +``` + +Fractions can of course be mixed with integer numbers. + +```{julia} +six_and_twothirds = "ϛʹ β″ ϛ″" +validstring(six_and_twothirds, stemortho()) +``` + + +```{julia} +tokenize(six_and_twothirds, stemortho()) +``` \ No newline at end of file diff --git a/quarto/index.qmd b/quarto/index.qmd index 4d24b73..147a7e9 100644 --- a/quarto/index.qmd +++ b/quarto/index.qmd @@ -4,6 +4,30 @@ title: "GreekScientificOrthography.jl" > An orthographic system for ancient Greek mathematical and scientific texts. + +## Quick start: tokenization + +Validate the orthography of a string: + +```{julia} +using GreekScientificOrthography, Orthography +ortho = stemortho() + +archimedes = "ἡ ΓΕ πρὸς ΓΗ μείζονα λόγον ἔχει ἤπερ φοαʹ πρὸς ρνγʹ." +validstring(archimedes, ortho) +``` + + +Tokenize a string: + +```{julia} +tkns = tokenize(archimedes, ortho) +``` + + + +## Documentation + | | Learning the package | Using the package | | --- | --- | --- | | **Hands-on** | Try a [tutorial](./tutorials/) to start using the package | Find [recipes](./guides/) for specific tasks | diff --git a/quarto/tutorials/index.qmd b/quarto/tutorials/index.qmd index b71ef11..397ef1f 100644 --- a/quarto/tutorials/index.qmd +++ b/quarto/tutorials/index.qmd @@ -1,31 +1,10 @@ --- -title: Tutorials +title: Hands-on tutorials --- -## Quick start -- Validate the orthography of a string: -```{julia} -using GreekScientificOrthography, Orthography -ortho = stemortho() - -archimedes = "ἡ ΓΕ πρὸς ΓΗ μείζονα λόγον ἔχει ἤπερ φοαʹ πρὸς ρνγʹ." -validstring(archimedes, ortho) -``` - - -- Tokenize a string: - -```{julia} -tkns = tokenize(archimedes, ortho) -``` - - -## Hands-on tutorials - - -- [validating and tokenizing strings](./tokens.qmd) -- [formatting numeric tokens](./numbers.qmd) +- [Validating and tokenizing strings](./tokens.qmd) +- [Evaluating numeric values](./numbers.qmd) diff --git a/quarto/tutorials/numbers.qmd b/quarto/tutorials/numbers.qmd index 49d1fad..acf7dab 100644 --- a/quarto/tutorials/numbers.qmd +++ b/quarto/tutorials/numbers.qmd @@ -1,185 +1,105 @@ -# Formatting numeric tokens -```{julia} -#| warning: false -#| echo: false -#| output: false -using GreekScientificOrthography, Orthography -``` - -Manuscripts of Greek scientific and mathematical texts use the "Milesian" system of numeric notation. - -## Integers in the "Milesian" system of notation - -### Values from 1-999 - -The Milesian system is essentially a place-value system. The 27 values for ones (1-9), tens (10-90), and hundreds (100-900) are noted with specific alphabetic characters in normal alphabetic order, with the additions of `ϛ` for 6, `ϙ` for 90 and `ϡ` for 900. - -::: {.callout-note} -Note that in `GreekScientificOrthography`, characters used to write the basic integer values *must be in lower-case*. -::: - +# Evaluating numeric values -::: {.column width="50%"} - -| Ones | Tens | Hundreds | -| --- | --- | --- | -| α | ι | ρ | -| β | κ | σ | -| γ | λ | τ | -| δ | μ | υ | -| ε| ν | φ | -| ϛ | ξ | χ | -| ζ| ο | ψ | -| η| π | ω | -| θ | ϙ | ϡ | +`GreekScientificOrthography` includes the `milesian` function for parsing Greek numbers in Milesian notation into numeric values. +::: {.callout-tip title="Milesian notation"} +See [this guide](../guides/numbers.qmd) for full documentation of the Milesian notation +recogized by `GreekScientificOrthography`. ::: -Since most of the characters used to write integers in the Milesian system can also be alphabetic characters, integer tokens are flagged with a special marker, the numeric tick mark, `ʹ` (Unicode x0374). The integer value `1` is written like this: +## Integers +Integer values are identified with the numeric tick, `ʹ`. ```{julia} -#| output: false -one = "αʹ" +#| warning: false +using GreekScientificOrthography +milesian("δʹ") ``` - -The sequence of digits is always smallest to largest from right to left. Note that their is no need for a zero character to represent the absence of a value is hundreds, tens or ones column, since the characters are distinct. - - - ```{julia} -#| output: false -eleven = "ιαʹ" -one_hundred_one = "ραʹ" -one_hundred_eleven = "ριαʹ" +milesian("κδʹ") ``` -All of these are valid strings in a `GreekSciOrthography`. +Thousands are separated by a comma from lower-value digits. ```{julia} -ortho = stemortho() -validstring(one, ortho) == validstring(eleven, ortho) == validstring(one_hundred_one, ortho) == validstring(one_hundred_eleven, ortho) +milesian("α,αʹ") ``` -A comma separates thousands values from 1,000 - 9,000 (left of the comma) from hundreds, tens and ones. The thousands values reuse the same characters as the ones values. +## Fractions + +Fractional are written as unit fractions values given by the reciprocal value and tagged with a double-prime marker. + ```{julia} -one_thousand_one = "α,αʹ" -validstring(one_thousand_one, ortho) +milesian("δ″") ``` -All of these strings represent a single integer token (type `MilesianIntegerToken`). - ```{julia} -tokenize(one, ortho) +milesian("κδ″") ``` +Other values have to be expressed as the sum of a series of unit fractions. Two-thirds can be written as 1/2 + 1/6, for example. ```{julia} -tokenize(one_thousand_one, ortho) +milesian("β″ ϛ″") ``` -## Integers from 999-19,999 -The value for 10,000 is written with the upper-case mu, `Μ` (for μυριάς, "myriad"). +You can of course mix integers and fractions. ```{julia} -myriad = "Μʹ" -validstring(myriad, ortho) +milesian("βʹ β″") ``` -Conventionally, the myriads value is written as a separate token separated by white space from the smaller columns. That means that a value like 10,001 will be parsed as two tokens. +`GreekScientificOrthography` includes three special characters often used in Greek manuscripts for the values 1/2, 2/3 and 3/4. ```{julia} -tenthousand_one = "Μʹ αʹ" -tokenize(tenthousand_one, ortho) +milesian("𐅵″") ``` -::: {.callout-note} -Greek manuscripts do not normally repeat the tick mark on the myriad marker; `GreekScientificOrthography` requires this to guarantee context-independent parsing of tokens. -::: +## Myriads - -You can express whole numbers up to 19,999 in this way. +The basic notation lets you write integers up to 9,999. ```{julia} -nineteen_999 = "Μʹ θ,ϡϙθʹ" -validstring(nineteen_999, ortho) +milesian("θ,ϡϙθʹ") ``` -```{julia} -tokenize(nineteen_999, ortho) -``` - - -## Integers greater than 19,999 +10,000 is written with upper-case mu, for "myriad". -To write values larger than 19,999, Milesian notation begins by multiplying the myriad character. In manuscripts, the multiplier is normally written above the `Μ`; in `GreekScientificOrthography`, we use the markdown convention for superscript that is supported by pandoc, among others, of bracketing the superscript value with carets. In this context, neither numeric character requires the numeric tick. - -20,000, for example, is written as Μ multiplied by β, like this example: ```{julia} -twentyk = "Μ^β^" -validstring(twentyk, ortho) +milesian("Μʹ") ``` -When you display the value in an environment that supports pandoc's markdown extension, the multiplier will display as a superscript. +It's treated as a distinct token. As with fractions, the value of the string expression is the sum of the tokens. ```{julia} -using Markdown -Markdown.parse(twentyk) +milesian("Μʹ α,αʹ") ``` - -Archimedes uses this notation as he derives limiting values for *pi* in his treatise *Measurement of a Circle*. The value 349,450, for example, is written with these two tokens: - -```{julia} -threefortynine450 = "Μ^λδ^ ,θυνʹ" -Markdown.parse(threefortynine450) -``` +Myriads can be multiplied! In `GreekScientificOrthography` this is indicated with a Markdown superscript expression (enclosed in carets). ```{julia} -tokenize(threefortynine450, ortho) +milesian("Μ^β^") ``` - -## Fractional values - -`GreekScientificOrthography` includes characters for three fractional values that are often written in manuscripts with special symbols: `𐅵` for one half, `𐅷` for two-thirds, and `𐅸` for three quarters. - -Apart from these special cases, the only notation for fractional values available to Greek mathematicians was to use normal integer notation, but flagged with a special double-prime marker indicating that this is a *reciprocal* value. +A myriad myriads is 10^8^! ```{julia} -sixth = "ϛ″" -thirtysixth = "λϛ″" -validstring(sixth, ortho) == validstring(thirtysixth, ortho) +milesian("Μ^Μ^") ``` -Other fractional values would be written as sums of these. The fraction 2/3 can, for example, appear as 1/2 + 1/6, written simply as a succession of individual fractional tokens. As with integer values, the sequence is least to greatest from right to left. +The biggest integer value we can write in this system is 100009999. ```{julia} -twothirds = "β″ ϛ″" -tokenize(twothirds, ortho) +milesian("Μ^Μ^ θ,ϡϙθʹ") ``` -Fractions can of course be mixed with integer numbers. - -```{julia} -six_and_twothirds = "ϛʹ β″ ϛ″" -``` - -## Using integer values in hexadecimal units - - -::: {.callout-note title="TBA"} -An alternative to tedious sums of fractions: -- divide whole units into sixtieths, seconds (3600ths), thirds (216000ths)... each written with an integer value up to 59 -- typically used in tables where columns determine meaning of integer -- but manually written columns make null values dangerous if they're left blank, so adds special character `Ο` (οὐδέν, "nothing") for 0. -::: \ No newline at end of file diff --git a/src/milesian.jl b/src/milesian.jl index 747c9d4..37a14a3 100644 --- a/src/milesian.jl +++ b/src/milesian.jl @@ -14,7 +14,7 @@ function milesian(tkn::OrthographicToken) fractionvalue(tkn) elseif tokencategory(tkn) isa MilesianIntegerToken - @info("Its an int: $(tkn)") + @debug("Its an int: $(tkn)") intvalue(tkn) else throw(DomainError("Not a Milesian token: $(tkn).")) @@ -78,18 +78,17 @@ end $(SIGNATURES) """ function intvalue(s::AbstractString) - @info("Int of $(s)") + @debug("Int of $(s)") if startswith(s, "Μ") myriadvalue(s) else pieces = split(s, ",") if length(pieces) == 1 - #intdigits(pieces[1]) - @info("Get intdigits for $(pieces[1])") + @debug("Get intdigits for $(pieces[1])") intdigits(pieces[1]) else - @info("Here are the pieces: $(pieces)") + @debug("Here are the pieces: $(pieces)") thousands = intdigits(pieces[1]) * 1000 lowervals = filter(s -> ! istick(s), pieces[2]) isempty(lowervals) ? thousands : thousands + intdigits(lowervals) @@ -120,7 +119,9 @@ function istick(c::Char) end - +"""Compute numeric value of an orthographic token expressing a fractional value in Milesian notation. +$(SIGNATURES) +""" function fractionvalue(tkn::OrthographicToken) fractionpieces(tkn) |> sum end @@ -133,29 +134,26 @@ function fractionpieces(tkn::OrthographicToken) fractionpieces(tokentext(tkn)) end +"""Compute numeric value of a string for a single token expressing a fractional value in Milesian notation. +$(SIGNATURES) +""" function fractionpieces(s::AbstractString) - @assert(istick(s[end])) - - pieces = [] - for gr in graphemes(s) - @debug("Eval grapheme $(gr)") - if gr == "$(ONE_HALF)" - grval = 1 /2 - push!(pieces, grval) - elseif gr == "$(TWO_THIRDS)" - grval = 2 / 3 - push!(pieces, grval) - elseif gr == "$(THREE_FOURTHS)" - grval = 3 / 4 - push!(pieces, grval) - - elseif istick(gr[1]) - else - @debug("Eval char $(gr[1])") - @assert gr[1] in keys(digitvalues) - grval = 1 / digitvalues[gr[1]] - push!(pieces, grval) - end + if !(istick(s[end])) + throw(DomainError("String not marked with fraction tick: $(s)")) + end + + if s[1] == ONE_HALF + 1 /2 + + elseif s[1] == TWO_THIRDS + 2 / 3 + + elseif s[1] == THREE_FOURTHS + 3 / 4 + + else + @debug("Get int value of $(s)") + 1 / intvalue(s) + end - pieces end diff --git a/src/tokens.jl b/src/tokens.jl index 4f59187..612a1af 100644 --- a/src/tokens.jl +++ b/src/tokens.jl @@ -30,8 +30,10 @@ $(SIGNATURES) """ function isinteger(s) - @info("Look at $(s). Ends with tick? $(s[end])") - if s == "Μ" # upper case Mu, unicode x039c + @debug("Look at integer string $(s). Ends with tick? $(s[end])") + if endswith(s, "$(NUMERIC_TICK)$(NUMERIC_TICK)") + false + elseif s == "Μ" # upper case Mu, unicode x039c true elseif s[1] == 'Μ' && s[end] == '^' @@ -41,6 +43,7 @@ function isinteger(s) true else + @debug("Not an int.") false end end