From e455ad29e11d1dcb722c61816c64db88b8724848 Mon Sep 17 00:00:00 2001
From: vnmabus <vnmabus@gmail.com>
Date: Thu, 23 Jun 2022 12:40:19 +0200
Subject: [PATCH 01/10] Some (unsucessful) parse tests for functions.

---
 rdata/conversion/__init__.py                  |   1 +
 rdata/conversion/_conversion.py               |  12 ++
 rdata/parser/_parser.py                       | 117 +++++++++++++++---
 rdata/tests/data/test_builtin.rda             | Bin 0 -> 79 bytes
 rdata/tests/data/test_empty_function.rda      | Bin 0 -> 323 bytes
 .../data/test_empty_function_uncompiled.rda   | Bin 0 -> 271 bytes
 rdata/tests/data/test_function.rda            | Bin 0 -> 382 bytes
 rdata/tests/data/test_function_arg.rda        | Bin 0 -> 298 bytes
 rdata/tests/test_rdata.py                     |  57 +++++++++
 9 files changed, 170 insertions(+), 17 deletions(-)
 create mode 100644 rdata/tests/data/test_builtin.rda
 create mode 100644 rdata/tests/data/test_empty_function.rda
 create mode 100644 rdata/tests/data/test_empty_function_uncompiled.rda
 create mode 100644 rdata/tests/data/test_function.rda
 create mode 100644 rdata/tests/data/test_function_arg.rda

diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index b0391e6..c8e5535 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -1,6 +1,7 @@
 from ._conversion import (
     DEFAULT_CLASS_MAP,
     Converter,
+    RBuiltin,
     RExpression,
     RLanguage,
     SimpleConverter,
diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index 8c9c217..1408508 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -1,5 +1,6 @@
 import abc
 import warnings
+from dataclasses import dataclass
 from fractions import Fraction
 from types import MappingProxyType, SimpleNamespace
 from typing import (
@@ -38,6 +39,13 @@ class RExpression(NamedTuple):
     elements: List[RLanguage]
 
 
+@dataclass
+class RBuiltin():
+    """R builtin."""
+
+    name: str
+
+
 def convert_list(
     r_list: parser.RObject,
     conversion_function: ConversionFunction,
@@ -576,6 +584,10 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
 
             value = RLanguage(rlanguage_list)
 
+        elif obj.info.type in {parser.RObjectType.SPECIAL, parser.RObjectType.BUILTIN}:
+
+            value = RBuiltin(name=obj.value.decode("ascii"))
+
         elif obj.info.type == parser.RObjectType.CHAR:
 
             # Return the internal string
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index 805fd04..7cc51a7 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -19,6 +19,7 @@
     List,
     Mapping,
     Optional,
+    Sequence,
     Set,
     TextIO,
     Tuple,
@@ -106,7 +107,12 @@ class RObjectType(enum.Enum):
     RAW = 24  # raw vector
     S4 = 25  # S4 classes not of simple type
     ALTREP = 238  # Alternative representations
+    ATTRLIST = 239  # Bytecode attribute
+    ATTRLANG = 240  # Bytecode attribute
     EMPTYENV = 242  # Empty environment
+    BCREPREF = 243  # Bytecode repetition reference
+    BCREPDEF = 244  # Bytecode repetition definition
+    MISSINGARG = 251  # Missinf argument
     GLOBALENV = 253  # Global environment
     NILVALUE = 254  # NIL value
     REF = 255  # Reference
@@ -488,9 +494,28 @@ def expand_altrep_to_object(
         constructor = self.altrep_constructor_dict[altrep_name]
         return constructor(state)
 
+    def _parse_bytecode(
+        self,
+        reference_list: Optional[List[RObject]],
+        bytecode_rep_list: List[RObject | None] | None = None,
+    ) -> Tuple[RObject, Sequence[RObject]]:
+        """Parse R bytecode."""
+        n_repeated = self.parse_int()
+
+        code = self.parse_R_object(reference_list, bytecode_rep_list)
+
+        n_constants = self.parse_int()
+        constants = [
+            self.parse_R_object(reference_list, [None] * n_repeated)
+            for _ in range(n_constants)
+        ]
+
+        return (code, constants)
+
     def parse_R_object(
         self,
-        reference_list: Optional[List[RObject]] = None,
+        reference_list: List[RObject] | None = None,
+        bytecode_rep_list: List[RObject | None] | None = None,
     ) -> RObject:
         """Parse a R object."""
         if reference_list is None:
@@ -505,6 +530,7 @@ def parse_R_object(
         attributes = None
         referenced_object = None
 
+        bytecode_rep_position = -1
         tag_read = False
         attributes_read = False
         add_reference = False
@@ -513,27 +539,47 @@ def parse_R_object(
 
         value: Any
 
+        if info.type == RObjectType.BCREPDEF:
+            assert bytecode_rep_list
+            bytecode_rep_position = self.parse_int()
+            info.type = RObjectType(self.parse_int())
+
         if info.type == RObjectType.NIL:
             value = None
 
         elif info.type == RObjectType.SYM:
             # Read Char
-            value = self.parse_R_object(reference_list)
+            value = self.parse_R_object(reference_list, bytecode_rep_list)
             # Symbols can be referenced
             add_reference = True
 
-        elif info.type in {RObjectType.LIST, RObjectType.LANG}:
+        elif info.type in {
+            RObjectType.LIST,
+            RObjectType.LANG,
+            RObjectType.CLO,
+            RObjectType.PROM,
+            RObjectType.DOT,
+            RObjectType.ATTRLANG,
+        }:
+            if info.type is RObjectType.ATTRLANG:
+                info.type = RObjectType.LANG
+                info.attributes = True
+
             tag = None
             if info.attributes:
-                attributes = self.parse_R_object(reference_list)
+                attributes = self.parse_R_object(
+                    reference_list,
+                    bytecode_rep_list,
+                )
                 attributes_read = True
-            elif info.tag:
-                tag = self.parse_R_object(reference_list)
+
+            if info.tag:
+                tag = self.parse_R_object(reference_list, bytecode_rep_list)
                 tag_read = True
 
             # Read CAR and CDR
-            car = self.parse_R_object(reference_list)
-            cdr = self.parse_R_object(reference_list)
+            car = self.parse_R_object(reference_list, bytecode_rep_list)
+            cdr = self.parse_R_object(reference_list, bytecode_rep_list)
             value = (car, cdr)
 
         elif info.type == RObjectType.ENV:
@@ -548,10 +594,10 @@ def parse_R_object(
             reference_list.append(result)
 
             locked = self.parse_bool()
-            enclosure = self.parse_R_object(reference_list)
-            frame = self.parse_R_object(reference_list)
-            hash_table = self.parse_R_object(reference_list)
-            attributes = self.parse_R_object(reference_list)
+            enclosure = self.parse_R_object(reference_list, bytecode_rep_list)
+            frame = self.parse_R_object(reference_list, bytecode_rep_list)
+            hash_table = self.parse_R_object(reference_list, bytecode_rep_list)
+            attributes = self.parse_R_object(reference_list, bytecode_rep_list)
 
             value = EnvironmentValue(
                 locked=locked,
@@ -560,6 +606,11 @@ def parse_R_object(
                 hash_table=hash_table,
             )
 
+        elif info.type in {RObjectType.SPECIAL, RObjectType.BUILTIN}:
+            length = self.parse_int()
+            if length > 0:
+                value = self.parse_string(length=length)
+
         elif info.type == RObjectType.CHAR:
             length = self.parse_int()
             if length > 0:
@@ -615,15 +666,28 @@ def parse_R_object(
             value = [None] * length
 
             for i in range(length):
-                value[i] = self.parse_R_object(reference_list)
+                value[i] = self.parse_R_object(
+                    reference_list, bytecode_rep_list)
+
+        elif info.type == RObjectType.BCODE:
+            value = self._parse_bytecode(reference_list, bytecode_rep_list)
 
         elif info.type == RObjectType.S4:
             value = None
 
         elif info.type == RObjectType.ALTREP:
-            altrep_info = self.parse_R_object(reference_list)
-            altrep_state = self.parse_R_object(reference_list)
-            altrep_attr = self.parse_R_object(reference_list)
+            altrep_info = self.parse_R_object(
+                reference_list,
+                bytecode_rep_list,
+            )
+            altrep_state = self.parse_R_object(
+                reference_list,
+                bytecode_rep_list,
+            )
+            altrep_attr = self.parse_R_object(
+                reference_list,
+                bytecode_rep_list,
+            )
 
             if self.expand_altrep:
                 info, value = self.expand_altrep_to_object(
@@ -637,6 +701,16 @@ def parse_R_object(
         elif info.type == RObjectType.EMPTYENV:
             value = None
 
+        elif info.type == RObjectType.BCREPREF:
+            assert bytecode_rep_list
+            position = self.parse_int()
+            result = bytecode_rep_list[position]
+            assert result
+            return result
+
+        elif info.type == RObjectType.MISSINGARG:
+            value = None
+
         elif info.type == RObjectType.GLOBALENV:
             value = None
 
@@ -657,7 +731,7 @@ def parse_R_object(
                 "and ignored",
             )
         if info.attributes and not attributes_read:
-            attributes = self.parse_R_object(reference_list)
+            attributes = self.parse_R_object(reference_list, bytecode_rep_list)
 
         if result is None:
             result = RObject(
@@ -676,6 +750,10 @@ def parse_R_object(
         if add_reference:
             reference_list.append(result)
 
+        if bytecode_rep_position >= 0:
+            assert bytecode_rep_list
+            bytecode_rep_list[bytecode_rep_position] = result
+
         return result
 
 
@@ -717,6 +795,11 @@ def parse_string(self, length: int) -> bytes:  # noqa: D102
         self.position += length
         return bytes(result)
 
+    def parse_all(self) -> RData:
+        rdata = super().parse_all()
+        assert self.position == len(self.data)
+        return rdata
+
 
 def parse_file(
     file_or_path: Union[BinaryIO, TextIO, 'os.PathLike[Any]', str],
diff --git a/rdata/tests/data/test_builtin.rda b/rdata/tests/data/test_builtin.rda
new file mode 100644
index 0000000000000000000000000000000000000000..48279c6732fbbf5cbdf3d196c98b3fade39b2c98
GIT binary patch
literal 79
zcmb2|=3oE=X6~X+gJ)e2k`fXU(h?GxCarN$W6sX#n7xjbIijU;Vq<Hgu*c^qOT?xh
gQJ<RjY)Z;A#-B0@zMY;&9bU;Z^nBga;|??j06vNx^#A|>

literal 0
HcmV?d00001

diff --git a/rdata/tests/data/test_empty_function.rda b/rdata/tests/data/test_empty_function.rda
new file mode 100644
index 0000000000000000000000000000000000000000..d8dd79fc8e596032d87880642748250701648db4
GIT binary patch
literal 323
zcmV-J0lfYniwFP!000001GQ31Yr;Sfp1jmX0~Qf_3}~S}c<QBx9!tS%AK*<S-OxZx
zLN=v}6#A1(@6}GSlek$Uc<I7q9+~;R*_nO3pWWCq8vqP2b^Karg7xzJFu8>y0vecb
zhI)`Op3T`Z$<}lKJ@T?3jsRMQlGfs>mogur9P~QHAAk$eW3tEOYf5z+tNkEkXqq6-
zkCI}ALBu#3eJOa^(42cWaZCH^+TDEG3Q{L$iWW<zfaPZ^q|WSx3tlqo6zo#RUYx8C
zFF_FPokCGfsU(75(nFezQ|ttwhjLD_d^z6&QO^+vB3_&HNYW$8r%4-FXYNkvO3kja
zx*u<G$ex<k4APq6aLuQaR{~Oowvm=oYUjW1Um+V``-^EuD*J4eq>OWXYJ3{Kvegj`
VT}UHFPq5eOw|@gmme8RC0083anKS?Z

literal 0
HcmV?d00001

diff --git a/rdata/tests/data/test_empty_function_uncompiled.rda b/rdata/tests/data/test_empty_function_uncompiled.rda
new file mode 100644
index 0000000000000000000000000000000000000000..205628f55c626eb9e4e29e5e1b4636326cda20bd
GIT binary patch
literal 271
zcmV+q0r37GiwFP!000000}FDAFy@NjVqjokW?*3glB_@`18ZoAo2~@|ScHL*frWt+
zNXwO^7MH}Q<`$Gx#;29$C6{F8=fwl*{M>@foYWKs1~z6i9c;x#$wjGYKvQ{vJPxo7
z1A{V@)`zn7p?pR(-RwZ!X+V>KOcn+R_=Ki}H77GKwHU|{KoT^-?@k39T?Lr1hNePw
zEf<=h93V^c5_410%msme5RB@G<ebD}m?M~=g4|HYC+8PbV)_Sa1p9w9E8qbJvxwm@
zgl1xc=wJfr5QYR6N}%c?(RxUHXrM9!^|AaX##VIIa8=yp8TmP>!PsJy0VzfqFk_T>
V%fT2X!&nUy1OPwF#`C2D008-ubvOV3

literal 0
HcmV?d00001

diff --git a/rdata/tests/data/test_function.rda b/rdata/tests/data/test_function.rda
new file mode 100644
index 0000000000000000000000000000000000000000..3e0940f4a2252dd53ef5c796fb0a29437387544e
GIT binary patch
literal 382
zcmV-^0fGJ>iwFP!000001GQ64OT$1Ao#wNq4HQK@RYDKa(&psFgGUj)`T@O3iCYZB
zq+}yjiqM}d>ebdsG6|b)3Pmt5o!Ob$_g*KHn~U+OHMRgi1x>+U9YfIX?=Ial$TH9X
z4ffF63FFDb^d<BXKMDa1RcbJJ?8R(~AU2+wSp{I9{D|Taog30^6K_xbfMKXX{(MR;
zdf<nQW6%+jZbg=J?m9)>A2^Fy?1xFecg2Dr>J8p439*?nhLdL|fu_$Yx2gg!nD9b@
zLdg~>_oCU`=AR)5b1hMn(<+GIhx}oNjRU7Cw$YAgYCRgy3H78o6YV2sQBszJyjFP&
zUBP#SGra{B?`i8@lvYrW$qy+W{uNd3F1uQ<Pf@^b>lO=y#e&Vn+MT>K5IPzQns8e9
zrlL{PsAnrCG=s#YXUiunRpC*nl?wZ!oc&+Z%__B$O3Gy}LD_D#G#xgd#f)?O+W2+&
c$mT>S<Eq=O7Pp<tuVAiz0if2HhO7ht0CfkoY5)KL

literal 0
HcmV?d00001

diff --git a/rdata/tests/data/test_function_arg.rda b/rdata/tests/data/test_function_arg.rda
new file mode 100644
index 0000000000000000000000000000000000000000..13cdae917fbc323a9f60d17bb8f77de5847ee52d
GIT binary patch
literal 298
zcmV+_0oDE=iwFP!000001D#S!OT$1Ao;;hT2ui_os2AJXlNS#j#UG#*dMiuVXdrG%
z5)mo>X|H1AWOoua8xO^W-FbiCWHJwTo9l2B0)PXqjjx9)cu(v5)eT4wSm44K<ypnc
zYMZ=?xJq-eWyLdq?-)sbS;PfT5a<N0Kt=%0NiIoWl5H8)Bdku+jHBv;I$uVLm!*Q2
zsGMrft3hkg?J8>ROE!=8yCM}8YM-G27&L@}y>bJyIR|z74a6BM+XifMPN?xX-+c_f
zg&;<?LD3DXC4wJ=%wqU_tDx<xpxwYsC3%0!GfmDko2Kkwom(H%N)Ppd<bw3Ye^Jfs
wE^G4sl4tz!uf+jvabUPO$CJz3O?9uP2fqGa<%Xz0P-|6(UtJN$xFiDr0OEs*K>z>%

literal 0
HcmV?d00001

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 38f4c65..1dd312d 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -211,6 +211,63 @@ def test_expression(self) -> None:
             ]),
         })
 
+    def test_builtin(self) -> None:
+        """Test that builtin functions can be parsed."""
+        parsed = rdata.parser.parse_file(TESTDATA_PATH / "test_builtin.rda")
+        converted = rdata.conversion.convert(parsed)
+
+        np.testing.assert_equal(converted, {
+            "test_builtin": rdata.conversion.RBuiltin(name="abs"),
+        })
+
+    def test_empty_function_uncompiled(self) -> None:
+        """Test that a simple function can be parsed."""
+        parsed = rdata.parser.parse_file(
+            TESTDATA_PATH / "test_empty_function_uncompiled.rda")
+        converted = rdata.conversion.convert(parsed)
+
+        np.testing.assert_equal(converted, {
+            "test_empty_function": rdata.conversion.RExpression([
+                rdata.conversion.RLanguage(['^', 'base', 'exponent']),
+            ]),
+        })
+
+    def test_empty_function(self) -> None:
+        """Test that a simple function (compiled) can be parsed."""
+        parsed = rdata.parser.parse_file(
+            TESTDATA_PATH / "test_empty_function.rda")
+        converted = rdata.conversion.convert(parsed)
+
+        np.testing.assert_equal(converted, {
+            "test_empty_function": rdata.conversion.RExpression([
+                rdata.conversion.RLanguage(['^', 'base', 'exponent']),
+            ]),
+        })
+
+    def test_function(self) -> None:
+        """Test that functions can be parsed."""
+        parsed = rdata.parser.parse_file(
+            TESTDATA_PATH / "test_function.rda")
+        converted = rdata.conversion.convert(parsed)
+
+        np.testing.assert_equal(converted, {
+            "test_function": rdata.conversion.RExpression([
+                rdata.conversion.RLanguage(['^', 'base', 'exponent']),
+            ]),
+        })
+
+    def test_function_arg(self) -> None:
+        """Test that functions can be parsed."""
+        parsed = rdata.parser.parse_file(
+            TESTDATA_PATH / "test_function_arg.rda")
+        converted = rdata.conversion.convert(parsed)
+
+        np.testing.assert_equal(converted, {
+            "test_function_arg": rdata.conversion.RExpression([
+                rdata.conversion.RLanguage(['^', 'base', 'exponent']),
+            ]),
+        })
+
     def test_encodings(self) -> None:
         """Test of differents encodings."""
         with self.assertWarns(

From e00a978ff6722d6c9331d33d08e3e901e53d4a61 Mon Sep 17 00:00:00 2001
From: vnmabus <vnmabus@gmail.com>
Date: Mon, 15 Aug 2022 17:26:49 +0200
Subject: [PATCH 02/10] Support minimal functions.

---
 rdata/conversion/__init__.py                  |   2 +
 rdata/conversion/_conversion.py               |  77 ++++++++++--
 rdata/parser/_parser.py                       |  71 +++++++++--
 rdata/tests/data/test_minimal_function.rda    | Bin 0 -> 275 bytes
 .../data/test_minimal_function_uncompiled.rda | Bin 0 -> 217 bytes
 rdata/tests/test_rdata.py                     | 114 ++++++++++++++++--
 6 files changed, 237 insertions(+), 27 deletions(-)
 create mode 100644 rdata/tests/data/test_minimal_function.rda
 create mode 100644 rdata/tests/data/test_minimal_function_uncompiled.rda

diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index c8e5535..2d601d7 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -2,7 +2,9 @@
     DEFAULT_CLASS_MAP,
     Converter,
     RBuiltin,
+    RBytecode,
     RExpression,
+    RFunction,
     RLanguage,
     SimpleConverter,
     convert,
diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index 1408508..841fa6e 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import abc
 import warnings
 from dataclasses import dataclass
@@ -31,6 +33,7 @@ class RLanguage(NamedTuple):
     """R language construct."""
 
     elements: List[Any]
+    attributes: Mapping[str, Any]
 
 
 class RExpression(NamedTuple):
@@ -40,12 +43,42 @@ class RExpression(NamedTuple):
 
 
 @dataclass
-class RBuiltin():
+class RBuiltin:
     """R builtin."""
 
     name: str
 
 
+@dataclass
+class RFunction:
+    """R function."""
+
+    environment: Mapping[str, Any]
+    formals: Optional[Mapping[str, Any]]
+    body: RLanguage
+    attributes: StrMap
+
+
+@dataclass
+class RBytecode:
+    """R bytecode."""
+
+    code: xarray.DataArray
+    attributes: StrMap
+
+
+class REnvironment(ChainMap[Union[str, bytes], Any]):
+    """R environment."""
+
+    def __init__(
+        self,
+        *maps: MutableMapping[str | bytes, Any],
+        frame: StrMap | None = None,
+    ) -> None:
+        super().__init__(*maps)
+        self.frame = frame
+
+
 def convert_list(
     r_list: parser.RObject,
     conversion_function: ConversionFunction,
@@ -102,7 +135,7 @@ def convert_list(
 def convert_env(
     r_env: parser.RObject,
     conversion_function: ConversionFunction,
-) -> ChainMap[Union[str, bytes], Any]:
+) -> REnvironment:
     """Convert environment objects."""
     if r_env.info.type is not parser.RObjectType.ENV:
         raise TypeError("Must receive a ENV object")
@@ -112,11 +145,12 @@ def convert_env(
     hash_table = conversion_function(r_env.value.hash_table)
 
     dictionary = {}
-    for d in hash_table:
-        if d is not None:
-            dictionary.update(d)
+    if hash_table is not None:
+        for d in hash_table:
+            if d is not None:
+                dictionary.update(d)
 
-    return ChainMap(dictionary, enclosure)
+    return REnvironment(dictionary, enclosure, frame=frame)
 
 
 def convert_attrs(
@@ -516,17 +550,17 @@ def __init__(
         constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
         default_encoding: Optional[str] = None,
         force_default_encoding: bool = False,
-        global_environment: Optional[StrMap] = None,
+        global_environment: MutableMapping[str | bytes, Any] | None = None,
     ) -> None:
 
         self.constructor_dict = constructor_dict
         self.default_encoding = default_encoding
         self.force_default_encoding = force_default_encoding
-        self.global_environment = ChainMap(
+        self.global_environment = REnvironment(
             {} if global_environment is None
             else global_environment,
         )
-        self.empty_environment: StrMap = ChainMap({})
+        self.empty_environment: StrMap = REnvironment({})
 
         self._reset()
 
@@ -570,6 +604,20 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
             # Expand the list and process the elements
             value = convert_list(obj, self._convert_next)
 
+        elif obj.info.type == parser.RObjectType.CLO:
+            assert obj.tag is not None
+            environment = self._convert_next(obj.tag)
+            formals = self._convert_next(obj.value[0])
+            body = self._convert_next(obj.value[1])
+            attributes = self._convert_next(obj.attributes)
+
+            value = RFunction(
+                environment=environment,
+                formals=formals,
+                body=body,
+                attributes=attributes,
+            )
+
         elif obj.info.type == parser.RObjectType.ENV:
 
             # Return a ChainMap of the environments
@@ -581,8 +629,10 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
             # special object
             rlanguage_list = convert_list(obj, self._convert_next)
             assert isinstance(rlanguage_list, list)
+            assert obj.attributes
+            attributes = self._convert_next(obj.attributes)
 
-            value = RLanguage(rlanguage_list)
+            value = RLanguage(rlanguage_list, attributes)
 
         elif obj.info.type in {parser.RObjectType.SPECIAL, parser.RObjectType.BUILTIN}:
 
@@ -628,6 +678,13 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
             # Convert the internal objects returning a special object
             value = RExpression(rexpression_list)
 
+        elif obj.info.type == parser.RObjectType.BCODE:
+
+            value = RBytecode(
+                code=self._convert_next(obj.value[0]),
+                attributes=attrs,
+            )
+
         elif obj.info.type == parser.RObjectType.S4:
             value = SimpleNamespace(**attrs)
 
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index 7cc51a7..b3836b8 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -118,6 +118,15 @@ class RObjectType(enum.Enum):
     REF = 255  # Reference
 
 
+BYTECODE_SPECIAL_SET = {
+    RObjectType.BCREPDEF,
+    RObjectType.LANG,
+    RObjectType.LIST,
+    RObjectType.ATTRLANG,
+    RObjectType.ATTRLIST,
+}
+
+
 class CharFlags(enum.IntFlag):
     """Flags for R objects of type char."""
 
@@ -257,6 +266,15 @@ class RData():
     extra: RExtraInfo
     object: RObject
 
+    def __str__(self) -> str:
+        return (
+            "RData(\n"
+            f"  versions: {self.versions}\n"
+            f"  extra: {self.extra}\n"
+            f"  object: \n{self.object._str_internal(indent=4)}\n"
+            ")\n"
+        )
+
 
 @dataclass
 class EnvironmentValue():
@@ -494,6 +512,20 @@ def expand_altrep_to_object(
         constructor = self.altrep_constructor_dict[altrep_name]
         return constructor(state)
 
+    def _parse_bytecode_constant(
+        self,
+        reference_list: Optional[List[RObject]],
+        bytecode_rep_list: List[RObject | None] | None = None,
+    ) -> RObject:
+
+        obj_type = self.parse_int()
+
+        return self.parse_R_object(
+            reference_list,
+            bytecode_rep_list,
+            info_int=obj_type,
+        )
+
     def _parse_bytecode(
         self,
         reference_list: Optional[List[RObject]],
@@ -506,7 +538,10 @@ def _parse_bytecode(
 
         n_constants = self.parse_int()
         constants = [
-            self.parse_R_object(reference_list, [None] * n_repeated)
+            self._parse_bytecode_constant(
+                reference_list,
+                [None] * n_repeated,
+            )
             for _ in range(n_constants)
         ]
 
@@ -516,15 +551,23 @@ def parse_R_object(
         self,
         reference_list: List[RObject] | None = None,
         bytecode_rep_list: List[RObject | None] | None = None,
+        info_int: int | None = None,
     ) -> RObject:
         """Parse a R object."""
         if reference_list is None:
             # Index is 1-based, so we insert a dummy object
             reference_list = []
 
-        info_int = self.parse_int()
-
-        info = parse_r_object_info(info_int)
+        original_info_int = info_int
+        if (
+            info_int is not None
+            and RObjectType(info_int) in BYTECODE_SPECIAL_SET
+        ):
+            info = parse_r_object_info(info_int)
+            info.tag = True
+        else:
+            info_int = self.parse_int()
+            info = parse_r_object_info(info_int)
 
         tag = None
         attributes = None
@@ -563,7 +606,6 @@ def parse_R_object(
         }:
             if info.type is RObjectType.ATTRLANG:
                 info.type = RObjectType.LANG
-                info.attributes = True
 
             tag = None
             if info.attributes:
@@ -578,8 +620,22 @@ def parse_R_object(
                 tag_read = True
 
             # Read CAR and CDR
-            car = self.parse_R_object(reference_list, bytecode_rep_list)
-            cdr = self.parse_R_object(reference_list, bytecode_rep_list)
+            car = self.parse_R_object(
+                reference_list,
+                bytecode_rep_list,
+                info_int=(
+                    None if original_info_int is None
+                    else self.parse_int()
+                ),
+            )
+            cdr = self.parse_R_object(
+                reference_list,
+                bytecode_rep_list,
+                info_int=(
+                    None if original_info_int is None
+                    else self.parse_int()
+                ),
+            )
             value = (car, cdr)
 
         elif info.type == RObjectType.ENV:
@@ -671,6 +727,7 @@ def parse_R_object(
 
         elif info.type == RObjectType.BCODE:
             value = self._parse_bytecode(reference_list, bytecode_rep_list)
+            tag_read = True
 
         elif info.type == RObjectType.S4:
             value = None
diff --git a/rdata/tests/data/test_minimal_function.rda b/rdata/tests/data/test_minimal_function.rda
new file mode 100644
index 0000000000000000000000000000000000000000..0c39c802adf7d0c2b62d135401719a309a2556a7
GIT binary patch
literal 275
zcmV+u0qp)CiwFP!000000}FDAFy@NjVqjokW?*3glB_@`18ZoAo2~@|ScHL*frWt+
zNQ;)F7MH~5X69w)Cg#MamF6XvWaj5FFt9PBDP$`yN-j!G0~*E)<Z*yy7#O6Wv<8%|
z0p&BI>1GG&PRq<m1u|I}Am9_464so|ywqYKLjXxo6RYzSY;+Z1Yz<8Xzfd0^E;Q3P
zK(^&2=BA<<2m=2g7}fUVoWx?7?MzTXZm6r1^9w35Jpwg?{Xd!&@Q{L8#PAnNi$X((
z7aTJT3_L(A2o+;NavU#dq09_)0n2})Tmv(ixq$&0VAUy*T2WAxT3iea<YLdfl++3w
ZW+1r)!wepX?I`;H0RVZSuri+l006?gZp{Dy

literal 0
HcmV?d00001

diff --git a/rdata/tests/data/test_minimal_function_uncompiled.rda b/rdata/tests/data/test_minimal_function_uncompiled.rda
new file mode 100644
index 0000000000000000000000000000000000000000..df8d2a681093f41f1b41d6b2ac6105dd509678ba
GIT binary patch
literal 217
zcmV;~04Dz*iwFP!000000}FDAFy@NjVqjokW?*3glB_@`18ZoAo2~@|ScHL*frWt+
zNGp`27MH~5X69w)Cg#MamF6XvWaj6^1L^$Sg3O%M6b1%1W;8u)#YM?QscArSd4W6*
zunYr(DwH;cvW=m9Ml{{*K;3CTlYvYY1_=0sri3*oGcUCm$PhphG$G_t1sh!jn6QSX
zf?ueQ4;Pxj93YGH5_410%m#sf5RB@O<ebD}m_wMLg4|FKB<B}YV)_Yc1p9w9E8u|!
Tvxwm@68j$j*lgGJvjG4ASO8qx

literal 0
HcmV?d00001

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 1dd312d..8db4698 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -220,17 +220,90 @@ def test_builtin(self) -> None:
             "test_builtin": rdata.conversion.RBuiltin(name="abs"),
         })
 
+    def test_minimal_function_uncompiled(self) -> None:
+        """Test that a minimal function can be parsed."""
+        parsed = rdata.parser.parse_file(
+            TESTDATA_PATH / "test_minimal_function_uncompiled.rda")
+        converted = rdata.conversion.convert(parsed)
+
+        converted_fun = converted["test_minimal_function_uncompiled"]
+
+        self.assertIsInstance(
+            converted_fun,
+            rdata.conversion.RFunction,
+        )
+
+        np.testing.assert_equal(converted_fun.environment, ChainMap({}))
+        np.testing.assert_equal(converted_fun.formals, None)
+        np.testing.assert_equal(
+            converted_fun.attributes,
+            {'srcref': np.array([1, 37, 1, 51, 37, 51, 1, 1])},
+        )
+
+        np.testing.assert_equal(converted_fun.body, None)
+
+    def test_minimal_function(self) -> None:
+        """Test that a minimal function (compiled) can be parsed."""
+        parsed = rdata.parser.parse_file(
+            TESTDATA_PATH / "test_minimal_function.rda")
+        converted = rdata.conversion.convert(parsed)
+
+        converted_fun = converted["test_minimal_function"]
+
+        self.assertIsInstance(
+            converted_fun,
+            rdata.conversion.RFunction,
+        )
+
+        np.testing.assert_equal(converted_fun.environment, ChainMap({}))
+        np.testing.assert_equal(converted_fun.formals, None)
+
+        converted_body = converted_fun.body
+
+        self.assertIsInstance(
+            converted_body,
+            rdata.conversion.RBytecode,
+        )
+
+        np.testing.assert_equal(converted_body.code, np.array([12, 17, 1]))
+        np.testing.assert_equal(converted_body.attributes, {})
+
     def test_empty_function_uncompiled(self) -> None:
         """Test that a simple function can be parsed."""
         parsed = rdata.parser.parse_file(
             TESTDATA_PATH / "test_empty_function_uncompiled.rda")
         converted = rdata.conversion.convert(parsed)
 
-        np.testing.assert_equal(converted, {
-            "test_empty_function": rdata.conversion.RExpression([
-                rdata.conversion.RLanguage(['^', 'base', 'exponent']),
-            ]),
-        })
+        converted_fun = converted["test_empty_function_uncompiled"]
+
+        self.assertIsInstance(
+            converted_fun,
+            rdata.conversion.RFunction,
+        )
+
+        np.testing.assert_equal(converted_fun.environment, ChainMap({}))
+        np.testing.assert_equal(converted_fun.formals, None)
+        np.testing.assert_equal(
+            converted_fun.attributes,
+            {'srcref': np.array([1, 35, 1, 47, 35, 47, 1, 1])},
+        )
+
+        converted_body = converted_fun.body
+
+        self.assertIsInstance(
+            converted_body,
+            rdata.conversion.RLanguage,
+        )
+
+        np.testing.assert_equal(converted_body.elements, ['{'])
+        np.testing.assert_equal(
+            converted_body.attributes,
+            {
+                'srcref': [np.array([1, 46, 1, 46, 46, 46, 1, 1])],
+                'srcfile': ChainMap({}, ChainMap({})),
+                'wholeSrcref': np.array([1, 0, 1, 47, 0, 47, 1, 1]),
+            },
+        )
 
     def test_empty_function(self) -> None:
         """Test that a simple function (compiled) can be parsed."""
@@ -238,11 +311,32 @@ def test_empty_function(self) -> None:
             TESTDATA_PATH / "test_empty_function.rda")
         converted = rdata.conversion.convert(parsed)
 
-        np.testing.assert_equal(converted, {
-            "test_empty_function": rdata.conversion.RExpression([
-                rdata.conversion.RLanguage(['^', 'base', 'exponent']),
-            ]),
-        })
+        converted_fun = converted["test_empty_function"]
+
+        self.assertIsInstance(
+            converted_fun,
+            rdata.conversion.RFunction,
+        )
+
+        np.testing.assert_equal(converted_fun.environment, ChainMap({}))
+        np.testing.assert_equal(converted_fun.formals, None)
+
+        converted_body = converted_fun.body
+
+        self.assertIsInstance(
+            converted_body,
+            rdata.conversion.RLanguage,
+        )
+
+        np.testing.assert_equal(converted_body.elements, ['{'])
+        np.testing.assert_equal(
+            converted_body.attributes,
+            {
+                'srcref': [np.array([1, 46, 1, 46, 46, 46, 1, 1])],
+                'srcfile': ChainMap({}, ChainMap({})),
+                'wholeSrcref': np.array([1, 0, 1, 47, 0, 47, 1, 1]),
+            },
+        )
 
     def test_function(self) -> None:
         """Test that functions can be parsed."""

From ae204b7d0d69b1ac2340e3b14a35e10c5cd1091d Mon Sep 17 00:00:00 2001
From: vnmabus <vnmabus@gmail.com>
Date: Tue, 16 Aug 2022 13:33:58 +0200
Subject: [PATCH 03/10] Functions without arguments working.

---
 rdata/conversion/_conversion.py |  91 ++++++++++++++++++-
 rdata/parser/_parser.py         | 156 ++++++++++++++++++--------------
 rdata/tests/test_rdata.py       | 114 ++++++++++++++---------
 3 files changed, 247 insertions(+), 114 deletions(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index 841fa6e..901fd48 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -58,6 +58,10 @@ class RFunction:
     body: RLanguage
     attributes: StrMap
 
+    @property
+    def source(self) -> str:
+        return self.attributes["srcref"].srcfile.lines
+
 
 @dataclass
 class RBytecode:
@@ -394,6 +398,9 @@ def convert_array(
         # R matrix order is like FORTRAN
         value = np.reshape(value, shape, order='F')
 
+    dimension_names = None
+    coords = None
+
     dimnames = attrs.get('dimnames')
     if dimnames:
         if isinstance(dimnames, Mapping):
@@ -407,7 +414,11 @@ def convert_array(
                 if d is not None
             }
 
-        value = xarray.DataArray(value, dims=dimension_names, coords=coords)
+        value = xarray.DataArray(
+            value,
+            dims=dimension_names,
+            coords=coords,
+        )
 
     return value
 
@@ -480,6 +491,72 @@ def ts_constructor(
     return pandas.Series(obj, index=index)
 
 
+@dataclass
+class SrcRef:
+    first_line: int
+    first_byte: int
+    last_line: int
+    last_byte: int
+    first_column: int
+    last_column: int
+    first_parsed: int
+    last_parsed: int
+    srcfile: SrcFile
+
+
+def srcref_constructor(
+    obj: Any,
+    attrs: StrMap,
+) -> SrcRef:
+    return SrcRef(*obj, srcfile=attrs["srcfile"])
+
+
+@dataclass
+class SrcFile:
+    filename: str
+    file_encoding: str | None
+    string_encoding: str | None
+
+
+def srcfile_constructor(
+    obj: Any,
+    attrs: StrMap,
+) -> SrcFile:
+
+    filename = obj.frame["filename"][0]
+    file_encoding = obj.frame.get("encoding")
+    string_encoding = obj.frame.get("Enc")
+
+    return SrcFile(
+        filename=filename,
+        file_encoding=file_encoding,
+        string_encoding=string_encoding,
+    )
+
+
+@dataclass
+class SrcFileCopy(SrcFile):
+    lines: str
+
+
+def srcfilecopy_constructor(
+    obj: Any,
+    attrs: StrMap,
+) -> SrcFile:
+
+    filename = obj.frame["filename"][0]
+    file_encoding = obj.frame.get("encoding", (None,))[0]
+    string_encoding = obj.frame.get("Enc", (None,))[0]
+    lines = obj.frame["lines"][0]
+
+    return SrcFileCopy(
+        filename=filename,
+        file_encoding=file_encoding,
+        string_encoding=string_encoding,
+        lines=lines,
+    )
+
+
 Constructor = Callable[[Any, Mapping], Any]
 ConstructorDict = Mapping[
     Union[str, bytes],
@@ -491,6 +568,9 @@ def ts_constructor(
     "factor": factor_constructor,
     "ordered": ordered_constructor,
     "ts": ts_constructor,
+    "srcref": srcref_constructor,
+    "srcfile": srcfile_constructor,
+    "srcfilecopy": srcfilecopy_constructor,
 }
 
 DEFAULT_CLASS_MAP = MappingProxyType(default_class_map_dict)
@@ -629,8 +709,9 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
             # special object
             rlanguage_list = convert_list(obj, self._convert_next)
             assert isinstance(rlanguage_list, list)
-            assert obj.attributes
-            attributes = self._convert_next(obj.attributes)
+            attributes = self._convert_next(
+                obj.attributes,
+            ) if obj.attributes else {}
 
             value = RLanguage(rlanguage_list, attributes)
 
@@ -710,8 +791,8 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
         else:
             raise NotImplementedError(f"Type {obj.info.type} not implemented")
 
-        if obj.info.object:
-            classname = attrs["class"]
+        if obj.info.object and attrs is not None:
+            classname = attrs.get("class", ())
             for i, c in enumerate(classname):
 
                 constructor = self.constructor_dict.get(c, None)
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index b3836b8..dfd75b8 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -171,91 +171,110 @@ class RObjectInfo():
     reference: int
 
 
-@dataclass
-class RObject():
-    """Representation of a R object."""
+def _str_internal(
+    obj: RObject | Sequence[RObject],
+    indent: int = 0,
+    used_references: Optional[Set[int]] = None,
+) -> str:
 
-    info: RObjectInfo
-    value: Any
-    attributes: Optional[RObject]
-    tag: Optional[RObject] = None
-    referenced_object: Optional[RObject] = None
+    if used_references is None:
+        used_references = set()
 
-    def _str_internal(
-        self,
-        indent: int = 0,
-        used_references: Optional[Set[int]] = None,
-    ) -> str:
+    small_indent = indent + 2
+    big_indent = indent + 4
 
-        if used_references is None:
-            used_references = set()
+    indent_spaces = ' ' * indent
+    small_indent_spaces = ' ' * small_indent
+    big_indent_spaces = ' ' * big_indent
 
-        small_indent = indent + 2
-        big_indent = indent + 4
+    string = ""
 
-        indent_spaces = ' ' * indent
-        small_indent_spaces = ' ' * small_indent
-        big_indent_spaces = ' ' * big_indent
+    if isinstance(obj, Sequence):
+        string += f"{indent_spaces}[\n"
+        for elem in obj:
+            string += _str_internal(
+                elem,
+                big_indent,
+                used_references.copy(),
+            )
+        string += f"{indent_spaces}]\n"
+
+        return string
 
-        string = ""
+    string += f"{indent_spaces}{obj.info.type}\n"
+
+    if obj.tag:
+        tag_string = _str_internal(
+            obj.tag,
+            big_indent,
+            used_references.copy(),
+        )
+        string += f"{small_indent_spaces}tag:\n{tag_string}\n"
+
+    if obj.info.reference:
+        assert obj.referenced_object
+        reference_string = (
+            f"{big_indent_spaces}..."
+            if obj.info.reference in used_references
+            else _str_internal(
+                obj.referenced_object,
+                indent + 4, used_references.copy())
+        )
+        string += (
+            f"{small_indent_spaces}reference: "
+            f"{obj.info.reference}\n{reference_string}\n"
+        )
 
-        string += f"{indent_spaces}{self.info.type}\n"
+    string += f"{small_indent_spaces}value:\n"
 
-        if self.tag:
-            tag_string = self.tag._str_internal(
+    if isinstance(obj.value, RObject):
+        string += _str_internal(
+            obj.value,
+            big_indent,
+            used_references.copy(),
+        )
+    elif isinstance(obj.value, (tuple, list)):
+        for elem in obj.value:
+            string += _str_internal(
+                elem,
                 big_indent,
                 used_references.copy(),
             )
-            string += f"{small_indent_spaces}tag:\n{tag_string}\n"
-
-        if self.info.reference:
-            assert self.referenced_object
-            reference_string = (
-                f"{big_indent_spaces}..."
-                if self.info.reference in used_references
-                else self.referenced_object._str_internal(
-                    indent + 4, used_references.copy())
-            )
+    elif isinstance(obj.value, np.ndarray):
+        string += big_indent_spaces
+        if len(obj.value) > 4:
             string += (
-                f"{small_indent_spaces}reference: "
-                f"{self.info.reference}\n{reference_string}\n"
+                f"[{obj.value[0]}, {obj.value[1]} ... "
+                f"{obj.value[-2]}, {obj.value[-1]}]\n"
             )
+        else:
+            string += f"{obj.value}\n"
+    else:
+        string += f"{big_indent_spaces}{obj.value}\n"
 
-        string += f"{small_indent_spaces}value:\n"
+    if obj.attributes:
+        attr_string = _str_internal(
+            obj.attributes,
+            big_indent,
+            used_references.copy(),
+        )
+        string += f"{small_indent_spaces}attributes:\n{attr_string}\n"
 
-        if isinstance(self.value, RObject):
-            string += self.value._str_internal(
-                big_indent,
-                used_references.copy(),
-            )
-        elif isinstance(self.value, (tuple, list)):
-            for elem in self.value:
-                string += elem._str_internal(
-                    big_indent,
-                    used_references.copy(),
-                )
-        elif isinstance(self.value, np.ndarray):
-            string += big_indent_spaces
-            if len(self.value) > 4:
-                string += (
-                    f"[{self.value[0]}, {self.value[1]} ... "
-                    f"{self.value[-2]}, {self.value[-1]}]\n"
-                )
-            else:
-                string += f"{self.value}\n"
-        else:
-            string += f"{big_indent_spaces}{self.value}\n"
+    return string
 
-        if self.attributes:
-            attr_string = self.attributes._str_internal(
-                big_indent,
-                used_references.copy())
-            string += f"{small_indent_spaces}attributes:\n{attr_string}\n"
 
-        return string
+@dataclass
+class RObject():
+    """Representation of a R object."""
+
+    info: RObjectInfo
+    value: Any
+    attributes: Optional[RObject]
+    tag: Optional[RObject] = None
+    referenced_object: Optional[RObject] = None
 
     def __str__(self) -> str:
-        return self._str_internal()
+        return _str_internal(self)
 
 
 @dataclass
@@ -271,7 +290,7 @@ def __str__(self) -> str:
             "RData(\n"
             f"  versions: {self.versions}\n"
             f"  extra: {self.extra}\n"
-            f"  object: \n{self.object._str_internal(indent=4)}\n"
+            f"  object: \n{_str_internal(self.object, indent=4)}\n"
             ")\n"
         )
 
@@ -606,6 +625,7 @@ def parse_R_object(
         }:
             if info.type is RObjectType.ATTRLANG:
                 info.type = RObjectType.LANG
+                info.attributes = True
 
             tag = None
             if info.attributes:
@@ -639,6 +659,8 @@ def parse_R_object(
             value = (car, cdr)
 
         elif info.type == RObjectType.ENV:
+            info.object = True
+
             result = RObject(
                 info=info,
                 tag=tag,
diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 8db4698..9a3e102 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -207,7 +207,10 @@ def test_expression(self) -> None:
 
         np.testing.assert_equal(converted, {
             "test_expression": rdata.conversion.RExpression([
-                rdata.conversion.RLanguage(['^', 'base', 'exponent']),
+                rdata.conversion.RLanguage(
+                    ['^', 'base', 'exponent'],
+                    attributes={},
+                ),
             ]),
         })
 
@@ -235,13 +238,12 @@ def test_minimal_function_uncompiled(self) -> None:
 
         np.testing.assert_equal(converted_fun.environment, ChainMap({}))
         np.testing.assert_equal(converted_fun.formals, None)
+        np.testing.assert_equal(converted_fun.body, None)
         np.testing.assert_equal(
-            converted_fun.attributes,
-            {'srcref': np.array([1, 37, 1, 51, 37, 51, 1, 1])},
+            converted_fun.source,
+            "test_minimal_function_uncompiled <- function() NULL\n",
         )
 
-        np.testing.assert_equal(converted_fun.body, None)
-
     def test_minimal_function(self) -> None:
         """Test that a minimal function (compiled) can be parsed."""
         parsed = rdata.parser.parse_file(
@@ -268,6 +270,11 @@ def test_minimal_function(self) -> None:
         np.testing.assert_equal(converted_body.code, np.array([12, 17, 1]))
         np.testing.assert_equal(converted_body.attributes, {})
 
+        np.testing.assert_equal(
+            converted_fun.source,
+            "test_minimal_function <- function() NULL\n",
+        )
+
     def test_empty_function_uncompiled(self) -> None:
         """Test that a simple function can be parsed."""
         parsed = rdata.parser.parse_file(
@@ -283,26 +290,10 @@ def test_empty_function_uncompiled(self) -> None:
 
         np.testing.assert_equal(converted_fun.environment, ChainMap({}))
         np.testing.assert_equal(converted_fun.formals, None)
+        self.assertIsInstance(converted_fun.body, rdata.conversion.RLanguage)
         np.testing.assert_equal(
-            converted_fun.attributes,
-            {'srcref': np.array([1, 35, 1, 47, 35, 47, 1, 1])},
-        )
-
-        converted_body = converted_fun.body
-
-        self.assertIsInstance(
-            converted_body,
-            rdata.conversion.RLanguage,
-        )
-
-        np.testing.assert_equal(converted_body.elements, ['{'])
-        np.testing.assert_equal(
-            converted_body.attributes,
-            {
-                'srcref': [np.array([1, 46, 1, 46, 46, 46, 1, 1])],
-                'srcfile': ChainMap({}, ChainMap({})),
-                'wholeSrcref': np.array([1, 0, 1, 47, 0, 47, 1, 1]),
-            },
+            converted_fun.source,
+            "test_empty_function_uncompiled <- function() {}\n",
         )
 
     def test_empty_function(self) -> None:
@@ -325,17 +316,15 @@ def test_empty_function(self) -> None:
 
         self.assertIsInstance(
             converted_body,
-            rdata.conversion.RLanguage,
+            rdata.conversion.RBytecode,
         )
 
-        np.testing.assert_equal(converted_body.elements, ['{'])
+        np.testing.assert_equal(converted_body.code, np.array([12, 17, 1]))
+        np.testing.assert_equal(converted_body.attributes, {})
+
         np.testing.assert_equal(
-            converted_body.attributes,
-            {
-                'srcref': [np.array([1, 46, 1, 46, 46, 46, 1, 1])],
-                'srcfile': ChainMap({}, ChainMap({})),
-                'wholeSrcref': np.array([1, 0, 1, 47, 0, 47, 1, 1]),
-            },
+            converted_fun.source,
+            "test_empty_function <- function() {}\n",
         )
 
     def test_function(self) -> None:
@@ -344,11 +333,33 @@ def test_function(self) -> None:
             TESTDATA_PATH / "test_function.rda")
         converted = rdata.conversion.convert(parsed)
 
-        np.testing.assert_equal(converted, {
-            "test_function": rdata.conversion.RExpression([
-                rdata.conversion.RLanguage(['^', 'base', 'exponent']),
-            ]),
-        })
+        converted_fun = converted["test_function"]
+
+        self.assertIsInstance(
+            converted_fun,
+            rdata.conversion.RFunction,
+        )
+
+        np.testing.assert_equal(converted_fun.environment, ChainMap({}))
+        np.testing.assert_equal(converted_fun.formals, None)
+
+        converted_body = converted_fun.body
+
+        self.assertIsInstance(
+            converted_body,
+            rdata.conversion.RBytecode,
+        )
+
+        np.testing.assert_equal(
+            converted_body.code,
+            np.array([12, 23, 1, 34, 4, 38, 2, 1]),
+        )
+        np.testing.assert_equal(converted_body.attributes, {})
+
+        np.testing.assert_equal(
+            converted_fun.source,
+            "test_function <- function() {print(\"Hello\")}\n",
+        )
 
     def test_function_arg(self) -> None:
         """Test that functions can be parsed."""
@@ -356,11 +367,30 @@ def test_function_arg(self) -> None:
             TESTDATA_PATH / "test_function_arg.rda")
         converted = rdata.conversion.convert(parsed)
 
-        np.testing.assert_equal(converted, {
-            "test_function_arg": rdata.conversion.RExpression([
-                rdata.conversion.RLanguage(['^', 'base', 'exponent']),
-            ]),
-        })
+        converted_fun = converted["test_function_arg"]
+
+        self.assertIsInstance(
+            converted_fun,
+            rdata.conversion.RFunction,
+        )
+
+        np.testing.assert_equal(converted_fun.environment, ChainMap({}))
+        np.testing.assert_equal(converted_fun.formals, None)
+
+        converted_body = converted_fun.body
+
+        self.assertIsInstance(
+            converted_body,
+            rdata.conversion.RBytecode,
+        )
+
+        np.testing.assert_equal(converted_body.code, np.array([12, 17, 1]))
+        np.testing.assert_equal(converted_body.attributes, {})
+
+        np.testing.assert_equal(
+            converted_fun.source,
+            "test_function <- function() {}\n",
+        )
 
     def test_encodings(self) -> None:
         """Test of differents encodings."""

From 096819635fc184b1eaaebe7fae263a157694ca92 Mon Sep 17 00:00:00 2001
From: vnmabus <vnmabus@gmail.com>
Date: Thu, 18 Aug 2022 12:27:27 +0200
Subject: [PATCH 04/10] Improve support for functions.

---
 rdata/conversion/_conversion.py        |   3 +++
 rdata/parser/_parser.py                |  15 ++++++++++++---
 rdata/tests/data/test_function_arg.rda | Bin 298 -> 409 bytes
 rdata/tests/test_rdata.py              |   9 ++++++---
 4 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index 901fd48..e844972 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -772,6 +772,9 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
         elif obj.info.type == parser.RObjectType.EMPTYENV:
             value = self.empty_environment
 
+        elif obj.info.type == parser.RObjectType.MISSINGARG:
+            value = NotImplemented
+
         elif obj.info.type == parser.RObjectType.GLOBALENV:
             value = self.global_environment
 
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index dfd75b8..3d3b5ff 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -119,6 +119,8 @@ class RObjectType(enum.Enum):
 
 
 BYTECODE_SPECIAL_SET = {
+    RObjectType.BCODE,
+    RObjectType.BCREPREF,
     RObjectType.BCREPDEF,
     RObjectType.LANG,
     RObjectType.LIST,
@@ -551,15 +553,19 @@ def _parse_bytecode(
         bytecode_rep_list: List[RObject | None] | None = None,
     ) -> Tuple[RObject, Sequence[RObject]]:
         """Parse R bytecode."""
-        n_repeated = self.parse_int()
+        if bytecode_rep_list is None:
+            n_repeated = self.parse_int()
 
         code = self.parse_R_object(reference_list, bytecode_rep_list)
 
+        if bytecode_rep_list is None:
+            bytecode_rep_list = [None] * n_repeated
+
         n_constants = self.parse_int()
         constants = [
             self._parse_bytecode_constant(
                 reference_list,
-                [None] * n_repeated,
+                bytecode_rep_list,
             )
             for _ in range(n_constants)
         ]
@@ -583,7 +589,10 @@ def parse_R_object(
             and RObjectType(info_int) in BYTECODE_SPECIAL_SET
         ):
             info = parse_r_object_info(info_int)
-            info.tag = True
+            info.tag = info.type not in {
+                RObjectType.BCREPREF,
+                RObjectType.BCODE,
+            }
         else:
             info_int = self.parse_int()
             info = parse_r_object_info(info_int)
diff --git a/rdata/tests/data/test_function_arg.rda b/rdata/tests/data/test_function_arg.rda
index 13cdae917fbc323a9f60d17bb8f77de5847ee52d..c97c3ce17e72020f448018e2e056ac4a9d42bed7 100644
GIT binary patch
literal 409
zcmV;K0cQRmiwFP!000001HDsAOT$1Ap61cUKA__3B$NuGtvz}1;86sxKA^V};#LDO
zE!l`l5&X%b-o)0~WVYRIQYZyon4Q^~*>AqgPA<>7$K`Gr08Fq9+-)?$zPUc{oIv7%
z0$8wydOc=5?)4u-FZKsv&yDT?il&xS<dGM#J_6P7<RlS*ZPIPB+jK5yw@Y}t?*|M`
z3)1zWrPzTVGLA-FaUNx;HBLJXc|LFt8>3<5hcQ~OWev~~1w;3NX_#V-Q<*2=1umBa
z402Y8xHlNSWWR+VN)xTbv{|wc{Ak4rE^glx+)4@p$j(+#cBqrP)Utib5xx@2dkScX
z(i>@P?q!;aZYpPb858eWlV#}7q3)4xk=<GqRnMMgRi5q#0lUf@Hc*BQvctA~@u`6N
zA|vXs2z*hnn3HHH6Q?8#iW6n9q()v9;r}FG?XQ}d3{Z^*MGViT(k(mH&i^ox$kyA8
zfr6B|U1!h3h;fd8GrtIL+4BM%V<x#c)s#gphLgq6WhHJB*IX=1a~=Nz3&2qcumu1B
DfP=md

literal 298
zcmV+_0oDE=iwFP!000001D#S!OT$1Ao;;hT2ui_os2AJXlNS#j#UG#*dMiuVXdrG%
z5)mo>X|H1AWOoua8xO^W-FbiCWHJwTo9l2B0)PXqjjx9)cu(v5)eT4wSm44K<ypnc
zYMZ=?xJq-eWyLdq?-)sbS;PfT5a<N0Kt=%0NiIoWl5H8)Bdku+jHBv;I$uVLm!*Q2
zsGMrft3hkg?J8>ROE!=8yCM}8YM-G27&L@}y>bJyIR|z74a6BM+XifMPN?xX-+c_f
zg&;<?LD3DXC4wJ=%wqU_tDx<xpxwYsC3%0!GfmDko2Kkwom(H%N)Ppd<bw3Ye^Jfs
wE^G4sl4tz!uf+jvabUPO$CJz3O?9uP2fqGa<%Xz0P-|6(UtJN$xFiDr0OEs*K>z>%

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 9a3e102..9b9e479 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -375,7 +375,7 @@ def test_function_arg(self) -> None:
         )
 
         np.testing.assert_equal(converted_fun.environment, ChainMap({}))
-        np.testing.assert_equal(converted_fun.formals, None)
+        np.testing.assert_equal(converted_fun.formals, {"a": NotImplemented})
 
         converted_body = converted_fun.body
 
@@ -384,12 +384,15 @@ def test_function_arg(self) -> None:
             rdata.conversion.RBytecode,
         )
 
-        np.testing.assert_equal(converted_body.code, np.array([12, 17, 1]))
+        np.testing.assert_equal(
+            converted_body.code,
+            np.array([12, 23, 1, 29, 4, 38, 2, 1]),
+        )
         np.testing.assert_equal(converted_body.attributes, {})
 
         np.testing.assert_equal(
             converted_fun.source,
-            "test_function <- function() {}\n",
+            "test_function_arg <- function(a) {print(a)}\n",
         )
 
     def test_encodings(self) -> None:

From 7ad8338d335580660d793c5c34314f91ccf21469 Mon Sep 17 00:00:00 2001
From: vnmabus <vnmabus@gmail.com>
Date: Thu, 18 Aug 2022 12:39:31 +0200
Subject: [PATCH 05/10] Store constants.

---
 rdata/conversion/_conversion.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index e844972..b5ad3a4 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -14,6 +14,7 @@
     MutableMapping,
     NamedTuple,
     Optional,
+    Sequence,
     Union,
     cast,
 )
@@ -68,6 +69,7 @@ class RBytecode:
     """R bytecode."""
 
     code: xarray.DataArray
+    constants: Sequence[Any]
     attributes: StrMap
 
 
@@ -763,6 +765,7 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
 
             value = RBytecode(
                 code=self._convert_next(obj.value[0]),
+                constants=[self._convert_next(c) for c in obj.value[1]],
                 attributes=attrs,
             )
 

From 552d71934f1c46ee5f7de1fc04120e597d0db1c0 Mon Sep 17 00:00:00 2001
From: vnmabus <vnmabus@gmail.com>
Date: Thu, 18 Aug 2022 12:49:07 +0200
Subject: [PATCH 06/10] Fix for multiline functions.

---
 rdata/conversion/_conversion.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index b5ad3a4..033372c 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -61,7 +61,7 @@ class RFunction:
 
     @property
     def source(self) -> str:
-        return self.attributes["srcref"].srcfile.lines
+        return "\n".join(self.attributes["srcref"].srcfile.lines)
 
 
 @dataclass
@@ -538,7 +538,7 @@ def srcfile_constructor(
 
 @dataclass
 class SrcFileCopy(SrcFile):
-    lines: str
+    lines: Sequence[str]
 
 
 def srcfilecopy_constructor(
@@ -549,7 +549,7 @@ def srcfilecopy_constructor(
     filename = obj.frame["filename"][0]
     file_encoding = obj.frame.get("encoding", (None,))[0]
     string_encoding = obj.frame.get("Enc", (None,))[0]
-    lines = obj.frame["lines"][0]
+    lines = obj.frame["lines"]
 
     return SrcFileCopy(
         filename=filename,

From 02ad781990139b5235b0d318e19d5300cd918259 Mon Sep 17 00:00:00 2001
From: vnmabus <vnmabus@gmail.com>
Date: Thu, 18 Aug 2022 14:24:13 +0200
Subject: [PATCH 07/10] Try parsing external pointers (just to allow loading
 the remaining part of the data).

---
 rdata/conversion/_conversion.py |  15 +++++++++++++++
 rdata/parser/_parser.py         |  22 ++++++++++++++++++++++
 rdata/tests/data/test_file.rda  | Bin 0 -> 124 bytes
 rdata/tests/test_rdata.py       |  15 +++++++++++++++
 4 files changed, 52 insertions(+)
 create mode 100644 rdata/tests/data/test_file.rda

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index 033372c..3374acb 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -64,6 +64,14 @@ def source(self) -> str:
         return "\n".join(self.attributes["srcref"].srcfile.lines)
 
 
+@dataclass
+class RExternalPointer:
+    """R bytecode."""
+
+    protected: Any
+    tag: Any
+
+
 @dataclass
 class RBytecode:
     """R bytecode."""
@@ -769,6 +777,13 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
                 attributes=attrs,
             )
 
+        elif obj.info.type == parser.RObjectType.EXTPTR:
+
+            value = RExternalPointer(
+                protected=self._convert_next(obj.value[0]),
+                tag=self._convert_next(obj.value[1]),
+            )
+
         elif obj.info.type == parser.RObjectType.S4:
             value = SimpleNamespace(**attrs)
 
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index 3d3b5ff..dec98b4 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -760,6 +760,28 @@ def parse_R_object(
             value = self._parse_bytecode(reference_list, bytecode_rep_list)
             tag_read = True
 
+        elif info.type == RObjectType.EXTPTR:
+
+            result = RObject(
+                info=info,
+                tag=tag,
+                attributes=attributes,
+                value=None,
+                referenced_object=referenced_object,
+            )
+
+            reference_list.append(result)
+            protected = self.parse_R_object(
+                reference_list,
+                bytecode_rep_list,
+            )
+            extptr_tag = self.parse_R_object(
+                reference_list,
+                bytecode_rep_list,
+            )
+
+            value = (protected, extptr_tag)
+
         elif info.type == RObjectType.S4:
             value = None
 
diff --git a/rdata/tests/data/test_file.rda b/rdata/tests/data/test_file.rda
new file mode 100644
index 0000000000000000000000000000000000000000..5cee314a85e63c559b3a9225f838895d976b8f97
GIT binary patch
literal 124
zcmb2|=3oE=X6~X+gJ)e2k`fXU(h?GxCarN$W6sX#n7xjbIijU;Vq<Hgut)K<C1TT$
zEKhy5glX!>Ce;S5iy|A<o~ww7GfOf#oM917Va#sYuyfMq&!2)OdmDZJd|}I`=9EXq
b!3{o!3jf+wS{HHZeCB639HS7h6KE*_O0F?1

literal 0
HcmV?d00001

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 9b9e479..6be9474 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -200,6 +200,21 @@ def test_list(self) -> None:
                 ],
         })
 
+    def test_file(self) -> None:
+        """Test that external pointers can be parsed."""
+        parsed = rdata.parser.parse_file(TESTDATA_PATH / "test_file.rda")
+        converted = rdata.conversion.convert(parsed)
+
+        np.testing.assert_equal(converted, {
+            "test_file":
+                [
+                    np.array([1.0]),
+                    ['a', 'b', 'c'],
+                    np.array([2.0, 3.0]),
+                    ['hi'],
+                ],
+        })
+
     def test_expression(self) -> None:
         """Test that expressions can be parsed."""
         parsed = rdata.parser.parse_file(TESTDATA_PATH / "test_expression.rda")

From fda1ca528a1792f7a02b5e24d44e88c5ac4e574e Mon Sep 17 00:00:00 2001
From: vnmabus <vnmabus@gmail.com>
Date: Thu, 18 Aug 2022 14:26:11 +0200
Subject: [PATCH 08/10] Fix tests.

---
 rdata/tests/test_rdata.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 6be9474..cb35604 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -206,13 +206,7 @@ def test_file(self) -> None:
         converted = rdata.conversion.convert(parsed)
 
         np.testing.assert_equal(converted, {
-            "test_file":
-                [
-                    np.array([1.0]),
-                    ['a', 'b', 'c'],
-                    np.array([2.0, 3.0]),
-                    ['hi'],
-                ],
+            "test_file": [5],
         })
 
     def test_expression(self) -> None:

From ee654f9524f2b8ca7b3c15ed9bc5f21f34194929 Mon Sep 17 00:00:00 2001
From: vnmabus <vnmabus@gmail.com>
Date: Fri, 2 Sep 2022 17:11:46 +0200
Subject: [PATCH 09/10] Add explicit reexports.

---
 rdata/conversion/__init__.py | 36 ++++++++++++++++++------------------
 rdata/parser/__init__.py     | 16 ++++++++--------
 setup.cfg                    |  2 ++
 3 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index 2d601d7..8f8926c 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -1,20 +1,20 @@
 from ._conversion import (
-    DEFAULT_CLASS_MAP,
-    Converter,
-    RBuiltin,
-    RBytecode,
-    RExpression,
-    RFunction,
-    RLanguage,
-    SimpleConverter,
-    convert,
-    convert_array,
-    convert_attrs,
-    convert_char,
-    convert_list,
-    convert_symbol,
-    convert_vector,
-    dataframe_constructor,
-    factor_constructor,
-    ts_constructor,
+    DEFAULT_CLASS_MAP as DEFAULT_CLASS_MAP,
+    Converter as Converter,
+    RBuiltin as RBuiltin,
+    RBytecode as RBytecode,
+    RExpression as RExpression,
+    RFunction as RFunction,
+    RLanguage as RLanguage,
+    SimpleConverter as SimpleConverter,
+    convert as convert,
+    convert_array as convert_array,
+    convert_attrs as convert_attrs,
+    convert_char as convert_char,
+    convert_list as convert_list,
+    convert_symbol as convert_symbol,
+    convert_vector as convert_vector,
+    dataframe_constructor as dataframe_constructor,
+    factor_constructor as factor_constructor,
+    ts_constructor as ts_constructor,
 )
diff --git a/rdata/parser/__init__.py b/rdata/parser/__init__.py
index 1810e4b..8af47f3 100644
--- a/rdata/parser/__init__.py
+++ b/rdata/parser/__init__.py
@@ -1,12 +1,12 @@
 """Utilities for parsing a rdata file."""
 
 from ._parser import (
-    DEFAULT_ALTREP_MAP,
-    CharFlags,
-    RData,
-    RObject,
-    RObjectInfo,
-    RObjectType,
-    parse_data,
-    parse_file,
+    DEFAULT_ALTREP_MAP as DEFAULT_ALTREP_MAP,
+    CharFlags as CharFlags,
+    RData as RData,
+    RObject as RObject,
+    RObjectInfo as RObjectInfo,
+    RObjectType as RObjectType,
+    parse_data as parse_data,
+    parse_file as parse_file,
 )
diff --git a/setup.cfg b/setup.cfg
index 5794b78..a53dbc5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -90,6 +90,8 @@ per-file-ignores =
 	__init__.py:
 		# Unused modules are allowed in `__init__.py`, to reduce imports
 		F401,
+		# Explicit re-exports allowed in __init__
+		WPS113,
 		# Import multiple names is allowed in `__init__.py`
 		WPS235,
 		# Logic is allowed in `__init__.py`

From fae6ad4b37021a4207e19dfaa423834d1a8abf68 Mon Sep 17 00:00:00 2001
From: vnmabus <vnmabus@gmail.com>
Date: Fri, 2 Sep 2022 17:18:25 +0200
Subject: [PATCH 10/10] Bump version.

---
 rdata/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdata/VERSION b/rdata/VERSION
index ce609ca..9a7d84f 100644
--- a/rdata/VERSION
+++ b/rdata/VERSION
@@ -1 +1 @@
-0.8
\ No newline at end of file
+0.9
\ No newline at end of file