From e455ad29e11d1dcb722c61816c64db88b8724848 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 23 Jun 2022 12:40:19 +0200 Subject: [PATCH 01/10] Some (unsucessful) parse tests for functions. --- rdata/conversion/__init__.py | 1 + rdata/conversion/_conversion.py | 12 ++ rdata/parser/_parser.py | 117 +++++++++++++++--- rdata/tests/data/test_builtin.rda | Bin 0 -> 79 bytes rdata/tests/data/test_empty_function.rda | Bin 0 -> 323 bytes .../data/test_empty_function_uncompiled.rda | Bin 0 -> 271 bytes rdata/tests/data/test_function.rda | Bin 0 -> 382 bytes rdata/tests/data/test_function_arg.rda | Bin 0 -> 298 bytes rdata/tests/test_rdata.py | 57 +++++++++ 9 files changed, 170 insertions(+), 17 deletions(-) create mode 100644 rdata/tests/data/test_builtin.rda create mode 100644 rdata/tests/data/test_empty_function.rda create mode 100644 rdata/tests/data/test_empty_function_uncompiled.rda create mode 100644 rdata/tests/data/test_function.rda create mode 100644 rdata/tests/data/test_function_arg.rda diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py index b0391e6..c8e5535 100644 --- a/rdata/conversion/__init__.py +++ b/rdata/conversion/__init__.py @@ -1,6 +1,7 @@ from ._conversion import ( DEFAULT_CLASS_MAP, Converter, + RBuiltin, RExpression, RLanguage, SimpleConverter, diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index 8c9c217..1408508 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -1,5 +1,6 @@ import abc import warnings +from dataclasses import dataclass from fractions import Fraction from types import MappingProxyType, SimpleNamespace from typing import ( @@ -38,6 +39,13 @@ class RExpression(NamedTuple): elements: List[RLanguage] +@dataclass +class RBuiltin(): + """R builtin.""" + + name: str + + def convert_list( r_list: parser.RObject, conversion_function: ConversionFunction, @@ -576,6 +584,10 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any: value = RLanguage(rlanguage_list) + elif obj.info.type in {parser.RObjectType.SPECIAL, parser.RObjectType.BUILTIN}: + + value = RBuiltin(name=obj.value.decode("ascii")) + elif obj.info.type == parser.RObjectType.CHAR: # Return the internal string diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py index 805fd04..7cc51a7 100644 --- a/rdata/parser/_parser.py +++ b/rdata/parser/_parser.py @@ -19,6 +19,7 @@ List, Mapping, Optional, + Sequence, Set, TextIO, Tuple, @@ -106,7 +107,12 @@ class RObjectType(enum.Enum): RAW = 24 # raw vector S4 = 25 # S4 classes not of simple type ALTREP = 238 # Alternative representations + ATTRLIST = 239 # Bytecode attribute + ATTRLANG = 240 # Bytecode attribute EMPTYENV = 242 # Empty environment + BCREPREF = 243 # Bytecode repetition reference + BCREPDEF = 244 # Bytecode repetition definition + MISSINGARG = 251 # Missinf argument GLOBALENV = 253 # Global environment NILVALUE = 254 # NIL value REF = 255 # Reference @@ -488,9 +494,28 @@ def expand_altrep_to_object( constructor = self.altrep_constructor_dict[altrep_name] return constructor(state) + def _parse_bytecode( + self, + reference_list: Optional[List[RObject]], + bytecode_rep_list: List[RObject | None] | None = None, + ) -> Tuple[RObject, Sequence[RObject]]: + """Parse R bytecode.""" + n_repeated = self.parse_int() + + code = self.parse_R_object(reference_list, bytecode_rep_list) + + n_constants = self.parse_int() + constants = [ + self.parse_R_object(reference_list, [None] * n_repeated) + for _ in range(n_constants) + ] + + return (code, constants) + def parse_R_object( self, - reference_list: Optional[List[RObject]] = None, + reference_list: List[RObject] | None = None, + bytecode_rep_list: List[RObject | None] | None = None, ) -> RObject: """Parse a R object.""" if reference_list is None: @@ -505,6 +530,7 @@ def parse_R_object( attributes = None referenced_object = None + bytecode_rep_position = -1 tag_read = False attributes_read = False add_reference = False @@ -513,27 +539,47 @@ def parse_R_object( value: Any + if info.type == RObjectType.BCREPDEF: + assert bytecode_rep_list + bytecode_rep_position = self.parse_int() + info.type = RObjectType(self.parse_int()) + if info.type == RObjectType.NIL: value = None elif info.type == RObjectType.SYM: # Read Char - value = self.parse_R_object(reference_list) + value = self.parse_R_object(reference_list, bytecode_rep_list) # Symbols can be referenced add_reference = True - elif info.type in {RObjectType.LIST, RObjectType.LANG}: + elif info.type in { + RObjectType.LIST, + RObjectType.LANG, + RObjectType.CLO, + RObjectType.PROM, + RObjectType.DOT, + RObjectType.ATTRLANG, + }: + if info.type is RObjectType.ATTRLANG: + info.type = RObjectType.LANG + info.attributes = True + tag = None if info.attributes: - attributes = self.parse_R_object(reference_list) + attributes = self.parse_R_object( + reference_list, + bytecode_rep_list, + ) attributes_read = True - elif info.tag: - tag = self.parse_R_object(reference_list) + + if info.tag: + tag = self.parse_R_object(reference_list, bytecode_rep_list) tag_read = True # Read CAR and CDR - car = self.parse_R_object(reference_list) - cdr = self.parse_R_object(reference_list) + car = self.parse_R_object(reference_list, bytecode_rep_list) + cdr = self.parse_R_object(reference_list, bytecode_rep_list) value = (car, cdr) elif info.type == RObjectType.ENV: @@ -548,10 +594,10 @@ def parse_R_object( reference_list.append(result) locked = self.parse_bool() - enclosure = self.parse_R_object(reference_list) - frame = self.parse_R_object(reference_list) - hash_table = self.parse_R_object(reference_list) - attributes = self.parse_R_object(reference_list) + enclosure = self.parse_R_object(reference_list, bytecode_rep_list) + frame = self.parse_R_object(reference_list, bytecode_rep_list) + hash_table = self.parse_R_object(reference_list, bytecode_rep_list) + attributes = self.parse_R_object(reference_list, bytecode_rep_list) value = EnvironmentValue( locked=locked, @@ -560,6 +606,11 @@ def parse_R_object( hash_table=hash_table, ) + elif info.type in {RObjectType.SPECIAL, RObjectType.BUILTIN}: + length = self.parse_int() + if length > 0: + value = self.parse_string(length=length) + elif info.type == RObjectType.CHAR: length = self.parse_int() if length > 0: @@ -615,15 +666,28 @@ def parse_R_object( value = [None] * length for i in range(length): - value[i] = self.parse_R_object(reference_list) + value[i] = self.parse_R_object( + reference_list, bytecode_rep_list) + + elif info.type == RObjectType.BCODE: + value = self._parse_bytecode(reference_list, bytecode_rep_list) elif info.type == RObjectType.S4: value = None elif info.type == RObjectType.ALTREP: - altrep_info = self.parse_R_object(reference_list) - altrep_state = self.parse_R_object(reference_list) - altrep_attr = self.parse_R_object(reference_list) + altrep_info = self.parse_R_object( + reference_list, + bytecode_rep_list, + ) + altrep_state = self.parse_R_object( + reference_list, + bytecode_rep_list, + ) + altrep_attr = self.parse_R_object( + reference_list, + bytecode_rep_list, + ) if self.expand_altrep: info, value = self.expand_altrep_to_object( @@ -637,6 +701,16 @@ def parse_R_object( elif info.type == RObjectType.EMPTYENV: value = None + elif info.type == RObjectType.BCREPREF: + assert bytecode_rep_list + position = self.parse_int() + result = bytecode_rep_list[position] + assert result + return result + + elif info.type == RObjectType.MISSINGARG: + value = None + elif info.type == RObjectType.GLOBALENV: value = None @@ -657,7 +731,7 @@ def parse_R_object( "and ignored", ) if info.attributes and not attributes_read: - attributes = self.parse_R_object(reference_list) + attributes = self.parse_R_object(reference_list, bytecode_rep_list) if result is None: result = RObject( @@ -676,6 +750,10 @@ def parse_R_object( if add_reference: reference_list.append(result) + if bytecode_rep_position >= 0: + assert bytecode_rep_list + bytecode_rep_list[bytecode_rep_position] = result + return result @@ -717,6 +795,11 @@ def parse_string(self, length: int) -> bytes: # noqa: D102 self.position += length return bytes(result) + def parse_all(self) -> RData: + rdata = super().parse_all() + assert self.position == len(self.data) + return rdata + def parse_file( file_or_path: Union[BinaryIO, TextIO, 'os.PathLike[Any]', str], diff --git a/rdata/tests/data/test_builtin.rda b/rdata/tests/data/test_builtin.rda new file mode 100644 index 0000000000000000000000000000000000000000..48279c6732fbbf5cbdf3d196c98b3fade39b2c98 GIT binary patch literal 79 zcmb2|=3oE=X6~X+gJ)e2k`fXU(h?GxCarN$W6sX#n7xjbIijU;Vq literal 0 HcmV?d00001 diff --git a/rdata/tests/data/test_empty_function.rda b/rdata/tests/data/test_empty_function.rda new file mode 100644 index 0000000000000000000000000000000000000000..d8dd79fc8e596032d87880642748250701648db4 GIT binary patch literal 323 zcmV-J0lfYniwFP!000001GQ31Yr;Sfp1jmX0~Qf_3}~S}cy0vecb zhI)`Op3T`Z$<}lKJ@T?3jsRMQlGfs>mogur9P~QHAAk$eW3tEOYf5z+tNkEkXqq6- zkCI}ALBu#3eJOa^(42cWaZCH^+TDEG3Q{L$iWWOWXYJ3{Kvegj` VT}UHFPq5eOw|@gmme8RC0083anKS?Z literal 0 HcmV?d00001 diff --git a/rdata/tests/data/test_empty_function_uncompiled.rda b/rdata/tests/data/test_empty_function_uncompiled.rda new file mode 100644 index 0000000000000000000000000000000000000000..205628f55c626eb9e4e29e5e1b4636326cda20bd GIT binary patch literal 271 zcmV+q0r37GiwFP!000000}FDAFy@NjVqjokW?*3glB_@`18ZoAo2~@|ScHL*frWt+ zNXwO^7MH}Q<`$Gx#;29$C6{F8=fwl*{M>@foYWKs1~z6i9c;x#$wjGYKvQ{vJPxo7 z1A{V@)`zn7p?pR(-RwZ!X+V>KOcn+R_=Ki}H77GKwHU|{KoT^-?@k39T?Lr1hNePw zEf<=h93V^c5_410%msme5RB@G!PsJy0VzfqFk_T> V%fT2X!&nUy1OPwF#`C2D008-ubvOV3 literal 0 HcmV?d00001 diff --git a/rdata/tests/data/test_function.rda b/rdata/tests/data/test_function.rda new file mode 100644 index 0000000000000000000000000000000000000000..3e0940f4a2252dd53ef5c796fb0a29437387544e GIT binary patch literal 382 zcmV-^0fGJ>iwFP!000001GQ64OT$1Ao#wNq4HQK@RYDKa(&psFgGUj)`T@O3iCYZB zq+}yjiqM}d>ebdsG6|b)3Pmt5o!Ob$_g*KHn~U+OHMRgi1x>+U9YfIX?=Ial$TH9X z4ffF63FFDb^dA2^Fy?1xFecg2Dr>J8p439*?nhLdL|fu_$Yx2gg!nD9b@ zLdg~>_oCU`=AR)5b1hMn(<+GIhx}oNjRU7Cw$YAgYCRgy3H78o6YV2sQBszJyjFP& zUBP#SGra{B?`i8@lvYrW$qy+W{uNd3F1uQlO=y#e&Vn+MT>K5IPzQns8e9 zrlL{PsAnrCG=s#YXUiunRpC*nl?wZ!oc&+Z%__B$O3Gy}LD_D#G#xgd#f)?O+W2+& c$mT>SX|H1AWOoua8xO^W-FbiCWHJwTo9l2B0)PXqjjx9)cu(v5)eT4wSm44KROE!=8yCM}8YM-G27&L@}y>bJyIR|z74a6BM+XifMPN?xX-+c_f zg&;z>% literal 0 HcmV?d00001 diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py index 38f4c65..1dd312d 100644 --- a/rdata/tests/test_rdata.py +++ b/rdata/tests/test_rdata.py @@ -211,6 +211,63 @@ def test_expression(self) -> None: ]), }) + def test_builtin(self) -> None: + """Test that builtin functions can be parsed.""" + parsed = rdata.parser.parse_file(TESTDATA_PATH / "test_builtin.rda") + converted = rdata.conversion.convert(parsed) + + np.testing.assert_equal(converted, { + "test_builtin": rdata.conversion.RBuiltin(name="abs"), + }) + + def test_empty_function_uncompiled(self) -> None: + """Test that a simple function can be parsed.""" + parsed = rdata.parser.parse_file( + TESTDATA_PATH / "test_empty_function_uncompiled.rda") + converted = rdata.conversion.convert(parsed) + + np.testing.assert_equal(converted, { + "test_empty_function": rdata.conversion.RExpression([ + rdata.conversion.RLanguage(['^', 'base', 'exponent']), + ]), + }) + + def test_empty_function(self) -> None: + """Test that a simple function (compiled) can be parsed.""" + parsed = rdata.parser.parse_file( + TESTDATA_PATH / "test_empty_function.rda") + converted = rdata.conversion.convert(parsed) + + np.testing.assert_equal(converted, { + "test_empty_function": rdata.conversion.RExpression([ + rdata.conversion.RLanguage(['^', 'base', 'exponent']), + ]), + }) + + def test_function(self) -> None: + """Test that functions can be parsed.""" + parsed = rdata.parser.parse_file( + TESTDATA_PATH / "test_function.rda") + converted = rdata.conversion.convert(parsed) + + np.testing.assert_equal(converted, { + "test_function": rdata.conversion.RExpression([ + rdata.conversion.RLanguage(['^', 'base', 'exponent']), + ]), + }) + + def test_function_arg(self) -> None: + """Test that functions can be parsed.""" + parsed = rdata.parser.parse_file( + TESTDATA_PATH / "test_function_arg.rda") + converted = rdata.conversion.convert(parsed) + + np.testing.assert_equal(converted, { + "test_function_arg": rdata.conversion.RExpression([ + rdata.conversion.RLanguage(['^', 'base', 'exponent']), + ]), + }) + def test_encodings(self) -> None: """Test of differents encodings.""" with self.assertWarns( From e00a978ff6722d6c9331d33d08e3e901e53d4a61 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Mon, 15 Aug 2022 17:26:49 +0200 Subject: [PATCH 02/10] Support minimal functions. --- rdata/conversion/__init__.py | 2 + rdata/conversion/_conversion.py | 77 ++++++++++-- rdata/parser/_parser.py | 71 +++++++++-- rdata/tests/data/test_minimal_function.rda | Bin 0 -> 275 bytes .../data/test_minimal_function_uncompiled.rda | Bin 0 -> 217 bytes rdata/tests/test_rdata.py | 114 ++++++++++++++++-- 6 files changed, 237 insertions(+), 27 deletions(-) create mode 100644 rdata/tests/data/test_minimal_function.rda create mode 100644 rdata/tests/data/test_minimal_function_uncompiled.rda diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py index c8e5535..2d601d7 100644 --- a/rdata/conversion/__init__.py +++ b/rdata/conversion/__init__.py @@ -2,7 +2,9 @@ DEFAULT_CLASS_MAP, Converter, RBuiltin, + RBytecode, RExpression, + RFunction, RLanguage, SimpleConverter, convert, diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index 1408508..841fa6e 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import abc import warnings from dataclasses import dataclass @@ -31,6 +33,7 @@ class RLanguage(NamedTuple): """R language construct.""" elements: List[Any] + attributes: Mapping[str, Any] class RExpression(NamedTuple): @@ -40,12 +43,42 @@ class RExpression(NamedTuple): @dataclass -class RBuiltin(): +class RBuiltin: """R builtin.""" name: str +@dataclass +class RFunction: + """R function.""" + + environment: Mapping[str, Any] + formals: Optional[Mapping[str, Any]] + body: RLanguage + attributes: StrMap + + +@dataclass +class RBytecode: + """R bytecode.""" + + code: xarray.DataArray + attributes: StrMap + + +class REnvironment(ChainMap[Union[str, bytes], Any]): + """R environment.""" + + def __init__( + self, + *maps: MutableMapping[str | bytes, Any], + frame: StrMap | None = None, + ) -> None: + super().__init__(*maps) + self.frame = frame + + def convert_list( r_list: parser.RObject, conversion_function: ConversionFunction, @@ -102,7 +135,7 @@ def convert_list( def convert_env( r_env: parser.RObject, conversion_function: ConversionFunction, -) -> ChainMap[Union[str, bytes], Any]: +) -> REnvironment: """Convert environment objects.""" if r_env.info.type is not parser.RObjectType.ENV: raise TypeError("Must receive a ENV object") @@ -112,11 +145,12 @@ def convert_env( hash_table = conversion_function(r_env.value.hash_table) dictionary = {} - for d in hash_table: - if d is not None: - dictionary.update(d) + if hash_table is not None: + for d in hash_table: + if d is not None: + dictionary.update(d) - return ChainMap(dictionary, enclosure) + return REnvironment(dictionary, enclosure, frame=frame) def convert_attrs( @@ -516,17 +550,17 @@ def __init__( constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP, default_encoding: Optional[str] = None, force_default_encoding: bool = False, - global_environment: Optional[StrMap] = None, + global_environment: MutableMapping[str | bytes, Any] | None = None, ) -> None: self.constructor_dict = constructor_dict self.default_encoding = default_encoding self.force_default_encoding = force_default_encoding - self.global_environment = ChainMap( + self.global_environment = REnvironment( {} if global_environment is None else global_environment, ) - self.empty_environment: StrMap = ChainMap({}) + self.empty_environment: StrMap = REnvironment({}) self._reset() @@ -570,6 +604,20 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any: # Expand the list and process the elements value = convert_list(obj, self._convert_next) + elif obj.info.type == parser.RObjectType.CLO: + assert obj.tag is not None + environment = self._convert_next(obj.tag) + formals = self._convert_next(obj.value[0]) + body = self._convert_next(obj.value[1]) + attributes = self._convert_next(obj.attributes) + + value = RFunction( + environment=environment, + formals=formals, + body=body, + attributes=attributes, + ) + elif obj.info.type == parser.RObjectType.ENV: # Return a ChainMap of the environments @@ -581,8 +629,10 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any: # special object rlanguage_list = convert_list(obj, self._convert_next) assert isinstance(rlanguage_list, list) + assert obj.attributes + attributes = self._convert_next(obj.attributes) - value = RLanguage(rlanguage_list) + value = RLanguage(rlanguage_list, attributes) elif obj.info.type in {parser.RObjectType.SPECIAL, parser.RObjectType.BUILTIN}: @@ -628,6 +678,13 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any: # Convert the internal objects returning a special object value = RExpression(rexpression_list) + elif obj.info.type == parser.RObjectType.BCODE: + + value = RBytecode( + code=self._convert_next(obj.value[0]), + attributes=attrs, + ) + elif obj.info.type == parser.RObjectType.S4: value = SimpleNamespace(**attrs) diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py index 7cc51a7..b3836b8 100644 --- a/rdata/parser/_parser.py +++ b/rdata/parser/_parser.py @@ -118,6 +118,15 @@ class RObjectType(enum.Enum): REF = 255 # Reference +BYTECODE_SPECIAL_SET = { + RObjectType.BCREPDEF, + RObjectType.LANG, + RObjectType.LIST, + RObjectType.ATTRLANG, + RObjectType.ATTRLIST, +} + + class CharFlags(enum.IntFlag): """Flags for R objects of type char.""" @@ -257,6 +266,15 @@ class RData(): extra: RExtraInfo object: RObject + def __str__(self) -> str: + return ( + "RData(\n" + f" versions: {self.versions}\n" + f" extra: {self.extra}\n" + f" object: \n{self.object._str_internal(indent=4)}\n" + ")\n" + ) + @dataclass class EnvironmentValue(): @@ -494,6 +512,20 @@ def expand_altrep_to_object( constructor = self.altrep_constructor_dict[altrep_name] return constructor(state) + def _parse_bytecode_constant( + self, + reference_list: Optional[List[RObject]], + bytecode_rep_list: List[RObject | None] | None = None, + ) -> RObject: + + obj_type = self.parse_int() + + return self.parse_R_object( + reference_list, + bytecode_rep_list, + info_int=obj_type, + ) + def _parse_bytecode( self, reference_list: Optional[List[RObject]], @@ -506,7 +538,10 @@ def _parse_bytecode( n_constants = self.parse_int() constants = [ - self.parse_R_object(reference_list, [None] * n_repeated) + self._parse_bytecode_constant( + reference_list, + [None] * n_repeated, + ) for _ in range(n_constants) ] @@ -516,15 +551,23 @@ def parse_R_object( self, reference_list: List[RObject] | None = None, bytecode_rep_list: List[RObject | None] | None = None, + info_int: int | None = None, ) -> RObject: """Parse a R object.""" if reference_list is None: # Index is 1-based, so we insert a dummy object reference_list = [] - info_int = self.parse_int() - - info = parse_r_object_info(info_int) + original_info_int = info_int + if ( + info_int is not None + and RObjectType(info_int) in BYTECODE_SPECIAL_SET + ): + info = parse_r_object_info(info_int) + info.tag = True + else: + info_int = self.parse_int() + info = parse_r_object_info(info_int) tag = None attributes = None @@ -563,7 +606,6 @@ def parse_R_object( }: if info.type is RObjectType.ATTRLANG: info.type = RObjectType.LANG - info.attributes = True tag = None if info.attributes: @@ -578,8 +620,22 @@ def parse_R_object( tag_read = True # Read CAR and CDR - car = self.parse_R_object(reference_list, bytecode_rep_list) - cdr = self.parse_R_object(reference_list, bytecode_rep_list) + car = self.parse_R_object( + reference_list, + bytecode_rep_list, + info_int=( + None if original_info_int is None + else self.parse_int() + ), + ) + cdr = self.parse_R_object( + reference_list, + bytecode_rep_list, + info_int=( + None if original_info_int is None + else self.parse_int() + ), + ) value = (car, cdr) elif info.type == RObjectType.ENV: @@ -671,6 +727,7 @@ def parse_R_object( elif info.type == RObjectType.BCODE: value = self._parse_bytecode(reference_list, bytecode_rep_list) + tag_read = True elif info.type == RObjectType.S4: value = None diff --git a/rdata/tests/data/test_minimal_function.rda b/rdata/tests/data/test_minimal_function.rda new file mode 100644 index 0000000000000000000000000000000000000000..0c39c802adf7d0c2b62d135401719a309a2556a7 GIT binary patch literal 275 zcmV+u0qp)CiwFP!000000}FDAFy@NjVqjokW?*3glB_@`18ZoAo2~@|ScHL*frWt+ zNQ;)F7MH~5X69w)Cg#MamF6XvWaj5FFt9PBDP$`yN-j!G0~*E)1GG&PRq None: "test_builtin": rdata.conversion.RBuiltin(name="abs"), }) + def test_minimal_function_uncompiled(self) -> None: + """Test that a minimal function can be parsed.""" + parsed = rdata.parser.parse_file( + TESTDATA_PATH / "test_minimal_function_uncompiled.rda") + converted = rdata.conversion.convert(parsed) + + converted_fun = converted["test_minimal_function_uncompiled"] + + self.assertIsInstance( + converted_fun, + rdata.conversion.RFunction, + ) + + np.testing.assert_equal(converted_fun.environment, ChainMap({})) + np.testing.assert_equal(converted_fun.formals, None) + np.testing.assert_equal( + converted_fun.attributes, + {'srcref': np.array([1, 37, 1, 51, 37, 51, 1, 1])}, + ) + + np.testing.assert_equal(converted_fun.body, None) + + def test_minimal_function(self) -> None: + """Test that a minimal function (compiled) can be parsed.""" + parsed = rdata.parser.parse_file( + TESTDATA_PATH / "test_minimal_function.rda") + converted = rdata.conversion.convert(parsed) + + converted_fun = converted["test_minimal_function"] + + self.assertIsInstance( + converted_fun, + rdata.conversion.RFunction, + ) + + np.testing.assert_equal(converted_fun.environment, ChainMap({})) + np.testing.assert_equal(converted_fun.formals, None) + + converted_body = converted_fun.body + + self.assertIsInstance( + converted_body, + rdata.conversion.RBytecode, + ) + + np.testing.assert_equal(converted_body.code, np.array([12, 17, 1])) + np.testing.assert_equal(converted_body.attributes, {}) + def test_empty_function_uncompiled(self) -> None: """Test that a simple function can be parsed.""" parsed = rdata.parser.parse_file( TESTDATA_PATH / "test_empty_function_uncompiled.rda") converted = rdata.conversion.convert(parsed) - np.testing.assert_equal(converted, { - "test_empty_function": rdata.conversion.RExpression([ - rdata.conversion.RLanguage(['^', 'base', 'exponent']), - ]), - }) + converted_fun = converted["test_empty_function_uncompiled"] + + self.assertIsInstance( + converted_fun, + rdata.conversion.RFunction, + ) + + np.testing.assert_equal(converted_fun.environment, ChainMap({})) + np.testing.assert_equal(converted_fun.formals, None) + np.testing.assert_equal( + converted_fun.attributes, + {'srcref': np.array([1, 35, 1, 47, 35, 47, 1, 1])}, + ) + + converted_body = converted_fun.body + + self.assertIsInstance( + converted_body, + rdata.conversion.RLanguage, + ) + + np.testing.assert_equal(converted_body.elements, ['{']) + np.testing.assert_equal( + converted_body.attributes, + { + 'srcref': [np.array([1, 46, 1, 46, 46, 46, 1, 1])], + 'srcfile': ChainMap({}, ChainMap({})), + 'wholeSrcref': np.array([1, 0, 1, 47, 0, 47, 1, 1]), + }, + ) def test_empty_function(self) -> None: """Test that a simple function (compiled) can be parsed.""" @@ -238,11 +311,32 @@ def test_empty_function(self) -> None: TESTDATA_PATH / "test_empty_function.rda") converted = rdata.conversion.convert(parsed) - np.testing.assert_equal(converted, { - "test_empty_function": rdata.conversion.RExpression([ - rdata.conversion.RLanguage(['^', 'base', 'exponent']), - ]), - }) + converted_fun = converted["test_empty_function"] + + self.assertIsInstance( + converted_fun, + rdata.conversion.RFunction, + ) + + np.testing.assert_equal(converted_fun.environment, ChainMap({})) + np.testing.assert_equal(converted_fun.formals, None) + + converted_body = converted_fun.body + + self.assertIsInstance( + converted_body, + rdata.conversion.RLanguage, + ) + + np.testing.assert_equal(converted_body.elements, ['{']) + np.testing.assert_equal( + converted_body.attributes, + { + 'srcref': [np.array([1, 46, 1, 46, 46, 46, 1, 1])], + 'srcfile': ChainMap({}, ChainMap({})), + 'wholeSrcref': np.array([1, 0, 1, 47, 0, 47, 1, 1]), + }, + ) def test_function(self) -> None: """Test that functions can be parsed.""" From ae204b7d0d69b1ac2340e3b14a35e10c5cd1091d Mon Sep 17 00:00:00 2001 From: vnmabus Date: Tue, 16 Aug 2022 13:33:58 +0200 Subject: [PATCH 03/10] Functions without arguments working. --- rdata/conversion/_conversion.py | 91 ++++++++++++++++++- rdata/parser/_parser.py | 156 ++++++++++++++++++-------------- rdata/tests/test_rdata.py | 114 ++++++++++++++--------- 3 files changed, 247 insertions(+), 114 deletions(-) diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index 841fa6e..901fd48 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -58,6 +58,10 @@ class RFunction: body: RLanguage attributes: StrMap + @property + def source(self) -> str: + return self.attributes["srcref"].srcfile.lines + @dataclass class RBytecode: @@ -394,6 +398,9 @@ def convert_array( # R matrix order is like FORTRAN value = np.reshape(value, shape, order='F') + dimension_names = None + coords = None + dimnames = attrs.get('dimnames') if dimnames: if isinstance(dimnames, Mapping): @@ -407,7 +414,11 @@ def convert_array( if d is not None } - value = xarray.DataArray(value, dims=dimension_names, coords=coords) + value = xarray.DataArray( + value, + dims=dimension_names, + coords=coords, + ) return value @@ -480,6 +491,72 @@ def ts_constructor( return pandas.Series(obj, index=index) +@dataclass +class SrcRef: + first_line: int + first_byte: int + last_line: int + last_byte: int + first_column: int + last_column: int + first_parsed: int + last_parsed: int + srcfile: SrcFile + + +def srcref_constructor( + obj: Any, + attrs: StrMap, +) -> SrcRef: + return SrcRef(*obj, srcfile=attrs["srcfile"]) + + +@dataclass +class SrcFile: + filename: str + file_encoding: str | None + string_encoding: str | None + + +def srcfile_constructor( + obj: Any, + attrs: StrMap, +) -> SrcFile: + + filename = obj.frame["filename"][0] + file_encoding = obj.frame.get("encoding") + string_encoding = obj.frame.get("Enc") + + return SrcFile( + filename=filename, + file_encoding=file_encoding, + string_encoding=string_encoding, + ) + + +@dataclass +class SrcFileCopy(SrcFile): + lines: str + + +def srcfilecopy_constructor( + obj: Any, + attrs: StrMap, +) -> SrcFile: + + filename = obj.frame["filename"][0] + file_encoding = obj.frame.get("encoding", (None,))[0] + string_encoding = obj.frame.get("Enc", (None,))[0] + lines = obj.frame["lines"][0] + + return SrcFileCopy( + filename=filename, + file_encoding=file_encoding, + string_encoding=string_encoding, + lines=lines, + ) + + Constructor = Callable[[Any, Mapping], Any] ConstructorDict = Mapping[ Union[str, bytes], @@ -491,6 +568,9 @@ def ts_constructor( "factor": factor_constructor, "ordered": ordered_constructor, "ts": ts_constructor, + "srcref": srcref_constructor, + "srcfile": srcfile_constructor, + "srcfilecopy": srcfilecopy_constructor, } DEFAULT_CLASS_MAP = MappingProxyType(default_class_map_dict) @@ -629,8 +709,9 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any: # special object rlanguage_list = convert_list(obj, self._convert_next) assert isinstance(rlanguage_list, list) - assert obj.attributes - attributes = self._convert_next(obj.attributes) + attributes = self._convert_next( + obj.attributes, + ) if obj.attributes else {} value = RLanguage(rlanguage_list, attributes) @@ -710,8 +791,8 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any: else: raise NotImplementedError(f"Type {obj.info.type} not implemented") - if obj.info.object: - classname = attrs["class"] + if obj.info.object and attrs is not None: + classname = attrs.get("class", ()) for i, c in enumerate(classname): constructor = self.constructor_dict.get(c, None) diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py index b3836b8..dfd75b8 100644 --- a/rdata/parser/_parser.py +++ b/rdata/parser/_parser.py @@ -171,91 +171,110 @@ class RObjectInfo(): reference: int -@dataclass -class RObject(): - """Representation of a R object.""" +def _str_internal( + obj: RObject | Sequence[RObject], + indent: int = 0, + used_references: Optional[Set[int]] = None, +) -> str: - info: RObjectInfo - value: Any - attributes: Optional[RObject] - tag: Optional[RObject] = None - referenced_object: Optional[RObject] = None + if used_references is None: + used_references = set() - def _str_internal( - self, - indent: int = 0, - used_references: Optional[Set[int]] = None, - ) -> str: + small_indent = indent + 2 + big_indent = indent + 4 - if used_references is None: - used_references = set() + indent_spaces = ' ' * indent + small_indent_spaces = ' ' * small_indent + big_indent_spaces = ' ' * big_indent - small_indent = indent + 2 - big_indent = indent + 4 + string = "" - indent_spaces = ' ' * indent - small_indent_spaces = ' ' * small_indent - big_indent_spaces = ' ' * big_indent + if isinstance(obj, Sequence): + string += f"{indent_spaces}[\n" + for elem in obj: + string += _str_internal( + elem, + big_indent, + used_references.copy(), + ) + string += f"{indent_spaces}]\n" + + return string - string = "" + string += f"{indent_spaces}{obj.info.type}\n" + + if obj.tag: + tag_string = _str_internal( + obj.tag, + big_indent, + used_references.copy(), + ) + string += f"{small_indent_spaces}tag:\n{tag_string}\n" + + if obj.info.reference: + assert obj.referenced_object + reference_string = ( + f"{big_indent_spaces}..." + if obj.info.reference in used_references + else _str_internal( + obj.referenced_object, + indent + 4, used_references.copy()) + ) + string += ( + f"{small_indent_spaces}reference: " + f"{obj.info.reference}\n{reference_string}\n" + ) - string += f"{indent_spaces}{self.info.type}\n" + string += f"{small_indent_spaces}value:\n" - if self.tag: - tag_string = self.tag._str_internal( + if isinstance(obj.value, RObject): + string += _str_internal( + obj.value, + big_indent, + used_references.copy(), + ) + elif isinstance(obj.value, (tuple, list)): + for elem in obj.value: + string += _str_internal( + elem, big_indent, used_references.copy(), ) - string += f"{small_indent_spaces}tag:\n{tag_string}\n" - - if self.info.reference: - assert self.referenced_object - reference_string = ( - f"{big_indent_spaces}..." - if self.info.reference in used_references - else self.referenced_object._str_internal( - indent + 4, used_references.copy()) - ) + elif isinstance(obj.value, np.ndarray): + string += big_indent_spaces + if len(obj.value) > 4: string += ( - f"{small_indent_spaces}reference: " - f"{self.info.reference}\n{reference_string}\n" + f"[{obj.value[0]}, {obj.value[1]} ... " + f"{obj.value[-2]}, {obj.value[-1]}]\n" ) + else: + string += f"{obj.value}\n" + else: + string += f"{big_indent_spaces}{obj.value}\n" - string += f"{small_indent_spaces}value:\n" + if obj.attributes: + attr_string = _str_internal( + obj.attributes, + big_indent, + used_references.copy(), + ) + string += f"{small_indent_spaces}attributes:\n{attr_string}\n" - if isinstance(self.value, RObject): - string += self.value._str_internal( - big_indent, - used_references.copy(), - ) - elif isinstance(self.value, (tuple, list)): - for elem in self.value: - string += elem._str_internal( - big_indent, - used_references.copy(), - ) - elif isinstance(self.value, np.ndarray): - string += big_indent_spaces - if len(self.value) > 4: - string += ( - f"[{self.value[0]}, {self.value[1]} ... " - f"{self.value[-2]}, {self.value[-1]}]\n" - ) - else: - string += f"{self.value}\n" - else: - string += f"{big_indent_spaces}{self.value}\n" + return string - if self.attributes: - attr_string = self.attributes._str_internal( - big_indent, - used_references.copy()) - string += f"{small_indent_spaces}attributes:\n{attr_string}\n" - return string +@dataclass +class RObject(): + """Representation of a R object.""" + + info: RObjectInfo + value: Any + attributes: Optional[RObject] + tag: Optional[RObject] = None + referenced_object: Optional[RObject] = None def __str__(self) -> str: - return self._str_internal() + return _str_internal(self) @dataclass @@ -271,7 +290,7 @@ def __str__(self) -> str: "RData(\n" f" versions: {self.versions}\n" f" extra: {self.extra}\n" - f" object: \n{self.object._str_internal(indent=4)}\n" + f" object: \n{_str_internal(self.object, indent=4)}\n" ")\n" ) @@ -606,6 +625,7 @@ def parse_R_object( }: if info.type is RObjectType.ATTRLANG: info.type = RObjectType.LANG + info.attributes = True tag = None if info.attributes: @@ -639,6 +659,8 @@ def parse_R_object( value = (car, cdr) elif info.type == RObjectType.ENV: + info.object = True + result = RObject( info=info, tag=tag, diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py index 8db4698..9a3e102 100644 --- a/rdata/tests/test_rdata.py +++ b/rdata/tests/test_rdata.py @@ -207,7 +207,10 @@ def test_expression(self) -> None: np.testing.assert_equal(converted, { "test_expression": rdata.conversion.RExpression([ - rdata.conversion.RLanguage(['^', 'base', 'exponent']), + rdata.conversion.RLanguage( + ['^', 'base', 'exponent'], + attributes={}, + ), ]), }) @@ -235,13 +238,12 @@ def test_minimal_function_uncompiled(self) -> None: np.testing.assert_equal(converted_fun.environment, ChainMap({})) np.testing.assert_equal(converted_fun.formals, None) + np.testing.assert_equal(converted_fun.body, None) np.testing.assert_equal( - converted_fun.attributes, - {'srcref': np.array([1, 37, 1, 51, 37, 51, 1, 1])}, + converted_fun.source, + "test_minimal_function_uncompiled <- function() NULL\n", ) - np.testing.assert_equal(converted_fun.body, None) - def test_minimal_function(self) -> None: """Test that a minimal function (compiled) can be parsed.""" parsed = rdata.parser.parse_file( @@ -268,6 +270,11 @@ def test_minimal_function(self) -> None: np.testing.assert_equal(converted_body.code, np.array([12, 17, 1])) np.testing.assert_equal(converted_body.attributes, {}) + np.testing.assert_equal( + converted_fun.source, + "test_minimal_function <- function() NULL\n", + ) + def test_empty_function_uncompiled(self) -> None: """Test that a simple function can be parsed.""" parsed = rdata.parser.parse_file( @@ -283,26 +290,10 @@ def test_empty_function_uncompiled(self) -> None: np.testing.assert_equal(converted_fun.environment, ChainMap({})) np.testing.assert_equal(converted_fun.formals, None) + self.assertIsInstance(converted_fun.body, rdata.conversion.RLanguage) np.testing.assert_equal( - converted_fun.attributes, - {'srcref': np.array([1, 35, 1, 47, 35, 47, 1, 1])}, - ) - - converted_body = converted_fun.body - - self.assertIsInstance( - converted_body, - rdata.conversion.RLanguage, - ) - - np.testing.assert_equal(converted_body.elements, ['{']) - np.testing.assert_equal( - converted_body.attributes, - { - 'srcref': [np.array([1, 46, 1, 46, 46, 46, 1, 1])], - 'srcfile': ChainMap({}, ChainMap({})), - 'wholeSrcref': np.array([1, 0, 1, 47, 0, 47, 1, 1]), - }, + converted_fun.source, + "test_empty_function_uncompiled <- function() {}\n", ) def test_empty_function(self) -> None: @@ -325,17 +316,15 @@ def test_empty_function(self) -> None: self.assertIsInstance( converted_body, - rdata.conversion.RLanguage, + rdata.conversion.RBytecode, ) - np.testing.assert_equal(converted_body.elements, ['{']) + np.testing.assert_equal(converted_body.code, np.array([12, 17, 1])) + np.testing.assert_equal(converted_body.attributes, {}) + np.testing.assert_equal( - converted_body.attributes, - { - 'srcref': [np.array([1, 46, 1, 46, 46, 46, 1, 1])], - 'srcfile': ChainMap({}, ChainMap({})), - 'wholeSrcref': np.array([1, 0, 1, 47, 0, 47, 1, 1]), - }, + converted_fun.source, + "test_empty_function <- function() {}\n", ) def test_function(self) -> None: @@ -344,11 +333,33 @@ def test_function(self) -> None: TESTDATA_PATH / "test_function.rda") converted = rdata.conversion.convert(parsed) - np.testing.assert_equal(converted, { - "test_function": rdata.conversion.RExpression([ - rdata.conversion.RLanguage(['^', 'base', 'exponent']), - ]), - }) + converted_fun = converted["test_function"] + + self.assertIsInstance( + converted_fun, + rdata.conversion.RFunction, + ) + + np.testing.assert_equal(converted_fun.environment, ChainMap({})) + np.testing.assert_equal(converted_fun.formals, None) + + converted_body = converted_fun.body + + self.assertIsInstance( + converted_body, + rdata.conversion.RBytecode, + ) + + np.testing.assert_equal( + converted_body.code, + np.array([12, 23, 1, 34, 4, 38, 2, 1]), + ) + np.testing.assert_equal(converted_body.attributes, {}) + + np.testing.assert_equal( + converted_fun.source, + "test_function <- function() {print(\"Hello\")}\n", + ) def test_function_arg(self) -> None: """Test that functions can be parsed.""" @@ -356,11 +367,30 @@ def test_function_arg(self) -> None: TESTDATA_PATH / "test_function_arg.rda") converted = rdata.conversion.convert(parsed) - np.testing.assert_equal(converted, { - "test_function_arg": rdata.conversion.RExpression([ - rdata.conversion.RLanguage(['^', 'base', 'exponent']), - ]), - }) + converted_fun = converted["test_function_arg"] + + self.assertIsInstance( + converted_fun, + rdata.conversion.RFunction, + ) + + np.testing.assert_equal(converted_fun.environment, ChainMap({})) + np.testing.assert_equal(converted_fun.formals, None) + + converted_body = converted_fun.body + + self.assertIsInstance( + converted_body, + rdata.conversion.RBytecode, + ) + + np.testing.assert_equal(converted_body.code, np.array([12, 17, 1])) + np.testing.assert_equal(converted_body.attributes, {}) + + np.testing.assert_equal( + converted_fun.source, + "test_function <- function() {}\n", + ) def test_encodings(self) -> None: """Test of differents encodings.""" From 096819635fc184b1eaaebe7fae263a157694ca92 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 18 Aug 2022 12:27:27 +0200 Subject: [PATCH 04/10] Improve support for functions. --- rdata/conversion/_conversion.py | 3 +++ rdata/parser/_parser.py | 15 ++++++++++++--- rdata/tests/data/test_function_arg.rda | Bin 298 -> 409 bytes rdata/tests/test_rdata.py | 9 ++++++--- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index 901fd48..e844972 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -772,6 +772,9 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any: elif obj.info.type == parser.RObjectType.EMPTYENV: value = self.empty_environment + elif obj.info.type == parser.RObjectType.MISSINGARG: + value = NotImplemented + elif obj.info.type == parser.RObjectType.GLOBALENV: value = self.global_environment diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py index dfd75b8..3d3b5ff 100644 --- a/rdata/parser/_parser.py +++ b/rdata/parser/_parser.py @@ -119,6 +119,8 @@ class RObjectType(enum.Enum): BYTECODE_SPECIAL_SET = { + RObjectType.BCODE, + RObjectType.BCREPREF, RObjectType.BCREPDEF, RObjectType.LANG, RObjectType.LIST, @@ -551,15 +553,19 @@ def _parse_bytecode( bytecode_rep_list: List[RObject | None] | None = None, ) -> Tuple[RObject, Sequence[RObject]]: """Parse R bytecode.""" - n_repeated = self.parse_int() + if bytecode_rep_list is None: + n_repeated = self.parse_int() code = self.parse_R_object(reference_list, bytecode_rep_list) + if bytecode_rep_list is None: + bytecode_rep_list = [None] * n_repeated + n_constants = self.parse_int() constants = [ self._parse_bytecode_constant( reference_list, - [None] * n_repeated, + bytecode_rep_list, ) for _ in range(n_constants) ] @@ -583,7 +589,10 @@ def parse_R_object( and RObjectType(info_int) in BYTECODE_SPECIAL_SET ): info = parse_r_object_info(info_int) - info.tag = True + info.tag = info.type not in { + RObjectType.BCREPREF, + RObjectType.BCODE, + } else: info_int = self.parse_int() info = parse_r_object_info(info_int) diff --git a/rdata/tests/data/test_function_arg.rda b/rdata/tests/data/test_function_arg.rda index 13cdae917fbc323a9f60d17bb8f77de5847ee52d..c97c3ce17e72020f448018e2e056ac4a9d42bed7 100644 GIT binary patch literal 409 zcmV;K0cQRmiwFP!000001HDsAOT$1Ap61cUKA__3B$NuGtvz}1;86sxKA^V};#LDO zE!l`l5&X%b-o)0~WVYRIQYZyon4Q^~*>AqgPA<>7$K`Gr08Fq9+-)?$zPUc{oIv7% z0$8wydOc=5?)4u-FZKsv&yDT?il&xS3<5hcQ~OWev~~1w;3NX_#V-Q<*2=1umBa z402Y8xHlNSWWR+VN)xTbv{|wc{Ak4rE^glx+)4@p$j(+#cBqrP)Utib5xx@2dkScX z(i>@P?q!;aZYpPb858eWlV#}7q3)4xk=X|H1AWOoua8xO^W-FbiCWHJwTo9l2B0)PXqjjx9)cu(v5)eT4wSm44KROE!=8yCM}8YM-G27&L@}y>bJyIR|z74a6BM+XifMPN?xX-+c_f zg&;z>% diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py index 9a3e102..9b9e479 100644 --- a/rdata/tests/test_rdata.py +++ b/rdata/tests/test_rdata.py @@ -375,7 +375,7 @@ def test_function_arg(self) -> None: ) np.testing.assert_equal(converted_fun.environment, ChainMap({})) - np.testing.assert_equal(converted_fun.formals, None) + np.testing.assert_equal(converted_fun.formals, {"a": NotImplemented}) converted_body = converted_fun.body @@ -384,12 +384,15 @@ def test_function_arg(self) -> None: rdata.conversion.RBytecode, ) - np.testing.assert_equal(converted_body.code, np.array([12, 17, 1])) + np.testing.assert_equal( + converted_body.code, + np.array([12, 23, 1, 29, 4, 38, 2, 1]), + ) np.testing.assert_equal(converted_body.attributes, {}) np.testing.assert_equal( converted_fun.source, - "test_function <- function() {}\n", + "test_function_arg <- function(a) {print(a)}\n", ) def test_encodings(self) -> None: From 7ad8338d335580660d793c5c34314f91ccf21469 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 18 Aug 2022 12:39:31 +0200 Subject: [PATCH 05/10] Store constants. --- rdata/conversion/_conversion.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index e844972..b5ad3a4 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -14,6 +14,7 @@ MutableMapping, NamedTuple, Optional, + Sequence, Union, cast, ) @@ -68,6 +69,7 @@ class RBytecode: """R bytecode.""" code: xarray.DataArray + constants: Sequence[Any] attributes: StrMap @@ -763,6 +765,7 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any: value = RBytecode( code=self._convert_next(obj.value[0]), + constants=[self._convert_next(c) for c in obj.value[1]], attributes=attrs, ) From 552d71934f1c46ee5f7de1fc04120e597d0db1c0 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 18 Aug 2022 12:49:07 +0200 Subject: [PATCH 06/10] Fix for multiline functions. --- rdata/conversion/_conversion.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index b5ad3a4..033372c 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -61,7 +61,7 @@ class RFunction: @property def source(self) -> str: - return self.attributes["srcref"].srcfile.lines + return "\n".join(self.attributes["srcref"].srcfile.lines) @dataclass @@ -538,7 +538,7 @@ def srcfile_constructor( @dataclass class SrcFileCopy(SrcFile): - lines: str + lines: Sequence[str] def srcfilecopy_constructor( @@ -549,7 +549,7 @@ def srcfilecopy_constructor( filename = obj.frame["filename"][0] file_encoding = obj.frame.get("encoding", (None,))[0] string_encoding = obj.frame.get("Enc", (None,))[0] - lines = obj.frame["lines"][0] + lines = obj.frame["lines"] return SrcFileCopy( filename=filename, From 02ad781990139b5235b0d318e19d5300cd918259 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 18 Aug 2022 14:24:13 +0200 Subject: [PATCH 07/10] Try parsing external pointers (just to allow loading the remaining part of the data). --- rdata/conversion/_conversion.py | 15 +++++++++++++++ rdata/parser/_parser.py | 22 ++++++++++++++++++++++ rdata/tests/data/test_file.rda | Bin 0 -> 124 bytes rdata/tests/test_rdata.py | 15 +++++++++++++++ 4 files changed, 52 insertions(+) create mode 100644 rdata/tests/data/test_file.rda diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index 033372c..3374acb 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -64,6 +64,14 @@ def source(self) -> str: return "\n".join(self.attributes["srcref"].srcfile.lines) +@dataclass +class RExternalPointer: + """R bytecode.""" + + protected: Any + tag: Any + + @dataclass class RBytecode: """R bytecode.""" @@ -769,6 +777,13 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any: attributes=attrs, ) + elif obj.info.type == parser.RObjectType.EXTPTR: + + value = RExternalPointer( + protected=self._convert_next(obj.value[0]), + tag=self._convert_next(obj.value[1]), + ) + elif obj.info.type == parser.RObjectType.S4: value = SimpleNamespace(**attrs) diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py index 3d3b5ff..dec98b4 100644 --- a/rdata/parser/_parser.py +++ b/rdata/parser/_parser.py @@ -760,6 +760,28 @@ def parse_R_object( value = self._parse_bytecode(reference_list, bytecode_rep_list) tag_read = True + elif info.type == RObjectType.EXTPTR: + + result = RObject( + info=info, + tag=tag, + attributes=attributes, + value=None, + referenced_object=referenced_object, + ) + + reference_list.append(result) + protected = self.parse_R_object( + reference_list, + bytecode_rep_list, + ) + extptr_tag = self.parse_R_object( + reference_list, + bytecode_rep_list, + ) + + value = (protected, extptr_tag) + elif info.type == RObjectType.S4: value = None diff --git a/rdata/tests/data/test_file.rda b/rdata/tests/data/test_file.rda new file mode 100644 index 0000000000000000000000000000000000000000..5cee314a85e63c559b3a9225f838895d976b8f97 GIT binary patch literal 124 zcmb2|=3oE=X6~X+gJ)e2k`fXU(h?GxCarN$W6sX#n7xjbIijU;VqCe;S5iy|A None: ], }) + def test_file(self) -> None: + """Test that external pointers can be parsed.""" + parsed = rdata.parser.parse_file(TESTDATA_PATH / "test_file.rda") + converted = rdata.conversion.convert(parsed) + + np.testing.assert_equal(converted, { + "test_file": + [ + np.array([1.0]), + ['a', 'b', 'c'], + np.array([2.0, 3.0]), + ['hi'], + ], + }) + def test_expression(self) -> None: """Test that expressions can be parsed.""" parsed = rdata.parser.parse_file(TESTDATA_PATH / "test_expression.rda") From fda1ca528a1792f7a02b5e24d44e88c5ac4e574e Mon Sep 17 00:00:00 2001 From: vnmabus Date: Thu, 18 Aug 2022 14:26:11 +0200 Subject: [PATCH 08/10] Fix tests. --- rdata/tests/test_rdata.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py index 6be9474..cb35604 100644 --- a/rdata/tests/test_rdata.py +++ b/rdata/tests/test_rdata.py @@ -206,13 +206,7 @@ def test_file(self) -> None: converted = rdata.conversion.convert(parsed) np.testing.assert_equal(converted, { - "test_file": - [ - np.array([1.0]), - ['a', 'b', 'c'], - np.array([2.0, 3.0]), - ['hi'], - ], + "test_file": [5], }) def test_expression(self) -> None: From ee654f9524f2b8ca7b3c15ed9bc5f21f34194929 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Fri, 2 Sep 2022 17:11:46 +0200 Subject: [PATCH 09/10] Add explicit reexports. --- rdata/conversion/__init__.py | 36 ++++++++++++++++++------------------ rdata/parser/__init__.py | 16 ++++++++-------- setup.cfg | 2 ++ 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py index 2d601d7..8f8926c 100644 --- a/rdata/conversion/__init__.py +++ b/rdata/conversion/__init__.py @@ -1,20 +1,20 @@ from ._conversion import ( - DEFAULT_CLASS_MAP, - Converter, - RBuiltin, - RBytecode, - RExpression, - RFunction, - RLanguage, - SimpleConverter, - convert, - convert_array, - convert_attrs, - convert_char, - convert_list, - convert_symbol, - convert_vector, - dataframe_constructor, - factor_constructor, - ts_constructor, + DEFAULT_CLASS_MAP as DEFAULT_CLASS_MAP, + Converter as Converter, + RBuiltin as RBuiltin, + RBytecode as RBytecode, + RExpression as RExpression, + RFunction as RFunction, + RLanguage as RLanguage, + SimpleConverter as SimpleConverter, + convert as convert, + convert_array as convert_array, + convert_attrs as convert_attrs, + convert_char as convert_char, + convert_list as convert_list, + convert_symbol as convert_symbol, + convert_vector as convert_vector, + dataframe_constructor as dataframe_constructor, + factor_constructor as factor_constructor, + ts_constructor as ts_constructor, ) diff --git a/rdata/parser/__init__.py b/rdata/parser/__init__.py index 1810e4b..8af47f3 100644 --- a/rdata/parser/__init__.py +++ b/rdata/parser/__init__.py @@ -1,12 +1,12 @@ """Utilities for parsing a rdata file.""" from ._parser import ( - DEFAULT_ALTREP_MAP, - CharFlags, - RData, - RObject, - RObjectInfo, - RObjectType, - parse_data, - parse_file, + DEFAULT_ALTREP_MAP as DEFAULT_ALTREP_MAP, + CharFlags as CharFlags, + RData as RData, + RObject as RObject, + RObjectInfo as RObjectInfo, + RObjectType as RObjectType, + parse_data as parse_data, + parse_file as parse_file, ) diff --git a/setup.cfg b/setup.cfg index 5794b78..a53dbc5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,6 +90,8 @@ per-file-ignores = __init__.py: # Unused modules are allowed in `__init__.py`, to reduce imports F401, + # Explicit re-exports allowed in __init__ + WPS113, # Import multiple names is allowed in `__init__.py` WPS235, # Logic is allowed in `__init__.py` From fae6ad4b37021a4207e19dfaa423834d1a8abf68 Mon Sep 17 00:00:00 2001 From: vnmabus Date: Fri, 2 Sep 2022 17:18:25 +0200 Subject: [PATCH 10/10] Bump version. --- rdata/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdata/VERSION b/rdata/VERSION index ce609ca..9a7d84f 100644 --- a/rdata/VERSION +++ b/rdata/VERSION @@ -1 +1 @@ -0.8 \ No newline at end of file +0.9 \ No newline at end of file