Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update new transformation for DAME integration #92

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,5 @@ examples/demo/data
*node_modules*

data
tmp
tmp
env.env
2 changes: 2 additions & 0 deletions dtran/argtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class ArgType(object):
Callable[[Any, bool, str, str], 'ArgType']] = dataset
OrderedDict: 'ArgType' = None
ListString: 'ArgType' = None
ListOrOneString: 'ArgType' = None
String: 'ArgType' = None
Number: 'ArgType' = None
Boolean: 'ArgType' = None
Expand Down Expand Up @@ -75,6 +76,7 @@ def type_cast(self, val: str):
from_str=lambda val: str(Path(val)))
ArgType.OrderedDict = ArgType("ordered_dict", validate=lambda val: isinstance(val, dict))
ArgType.ListString = ArgType("list_string", validate=lambda val: isinstance(val, list) and all(isinstance(x, str) for x in val))
ArgType.ListOrOneString = ArgType("list_or_one_string", validate=lambda val: isinstance(val, str) or (isinstance(val, list) and all(isinstance(x, str) for x in val)))
ArgType.String = ArgType("string", validate=lambda val: isinstance(val, str))
ArgType.Number = ArgType("number", validate=lambda val: isinstance(val, (int, float)),
from_str=lambda val: ('.' in val and float(val)) or int(val))
Expand Down
40 changes: 25 additions & 15 deletions dtran/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ class InputSchema(Schema):


class PipelineSchema(Schema):
def __init__(self, cli_inputs, **kwargs):
super().__init__(**kwargs)
self.cli_inputs = cli_inputs

version = fields.Str(required=True)
description = fields.Str()
inputs = OrderedDictField(validate=validate.Length(min=1),
Expand All @@ -53,6 +49,10 @@ def __init__(self, cli_inputs, **kwargs):
class Meta:
ordered = True

def __init__(self, cli_inputs, **kwargs):
super().__init__(**kwargs)
self.cli_inputs: Dict[Union[Tuple[str, str], str], str] = cli_inputs

@staticmethod
def process_input(val, data):
# processing for root-level pipeline inputs recursively
Expand Down Expand Up @@ -96,17 +96,27 @@ def construct_pipeline(self, data, **kwargs):
mappings[name] = (cls, adapter_count[adapter['adapter']])

# validating cli_inputs
for name, input in self.cli_inputs:
if name not in mappings:
raise ValidationError(
['cli_inputs exception', f"invalid adapter name {name}. not found in config file"])
if input not in mappings[name][0].inputs:
raise ValidationError(['cli_inputs exception',
f"invalid input {input} in {data['adapters'][name]['adapter']} for {name}"])
# cli_inputs has higher priority and overwrites config_file data
if 'inputs' not in data['adapters'][name]:
data['adapters'][name]['inputs'] = OrderedDict()
data['adapters'][name]['inputs'][input] = self.cli_inputs[(name, input)]
for cli_input in self.cli_inputs:
if isinstance(cli_input, (tuple,list)):
name, arg = cli_input

if name not in mappings:
raise ValidationError(
['cli_inputs exception', f"invalid adapter name {name}. not found in config file"])
if arg not in mappings[name][0].inputs:
raise ValidationError(['cli_inputs exception',
f"invalid input {arg} in {data['adapters'][name]['adapter']} for {name}"])
# cli_inputs has higher priority and overwrites config_file data
if 'inputs' not in data['adapters'][name]:
data['adapters'][name]['inputs'] = OrderedDict()
data['adapters'][name]['inputs'][arg] = self.cli_inputs[(name, arg)]
else:
name = cli_input
if name in data['inputs']:
if isinstance(data['inputs'][name], (dict, OrderedDict)):
data['inputs'][name]['value'] = self.cli_inputs[name]
else:
data['inputs'][name] = self.cli_inputs[name]

inputs = {}
wired = []
Expand Down
1 change: 0 additions & 1 deletion dtran/dcat/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ def handle_api_response(response: requests.Response):
"""
This is a convenience method to handle api responses
:param response:
:param print_response:
:return:
"""
parsed_response = response.json()
Expand Down
2 changes: 1 addition & 1 deletion dtran/dcat/scripts/delete.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[]
["b3e79dc2-8fa1-4203-ac82-b5267925191f"]
10 changes: 5 additions & 5 deletions dtran/dcat/scripts/register_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,20 @@ def cli():
ignore_unknown_options=True,
allow_extra_args=False,
))
@click.option("--name", help="DCAT dataset name", default="test-dataset")
@click.option("--description", help="DCAT dataset description", default="test-description")
@click.option("--name", help="DCAT dataset name", prompt="Dataset name")
@click.option("--description", help="DCAT dataset description", prompt="Dataset description")
@click.option("--metadata_path", help="DCAT dataset metadata file path", default=None)
@click.option("--resource_path", help="DCAT dataset resources json path, should be a file name-url dict", default=None)
@click.option("--resource_type", help="DCAT dataset resource type", default="zip")
@click.option("--variable_path", help="DCAT dataset variable json path", default=None)
def register_dataset(name, description, metadata_path, resource_path, variable_path, resource_type):
"""
Registers DCAT dataset with multiple resources.
Example: PYTHONPATH=$(pwd):$(pwd):$PYTHONPATH python dtran/dcat/scripts/register_datasets.py register_dataset
Example: PYTHONPATH=$(pwd):$PYTHONPATH python dtran/dcat/scripts/register_datasets.py register_dataset
--resource_path=./uploaded.json --variable_path=variables.json
"""

dcat = DCatAPI.get_instance("https://api.mint-data-catalog.org")
dcat = DCatAPI.get_instance()

if metadata_path is None:
metadata = {}
Expand Down Expand Up @@ -85,7 +85,7 @@ def delete_dataset(dcatid, json_path):
Delete specified datasets.
Example: PYTHONPATH=$(pwd):$(pwd):$PYTHONPATH python dtran/dcat/scripts/register_datasets.py delete_dataset --dcatid=c4fedf48-f888-4de1-b60f-c6ac5cb1615b
"""
dcat = DCatAPI.get_instance("https://api.mint-data-catalog.org")
dcat = DCatAPI.get_instance()

if dcatid is None and json_path is None:
raise ValueError("Please enter dataset ids to delete!")
Expand Down
3 changes: 3 additions & 0 deletions dtran/dcat/scripts/resource.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"awash.tif": "https://data.mint.isi.edu/files/hand-dem/GIS-Oromia/Awash/Awash-border_DEM_buffer.tif"
}
12 changes: 9 additions & 3 deletions dtran/dcat/scripts/upload_files_in_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,20 @@

def setup_owncloud(upload_dir):
oc = owncloud.Client('https://files.mint.isi.edu/')
oc.login('datacatalog', 'sVMIryVWEx3Ec2')
oc.mkdir(upload_dir)
oc.login(os.environ['USERNAME'], os.environ['PASSWORD'])
try:
# https://github.com/owncloud/pyocclient/blob/master/owncloud/owncloud.py
# trying to look through the documentation. However, I didn't see a way to check if the directory exists
# before, so I just assume that if the operator fails, the directory is already there.
oc.mkdir(upload_dir)
except:
pass
return oc


def upload_to_mint_server(target_dir, target_filename, upload_url):
upload_output = subprocess.check_output(
f"curl -sD - --user upload:HVmyqAPWDNuk5SmkLOK2 --upload-file {target_dir}/{target_filename} {upload_url}",
f"curl -sD - --user {os.environ['USERNAME']}:{os.environ['PASSWORD']} --upload-file {target_dir}/{target_filename} {upload_url}",
shell=True,
)
uploaded_url = f'https://{upload_output.decode("utf-8").split("https://")[-1]}'
Expand Down
8 changes: 4 additions & 4 deletions dtran/dcat/scripts/variables.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"standard_variables": [
{
"ontology": "ScientificVariablesOntology",
"name": "precipitation_leg_volume_flux",
"uri": "http://www.geoscienceontology.org/svo/svl/variable/1.0.0/#atmosphere%40role%7Esource_water%40role%7Emain_precipitation__precipitation_leq_volume_flux"
"ontology": "MINT Ontology",
"name": "var_0",
"uri": "http://mint.isi.edu/var_0"
}
],
"variable_names": ["test-var"]
"variable_names": ["var_0"]
}
11 changes: 7 additions & 4 deletions dtran/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,21 @@ def exec_pipeline(ctx, config=None):
Creates a pipeline and execute it based on given config and input(optional).
To specify the input to pipeline, use (listed in ascending priority):
1) config file option: --config path_to_file
2) arg params: e.g. --FuncName.Attr=value
2) arg params: e.g. --FuncName.Attr=value, --InputName=value
"""

# Accept user-specified inputs: expect format of --key=value
user_inputs = {}
for arg in ctx.args:
try:
key, value = arg[2:].split("=")
func_name, attr_name = key.split(".")
user_inputs[(func_name, attr_name)] = value
if key.find(".") != -1:
func_name, attr_name = key.split(".")
user_inputs[(func_name, attr_name)] = value
else:
user_inputs[key] = value
except ValueError:
print(f"user input: '{arg}' should have format '--FuncName.Attr=value'")
print(f"user input: '{arg}' should have format '--FuncName.Attr=value' or --InputName=value")
return

parser = ConfigParser(user_inputs)
Expand Down
Loading