diff --git a/mindsdb_sql/__about__.py b/mindsdb_sql/__about__.py index c13e57b3..72d76d8f 100644 --- a/mindsdb_sql/__about__.py +++ b/mindsdb_sql/__about__.py @@ -1,6 +1,6 @@ __title__ = 'mindsdb_sql' __package_name__ = 'mindsdb_sql' -__version__ = '0.10.3' +__version__ = '0.10.4' __description__ = "Pure python SQL parser" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' diff --git a/mindsdb_sql/parser/dialects/mindsdb/create_job.py b/mindsdb_sql/parser/dialects/mindsdb/create_job.py index b7cfd5e2..eb06a8df 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/create_job.py +++ b/mindsdb_sql/parser/dialects/mindsdb/create_job.py @@ -12,6 +12,7 @@ def __init__(self, start_str=None, end_str=None, repeat_str=None, + if_query_str=None, if_not_exists=False, *args, **kwargs): super().__init__(*args, **kwargs) @@ -22,6 +23,7 @@ def __init__(self, self.repeat_str = repeat_str self.date_format = '%Y-%m-%d %H:%M:%S' self.if_not_exists = if_not_exists + self.if_query_str = if_query_str def to_tree(self, *args, level=0, **kwargs): ind = indent(level) @@ -46,6 +48,10 @@ def to_tree(self, *args, level=0, **kwargs): if self.if_not_exists: if_not_exists_str = f'\n{ind1}if_not_exists=True,' + if_query_str = '' + if self.if_query_str is not None: + if_query_str = f"\n{ind1}if_query='{self.if_query_str}'" + out_str = f'{ind}CreateJob(' \ f'{if_not_exists_str}' \ f'{name_str}' \ @@ -53,6 +59,7 @@ def to_tree(self, *args, level=0, **kwargs): f'{start_str}' \ f'{end_str}' \ f'{repeat_str}' \ + f'{if_query_str}' \ f'\n{ind})' return out_str @@ -70,5 +77,9 @@ def get_string(self, *args, **kwargs): if self.repeat_str is not None: repeat_str = f" EVERY '{self.repeat_str}'" - out_str = f'CREATE JOB {"IF NOT EXISTS" if self.if_not_exists else ""} {self.name.to_string()} ({self.query_str}){start_str}{end_str}{repeat_str}' + if_query_str = '' + if self.if_query_str is not None: + if_query_str = f" IF '{self.if_query_str}'" + + out_str = f'CREATE JOB {"IF NOT EXISTS" if self.if_not_exists else ""} {self.name.to_string()} ({self.query_str}){start_str}{end_str}{repeat_str}{if_query_str}' return out_str diff --git a/mindsdb_sql/parser/dialects/mindsdb/create_predictor.py b/mindsdb_sql/parser/dialects/mindsdb/create_predictor.py index f98d48f1..0349774f 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/create_predictor.py +++ b/mindsdb_sql/parser/dialects/mindsdb/create_predictor.py @@ -35,6 +35,7 @@ def __init__(self, self.is_replace = is_replace self.if_not_exists = if_not_exists self.task = task + self._action = 'CREATE' def to_tree(self, *args, level=0, **kwargs): ind = indent(level) @@ -74,8 +75,10 @@ def to_tree(self, *args, level=0, **kwargs): using_str = f'\n{ind1}using={repr(self.using)},' if_not_exists_str = f'\n{ind1}if_not_exists={self.if_not_exists},' if self.if_not_exists else '' + or_replace_str = f'\n{ind1}is_replace={self.is_replace},' if self.is_replace else '' out_str = f'{ind}{self.__class__.__name__}(' \ + f'{or_replace_str}' \ f'{if_not_exists_str}' \ f'{name_str}' \ f'{integration_name_str}' \ @@ -126,9 +129,11 @@ def get_string(self, *args, **kwargs): if self.integration_name is not None: integration_name_str = f'FROM {self.integration_name.to_string()} ' + or_replace_str = ' OR REPLACE' if self.is_replace else '' if_not_exists_str = 'IF NOT EXISTS ' if self.if_not_exists else '' + object_str = self._object + ' ' if self._object else '' - out_str = f'{self._command} {if_not_exists_str}{self.name.to_string()} {integration_name_str}{query_str}' \ + out_str = f'{self._action}{or_replace_str} {object_str}{if_not_exists_str}{self.name.to_string()} {integration_name_str}{query_str}' \ f'{datasource_name_str}' \ f'{targets_str} ' \ f'{order_by_str}' \ @@ -143,12 +148,12 @@ def get_string(self, *args, **kwargs): class CreatePredictor(CreatePredictorBase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._command = 'CREATE PREDICTOR' + self._object = 'PREDICTOR' # Models by task type class CreateAnomalyDetectionModel(CreatePredictorBase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._command = 'CREATE ANOMALY DETECTION MODEL' + self._object = 'ANOMALY DETECTION MODEL' self.task = Identifier('AnomalyDetection') diff --git a/mindsdb_sql/parser/dialects/mindsdb/finetune_predictor.py b/mindsdb_sql/parser/dialects/mindsdb/finetune_predictor.py index 232c4721..bc468229 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/finetune_predictor.py +++ b/mindsdb_sql/parser/dialects/mindsdb/finetune_predictor.py @@ -4,4 +4,5 @@ class FinetunePredictor(CreatePredictorBase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._command = 'FINETUNE' + self._action = 'FINETUNE' + self._object = '' diff --git a/mindsdb_sql/parser/dialects/mindsdb/lexer.py b/mindsdb_sql/parser/dialects/mindsdb/lexer.py index 405fde10..d9c1c0b7 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/lexer.py +++ b/mindsdb_sql/parser/dialects/mindsdb/lexer.py @@ -43,7 +43,7 @@ class MindsDBLexer(Lexer): GLOBAL, PROCEDURE, FUNCTION, INDEX, WARNINGS, ENGINES, CHARSET, COLLATION, PLUGINS, CHARACTER, PERSIST, PERSIST_ONLY, - IF_EXISTS, IF_NOT_EXISTS, COLUMNS, FIELDS, COLLATE, SEARCH_PATH, + IF_EXISTS, IF_NOT_EXISTS, IF, COLUMNS, FIELDS, COLLATE, SEARCH_PATH, VARIABLE, SYSTEM_VARIABLE, # SELECT Keywords @@ -71,7 +71,7 @@ class MindsDBLexer(Lexer): PLUS, MINUS, DIVIDE, MODULO, EQUALS, NEQUALS, GREATER, GEQ, LESS, LEQ, AND, OR, NOT, IS, IS_NOT, - IN, LIKE, CONCAT, BETWEEN, WINDOW, OVER, PARTITION_BY, + IN, LIKE, NOT_LIKE, CONCAT, BETWEEN, WINDOW, OVER, PARTITION_BY, # Data types CAST, ID, INTEGER, FLOAT, QUOTE_STRING, DQUOTE_STRING, NULL, TRUE, FALSE, @@ -174,6 +174,7 @@ class MindsDBLexer(Lexer): PERSIST_ONLY = r'\bPERSIST_ONLY\b' IF_EXISTS = r'\bIF[\s]+EXISTS\b' IF_NOT_EXISTS = r'\bIF[\s]+NOT[\s]+EXISTS\b' + IF = r'\bIF\b' COLUMNS = r'\bCOLUMNS\b' FIELDS = r'\bFIELDS\b' EXTENDED = r'\bEXTENDED\b' @@ -275,6 +276,7 @@ class MindsDBLexer(Lexer): AND = r'\bAND\b' OR = r'\bOR\b' IS_NOT = r'\bIS[\s]+NOT\b' + NOT_LIKE = r'\bNOT[\s]+LIKE\b' NOT = r'\bNOT\b' IS = r'\bIS\b' LIKE = r'\bLIKE\b' diff --git a/mindsdb_sql/parser/dialects/mindsdb/parser.py b/mindsdb_sql/parser/dialects/mindsdb/parser.py index a25d764e..ada46cf3 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/parser.py +++ b/mindsdb_sql/parser/dialects/mindsdb/parser.py @@ -47,7 +47,7 @@ class MindsDBParser(Parser): ('left', PLUS, MINUS), ('left', STAR, DIVIDE), ('right', UMINUS), # Unary minus operator, unary not - ('nonassoc', LESS, LEQ, GREATER, GEQ, IN, BETWEEN, IS, IS_NOT, LIKE), + ('nonassoc', LESS, LEQ, GREATER, GEQ, IN, BETWEEN, IS, IS_NOT, NOT_LIKE, LIKE), ) # Top-level statements @@ -229,10 +229,20 @@ def drop_trigger(self, p): # -- Jobs -- @_('CREATE JOB if_not_exists_or_empty identifier LPAREN raw_query RPAREN job_schedule', 'CREATE JOB if_not_exists_or_empty identifier AS LPAREN raw_query RPAREN job_schedule', + 'CREATE JOB if_not_exists_or_empty identifier LPAREN raw_query RPAREN job_schedule IF LPAREN raw_query RPAREN', + 'CREATE JOB if_not_exists_or_empty identifier AS LPAREN raw_query RPAREN job_schedule IF LPAREN raw_query RPAREN', 'CREATE JOB if_not_exists_or_empty identifier LPAREN raw_query RPAREN', - 'CREATE JOB if_not_exists_or_empty identifier AS LPAREN raw_query RPAREN') + 'CREATE JOB if_not_exists_or_empty identifier AS LPAREN raw_query RPAREN', + 'CREATE JOB if_not_exists_or_empty identifier LPAREN raw_query RPAREN IF LPAREN raw_query RPAREN', + 'CREATE JOB if_not_exists_or_empty identifier AS LPAREN raw_query RPAREN IF LPAREN raw_query RPAREN' + ) def create_job(self, p): - query_str = tokens_to_string(p.raw_query) + if hasattr(p, 'raw_query0'): + query_str = tokens_to_string(p.raw_query0) + if_query_str = tokens_to_string(p.raw_query1) + else: + query_str = tokens_to_string(p.raw_query) + if_query_str = None job_schedule = getattr(p, 'job_schedule', {}) @@ -254,6 +264,7 @@ def create_job(self, p): return CreateJob( name=p.identifier, query_str=query_str, + if_query_str=if_query_str, start_str=start_str, end_str=end_str, repeat_str=repeat_str, @@ -776,10 +787,11 @@ def create_predictor(self, p): p.create_predictor.order_by = p.ordering_terms return p.create_predictor - @_('CREATE PREDICTOR if_not_exists_or_empty identifier FROM identifier LPAREN raw_query RPAREN PREDICT result_columns', - 'CREATE PREDICTOR if_not_exists_or_empty identifier PREDICT result_columns', - 'CREATE MODEL if_not_exists_or_empty identifier FROM identifier LPAREN raw_query RPAREN PREDICT result_columns', - 'CREATE MODEL if_not_exists_or_empty identifier PREDICT result_columns') + @_('CREATE replace_or_empty PREDICTOR if_not_exists_or_empty identifier FROM identifier LPAREN raw_query RPAREN PREDICT result_columns', + 'CREATE replace_or_empty PREDICTOR if_not_exists_or_empty identifier PREDICT result_columns', + 'CREATE replace_or_empty MODEL if_not_exists_or_empty identifier FROM identifier LPAREN raw_query RPAREN PREDICT result_columns', + 'CREATE replace_or_empty MODEL if_not_exists_or_empty identifier PREDICT result_columns' + ) def create_predictor(self, p): query_str = None if hasattr(p, 'raw_query'): @@ -796,7 +808,8 @@ def create_predictor(self, p): integration_name=getattr(p, 'identifier1', None), query_str=query_str, targets=p.result_columns, - if_not_exists=p.if_not_exists_or_empty + if_not_exists=p.if_not_exists_or_empty, + is_replace=p.replace_or_empty ) # Typed models @@ -1422,6 +1435,7 @@ def expr(self, p): 'expr NOT expr', 'expr IS expr', 'expr LIKE expr', + 'expr NOT_LIKE expr', 'expr CONCAT expr', 'expr IN expr') def expr(self, p): @@ -1429,7 +1443,11 @@ def expr(self, p): arg1 = Last() else: arg1 = p.expr1 - return BinaryOperation(op=p[1], args=(p[0], arg1)) + if len(p) > 3: + op = ' '.join([p[i] for i in range(1, len(p)-1)]) + else: + op = p[1] + return BinaryOperation(op=op, args=(p[0], arg1)) @_('MINUS expr %prec UMINUS', 'NOT expr %prec UNOT', ) @@ -1729,6 +1747,15 @@ def variable(self, p): def variable(self, p): return Variable(value=p.VARIABLE) + @_( + 'OR REPLACE', + 'empty' + ) + def replace_or_empty(self, p): + if hasattr(p, 'REPLACE'): + return True + return False + @_( 'IF_NOT_EXISTS', 'empty' diff --git a/mindsdb_sql/parser/dialects/mindsdb/retrain_predictor.py b/mindsdb_sql/parser/dialects/mindsdb/retrain_predictor.py index 229c7cef..410a94c8 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/retrain_predictor.py +++ b/mindsdb_sql/parser/dialects/mindsdb/retrain_predictor.py @@ -1,8 +1,8 @@ -from mindsdb_sql.parser.ast.base import ASTNode -from mindsdb_sql.parser.utils import indent from .create_predictor import CreatePredictorBase + class RetrainPredictor(CreatePredictorBase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._command = 'RETRAIN' \ No newline at end of file + self._action = 'RETRAIN' + self._object = '' diff --git a/mindsdb_sql/planner/plan_join.py b/mindsdb_sql/planner/plan_join.py index bcbeaa64..acd1003c 100644 --- a/mindsdb_sql/planner/plan_join.py +++ b/mindsdb_sql/planner/plan_join.py @@ -407,16 +407,27 @@ def process_predictor(self, item, query_in): raise NotImplementedError("TS predictor is not supported here yet") data_step = self.step_stack[-1] row_dict = None + + predict_target = item.predictor_info.get('to_predict') + if isinstance(predict_target, list) and len(predict_target) > 0: + predict_target = predict_target[0] + if predict_target is not None: + predict_target = predict_target.lower() + if item.conditions: row_dict = {} - for el in item.conditions: + for i, el in enumerate(item.conditions): if isinstance(el.args[0], Identifier) and el.op == '=': + col_name = el.args[0].parts[-1] + if col_name.lower() == predict_target: + # don't add predict target to parameters + continue if isinstance(el.args[1], (Constant, Parameter)): row_dict[el.args[0].parts[-1]] = el.args[1].value # exclude condition - item.conditions[0]._orig_node.args = [Constant(0), Constant(0)] + el._orig_node.args = [Constant(0), Constant(0)] predictor_step = self.planner.plan.add_step(ApplyPredictorStep( namespace=item.integration, diff --git a/mindsdb_sql/planner/query_planner.py b/mindsdb_sql/planner/query_planner.py index 685a6df8..f8a08b1c 100644 --- a/mindsdb_sql/planner/query_planner.py +++ b/mindsdb_sql/planner/query_planner.py @@ -289,9 +289,10 @@ def plan_api_db_select(self, query): # keep only limit and where # the rest goes to outer select query2 = Select( - targets=[Star()], + targets=query.targets, from_table=query.from_table, where=query.where, + order_by=query.order_by, limit=query.limit, ) prev_step = self.plan_integration_select(query2) diff --git a/mindsdb_sql/render/sqlalchemy_render.py b/mindsdb_sql/render/sqlalchemy_render.py index 23801cc9..c138b113 100644 --- a/mindsdb_sql/render/sqlalchemy_render.py +++ b/mindsdb_sql/render/sqlalchemy_render.py @@ -133,6 +133,7 @@ def to_expression(self, t): "is": "is_", "is not": "is_not", "like": "like", + "not like": "notlike", "in": "in_", "not in": "notin_", "||": "concat", diff --git a/tests/test_parser/test_base_sql/test_select_operations.py b/tests/test_parser/test_base_sql/test_select_operations.py index ae1d77e3..e7e0da06 100644 --- a/tests/test_parser/test_base_sql/test_select_operations.py +++ b/tests/test_parser/test_base_sql/test_select_operations.py @@ -564,3 +564,21 @@ def test_function_with_from(self, dialect): assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree() +class TestOperationsMindsdb: + def test_select_binary_operations(self): + for op in ['not like',]: + sql = f'SELECT column1 {op.upper()} column2 FROM tab' + ast = parse_sql(sql) + + expected_ast = Select( + targets=[BinaryOperation(op=op, + args=( + Identifier.from_path_str('column1'), Identifier.from_path_str('column2') + )), + ], + from_table=Identifier.from_path_str('tab') + ) + + assert str(ast).lower() == sql.lower() + assert str(ast) == str(expected_ast) + assert ast.to_tree() == expected_ast.to_tree() diff --git a/tests/test_parser/test_mindsdb/test_create_predictor.py b/tests/test_parser/test_mindsdb/test_create_predictor.py index b035ab7a..439d882e 100644 --- a/tests/test_parser/test_mindsdb/test_create_predictor.py +++ b/tests/test_parser/test_mindsdb/test_create_predictor.py @@ -104,6 +104,17 @@ def test_create_predictor_no_with(self): assert ast.to_tree() == expected_ast.to_tree() + # test or replace + sql = """CREATE or replace model pred + FROM integration_name + (select * FROM table_name) + PREDICT f1 as f1_alias, f2 + """ + ast = parse_sql(sql, dialect='mindsdb') + expected_ast.is_replace = True + + assert ast.to_tree() == expected_ast.to_tree() + def test_create_predictor_quotes(self): sql = """CREATE PREDICTOR xxx FROM `yyy` diff --git a/tests/test_parser/test_mindsdb/test_jobs.py b/tests/test_parser/test_mindsdb/test_jobs.py index 47f8ddc7..36353130 100644 --- a/tests/test_parser/test_mindsdb/test_jobs.py +++ b/tests/test_parser/test_mindsdb/test_jobs.py @@ -56,6 +56,23 @@ def test_create_job(self): assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree() + # 4 + + sql = ''' + create job j1 ( retrain p1 ) + every 2 hours + if (select a from table) + ''' + ast = parse_sql(sql, dialect='mindsdb') + expected_ast = CreateJob( + name=Identifier('j1'), + query_str="retrain p1", + if_query_str="select a from table", + repeat_str="2 hours" + ) + assert str(ast) == str(expected_ast) + assert ast.to_tree() == expected_ast.to_tree() + def test_create_job_minimal_with_if_not_exists(self): sql = ''' create job if not exists proj2.j1 ( diff --git a/tests/test_planner/test_integration_select.py b/tests/test_planner/test_integration_select.py index e4c3adf1..6cd4e5bf 100644 --- a/tests/test_planner/test_integration_select.py +++ b/tests/test_planner/test_integration_select.py @@ -524,7 +524,7 @@ def test_select_from_table_subselect_api_integration(self): steps=[ FetchDataframeStep( integration='int1', - query=parse_sql('select * from tab1'), + query=parse_sql('select tab1.`id` AS `id` from tab1'), ), SubSelectStep( dataframe=Result(0), @@ -534,7 +534,7 @@ def test_select_from_table_subselect_api_integration(self): FetchDataframeStep( integration='int1', query=Select( - targets=[Star()], + targets=[Identifier('tab2.x', alias=Identifier('x'))], from_table=Identifier('tab2'), where=BinaryOperation( op='in', @@ -597,7 +597,7 @@ def test_delete_from_table_subselect_api_integration(self): steps=[ FetchDataframeStep( integration='int1', - query=parse_sql('select * from tab1'), + query=parse_sql('select tab1.`id` AS `id` from tab1'), ), SubSelectStep( dataframe=Result(0), diff --git a/tests/test_planner/test_join_predictor.py b/tests/test_planner/test_join_predictor.py index 8999b784..d2d80a4a 100644 --- a/tests/test_planner/test_join_predictor.py +++ b/tests/test_planner/test_join_predictor.py @@ -532,12 +532,12 @@ def test_where_using(self): sql = ''' select * from int.tab1 a join proj.pred.1 p - where a.x=1 and p.x=1 and a.y=3 and p.y='' + where a.x=1 and p.x=1 and p.ttt=2 and a.y=3 and p.y='' ''' subquery = parse_sql(""" select * from x - where a.x=1 and 0=0 and a.y=3 and p.y='' + where a.x=1 and 0=0 and p.ttt=2 and a.y=3 and 0=0 """) subquery.from_table = None @@ -560,7 +560,7 @@ def test_where_using(self): ) plan = plan_query(query, integrations=['int'], predictor_namespace='mindsdb', - predictor_metadata=[{'name': 'pred', 'integration_name': 'proj'}]) + predictor_metadata=[{'name': 'pred', 'integration_name': 'proj', 'to_predict': ['ttt']}]) assert plan.steps == expected_plan.steps diff --git a/tests/test_render/test_sqlalchemyrender.py b/tests/test_render/test_sqlalchemyrender.py index 77ff7ba4..a1bc6b7c 100644 --- a/tests/test_render/test_sqlalchemyrender.py +++ b/tests/test_render/test_sqlalchemyrender.py @@ -62,7 +62,7 @@ def parse_sql2(sql, dialect='mindsdb'): return query sql2 = sql2.replace(' FLOAT', ' float') - query2 = parse_sql(sql2, 'mysql') + query2 = parse_sql(sql2, 'mindsdb') # exclude cases when sqlalchemy replaces some parts of sql if not (