Skip to content

Commit

Permalink
Add support for like() function (#130)
Browse files Browse the repository at this point in the history
* add like support

* Update README.md

* Update SplToCatalystTest.scala
  • Loading branch information
chirag-s-db authored Mar 18, 2024
1 parent 14789b6 commit b3586ef
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ There's basic support for the most used commands like `addtotals`, `bin`, `colle
`streamstats`, `table`, `where`.

There's also basic support for functions like `auto()`, `cidr_match()`, `coalesce()`, `count()`,
`ctime()`, `earliest()`, `if()`, `isnotnull()`, `latest()`, `len()`, `lower()`, `max()`,
`ctime()`, `earliest()`, `if()`, `isnotnull()`, `latest()`, `len()`, `like()`, `lower()`, `max()`,
`memk()`, `min()`, `mvappend()`, `mvcount()`, `mvfilter()`, `mvindex()`, `none()`,
`null()`, `num()`, `replace()`, `rmcomma()`, `rmunit()`, `round()`, `strftime()`,
`substr()`, `sum()`, `term()`, `values()`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,30 @@ object SplToCatalyst extends Logging {
determineMax(ctx, call)
case "len" =>
Length(attrOrExpr(ctx, call.args.head))
case "like" =>
val field = attrOrExpr(ctx, call.args.head)
val pattern = attrOrExpr(ctx, call.args(1))
pattern match {
case Literal(patternLiteral: UTF8String, StringType) =>
val patternString = patternLiteral.toString
// If the pattern is a simple LIKE (%foo%) pattern, we can convert it into a CONTAINS
// expression.
// For this to be safe, the pattern must start with %, end with % (unescaped), and contain
// exactly 2 instances of the wildcard character %. Note that this approach is
// conservative, as there may exist cases like %foo\%bar% that can be safely converted
// (as the wildcard in the middle of the string is escaped).
if (patternString.length > 2 &&
patternString.charAt(0) == '%' &&
patternString.charAt(patternString.length - 1) == '%' &&
patternString.charAt(patternString.length - 2) != '\\' &&
patternString.count(_ == '%') == 2) {
Contains(field,
Literal(patternString.substring(1, patternString.length - 1)))
} else {
Like(field, pattern, '\\')
}
case _ => Like(field, pattern, '\\')
}
case "substr" =>
val str = attrOrExpr(ctx, call.args.head)
val pos = expression(ctx, call.args(1))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,39 @@ class SplToCatalystTest extends AnyFunSuite with PlanTestBase {
)
}

test("simple LIKE converted to CONTAINS") {
check(ast.SearchCommand(
ast.Call("like", Seq(ast.Field("a"), ast.StrValue("%foo%")))),
(_, tree) =>
Filter(
Contains(
UnresolvedAttribute("a"),
Literal.create("foo")),
tree)
)
}

test("complex LIKE not converted to CONTAINS") {
check(ast.SearchCommand(
ast.Call("like", Seq(ast.Field("a"), ast.StrValue("%foo%bar%")))),
(_, tree) =>
Filter(
Like(
UnresolvedAttribute("a"),
Literal.create("%foo%bar%"), '\\'),
tree)
)
check(ast.SearchCommand(
ast.Call("like", Seq(ast.Field("a"), ast.StrValue("%foo\\%")))),
(_, tree) =>
Filter(
Like(
UnresolvedAttribute("a"),
Literal.create("%foo\\%"), '\\'),
tree)
)
}

test("eventstats max(colA) AS maxA by colC") {
check(ast.EventStatsCommand(
allNum = false,
Expand Down

0 comments on commit b3586ef

Please sign in to comment.