sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 build_replace_with_optional_replacement, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 encode_decode_sql, 15 build_formatted_time, 16 if_sql, 17 left_to_substring_sql, 18 no_ilike_sql, 19 no_pivot_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 strposition_sql, 26 struct_extract_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31 sequence_sql, 32 build_regexp_extract, 33 explode_to_unnest_sql, 34) 35from sqlglot.dialects.hive import Hive 36from sqlglot.dialects.mysql import MySQL 37from sqlglot.helper import apply_index_offset, seq_get 38from sqlglot.optimizer.scope import find_all_in_scope 39from sqlglot.tokens import TokenType 40from sqlglot.transforms import unqualify_columns 41from sqlglot.generator import unsupported_args 42 43DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 44 45 46def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 47 regex = r"(\w)(\w*)" 48 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 49 50 51def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 52 if expression.args.get("asc") == exp.false(): 53 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 54 else: 55 comparator = None 56 return self.func("ARRAY_SORT", expression.this, comparator) 57 58 59def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 60 if isinstance(expression.parent, exp.PartitionedByProperty): 61 # Any columns in the ARRAY[] string literals should not be quoted 62 expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False) 63 64 partition_exprs = [ 65 self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this") 66 for c in expression.expressions 67 ] 68 return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs])) 69 70 if expression.parent: 71 for schema in expression.parent.find_all(exp.Schema): 72 if schema is expression: 73 continue 74 75 column_defs = schema.find_all(exp.ColumnDef) 76 if column_defs and isinstance(schema.parent, exp.Property): 77 expression.expressions.extend(column_defs) 78 79 return self.schema_sql(expression) 80 81 82def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 83 self.unsupported("Presto does not support exact quantiles") 84 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 85 86 87def _str_to_time_sql( 88 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 89) -> str: 90 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 91 92 93def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 94 time_format = self.format_time(expression) 95 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 96 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 97 return self.sql( 98 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 99 ) 100 101 102def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 103 expression = ts_or_ds_add_cast(expression) 104 unit = unit_to_str(expression) 105 return self.func("DATE_ADD", unit, expression.expression, expression.this) 106 107 108def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 109 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 110 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 111 unit = unit_to_str(expression) 112 return self.func("DATE_DIFF", unit, expr, this) 113 114 115def _build_approx_percentile(args: t.List) -> exp.Expression: 116 if len(args) == 4: 117 return exp.ApproxQuantile( 118 this=seq_get(args, 0), 119 weight=seq_get(args, 1), 120 quantile=seq_get(args, 2), 121 accuracy=seq_get(args, 3), 122 ) 123 if len(args) == 3: 124 return exp.ApproxQuantile( 125 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 126 ) 127 return exp.ApproxQuantile.from_arg_list(args) 128 129 130def _build_from_unixtime(args: t.List) -> exp.Expression: 131 if len(args) == 3: 132 return exp.UnixToTime( 133 this=seq_get(args, 0), 134 hours=seq_get(args, 1), 135 minutes=seq_get(args, 2), 136 ) 137 if len(args) == 2: 138 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 139 140 return exp.UnixToTime.from_arg_list(args) 141 142 143def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 144 """ 145 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 146 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 147 they're converted into an ARBITRARY call. 148 149 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 150 """ 151 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 152 return self.function_fallback_sql(expression) 153 154 return rename_func("ARBITRARY")(self, expression) 155 156 157def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 158 scale = expression.args.get("scale") 159 timestamp = self.sql(expression, "this") 160 if scale in (None, exp.UnixToTime.SECONDS): 161 return rename_func("FROM_UNIXTIME")(self, expression) 162 163 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 164 165 166def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 167 if not expression.type: 168 from sqlglot.optimizer.annotate_types import annotate_types 169 170 annotate_types(expression, dialect=self.dialect) 171 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 172 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 173 return expression 174 175 176def _build_to_char(args: t.List) -> exp.TimeToStr: 177 fmt = seq_get(args, 1) 178 if isinstance(fmt, exp.Literal): 179 # We uppercase this to match Teradata's format mapping keys 180 fmt.set("this", fmt.this.upper()) 181 182 # We use "teradata" on purpose here, because the time formats are different in Presto. 183 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 184 return build_formatted_time(exp.TimeToStr, "teradata")(args) 185 186 187def _date_delta_sql( 188 name: str, negate_interval: bool = False 189) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 190 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 191 interval = _to_int(self, expression.expression) 192 return self.func( 193 name, 194 unit_to_str(expression), 195 interval * (-1) if negate_interval else interval, 196 expression.this, 197 ) 198 199 return _delta_sql 200 201 202def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 203 explode = expression.this 204 if isinstance(explode, exp.Explode): 205 exploded_type = explode.this.type 206 alias = expression.args.get("alias") 207 208 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 209 if ( 210 isinstance(alias, exp.TableAlias) 211 and isinstance(exploded_type, exp.DataType) 212 and exploded_type.is_type(exp.DataType.Type.ARRAY) 213 and exploded_type.expressions 214 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 215 ): 216 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 217 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 218 elif isinstance(explode, exp.Inline): 219 explode.replace(exp.Explode(this=explode.this.copy())) 220 221 return explode_to_unnest_sql(self, expression) 222 223 224def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 225 # We check for expression.type because the columns can be amended only if types were inferred 226 if isinstance(expression, exp.Select) and expression.type: 227 for lateral in expression.args.get("laterals") or []: 228 alias = lateral.args.get("alias") 229 if ( 230 not isinstance(lateral.this, exp.Explode) 231 or not isinstance(alias, exp.TableAlias) 232 or len(alias.columns) != 1 233 ): 234 continue 235 236 new_table = alias.this 237 old_table = alias.columns[0].name.lower() 238 239 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 240 # with the struct column, resulting in invalid Presto references that need to be amended 241 for column in find_all_in_scope(expression, exp.Column): 242 if column.db.lower() == old_table: 243 column.set("table", column.args["db"].pop()) 244 elif column.table.lower() == old_table: 245 column.set("table", new_table.copy()) 246 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 247 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 248 249 return expression 250 251 252class Presto(Dialect): 253 INDEX_OFFSET = 1 254 NULL_ORDERING = "nulls_are_last" 255 TIME_FORMAT = MySQL.TIME_FORMAT 256 STRICT_STRING_CONCAT = True 257 SUPPORTS_SEMI_ANTI_JOIN = False 258 TYPED_DIVISION = True 259 TABLESAMPLE_SIZE_IS_PERCENT = True 260 LOG_BASE_FIRST: t.Optional[bool] = None 261 SUPPORTS_VALUES_DEFAULT = False 262 263 TIME_MAPPING = MySQL.TIME_MAPPING 264 265 # https://github.com/trinodb/trino/issues/17 266 # https://github.com/trinodb/trino/issues/12289 267 # https://github.com/prestodb/presto/issues/2863 268 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 269 270 # The result of certain math functions in Presto/Trino is of type 271 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 272 ANNOTATORS = { 273 **Dialect.ANNOTATORS, 274 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 275 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 276 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 277 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 280 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 281 if e.this 282 else self._set_type(e, exp.DataType.Type.DOUBLE), 283 } 284 285 SUPPORTED_SETTINGS = { 286 *Dialect.SUPPORTED_SETTINGS, 287 "variant_extract_is_json_extract", 288 } 289 290 class Tokenizer(tokens.Tokenizer): 291 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 292 UNICODE_STRINGS = [ 293 (prefix + q, q) 294 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 295 for prefix in ("U&", "u&") 296 ] 297 298 NESTED_COMMENTS = False 299 300 KEYWORDS = { 301 **tokens.Tokenizer.KEYWORDS, 302 "DEALLOCATE PREPARE": TokenType.COMMAND, 303 "DESCRIBE INPUT": TokenType.COMMAND, 304 "DESCRIBE OUTPUT": TokenType.COMMAND, 305 "RESET SESSION": TokenType.COMMAND, 306 "START": TokenType.BEGIN, 307 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 308 "ROW": TokenType.STRUCT, 309 "IPADDRESS": TokenType.IPADDRESS, 310 "IPPREFIX": TokenType.IPPREFIX, 311 "TDIGEST": TokenType.TDIGEST, 312 "HYPERLOGLOG": TokenType.HLLSKETCH, 313 } 314 KEYWORDS.pop("/*+") 315 KEYWORDS.pop("QUALIFY") 316 317 class Parser(parser.Parser): 318 VALUES_FOLLOWED_BY_PAREN = False 319 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 320 321 FUNCTIONS = { 322 **parser.Parser.FUNCTIONS, 323 "ARBITRARY": exp.AnyValue.from_arg_list, 324 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 325 "APPROX_PERCENTILE": _build_approx_percentile, 326 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 327 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 328 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 329 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 330 "CARDINALITY": exp.ArraySize.from_arg_list, 331 "CONTAINS": exp.ArrayContains.from_arg_list, 332 "DATE_ADD": lambda args: exp.DateAdd( 333 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 334 ), 335 "DATE_DIFF": lambda args: exp.DateDiff( 336 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 337 ), 338 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 339 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 340 "DATE_TRUNC": date_trunc_to_time, 341 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 342 "DOW": exp.DayOfWeekIso.from_arg_list, 343 "DOY": exp.DayOfYear.from_arg_list, 344 "ELEMENT_AT": lambda args: exp.Bracket( 345 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 346 ), 347 "FROM_HEX": exp.Unhex.from_arg_list, 348 "FROM_UNIXTIME": _build_from_unixtime, 349 "FROM_UTF8": lambda args: exp.Decode( 350 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 351 ), 352 "JSON_FORMAT": lambda args: exp.JSONFormat( 353 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 354 ), 355 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 356 "NOW": exp.CurrentTimestamp.from_arg_list, 357 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 358 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 359 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 360 this=seq_get(args, 0), 361 expression=seq_get(args, 1), 362 replacement=seq_get(args, 2) or exp.Literal.string(""), 363 ), 364 "REPLACE": build_replace_with_optional_replacement, 365 "ROW": exp.Struct.from_arg_list, 366 "SEQUENCE": exp.GenerateSeries.from_arg_list, 367 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 368 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 369 "STRPOS": lambda args: exp.StrPosition( 370 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 371 ), 372 "TO_CHAR": _build_to_char, 373 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 374 "TO_UTF8": lambda args: exp.Encode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "MD5": exp.MD5Digest.from_arg_list, 378 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 379 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 380 } 381 382 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 383 FUNCTION_PARSERS.pop("TRIM") 384 385 class Generator(generator.Generator): 386 INTERVAL_ALLOWS_PLURAL_FORM = False 387 JOIN_HINTS = False 388 TABLE_HINTS = False 389 QUERY_HINTS = False 390 IS_BOOL_ALLOWED = False 391 TZ_TO_WITH_TIME_ZONE = True 392 NVL2_SUPPORTED = False 393 STRUCT_DELIMITER = ("(", ")") 394 LIMIT_ONLY_LITERALS = True 395 SUPPORTS_SINGLE_ARG_CONCAT = False 396 LIKE_PROPERTY_INSIDE_SCHEMA = True 397 MULTI_ARG_DISTINCT = False 398 SUPPORTS_TO_NUMBER = False 399 HEX_FUNC = "TO_HEX" 400 PARSE_JSON_NAME = "JSON_PARSE" 401 PAD_FILL_PATTERN_IS_REQUIRED = True 402 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 403 SUPPORTS_MEDIAN = False 404 ARRAY_SIZE_NAME = "CARDINALITY" 405 406 PROPERTIES_LOCATION = { 407 **generator.Generator.PROPERTIES_LOCATION, 408 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 409 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 410 } 411 412 TYPE_MAPPING = { 413 **generator.Generator.TYPE_MAPPING, 414 exp.DataType.Type.BINARY: "VARBINARY", 415 exp.DataType.Type.BIT: "BOOLEAN", 416 exp.DataType.Type.DATETIME: "TIMESTAMP", 417 exp.DataType.Type.DATETIME64: "TIMESTAMP", 418 exp.DataType.Type.FLOAT: "REAL", 419 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 420 exp.DataType.Type.INT: "INTEGER", 421 exp.DataType.Type.STRUCT: "ROW", 422 exp.DataType.Type.TEXT: "VARCHAR", 423 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 424 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 425 exp.DataType.Type.TIMETZ: "TIME", 426 } 427 428 TRANSFORMS = { 429 **generator.Generator.TRANSFORMS, 430 exp.AnyValue: rename_func("ARBITRARY"), 431 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 432 exp.ArgMax: rename_func("MAX_BY"), 433 exp.ArgMin: rename_func("MIN_BY"), 434 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 435 exp.ArrayAny: rename_func("ANY_MATCH"), 436 exp.ArrayConcat: rename_func("CONCAT"), 437 exp.ArrayContains: rename_func("CONTAINS"), 438 exp.ArrayToString: rename_func("ARRAY_JOIN"), 439 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 440 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 441 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 442 exp.BitwiseLeftShift: lambda self, e: self.func( 443 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 444 ), 445 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 446 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 447 exp.BitwiseRightShift: lambda self, e: self.func( 448 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 449 ), 450 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 451 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 452 exp.CurrentTime: lambda *_: "CURRENT_TIME", 453 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 454 exp.CurrentUser: lambda *_: "CURRENT_USER", 455 exp.DateAdd: _date_delta_sql("DATE_ADD"), 456 exp.DateDiff: lambda self, e: self.func( 457 "DATE_DIFF", unit_to_str(e), e.expression, e.this 458 ), 459 exp.DateStrToDate: datestrtodate_sql, 460 exp.DateToDi: lambda self, 461 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 462 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 463 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 464 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 465 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 466 exp.DiToDate: lambda self, 467 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 468 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 469 exp.FileFormatProperty: lambda self, 470 e: f"format={self.sql(exp.Literal.string(e.name))}", 471 exp.First: _first_last_sql, 472 exp.FromTimeZone: lambda self, 473 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 474 exp.GenerateSeries: sequence_sql, 475 exp.GenerateDateArray: sequence_sql, 476 exp.Group: transforms.preprocess([transforms.unalias_group]), 477 exp.If: if_sql(), 478 exp.ILike: no_ilike_sql, 479 exp.Initcap: _initcap_sql, 480 exp.Last: _first_last_sql, 481 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 482 exp.Lateral: _explode_to_unnest_sql, 483 exp.Left: left_to_substring_sql, 484 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 485 rename_func("LEVENSHTEIN_DISTANCE") 486 ), 487 exp.LogicalAnd: rename_func("BOOL_AND"), 488 exp.LogicalOr: rename_func("BOOL_OR"), 489 exp.Pivot: no_pivot_sql, 490 exp.Quantile: _quantile_sql, 491 exp.RegexpExtract: regexp_extract_sql, 492 exp.RegexpExtractAll: regexp_extract_sql, 493 exp.Right: right_to_substring_sql, 494 exp.Schema: _schema_sql, 495 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 496 exp.Select: transforms.preprocess( 497 [ 498 transforms.eliminate_window_clause, 499 transforms.eliminate_qualify, 500 transforms.eliminate_distinct_on, 501 transforms.explode_projection_to_unnest(1), 502 transforms.eliminate_semi_and_anti_joins, 503 amend_exploded_column_table, 504 ] 505 ), 506 exp.SortArray: _no_sort_array, 507 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 508 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 509 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 510 exp.StrToTime: _str_to_time_sql, 511 exp.StructExtract: struct_extract_sql, 512 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 513 exp.Timestamp: no_timestamp_sql, 514 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 515 exp.TimestampTrunc: timestamptrunc_sql(), 516 exp.TimeStrToDate: timestrtotime_sql, 517 exp.TimeStrToTime: timestrtotime_sql, 518 exp.TimeStrToUnix: lambda self, e: self.func( 519 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 520 ), 521 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 522 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 523 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 524 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 525 exp.TsOrDiToDi: lambda self, 526 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 527 exp.TsOrDsAdd: _ts_or_ds_add_sql, 528 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 529 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 530 exp.Unhex: rename_func("FROM_HEX"), 531 exp.UnixToStr: lambda self, 532 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 533 exp.UnixToTime: _unix_to_time_sql, 534 exp.UnixToTimeStr: lambda self, 535 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 536 exp.VariancePop: rename_func("VAR_POP"), 537 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 538 exp.WithinGroup: transforms.preprocess( 539 [transforms.remove_within_group_for_percentiles] 540 ), 541 exp.Xor: bool_xor_sql, 542 exp.MD5Digest: rename_func("MD5"), 543 exp.SHA: rename_func("SHA1"), 544 exp.SHA2: sha256_sql, 545 } 546 547 RESERVED_KEYWORDS = { 548 "alter", 549 "and", 550 "as", 551 "between", 552 "by", 553 "case", 554 "cast", 555 "constraint", 556 "create", 557 "cross", 558 "current_time", 559 "current_timestamp", 560 "deallocate", 561 "delete", 562 "describe", 563 "distinct", 564 "drop", 565 "else", 566 "end", 567 "escape", 568 "except", 569 "execute", 570 "exists", 571 "extract", 572 "false", 573 "for", 574 "from", 575 "full", 576 "group", 577 "having", 578 "in", 579 "inner", 580 "insert", 581 "intersect", 582 "into", 583 "is", 584 "join", 585 "left", 586 "like", 587 "natural", 588 "not", 589 "null", 590 "on", 591 "or", 592 "order", 593 "outer", 594 "prepare", 595 "right", 596 "select", 597 "table", 598 "then", 599 "true", 600 "union", 601 "using", 602 "values", 603 "when", 604 "where", 605 "with", 606 } 607 608 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 609 this = expression.this 610 is_json = expression.args.get("is_json") 611 612 if this and not (is_json or this.type): 613 from sqlglot.optimizer.annotate_types import annotate_types 614 615 this = annotate_types(this, dialect=self.dialect) 616 617 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 618 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 619 620 return self.function_fallback_sql(expression) 621 622 def md5_sql(self, expression: exp.MD5) -> str: 623 this = expression.this 624 625 if not this.type: 626 from sqlglot.optimizer.annotate_types import annotate_types 627 628 this = annotate_types(this, dialect=self.dialect) 629 630 if this.is_type(*exp.DataType.TEXT_TYPES): 631 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 632 633 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 634 635 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 636 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 637 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 638 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 639 # which seems to be using the same time mapping as Hive, as per: 640 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 641 this = expression.this 642 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 643 value_as_timestamp = ( 644 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 645 ) 646 647 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 648 649 formatted_value = self.func( 650 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 651 ) 652 parse_with_tz = self.func( 653 "PARSE_DATETIME", 654 formatted_value, 655 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 656 ) 657 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 658 return self.func("TO_UNIXTIME", coalesced) 659 660 def bracket_sql(self, expression: exp.Bracket) -> str: 661 if expression.args.get("safe"): 662 return self.func( 663 "ELEMENT_AT", 664 expression.this, 665 seq_get( 666 apply_index_offset( 667 expression.this, 668 expression.expressions, 669 1 - expression.args.get("offset", 0), 670 dialect=self.dialect, 671 ), 672 0, 673 ), 674 ) 675 return super().bracket_sql(expression) 676 677 def struct_sql(self, expression: exp.Struct) -> str: 678 from sqlglot.optimizer.annotate_types import annotate_types 679 680 expression = annotate_types(expression, dialect=self.dialect) 681 values: t.List[str] = [] 682 schema: t.List[str] = [] 683 unknown_type = False 684 685 for e in expression.expressions: 686 if isinstance(e, exp.PropertyEQ): 687 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 688 unknown_type = True 689 else: 690 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 691 values.append(self.sql(e, "expression")) 692 else: 693 values.append(self.sql(e)) 694 695 size = len(expression.expressions) 696 697 if not size or len(schema) != size: 698 if unknown_type: 699 self.unsupported( 700 "Cannot convert untyped key-value definitions (try annotate_types)." 701 ) 702 return self.func("ROW", *values) 703 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 704 705 def interval_sql(self, expression: exp.Interval) -> str: 706 if expression.this and expression.text("unit").upper().startswith("WEEK"): 707 return f"({expression.this.name} * INTERVAL '7' DAY)" 708 return super().interval_sql(expression) 709 710 def transaction_sql(self, expression: exp.Transaction) -> str: 711 modes = expression.args.get("modes") 712 modes = f" {', '.join(modes)}" if modes else "" 713 return f"START TRANSACTION{modes}" 714 715 def offset_limit_modifiers( 716 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 717 ) -> t.List[str]: 718 return [ 719 self.sql(expression, "offset"), 720 self.sql(limit), 721 ] 722 723 def create_sql(self, expression: exp.Create) -> str: 724 """ 725 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 726 so we need to remove them 727 """ 728 kind = expression.args["kind"] 729 schema = expression.this 730 if kind == "VIEW" and schema.expressions: 731 expression.this.set("expressions", None) 732 return super().create_sql(expression) 733 734 def delete_sql(self, expression: exp.Delete) -> str: 735 """ 736 Presto only supports DELETE FROM for a single table without an alias, so we need 737 to remove the unnecessary parts. If the original DELETE statement contains more 738 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 739 """ 740 tables = expression.args.get("tables") or [expression.this] 741 if len(tables) > 1: 742 return super().delete_sql(expression) 743 744 table = tables[0] 745 expression.set("this", table) 746 expression.set("tables", None) 747 748 if isinstance(table, exp.Table): 749 table_alias = table.args.get("alias") 750 if table_alias: 751 table_alias.pop() 752 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 753 754 return super().delete_sql(expression) 755 756 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 757 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 758 759 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 760 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 761 if not expression.args.get("variant_extract") or is_json_extract: 762 return self.func( 763 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 764 ) 765 766 this = self.sql(expression, "this") 767 768 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 769 segments = [] 770 for path_key in expression.expression.expressions[1:]: 771 if not isinstance(path_key, exp.JSONPathKey): 772 # Cannot transpile subscripts, wildcards etc to dot notation 773 self.unsupported( 774 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 775 ) 776 continue 777 key = path_key.this 778 if not exp.SAFE_IDENTIFIER_RE.match(key): 779 key = f'"{key}"' 780 segments.append(f".{key}") 781 782 expr = "".join(segments) 783 784 return f"{this}{expr}" 785 786 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 787 return self.func( 788 "ARRAY_JOIN", 789 self.func("ARRAY_AGG", expression.this), 790 expression.args.get("separator"), 791 )
225def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 226 # We check for expression.type because the columns can be amended only if types were inferred 227 if isinstance(expression, exp.Select) and expression.type: 228 for lateral in expression.args.get("laterals") or []: 229 alias = lateral.args.get("alias") 230 if ( 231 not isinstance(lateral.this, exp.Explode) 232 or not isinstance(alias, exp.TableAlias) 233 or len(alias.columns) != 1 234 ): 235 continue 236 237 new_table = alias.this 238 old_table = alias.columns[0].name.lower() 239 240 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 241 # with the struct column, resulting in invalid Presto references that need to be amended 242 for column in find_all_in_scope(expression, exp.Column): 243 if column.db.lower() == old_table: 244 column.set("table", column.args["db"].pop()) 245 elif column.table.lower() == old_table: 246 column.set("table", new_table.copy()) 247 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 248 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 249 250 return expression
253class Presto(Dialect): 254 INDEX_OFFSET = 1 255 NULL_ORDERING = "nulls_are_last" 256 TIME_FORMAT = MySQL.TIME_FORMAT 257 STRICT_STRING_CONCAT = True 258 SUPPORTS_SEMI_ANTI_JOIN = False 259 TYPED_DIVISION = True 260 TABLESAMPLE_SIZE_IS_PERCENT = True 261 LOG_BASE_FIRST: t.Optional[bool] = None 262 SUPPORTS_VALUES_DEFAULT = False 263 264 TIME_MAPPING = MySQL.TIME_MAPPING 265 266 # https://github.com/trinodb/trino/issues/17 267 # https://github.com/trinodb/trino/issues/12289 268 # https://github.com/prestodb/presto/issues/2863 269 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 270 271 # The result of certain math functions in Presto/Trino is of type 272 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 273 ANNOTATORS = { 274 **Dialect.ANNOTATORS, 275 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 276 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 277 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 278 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 280 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 281 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 282 if e.this 283 else self._set_type(e, exp.DataType.Type.DOUBLE), 284 } 285 286 SUPPORTED_SETTINGS = { 287 *Dialect.SUPPORTED_SETTINGS, 288 "variant_extract_is_json_extract", 289 } 290 291 class Tokenizer(tokens.Tokenizer): 292 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 293 UNICODE_STRINGS = [ 294 (prefix + q, q) 295 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 296 for prefix in ("U&", "u&") 297 ] 298 299 NESTED_COMMENTS = False 300 301 KEYWORDS = { 302 **tokens.Tokenizer.KEYWORDS, 303 "DEALLOCATE PREPARE": TokenType.COMMAND, 304 "DESCRIBE INPUT": TokenType.COMMAND, 305 "DESCRIBE OUTPUT": TokenType.COMMAND, 306 "RESET SESSION": TokenType.COMMAND, 307 "START": TokenType.BEGIN, 308 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 309 "ROW": TokenType.STRUCT, 310 "IPADDRESS": TokenType.IPADDRESS, 311 "IPPREFIX": TokenType.IPPREFIX, 312 "TDIGEST": TokenType.TDIGEST, 313 "HYPERLOGLOG": TokenType.HLLSKETCH, 314 } 315 KEYWORDS.pop("/*+") 316 KEYWORDS.pop("QUALIFY") 317 318 class Parser(parser.Parser): 319 VALUES_FOLLOWED_BY_PAREN = False 320 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "ARBITRARY": exp.AnyValue.from_arg_list, 325 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 326 "APPROX_PERCENTILE": _build_approx_percentile, 327 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 328 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 329 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 330 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 331 "CARDINALITY": exp.ArraySize.from_arg_list, 332 "CONTAINS": exp.ArrayContains.from_arg_list, 333 "DATE_ADD": lambda args: exp.DateAdd( 334 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 335 ), 336 "DATE_DIFF": lambda args: exp.DateDiff( 337 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 338 ), 339 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 340 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 341 "DATE_TRUNC": date_trunc_to_time, 342 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 343 "DOW": exp.DayOfWeekIso.from_arg_list, 344 "DOY": exp.DayOfYear.from_arg_list, 345 "ELEMENT_AT": lambda args: exp.Bracket( 346 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 347 ), 348 "FROM_HEX": exp.Unhex.from_arg_list, 349 "FROM_UNIXTIME": _build_from_unixtime, 350 "FROM_UTF8": lambda args: exp.Decode( 351 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 352 ), 353 "JSON_FORMAT": lambda args: exp.JSONFormat( 354 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 355 ), 356 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 357 "NOW": exp.CurrentTimestamp.from_arg_list, 358 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 359 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 360 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 361 this=seq_get(args, 0), 362 expression=seq_get(args, 1), 363 replacement=seq_get(args, 2) or exp.Literal.string(""), 364 ), 365 "REPLACE": build_replace_with_optional_replacement, 366 "ROW": exp.Struct.from_arg_list, 367 "SEQUENCE": exp.GenerateSeries.from_arg_list, 368 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 369 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 370 "STRPOS": lambda args: exp.StrPosition( 371 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 372 ), 373 "TO_CHAR": _build_to_char, 374 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 375 "TO_UTF8": lambda args: exp.Encode( 376 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 377 ), 378 "MD5": exp.MD5Digest.from_arg_list, 379 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 380 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 381 } 382 383 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 384 FUNCTION_PARSERS.pop("TRIM") 385 386 class Generator(generator.Generator): 387 INTERVAL_ALLOWS_PLURAL_FORM = False 388 JOIN_HINTS = False 389 TABLE_HINTS = False 390 QUERY_HINTS = False 391 IS_BOOL_ALLOWED = False 392 TZ_TO_WITH_TIME_ZONE = True 393 NVL2_SUPPORTED = False 394 STRUCT_DELIMITER = ("(", ")") 395 LIMIT_ONLY_LITERALS = True 396 SUPPORTS_SINGLE_ARG_CONCAT = False 397 LIKE_PROPERTY_INSIDE_SCHEMA = True 398 MULTI_ARG_DISTINCT = False 399 SUPPORTS_TO_NUMBER = False 400 HEX_FUNC = "TO_HEX" 401 PARSE_JSON_NAME = "JSON_PARSE" 402 PAD_FILL_PATTERN_IS_REQUIRED = True 403 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 404 SUPPORTS_MEDIAN = False 405 ARRAY_SIZE_NAME = "CARDINALITY" 406 407 PROPERTIES_LOCATION = { 408 **generator.Generator.PROPERTIES_LOCATION, 409 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 410 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 411 } 412 413 TYPE_MAPPING = { 414 **generator.Generator.TYPE_MAPPING, 415 exp.DataType.Type.BINARY: "VARBINARY", 416 exp.DataType.Type.BIT: "BOOLEAN", 417 exp.DataType.Type.DATETIME: "TIMESTAMP", 418 exp.DataType.Type.DATETIME64: "TIMESTAMP", 419 exp.DataType.Type.FLOAT: "REAL", 420 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 421 exp.DataType.Type.INT: "INTEGER", 422 exp.DataType.Type.STRUCT: "ROW", 423 exp.DataType.Type.TEXT: "VARCHAR", 424 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 425 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 426 exp.DataType.Type.TIMETZ: "TIME", 427 } 428 429 TRANSFORMS = { 430 **generator.Generator.TRANSFORMS, 431 exp.AnyValue: rename_func("ARBITRARY"), 432 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 433 exp.ArgMax: rename_func("MAX_BY"), 434 exp.ArgMin: rename_func("MIN_BY"), 435 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 436 exp.ArrayAny: rename_func("ANY_MATCH"), 437 exp.ArrayConcat: rename_func("CONCAT"), 438 exp.ArrayContains: rename_func("CONTAINS"), 439 exp.ArrayToString: rename_func("ARRAY_JOIN"), 440 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 441 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 442 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 443 exp.BitwiseLeftShift: lambda self, e: self.func( 444 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 445 ), 446 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 447 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 448 exp.BitwiseRightShift: lambda self, e: self.func( 449 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 450 ), 451 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 452 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 453 exp.CurrentTime: lambda *_: "CURRENT_TIME", 454 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 455 exp.CurrentUser: lambda *_: "CURRENT_USER", 456 exp.DateAdd: _date_delta_sql("DATE_ADD"), 457 exp.DateDiff: lambda self, e: self.func( 458 "DATE_DIFF", unit_to_str(e), e.expression, e.this 459 ), 460 exp.DateStrToDate: datestrtodate_sql, 461 exp.DateToDi: lambda self, 462 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 463 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 464 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 465 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 466 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 467 exp.DiToDate: lambda self, 468 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 469 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 470 exp.FileFormatProperty: lambda self, 471 e: f"format={self.sql(exp.Literal.string(e.name))}", 472 exp.First: _first_last_sql, 473 exp.FromTimeZone: lambda self, 474 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 475 exp.GenerateSeries: sequence_sql, 476 exp.GenerateDateArray: sequence_sql, 477 exp.Group: transforms.preprocess([transforms.unalias_group]), 478 exp.If: if_sql(), 479 exp.ILike: no_ilike_sql, 480 exp.Initcap: _initcap_sql, 481 exp.Last: _first_last_sql, 482 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 483 exp.Lateral: _explode_to_unnest_sql, 484 exp.Left: left_to_substring_sql, 485 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 486 rename_func("LEVENSHTEIN_DISTANCE") 487 ), 488 exp.LogicalAnd: rename_func("BOOL_AND"), 489 exp.LogicalOr: rename_func("BOOL_OR"), 490 exp.Pivot: no_pivot_sql, 491 exp.Quantile: _quantile_sql, 492 exp.RegexpExtract: regexp_extract_sql, 493 exp.RegexpExtractAll: regexp_extract_sql, 494 exp.Right: right_to_substring_sql, 495 exp.Schema: _schema_sql, 496 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 497 exp.Select: transforms.preprocess( 498 [ 499 transforms.eliminate_window_clause, 500 transforms.eliminate_qualify, 501 transforms.eliminate_distinct_on, 502 transforms.explode_projection_to_unnest(1), 503 transforms.eliminate_semi_and_anti_joins, 504 amend_exploded_column_table, 505 ] 506 ), 507 exp.SortArray: _no_sort_array, 508 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 509 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 510 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 511 exp.StrToTime: _str_to_time_sql, 512 exp.StructExtract: struct_extract_sql, 513 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 514 exp.Timestamp: no_timestamp_sql, 515 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 516 exp.TimestampTrunc: timestamptrunc_sql(), 517 exp.TimeStrToDate: timestrtotime_sql, 518 exp.TimeStrToTime: timestrtotime_sql, 519 exp.TimeStrToUnix: lambda self, e: self.func( 520 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 521 ), 522 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 523 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 524 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 525 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 526 exp.TsOrDiToDi: lambda self, 527 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 528 exp.TsOrDsAdd: _ts_or_ds_add_sql, 529 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 530 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 531 exp.Unhex: rename_func("FROM_HEX"), 532 exp.UnixToStr: lambda self, 533 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 534 exp.UnixToTime: _unix_to_time_sql, 535 exp.UnixToTimeStr: lambda self, 536 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 537 exp.VariancePop: rename_func("VAR_POP"), 538 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 539 exp.WithinGroup: transforms.preprocess( 540 [transforms.remove_within_group_for_percentiles] 541 ), 542 exp.Xor: bool_xor_sql, 543 exp.MD5Digest: rename_func("MD5"), 544 exp.SHA: rename_func("SHA1"), 545 exp.SHA2: sha256_sql, 546 } 547 548 RESERVED_KEYWORDS = { 549 "alter", 550 "and", 551 "as", 552 "between", 553 "by", 554 "case", 555 "cast", 556 "constraint", 557 "create", 558 "cross", 559 "current_time", 560 "current_timestamp", 561 "deallocate", 562 "delete", 563 "describe", 564 "distinct", 565 "drop", 566 "else", 567 "end", 568 "escape", 569 "except", 570 "execute", 571 "exists", 572 "extract", 573 "false", 574 "for", 575 "from", 576 "full", 577 "group", 578 "having", 579 "in", 580 "inner", 581 "insert", 582 "intersect", 583 "into", 584 "is", 585 "join", 586 "left", 587 "like", 588 "natural", 589 "not", 590 "null", 591 "on", 592 "or", 593 "order", 594 "outer", 595 "prepare", 596 "right", 597 "select", 598 "table", 599 "then", 600 "true", 601 "union", 602 "using", 603 "values", 604 "when", 605 "where", 606 "with", 607 } 608 609 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 610 this = expression.this 611 is_json = expression.args.get("is_json") 612 613 if this and not (is_json or this.type): 614 from sqlglot.optimizer.annotate_types import annotate_types 615 616 this = annotate_types(this, dialect=self.dialect) 617 618 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 619 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 620 621 return self.function_fallback_sql(expression) 622 623 def md5_sql(self, expression: exp.MD5) -> str: 624 this = expression.this 625 626 if not this.type: 627 from sqlglot.optimizer.annotate_types import annotate_types 628 629 this = annotate_types(this, dialect=self.dialect) 630 631 if this.is_type(*exp.DataType.TEXT_TYPES): 632 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 633 634 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 635 636 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 637 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 638 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 639 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 640 # which seems to be using the same time mapping as Hive, as per: 641 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 642 this = expression.this 643 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 644 value_as_timestamp = ( 645 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 646 ) 647 648 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 649 650 formatted_value = self.func( 651 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 652 ) 653 parse_with_tz = self.func( 654 "PARSE_DATETIME", 655 formatted_value, 656 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 657 ) 658 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 659 return self.func("TO_UNIXTIME", coalesced) 660 661 def bracket_sql(self, expression: exp.Bracket) -> str: 662 if expression.args.get("safe"): 663 return self.func( 664 "ELEMENT_AT", 665 expression.this, 666 seq_get( 667 apply_index_offset( 668 expression.this, 669 expression.expressions, 670 1 - expression.args.get("offset", 0), 671 dialect=self.dialect, 672 ), 673 0, 674 ), 675 ) 676 return super().bracket_sql(expression) 677 678 def struct_sql(self, expression: exp.Struct) -> str: 679 from sqlglot.optimizer.annotate_types import annotate_types 680 681 expression = annotate_types(expression, dialect=self.dialect) 682 values: t.List[str] = [] 683 schema: t.List[str] = [] 684 unknown_type = False 685 686 for e in expression.expressions: 687 if isinstance(e, exp.PropertyEQ): 688 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 689 unknown_type = True 690 else: 691 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 692 values.append(self.sql(e, "expression")) 693 else: 694 values.append(self.sql(e)) 695 696 size = len(expression.expressions) 697 698 if not size or len(schema) != size: 699 if unknown_type: 700 self.unsupported( 701 "Cannot convert untyped key-value definitions (try annotate_types)." 702 ) 703 return self.func("ROW", *values) 704 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 705 706 def interval_sql(self, expression: exp.Interval) -> str: 707 if expression.this and expression.text("unit").upper().startswith("WEEK"): 708 return f"({expression.this.name} * INTERVAL '7' DAY)" 709 return super().interval_sql(expression) 710 711 def transaction_sql(self, expression: exp.Transaction) -> str: 712 modes = expression.args.get("modes") 713 modes = f" {', '.join(modes)}" if modes else "" 714 return f"START TRANSACTION{modes}" 715 716 def offset_limit_modifiers( 717 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 718 ) -> t.List[str]: 719 return [ 720 self.sql(expression, "offset"), 721 self.sql(limit), 722 ] 723 724 def create_sql(self, expression: exp.Create) -> str: 725 """ 726 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 727 so we need to remove them 728 """ 729 kind = expression.args["kind"] 730 schema = expression.this 731 if kind == "VIEW" and schema.expressions: 732 expression.this.set("expressions", None) 733 return super().create_sql(expression) 734 735 def delete_sql(self, expression: exp.Delete) -> str: 736 """ 737 Presto only supports DELETE FROM for a single table without an alias, so we need 738 to remove the unnecessary parts. If the original DELETE statement contains more 739 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 740 """ 741 tables = expression.args.get("tables") or [expression.this] 742 if len(tables) > 1: 743 return super().delete_sql(expression) 744 745 table = tables[0] 746 expression.set("this", table) 747 expression.set("tables", None) 748 749 if isinstance(table, exp.Table): 750 table_alias = table.args.get("alias") 751 if table_alias: 752 table_alias.pop() 753 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 754 755 return super().delete_sql(expression) 756 757 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 758 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 759 760 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 761 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 762 if not expression.args.get("variant_extract") or is_json_extract: 763 return self.func( 764 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 765 ) 766 767 this = self.sql(expression, "this") 768 769 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 770 segments = [] 771 for path_key in expression.expression.expressions[1:]: 772 if not isinstance(path_key, exp.JSONPathKey): 773 # Cannot transpile subscripts, wildcards etc to dot notation 774 self.unsupported( 775 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 776 ) 777 continue 778 key = path_key.this 779 if not exp.SAFE_IDENTIFIER_RE.match(key): 780 key = f'"{key}"' 781 segments.append(f".{key}") 782 783 expr = "".join(segments) 784 785 return f"{this}{expr}" 786 787 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 788 return self.func( 789 "ARRAY_JOIN", 790 self.func("ARRAY_AGG", expression.this), 791 expression.args.get("separator"), 792 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
291 class Tokenizer(tokens.Tokenizer): 292 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 293 UNICODE_STRINGS = [ 294 (prefix + q, q) 295 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 296 for prefix in ("U&", "u&") 297 ] 298 299 NESTED_COMMENTS = False 300 301 KEYWORDS = { 302 **tokens.Tokenizer.KEYWORDS, 303 "DEALLOCATE PREPARE": TokenType.COMMAND, 304 "DESCRIBE INPUT": TokenType.COMMAND, 305 "DESCRIBE OUTPUT": TokenType.COMMAND, 306 "RESET SESSION": TokenType.COMMAND, 307 "START": TokenType.BEGIN, 308 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 309 "ROW": TokenType.STRUCT, 310 "IPADDRESS": TokenType.IPADDRESS, 311 "IPPREFIX": TokenType.IPPREFIX, 312 "TDIGEST": TokenType.TDIGEST, 313 "HYPERLOGLOG": TokenType.HLLSKETCH, 314 } 315 KEYWORDS.pop("/*+") 316 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
318 class Parser(parser.Parser): 319 VALUES_FOLLOWED_BY_PAREN = False 320 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "ARBITRARY": exp.AnyValue.from_arg_list, 325 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 326 "APPROX_PERCENTILE": _build_approx_percentile, 327 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 328 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 329 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 330 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 331 "CARDINALITY": exp.ArraySize.from_arg_list, 332 "CONTAINS": exp.ArrayContains.from_arg_list, 333 "DATE_ADD": lambda args: exp.DateAdd( 334 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 335 ), 336 "DATE_DIFF": lambda args: exp.DateDiff( 337 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 338 ), 339 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 340 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 341 "DATE_TRUNC": date_trunc_to_time, 342 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 343 "DOW": exp.DayOfWeekIso.from_arg_list, 344 "DOY": exp.DayOfYear.from_arg_list, 345 "ELEMENT_AT": lambda args: exp.Bracket( 346 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 347 ), 348 "FROM_HEX": exp.Unhex.from_arg_list, 349 "FROM_UNIXTIME": _build_from_unixtime, 350 "FROM_UTF8": lambda args: exp.Decode( 351 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 352 ), 353 "JSON_FORMAT": lambda args: exp.JSONFormat( 354 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 355 ), 356 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 357 "NOW": exp.CurrentTimestamp.from_arg_list, 358 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 359 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 360 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 361 this=seq_get(args, 0), 362 expression=seq_get(args, 1), 363 replacement=seq_get(args, 2) or exp.Literal.string(""), 364 ), 365 "REPLACE": build_replace_with_optional_replacement, 366 "ROW": exp.Struct.from_arg_list, 367 "SEQUENCE": exp.GenerateSeries.from_arg_list, 368 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 369 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 370 "STRPOS": lambda args: exp.StrPosition( 371 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 372 ), 373 "TO_CHAR": _build_to_char, 374 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 375 "TO_UTF8": lambda args: exp.Encode( 376 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 377 ), 378 "MD5": exp.MD5Digest.from_arg_list, 379 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 380 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 381 } 382 383 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 384 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
386 class Generator(generator.Generator): 387 INTERVAL_ALLOWS_PLURAL_FORM = False 388 JOIN_HINTS = False 389 TABLE_HINTS = False 390 QUERY_HINTS = False 391 IS_BOOL_ALLOWED = False 392 TZ_TO_WITH_TIME_ZONE = True 393 NVL2_SUPPORTED = False 394 STRUCT_DELIMITER = ("(", ")") 395 LIMIT_ONLY_LITERALS = True 396 SUPPORTS_SINGLE_ARG_CONCAT = False 397 LIKE_PROPERTY_INSIDE_SCHEMA = True 398 MULTI_ARG_DISTINCT = False 399 SUPPORTS_TO_NUMBER = False 400 HEX_FUNC = "TO_HEX" 401 PARSE_JSON_NAME = "JSON_PARSE" 402 PAD_FILL_PATTERN_IS_REQUIRED = True 403 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 404 SUPPORTS_MEDIAN = False 405 ARRAY_SIZE_NAME = "CARDINALITY" 406 407 PROPERTIES_LOCATION = { 408 **generator.Generator.PROPERTIES_LOCATION, 409 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 410 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 411 } 412 413 TYPE_MAPPING = { 414 **generator.Generator.TYPE_MAPPING, 415 exp.DataType.Type.BINARY: "VARBINARY", 416 exp.DataType.Type.BIT: "BOOLEAN", 417 exp.DataType.Type.DATETIME: "TIMESTAMP", 418 exp.DataType.Type.DATETIME64: "TIMESTAMP", 419 exp.DataType.Type.FLOAT: "REAL", 420 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 421 exp.DataType.Type.INT: "INTEGER", 422 exp.DataType.Type.STRUCT: "ROW", 423 exp.DataType.Type.TEXT: "VARCHAR", 424 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 425 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 426 exp.DataType.Type.TIMETZ: "TIME", 427 } 428 429 TRANSFORMS = { 430 **generator.Generator.TRANSFORMS, 431 exp.AnyValue: rename_func("ARBITRARY"), 432 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 433 exp.ArgMax: rename_func("MAX_BY"), 434 exp.ArgMin: rename_func("MIN_BY"), 435 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 436 exp.ArrayAny: rename_func("ANY_MATCH"), 437 exp.ArrayConcat: rename_func("CONCAT"), 438 exp.ArrayContains: rename_func("CONTAINS"), 439 exp.ArrayToString: rename_func("ARRAY_JOIN"), 440 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 441 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 442 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 443 exp.BitwiseLeftShift: lambda self, e: self.func( 444 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 445 ), 446 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 447 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 448 exp.BitwiseRightShift: lambda self, e: self.func( 449 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 450 ), 451 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 452 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 453 exp.CurrentTime: lambda *_: "CURRENT_TIME", 454 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 455 exp.CurrentUser: lambda *_: "CURRENT_USER", 456 exp.DateAdd: _date_delta_sql("DATE_ADD"), 457 exp.DateDiff: lambda self, e: self.func( 458 "DATE_DIFF", unit_to_str(e), e.expression, e.this 459 ), 460 exp.DateStrToDate: datestrtodate_sql, 461 exp.DateToDi: lambda self, 462 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 463 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 464 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 465 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 466 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 467 exp.DiToDate: lambda self, 468 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 469 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 470 exp.FileFormatProperty: lambda self, 471 e: f"format={self.sql(exp.Literal.string(e.name))}", 472 exp.First: _first_last_sql, 473 exp.FromTimeZone: lambda self, 474 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 475 exp.GenerateSeries: sequence_sql, 476 exp.GenerateDateArray: sequence_sql, 477 exp.Group: transforms.preprocess([transforms.unalias_group]), 478 exp.If: if_sql(), 479 exp.ILike: no_ilike_sql, 480 exp.Initcap: _initcap_sql, 481 exp.Last: _first_last_sql, 482 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 483 exp.Lateral: _explode_to_unnest_sql, 484 exp.Left: left_to_substring_sql, 485 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 486 rename_func("LEVENSHTEIN_DISTANCE") 487 ), 488 exp.LogicalAnd: rename_func("BOOL_AND"), 489 exp.LogicalOr: rename_func("BOOL_OR"), 490 exp.Pivot: no_pivot_sql, 491 exp.Quantile: _quantile_sql, 492 exp.RegexpExtract: regexp_extract_sql, 493 exp.RegexpExtractAll: regexp_extract_sql, 494 exp.Right: right_to_substring_sql, 495 exp.Schema: _schema_sql, 496 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 497 exp.Select: transforms.preprocess( 498 [ 499 transforms.eliminate_window_clause, 500 transforms.eliminate_qualify, 501 transforms.eliminate_distinct_on, 502 transforms.explode_projection_to_unnest(1), 503 transforms.eliminate_semi_and_anti_joins, 504 amend_exploded_column_table, 505 ] 506 ), 507 exp.SortArray: _no_sort_array, 508 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 509 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 510 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 511 exp.StrToTime: _str_to_time_sql, 512 exp.StructExtract: struct_extract_sql, 513 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 514 exp.Timestamp: no_timestamp_sql, 515 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 516 exp.TimestampTrunc: timestamptrunc_sql(), 517 exp.TimeStrToDate: timestrtotime_sql, 518 exp.TimeStrToTime: timestrtotime_sql, 519 exp.TimeStrToUnix: lambda self, e: self.func( 520 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 521 ), 522 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 523 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 524 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 525 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 526 exp.TsOrDiToDi: lambda self, 527 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 528 exp.TsOrDsAdd: _ts_or_ds_add_sql, 529 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 530 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 531 exp.Unhex: rename_func("FROM_HEX"), 532 exp.UnixToStr: lambda self, 533 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 534 exp.UnixToTime: _unix_to_time_sql, 535 exp.UnixToTimeStr: lambda self, 536 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 537 exp.VariancePop: rename_func("VAR_POP"), 538 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 539 exp.WithinGroup: transforms.preprocess( 540 [transforms.remove_within_group_for_percentiles] 541 ), 542 exp.Xor: bool_xor_sql, 543 exp.MD5Digest: rename_func("MD5"), 544 exp.SHA: rename_func("SHA1"), 545 exp.SHA2: sha256_sql, 546 } 547 548 RESERVED_KEYWORDS = { 549 "alter", 550 "and", 551 "as", 552 "between", 553 "by", 554 "case", 555 "cast", 556 "constraint", 557 "create", 558 "cross", 559 "current_time", 560 "current_timestamp", 561 "deallocate", 562 "delete", 563 "describe", 564 "distinct", 565 "drop", 566 "else", 567 "end", 568 "escape", 569 "except", 570 "execute", 571 "exists", 572 "extract", 573 "false", 574 "for", 575 "from", 576 "full", 577 "group", 578 "having", 579 "in", 580 "inner", 581 "insert", 582 "intersect", 583 "into", 584 "is", 585 "join", 586 "left", 587 "like", 588 "natural", 589 "not", 590 "null", 591 "on", 592 "or", 593 "order", 594 "outer", 595 "prepare", 596 "right", 597 "select", 598 "table", 599 "then", 600 "true", 601 "union", 602 "using", 603 "values", 604 "when", 605 "where", 606 "with", 607 } 608 609 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 610 this = expression.this 611 is_json = expression.args.get("is_json") 612 613 if this and not (is_json or this.type): 614 from sqlglot.optimizer.annotate_types import annotate_types 615 616 this = annotate_types(this, dialect=self.dialect) 617 618 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 619 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 620 621 return self.function_fallback_sql(expression) 622 623 def md5_sql(self, expression: exp.MD5) -> str: 624 this = expression.this 625 626 if not this.type: 627 from sqlglot.optimizer.annotate_types import annotate_types 628 629 this = annotate_types(this, dialect=self.dialect) 630 631 if this.is_type(*exp.DataType.TEXT_TYPES): 632 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 633 634 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 635 636 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 637 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 638 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 639 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 640 # which seems to be using the same time mapping as Hive, as per: 641 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 642 this = expression.this 643 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 644 value_as_timestamp = ( 645 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 646 ) 647 648 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 649 650 formatted_value = self.func( 651 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 652 ) 653 parse_with_tz = self.func( 654 "PARSE_DATETIME", 655 formatted_value, 656 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 657 ) 658 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 659 return self.func("TO_UNIXTIME", coalesced) 660 661 def bracket_sql(self, expression: exp.Bracket) -> str: 662 if expression.args.get("safe"): 663 return self.func( 664 "ELEMENT_AT", 665 expression.this, 666 seq_get( 667 apply_index_offset( 668 expression.this, 669 expression.expressions, 670 1 - expression.args.get("offset", 0), 671 dialect=self.dialect, 672 ), 673 0, 674 ), 675 ) 676 return super().bracket_sql(expression) 677 678 def struct_sql(self, expression: exp.Struct) -> str: 679 from sqlglot.optimizer.annotate_types import annotate_types 680 681 expression = annotate_types(expression, dialect=self.dialect) 682 values: t.List[str] = [] 683 schema: t.List[str] = [] 684 unknown_type = False 685 686 for e in expression.expressions: 687 if isinstance(e, exp.PropertyEQ): 688 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 689 unknown_type = True 690 else: 691 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 692 values.append(self.sql(e, "expression")) 693 else: 694 values.append(self.sql(e)) 695 696 size = len(expression.expressions) 697 698 if not size or len(schema) != size: 699 if unknown_type: 700 self.unsupported( 701 "Cannot convert untyped key-value definitions (try annotate_types)." 702 ) 703 return self.func("ROW", *values) 704 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 705 706 def interval_sql(self, expression: exp.Interval) -> str: 707 if expression.this and expression.text("unit").upper().startswith("WEEK"): 708 return f"({expression.this.name} * INTERVAL '7' DAY)" 709 return super().interval_sql(expression) 710 711 def transaction_sql(self, expression: exp.Transaction) -> str: 712 modes = expression.args.get("modes") 713 modes = f" {', '.join(modes)}" if modes else "" 714 return f"START TRANSACTION{modes}" 715 716 def offset_limit_modifiers( 717 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 718 ) -> t.List[str]: 719 return [ 720 self.sql(expression, "offset"), 721 self.sql(limit), 722 ] 723 724 def create_sql(self, expression: exp.Create) -> str: 725 """ 726 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 727 so we need to remove them 728 """ 729 kind = expression.args["kind"] 730 schema = expression.this 731 if kind == "VIEW" and schema.expressions: 732 expression.this.set("expressions", None) 733 return super().create_sql(expression) 734 735 def delete_sql(self, expression: exp.Delete) -> str: 736 """ 737 Presto only supports DELETE FROM for a single table without an alias, so we need 738 to remove the unnecessary parts. If the original DELETE statement contains more 739 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 740 """ 741 tables = expression.args.get("tables") or [expression.this] 742 if len(tables) > 1: 743 return super().delete_sql(expression) 744 745 table = tables[0] 746 expression.set("this", table) 747 expression.set("tables", None) 748 749 if isinstance(table, exp.Table): 750 table_alias = table.args.get("alias") 751 if table_alias: 752 table_alias.pop() 753 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 754 755 return super().delete_sql(expression) 756 757 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 758 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 759 760 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 761 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 762 if not expression.args.get("variant_extract") or is_json_extract: 763 return self.func( 764 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 765 ) 766 767 this = self.sql(expression, "this") 768 769 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 770 segments = [] 771 for path_key in expression.expression.expressions[1:]: 772 if not isinstance(path_key, exp.JSONPathKey): 773 # Cannot transpile subscripts, wildcards etc to dot notation 774 self.unsupported( 775 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 776 ) 777 continue 778 key = path_key.this 779 if not exp.SAFE_IDENTIFIER_RE.match(key): 780 key = f'"{key}"' 781 segments.append(f".{key}") 782 783 expr = "".join(segments) 784 785 return f"{this}{expr}" 786 787 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 788 return self.func( 789 "ARRAY_JOIN", 790 self.func("ARRAY_AGG", expression.this), 791 expression.args.get("separator"), 792 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
609 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 610 this = expression.this 611 is_json = expression.args.get("is_json") 612 613 if this and not (is_json or this.type): 614 from sqlglot.optimizer.annotate_types import annotate_types 615 616 this = annotate_types(this, dialect=self.dialect) 617 618 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 619 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 620 621 return self.function_fallback_sql(expression)
623 def md5_sql(self, expression: exp.MD5) -> str: 624 this = expression.this 625 626 if not this.type: 627 from sqlglot.optimizer.annotate_types import annotate_types 628 629 this = annotate_types(this, dialect=self.dialect) 630 631 if this.is_type(*exp.DataType.TEXT_TYPES): 632 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 633 634 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
636 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 637 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 638 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 639 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 640 # which seems to be using the same time mapping as Hive, as per: 641 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 642 this = expression.this 643 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 644 value_as_timestamp = ( 645 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 646 ) 647 648 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 649 650 formatted_value = self.func( 651 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 652 ) 653 parse_with_tz = self.func( 654 "PARSE_DATETIME", 655 formatted_value, 656 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 657 ) 658 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 659 return self.func("TO_UNIXTIME", coalesced)
661 def bracket_sql(self, expression: exp.Bracket) -> str: 662 if expression.args.get("safe"): 663 return self.func( 664 "ELEMENT_AT", 665 expression.this, 666 seq_get( 667 apply_index_offset( 668 expression.this, 669 expression.expressions, 670 1 - expression.args.get("offset", 0), 671 dialect=self.dialect, 672 ), 673 0, 674 ), 675 ) 676 return super().bracket_sql(expression)
678 def struct_sql(self, expression: exp.Struct) -> str: 679 from sqlglot.optimizer.annotate_types import annotate_types 680 681 expression = annotate_types(expression, dialect=self.dialect) 682 values: t.List[str] = [] 683 schema: t.List[str] = [] 684 unknown_type = False 685 686 for e in expression.expressions: 687 if isinstance(e, exp.PropertyEQ): 688 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 689 unknown_type = True 690 else: 691 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 692 values.append(self.sql(e, "expression")) 693 else: 694 values.append(self.sql(e)) 695 696 size = len(expression.expressions) 697 698 if not size or len(schema) != size: 699 if unknown_type: 700 self.unsupported( 701 "Cannot convert untyped key-value definitions (try annotate_types)." 702 ) 703 return self.func("ROW", *values) 704 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
724 def create_sql(self, expression: exp.Create) -> str: 725 """ 726 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 727 so we need to remove them 728 """ 729 kind = expression.args["kind"] 730 schema = expression.this 731 if kind == "VIEW" and schema.expressions: 732 expression.this.set("expressions", None) 733 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
735 def delete_sql(self, expression: exp.Delete) -> str: 736 """ 737 Presto only supports DELETE FROM for a single table without an alias, so we need 738 to remove the unnecessary parts. If the original DELETE statement contains more 739 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 740 """ 741 tables = expression.args.get("tables") or [expression.this] 742 if len(tables) > 1: 743 return super().delete_sql(expression) 744 745 table = tables[0] 746 expression.set("this", table) 747 expression.set("tables", None) 748 749 if isinstance(table, exp.Table): 750 table_alias = table.args.get("alias") 751 if table_alias: 752 table_alias.pop() 753 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 754 755 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
757 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 758 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 759 760 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 761 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 762 if not expression.args.get("variant_extract") or is_json_extract: 763 return self.func( 764 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 765 ) 766 767 this = self.sql(expression, "this") 768 769 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 770 segments = [] 771 for path_key in expression.expression.expressions[1:]: 772 if not isinstance(path_key, exp.JSONPathKey): 773 # Cannot transpile subscripts, wildcards etc to dot notation 774 self.unsupported( 775 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 776 ) 777 continue 778 key = path_key.this 779 if not exp.SAFE_IDENTIFIER_RE.match(key): 780 key = f'"{key}"' 781 segments.append(f".{key}") 782 783 expr = "".join(segments) 784 785 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql