2023-06-20 05:55:12 +00:00
package logstorage
import (
"fmt"
"math"
"strconv"
"strings"
"time"
"unicode"
"unicode/utf8"
2024-05-20 02:08:30 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
2023-06-20 05:55:12 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
2024-05-23 19:47:21 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
2023-06-20 05:55:12 +00:00
)
type lexer struct {
// s contains unparsed tail of sOrig
s string
// sOrig contains the original string
sOrig string
// token contains the current token
//
// an empty token means the end of s
token string
// rawToken contains raw token before unquoting
rawToken string
// prevToken contains the previously parsed token
prevToken string
// isSkippedSpace is set to true if there was a whitespace before the token in s
isSkippedSpace bool
// currentTimestamp is the current timestamp in nanoseconds
currentTimestamp int64
}
2024-05-20 02:08:30 +00:00
type lexerState struct {
lex lexer
}
func ( lex * lexer ) backupState ( ) * lexerState {
return & lexerState {
lex : * lex ,
}
}
func ( lex * lexer ) restoreState ( ls * lexerState ) {
* lex = ls . lex
}
2024-05-12 14:33:29 +00:00
// newLexer returns new lexer for the given s.
//
// The lex.token points to the first token in s.
2023-06-20 05:55:12 +00:00
func newLexer ( s string ) * lexer {
2024-05-12 14:33:29 +00:00
lex := & lexer {
2023-06-20 05:55:12 +00:00
s : s ,
sOrig : s ,
currentTimestamp : time . Now ( ) . UnixNano ( ) ,
}
2024-05-12 14:33:29 +00:00
lex . nextToken ( )
return lex
2023-06-20 05:55:12 +00:00
}
func ( lex * lexer ) isEnd ( ) bool {
return len ( lex . s ) == 0 && len ( lex . token ) == 0 && len ( lex . rawToken ) == 0
}
func ( lex * lexer ) isQuotedToken ( ) bool {
return lex . token != lex . rawToken
}
func ( lex * lexer ) isPrevToken ( tokens ... string ) bool {
for _ , token := range tokens {
if token == lex . prevToken {
return true
}
}
return false
}
func ( lex * lexer ) isKeyword ( keywords ... string ) bool {
if lex . isQuotedToken ( ) {
return false
}
tokenLower := strings . ToLower ( lex . token )
for _ , kw := range keywords {
if kw == tokenLower {
return true
}
}
return false
}
func ( lex * lexer ) context ( ) string {
tail := lex . sOrig
tail = tail [ : len ( tail ) - len ( lex . s ) ]
if len ( tail ) > 50 {
tail = tail [ len ( tail ) - 50 : ]
}
return tail
}
func ( lex * lexer ) mustNextToken ( ) bool {
lex . nextToken ( )
return ! lex . isEnd ( )
}
func ( lex * lexer ) nextCharToken ( s string , size int ) {
lex . token = s [ : size ]
lex . rawToken = lex . token
lex . s = s [ size : ]
}
// nextToken updates lex.token to the next token.
func ( lex * lexer ) nextToken ( ) {
s := lex . s
lex . prevToken = lex . token
lex . token = ""
lex . rawToken = ""
lex . isSkippedSpace = false
2024-05-25 23:54:39 +00:00
2023-06-20 05:55:12 +00:00
if len ( s ) == 0 {
return
}
2024-05-25 23:54:39 +00:00
again :
2023-06-20 05:55:12 +00:00
r , size := utf8 . DecodeRuneInString ( s )
if r == utf8 . RuneError {
lex . nextCharToken ( s , size )
return
}
// Skip whitespace
for unicode . IsSpace ( r ) {
lex . isSkippedSpace = true
s = s [ size : ]
r , size = utf8 . DecodeRuneInString ( s )
}
2024-05-25 23:54:39 +00:00
if r == '#' {
// skip comment till \n
n := strings . IndexByte ( s , '\n' )
if n < 0 {
s = ""
} else {
s = s [ n + 1 : ]
}
goto again
}
2023-06-20 05:55:12 +00:00
// Try decoding simple token
tokenLen := 0
for isTokenRune ( r ) || r == '.' {
tokenLen += size
r , size = utf8 . DecodeRuneInString ( s [ tokenLen : ] )
}
if tokenLen > 0 {
lex . nextCharToken ( s , tokenLen )
return
}
switch r {
case '"' , '`' :
prefix , err := strconv . QuotedPrefix ( s )
if err != nil {
lex . nextCharToken ( s , 1 )
return
}
token , err := strconv . Unquote ( prefix )
if err != nil {
lex . nextCharToken ( s , 1 )
return
}
lex . token = token
lex . rawToken = prefix
lex . s = s [ len ( prefix ) : ]
return
case '\'' :
var b [ ] byte
for ! strings . HasPrefix ( s [ size : ] , "'" ) {
ch , _ , newTail , err := strconv . UnquoteChar ( s [ size : ] , '\'' )
if err != nil {
lex . nextCharToken ( s , 1 )
return
}
b = utf8 . AppendRune ( b , ch )
size += len ( s [ size : ] ) - len ( newTail )
}
size ++
lex . token = string ( b )
lex . rawToken = string ( s [ : size ] )
lex . s = s [ size : ]
return
case '=' :
if strings . HasPrefix ( s [ size : ] , "~" ) {
lex . nextCharToken ( s , 2 )
return
}
lex . nextCharToken ( s , 1 )
return
case '!' :
if strings . HasPrefix ( s [ size : ] , "~" ) || strings . HasPrefix ( s [ size : ] , "=" ) {
lex . nextCharToken ( s , 2 )
return
}
lex . nextCharToken ( s , 1 )
return
default :
lex . nextCharToken ( s , size )
return
}
}
// Query represents LogsQL query.
type Query struct {
f filter
2024-05-12 14:33:29 +00:00
pipes [ ] pipe
2023-06-20 05:55:12 +00:00
}
// String returns string representation for q.
func ( q * Query ) String ( ) string {
2024-05-12 14:33:29 +00:00
s := q . f . String ( )
2023-06-20 05:55:12 +00:00
2024-05-12 14:33:29 +00:00
for _ , p := range q . pipes {
s += " | " + p . String ( )
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
return s
}
2024-05-20 02:08:30 +00:00
// AddCountByTimePipe adds '| stats by (_time:step offset off, field1, ..., fieldN) count() hits' to the end of q.
func ( q * Query ) AddCountByTimePipe ( step , off int64 , fields [ ] string ) {
{
// add 'stats by (_time:step offset off, fields) count() hits'
2024-06-03 12:01:05 +00:00
stepStr := string ( marshalDurationString ( nil , step ) )
offsetStr := string ( marshalDurationString ( nil , off ) )
2024-05-20 02:08:30 +00:00
byFieldsStr := "_time:" + stepStr + " offset " + offsetStr
for _ , f := range fields {
byFieldsStr += ", " + quoteTokenIfNeeded ( f )
}
s := fmt . Sprintf ( "stats by (%s) count() hits" , byFieldsStr )
lex := newLexer ( s )
2024-05-22 19:01:20 +00:00
2024-05-27 14:48:34 +00:00
ps , err := parsePipeStats ( lex , true )
2024-05-20 02:08:30 +00:00
if err != nil {
2024-05-22 19:01:20 +00:00
logger . Panicf ( "BUG: unexpected error when parsing [%s]: %s" , s , err )
}
if ! lex . isEnd ( ) {
logger . Panicf ( "BUG: unexpected tail left after parsing [%s]: %q" , s , lex . s )
2024-05-20 02:08:30 +00:00
}
2024-05-22 19:01:20 +00:00
2024-05-20 02:08:30 +00:00
q . pipes = append ( q . pipes , ps )
}
{
// Add 'sort by (_time, fields)' in order to get consistent order of the results.
sortFieldsStr := "_time"
for _ , f := range fields {
sortFieldsStr += ", " + quoteTokenIfNeeded ( f )
}
s := fmt . Sprintf ( "sort by (%s)" , sortFieldsStr )
lex := newLexer ( s )
ps , err := parsePipeSort ( lex )
if err != nil {
logger . Panicf ( "BUG: unexpected error when parsing %q: %s" , s , err )
}
q . pipes = append ( q . pipes , ps )
}
}
2024-06-03 14:58:47 +00:00
// Clone returns a copy of q.
func ( q * Query ) Clone ( ) * Query {
qStr := q . String ( )
qCopy , err := ParseQuery ( qStr )
if err != nil {
logger . Panicf ( "BUG: cannot parse %q: %s" , qStr , err )
}
return qCopy
}
// CanReturnLastNResults returns true if time range filter at q can be adjusted for returning the last N results.
func ( q * Query ) CanReturnLastNResults ( ) bool {
for _ , p := range q . pipes {
switch p . ( type ) {
case * pipeFieldNames ,
* pipeFieldValues ,
* pipeLimit ,
* pipeOffset ,
* pipeSort ,
* pipeStats ,
* pipeUniq :
return false
}
}
return true
}
// GetFilterTimeRange returns filter time range for the given q.
func ( q * Query ) GetFilterTimeRange ( ) ( int64 , int64 ) {
return getFilterTimeRange ( q . f )
}
2024-05-15 02:55:44 +00:00
// AddTimeFilter adds global filter _time:[start ... end] to q.
func ( q * Query ) AddTimeFilter ( start , end int64 ) {
2024-05-20 02:08:30 +00:00
startStr := marshalTimestampRFC3339NanoString ( nil , start )
endStr := marshalTimestampRFC3339NanoString ( nil , end )
2024-05-15 02:55:44 +00:00
ft := & filterTime {
minTimestamp : start ,
maxTimestamp : end ,
stringRepr : fmt . Sprintf ( "[%s, %s]" , startStr , endStr ) ,
}
fa , ok := q . f . ( * filterAnd )
if ok {
filters := make ( [ ] filter , len ( fa . filters ) + 1 )
filters [ 0 ] = ft
copy ( filters [ 1 : ] , fa . filters )
fa . filters = filters
} else {
q . f = & filterAnd {
filters : [ ] filter { ft , q . f } ,
}
}
}
2024-05-14 01:05:03 +00:00
// AddPipeLimit adds `| limit n` pipe to q.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe
func ( q * Query ) AddPipeLimit ( n uint64 ) {
q . pipes = append ( q . pipes , & pipeLimit {
2024-05-20 02:08:30 +00:00
limit : n ,
2024-05-14 01:05:03 +00:00
} )
}
2024-05-15 02:55:44 +00:00
// Optimize tries optimizing the query.
func ( q * Query ) Optimize ( ) {
q . pipes = optimizeSortOffsetPipes ( q . pipes )
q . pipes = optimizeSortLimitPipes ( q . pipes )
2024-05-20 02:08:30 +00:00
q . pipes = optimizeUniqLimitPipes ( q . pipes )
q . pipes = optimizeFilterPipes ( q . pipes )
// Merge `q | filter ...` into q.
if len ( q . pipes ) > 0 {
pf , ok := q . pipes [ 0 ] . ( * pipeFilter )
if ok {
q . f = mergeFiltersAnd ( q . f , pf . f )
q . pipes = append ( q . pipes [ : 0 ] , q . pipes [ 1 : ] ... )
}
}
// Optimize `q | field_names ...` by marking pipeFieldNames as first pipe.
if len ( q . pipes ) > 0 {
pf , ok := q . pipes [ 0 ] . ( * pipeFieldNames )
if ok {
pf . isFirstPipe = true
}
}
// Substitute '*' prefixFilter with filterNoop in order to avoid reading _msg data.
q . f = removeStarFilters ( q . f )
// Call Optimize for queries from 'in(query)' filters.
optimizeFilterIn ( q . f )
2024-05-25 12:37:26 +00:00
// Optimize individual pipes.
2024-05-20 02:08:30 +00:00
for _ , p := range q . pipes {
2024-05-25 12:37:26 +00:00
p . optimize ( )
2024-05-20 02:08:30 +00:00
}
}
func removeStarFilters ( f filter ) filter {
visitFunc := func ( f filter ) bool {
fp , ok := f . ( * filterPrefix )
return ok && isMsgFieldName ( fp . fieldName ) && fp . prefix == ""
}
copyFunc := func ( _ filter ) ( filter , error ) {
fn := & filterNoop { }
return fn , nil
}
f , err := copyFilter ( f , visitFunc , copyFunc )
if err != nil {
logger . Fatalf ( "BUG: unexpected error: %s" , err )
}
return f
}
2024-05-15 02:55:44 +00:00
func optimizeSortOffsetPipes ( pipes [ ] pipe ) [ ] pipe {
// Merge 'sort ... | offset ...' into 'sort ... offset ...'
i := 1
for i < len ( pipes ) {
po , ok := pipes [ i ] . ( * pipeOffset )
if ! ok {
i ++
continue
}
ps , ok := pipes [ i - 1 ] . ( * pipeSort )
if ! ok {
i ++
continue
}
if ps . offset == 0 && ps . limit == 0 {
2024-05-20 02:08:30 +00:00
ps . offset = po . offset
2024-05-15 02:55:44 +00:00
}
pipes = append ( pipes [ : i ] , pipes [ i + 1 : ] ... )
}
return pipes
}
func optimizeSortLimitPipes ( pipes [ ] pipe ) [ ] pipe {
// Merge 'sort ... | limit ...' into 'sort ... limit ...'
i := 1
for i < len ( pipes ) {
pl , ok := pipes [ i ] . ( * pipeLimit )
if ! ok {
i ++
continue
}
ps , ok := pipes [ i - 1 ] . ( * pipeSort )
if ! ok {
i ++
continue
}
2024-05-20 02:08:30 +00:00
if ps . limit == 0 || pl . limit < ps . limit {
ps . limit = pl . limit
2024-05-15 02:55:44 +00:00
}
pipes = append ( pipes [ : i ] , pipes [ i + 1 : ] ... )
}
return pipes
}
2024-05-20 02:08:30 +00:00
func optimizeUniqLimitPipes ( pipes [ ] pipe ) [ ] pipe {
// Merge 'uniq ... | limit ...' into 'uniq ... limit ...'
i := 1
for i < len ( pipes ) {
pl , ok := pipes [ i ] . ( * pipeLimit )
if ! ok {
i ++
continue
}
pu , ok := pipes [ i - 1 ] . ( * pipeUniq )
if ! ok {
i ++
continue
}
if pu . limit == 0 || pl . limit < pu . limit {
pu . limit = pl . limit
}
pipes = append ( pipes [ : i ] , pipes [ i + 1 : ] ... )
}
return pipes
}
func optimizeFilterPipes ( pipes [ ] pipe ) [ ] pipe {
// Merge multiple `| filter ...` pipes into a single `filter ...` pipe
i := 1
for i < len ( pipes ) {
pf1 , ok := pipes [ i - 1 ] . ( * pipeFilter )
if ! ok {
i ++
continue
}
pf2 , ok := pipes [ i ] . ( * pipeFilter )
if ! ok {
i ++
continue
}
pf1 . f = mergeFiltersAnd ( pf1 . f , pf2 . f )
pipes = append ( pipes [ : i ] , pipes [ i + 1 : ] ... )
}
return pipes
}
func mergeFiltersAnd ( f1 , f2 filter ) filter {
fa1 , ok := f1 . ( * filterAnd )
if ok {
fa1 . filters = append ( fa1 . filters , f2 )
return fa1
}
fa2 , ok := f2 . ( * filterAnd )
if ok {
filters := make ( [ ] filter , len ( fa2 . filters ) + 1 )
filters [ 0 ] = f1
copy ( filters [ 1 : ] , fa2 . filters )
fa2 . filters = filters
return fa2
}
return & filterAnd {
filters : [ ] filter { f1 , f2 } ,
}
}
2024-05-12 14:33:29 +00:00
func ( q * Query ) getNeededColumns ( ) ( [ ] string , [ ] string ) {
neededFields := newFieldsSet ( )
neededFields . add ( "*" )
unneededFields := newFieldsSet ( )
2023-06-20 05:55:12 +00:00
2024-05-12 14:33:29 +00:00
pipes := q . pipes
for i := len ( pipes ) - 1 ; i >= 0 ; i -- {
pipes [ i ] . updateNeededFields ( neededFields , unneededFields )
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
return neededFields . getAll ( ) , unneededFields . getAll ( )
2023-06-20 05:55:12 +00:00
}
// ParseQuery parses s.
func ParseQuery ( s string ) ( * Query , error ) {
lex := newLexer ( s )
2024-06-11 13:13:55 +00:00
// Verify the first token doesn't match pipe names.
firstToken := strings . ToLower ( lex . rawToken )
if _ , ok := pipeNames [ firstToken ] ; ok {
return nil , fmt . Errorf ( "the query [%s] cannot start with pipe - it must start with madatory filter; see https://docs.victoriametrics.com/victorialogs/logsql/#query-syntax; " +
"if the filter isn't missing, then please put the first word of the filter into quotes: %q" , s , firstToken )
}
2024-05-20 02:08:30 +00:00
q , err := parseQuery ( lex )
if err != nil {
return nil , err
}
if ! lex . isEnd ( ) {
return nil , fmt . Errorf ( "unexpected unparsed tail after [%s]; context: [%s]; tail: [%s]" , q , lex . context ( ) , lex . s )
}
return q , nil
}
2023-06-20 05:55:12 +00:00
2024-05-20 02:08:30 +00:00
func parseQuery ( lex * lexer ) ( * Query , error ) {
2023-06-20 05:55:12 +00:00
f , err := parseFilter ( lex )
if err != nil {
2024-05-12 14:33:29 +00:00
return nil , fmt . Errorf ( "%w; context: [%s]" , err , lex . context ( ) )
2023-06-20 05:55:12 +00:00
}
q := & Query {
f : f ,
}
2024-05-12 14:33:29 +00:00
2024-05-22 19:01:20 +00:00
if lex . isKeyword ( "|" ) {
lex . nextToken ( )
pipes , err := parsePipes ( lex )
if err != nil {
return nil , fmt . Errorf ( "%w; context: [%s]" , err , lex . context ( ) )
}
q . pipes = pipes
2024-05-12 14:33:29 +00:00
}
2023-06-20 05:55:12 +00:00
return q , nil
}
func parseFilter ( lex * lexer ) ( filter , error ) {
2024-05-12 14:33:29 +00:00
if lex . isKeyword ( "|" , "" ) {
2023-06-20 05:55:12 +00:00
return nil , fmt . Errorf ( "missing query" )
}
2024-05-12 14:33:29 +00:00
fo , err := parseFilterOr ( lex , "" )
2023-06-20 05:55:12 +00:00
if err != nil {
return nil , err
}
2024-05-12 14:33:29 +00:00
return fo , nil
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
func parseFilterOr ( lex * lexer , fieldName string ) ( filter , error ) {
2023-06-20 05:55:12 +00:00
var filters [ ] filter
for {
2024-05-12 14:33:29 +00:00
f , err := parseFilterAnd ( lex , fieldName )
2023-06-20 05:55:12 +00:00
if err != nil {
return nil , err
}
filters = append ( filters , f )
switch {
case lex . isKeyword ( "|" , ")" , "" ) :
if len ( filters ) == 1 {
return filters [ 0 ] , nil
}
2024-05-12 14:33:29 +00:00
fo := & filterOr {
2023-06-20 05:55:12 +00:00
filters : filters ,
}
2024-05-12 14:33:29 +00:00
return fo , nil
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "or" ) :
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing filter after 'or'" )
}
}
}
}
2024-05-12 14:33:29 +00:00
func parseFilterAnd ( lex * lexer , fieldName string ) ( filter , error ) {
2023-06-20 05:55:12 +00:00
var filters [ ] filter
for {
f , err := parseGenericFilter ( lex , fieldName )
if err != nil {
return nil , err
}
filters = append ( filters , f )
switch {
case lex . isKeyword ( "or" , "|" , ")" , "" ) :
if len ( filters ) == 1 {
return filters [ 0 ] , nil
}
2024-05-12 14:33:29 +00:00
fa := & filterAnd {
2023-06-20 05:55:12 +00:00
filters : filters ,
}
2024-05-12 14:33:29 +00:00
return fa , nil
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "and" ) :
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing filter after 'and'" )
}
}
}
}
func parseGenericFilter ( lex * lexer , fieldName string ) ( filter , error ) {
// Check for special keywords
switch {
case lex . isKeyword ( ":" ) :
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing filter after ':'" )
}
return parseGenericFilter ( lex , fieldName )
case lex . isKeyword ( "*" ) :
lex . nextToken ( )
2024-05-12 14:33:29 +00:00
f := & filterPrefix {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
prefix : "" ,
}
return f , nil
case lex . isKeyword ( "(" ) :
if ! lex . isSkippedSpace && ! lex . isPrevToken ( "" , ":" , "(" , "!" , "not" ) {
return nil , fmt . Errorf ( "missing whitespace before the search word %q" , lex . prevToken )
}
return parseParensFilter ( lex , fieldName )
2024-05-20 02:08:30 +00:00
case lex . isKeyword ( ">" ) :
return parseFilterGT ( lex , fieldName )
case lex . isKeyword ( "<" ) :
return parseFilterLT ( lex , fieldName )
2024-05-22 19:01:20 +00:00
case lex . isKeyword ( "=" ) :
return parseFilterEQ ( lex , fieldName )
2024-05-23 10:01:15 +00:00
case lex . isKeyword ( "!=" ) :
return parseFilterNEQ ( lex , fieldName )
2024-05-22 19:01:20 +00:00
case lex . isKeyword ( "~" ) :
return parseFilterTilda ( lex , fieldName )
2024-05-23 10:01:15 +00:00
case lex . isKeyword ( "!~" ) :
return parseFilterNotTilda ( lex , fieldName )
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "not" , "!" ) :
2024-05-12 14:33:29 +00:00
return parseFilterNot ( lex , fieldName )
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "exact" ) :
2024-05-12 14:33:29 +00:00
return parseFilterExact ( lex , fieldName )
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "i" ) :
return parseAnyCaseFilter ( lex , fieldName )
case lex . isKeyword ( "in" ) :
2024-05-12 14:33:29 +00:00
return parseFilterIn ( lex , fieldName )
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "ipv4_range" ) :
2024-05-12 14:33:29 +00:00
return parseFilterIPv4Range ( lex , fieldName )
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "len_range" ) :
2024-05-12 14:33:29 +00:00
return parseFilterLenRange ( lex , fieldName )
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "range" ) :
2024-05-12 14:33:29 +00:00
return parseFilterRange ( lex , fieldName )
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "re" ) :
2024-05-12 14:33:29 +00:00
return parseFilterRegexp ( lex , fieldName )
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "seq" ) :
2024-05-12 14:33:29 +00:00
return parseFilterSequence ( lex , fieldName )
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( "string_range" ) :
2024-05-12 14:33:29 +00:00
return parseFilterStringRange ( lex , fieldName )
2023-06-20 05:55:12 +00:00
case lex . isKeyword ( ` " ` , "'" , "`" ) :
return nil , fmt . Errorf ( "improperly quoted string" )
case lex . isKeyword ( "," , ")" , "[" , "]" ) :
return nil , fmt . Errorf ( "unexpected token %q" , lex . token )
}
2024-05-20 02:08:30 +00:00
phrase , err := getCompoundPhrase ( lex , fieldName != "" )
if err != nil {
return nil , err
}
2023-06-20 05:55:12 +00:00
return parseFilterForPhrase ( lex , phrase , fieldName )
}
2024-05-20 02:08:30 +00:00
func getCompoundPhrase ( lex * lexer , allowColon bool ) ( string , error ) {
2024-06-07 23:32:25 +00:00
stopTokens := [ ] string { "*" , "," , "(" , ")" , "[" , "]" , "|" , "!" , "" }
2024-05-20 02:08:30 +00:00
if lex . isKeyword ( stopTokens ... ) {
return "" , fmt . Errorf ( "compound phrase cannot start with '%s'" , lex . token )
}
2023-06-20 05:55:12 +00:00
phrase := lex . token
rawPhrase := lex . rawToken
lex . nextToken ( )
2024-05-12 14:33:29 +00:00
suffix := getCompoundSuffix ( lex , allowColon )
2023-06-20 05:55:12 +00:00
if suffix == "" {
2024-05-20 02:08:30 +00:00
return phrase , nil
2023-06-20 05:55:12 +00:00
}
2024-05-20 02:08:30 +00:00
return rawPhrase + suffix , nil
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
func getCompoundSuffix ( lex * lexer , allowColon bool ) string {
2023-06-20 05:55:12 +00:00
s := ""
2024-06-07 23:32:25 +00:00
stopTokens := [ ] string { "*" , "," , "(" , ")" , "[" , "]" , "|" , "!" , "" }
2024-05-12 14:33:29 +00:00
if ! allowColon {
2023-06-20 05:55:12 +00:00
stopTokens = append ( stopTokens , ":" )
}
for ! lex . isSkippedSpace && ! lex . isKeyword ( stopTokens ... ) {
s += lex . rawToken
lex . nextToken ( )
}
return s
}
2024-05-20 02:08:30 +00:00
func getCompoundToken ( lex * lexer ) ( string , error ) {
2024-06-07 23:32:25 +00:00
stopTokens := [ ] string { "," , "(" , ")" , "[" , "]" , "|" , "!" , "" }
2024-05-20 02:08:30 +00:00
if lex . isKeyword ( stopTokens ... ) {
return "" , fmt . Errorf ( "compound token cannot start with '%s'" , lex . token )
}
2023-06-20 05:55:12 +00:00
s := lex . token
rawS := lex . rawToken
lex . nextToken ( )
suffix := ""
2024-05-20 02:08:30 +00:00
for ! lex . isSkippedSpace && ! lex . isKeyword ( stopTokens ... ) {
2023-06-20 05:55:12 +00:00
s += lex . token
lex . nextToken ( )
}
if suffix == "" {
2024-05-20 02:08:30 +00:00
return s , nil
2023-06-20 05:55:12 +00:00
}
2024-05-20 02:08:30 +00:00
return rawS + suffix , nil
2023-06-20 05:55:12 +00:00
}
func getCompoundFuncArg ( lex * lexer ) string {
if lex . isKeyword ( "*" ) {
return ""
}
arg := lex . token
rawArg := lex . rawToken
lex . nextToken ( )
suffix := ""
2024-05-20 02:08:30 +00:00
for ! lex . isSkippedSpace && ! lex . isKeyword ( "*" , "," , "(" , ")" , "|" , "" ) {
2023-06-20 05:55:12 +00:00
suffix += lex . rawToken
lex . nextToken ( )
}
if suffix == "" {
return arg
}
return rawArg + suffix
}
func parseFilterForPhrase ( lex * lexer , phrase , fieldName string ) ( filter , error ) {
if fieldName != "" || ! lex . isKeyword ( ":" ) {
// The phrase is either a search phrase or a search prefix.
if lex . isKeyword ( "*" ) && ! lex . isSkippedSpace {
// The phrase is a search prefix in the form `foo*`.
lex . nextToken ( )
2024-05-12 14:33:29 +00:00
f := & filterPrefix {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
prefix : phrase ,
}
return f , nil
}
// The phrase is a search phrase.
2024-05-12 14:33:29 +00:00
f := & filterPhrase {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
phrase : phrase ,
}
return f , nil
}
// The phrase contains the field name.
fieldName = phrase
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing filter after field name %s" , quoteTokenIfNeeded ( fieldName ) )
}
switch fieldName {
case "_time" :
2024-06-06 10:10:14 +00:00
return parseFilterTimeGeneric ( lex )
2023-06-20 05:55:12 +00:00
case "_stream" :
2024-05-12 14:33:29 +00:00
return parseFilterStream ( lex )
2023-06-20 05:55:12 +00:00
default :
return parseGenericFilter ( lex , fieldName )
}
}
func parseParensFilter ( lex * lexer , fieldName string ) ( filter , error ) {
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing filter after '('" )
}
2024-05-12 14:33:29 +00:00
f , err := parseFilterOr ( lex , fieldName )
2023-06-20 05:55:12 +00:00
if err != nil {
return nil , err
}
if ! lex . isKeyword ( ")" ) {
return nil , fmt . Errorf ( "unexpected token %q instead of ')'" , lex . token )
}
lex . nextToken ( )
return f , nil
}
2024-05-12 14:33:29 +00:00
func parseFilterNot ( lex * lexer , fieldName string ) ( filter , error ) {
2023-06-20 05:55:12 +00:00
notKeyword := lex . token
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing filters after '%s'" , notKeyword )
}
f , err := parseGenericFilter ( lex , fieldName )
if err != nil {
return nil , err
}
2024-05-12 14:33:29 +00:00
fn , ok := f . ( * filterNot )
2023-06-20 05:55:12 +00:00
if ok {
2024-05-12 14:33:29 +00:00
return fn . f , nil
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
fn = & filterNot {
2023-06-20 05:55:12 +00:00
f : f ,
}
2024-05-12 14:33:29 +00:00
return fn , nil
2023-06-20 05:55:12 +00:00
}
func parseAnyCaseFilter ( lex * lexer , fieldName string ) ( filter , error ) {
2024-05-12 14:33:29 +00:00
return parseFuncArgMaybePrefix ( lex , "i" , fieldName , func ( phrase string , isFilterPrefix bool ) ( filter , error ) {
if isFilterPrefix {
f := & filterAnyCasePrefix {
2023-07-18 00:19:41 +00:00
fieldName : fieldName ,
prefix : phrase ,
}
return f , nil
}
2024-05-12 14:33:29 +00:00
f := & filterAnyCasePhrase {
2023-07-18 00:19:41 +00:00
fieldName : fieldName ,
phrase : phrase ,
}
return f , nil
} )
}
2023-07-18 01:17:43 +00:00
func parseFuncArgMaybePrefix ( lex * lexer , funcName , fieldName string , callback func ( arg string , isPrefiFilter bool ) ( filter , error ) ) ( filter , error ) {
2023-06-20 05:55:12 +00:00
phrase := lex . token
lex . nextToken ( )
if ! lex . isKeyword ( "(" ) {
2024-05-12 14:33:29 +00:00
phrase += getCompoundSuffix ( lex , fieldName != "" )
2023-06-20 05:55:12 +00:00
return parseFilterForPhrase ( lex , phrase , fieldName )
}
if ! lex . mustNextToken ( ) {
2023-07-18 00:19:41 +00:00
return nil , fmt . Errorf ( "missing arg for %s()" , funcName )
2023-06-20 05:55:12 +00:00
}
phrase = getCompoundFuncArg ( lex )
2024-05-12 14:33:29 +00:00
isFilterPrefix := false
2023-06-20 05:55:12 +00:00
if lex . isKeyword ( "*" ) && ! lex . isSkippedSpace {
2024-05-12 14:33:29 +00:00
isFilterPrefix = true
2023-06-20 05:55:12 +00:00
if ! lex . mustNextToken ( ) {
2023-07-18 00:19:41 +00:00
return nil , fmt . Errorf ( "missing ')' after %s()" , funcName )
2023-06-20 05:55:12 +00:00
}
}
if ! lex . isKeyword ( ")" ) {
2023-07-18 00:19:41 +00:00
return nil , fmt . Errorf ( "unexpected token %q instead of ')' in %s()" , lex . token , funcName )
2023-06-20 05:55:12 +00:00
}
lex . nextToken ( )
2024-05-12 14:33:29 +00:00
return callback ( phrase , isFilterPrefix )
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
func parseFilterLenRange ( lex * lexer , fieldName string ) ( filter , error ) {
2023-06-20 05:55:12 +00:00
funcName := lex . token
return parseFuncArgs ( lex , fieldName , func ( args [ ] string ) ( filter , error ) {
if len ( args ) != 2 {
return nil , fmt . Errorf ( "unexpected number of args for %s(); got %d; want 2" , funcName , len ( args ) )
}
2024-05-12 14:33:29 +00:00
minLen , err := parseUint ( args [ 0 ] )
2023-06-20 05:55:12 +00:00
if err != nil {
return nil , fmt . Errorf ( "cannot parse minLen at %s(): %w" , funcName , err )
}
2024-05-12 14:33:29 +00:00
maxLen , err := parseUint ( args [ 1 ] )
2023-06-20 05:55:12 +00:00
if err != nil {
return nil , fmt . Errorf ( "cannot parse maxLen at %s(): %w" , funcName , err )
}
2024-05-12 14:33:29 +00:00
stringRepr := "(" + args [ 0 ] + ", " + args [ 1 ] + ")"
fr := & filterLenRange {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
minLen : minLen ,
maxLen : maxLen ,
2024-05-12 14:33:29 +00:00
stringRepr : stringRepr ,
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
return fr , nil
2023-06-20 05:55:12 +00:00
} )
}
2024-05-12 14:33:29 +00:00
func parseFilterStringRange ( lex * lexer , fieldName string ) ( filter , error ) {
2023-06-20 05:55:12 +00:00
funcName := lex . token
return parseFuncArgs ( lex , fieldName , func ( args [ ] string ) ( filter , error ) {
if len ( args ) != 2 {
return nil , fmt . Errorf ( "unexpected number of args for %s(); got %d; want 2" , funcName , len ( args ) )
}
2024-05-12 14:33:29 +00:00
fr := & filterStringRange {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
minValue : args [ 0 ] ,
maxValue : args [ 1 ] ,
2024-05-27 14:18:53 +00:00
stringRepr : fmt . Sprintf ( "string_range(%s, %s)" , quoteTokenIfNeeded ( args [ 0 ] ) , quoteTokenIfNeeded ( args [ 1 ] ) ) ,
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
return fr , nil
2023-06-20 05:55:12 +00:00
} )
}
2024-05-12 14:33:29 +00:00
func parseFilterIPv4Range ( lex * lexer , fieldName string ) ( filter , error ) {
2023-06-20 05:55:12 +00:00
funcName := lex . token
return parseFuncArgs ( lex , fieldName , func ( args [ ] string ) ( filter , error ) {
if len ( args ) == 1 {
minValue , maxValue , ok := tryParseIPv4CIDR ( args [ 0 ] )
if ! ok {
return nil , fmt . Errorf ( "cannot parse IPv4 address or IPv4 CIDR %q at %s()" , args [ 0 ] , funcName )
}
2024-05-12 14:33:29 +00:00
fr := & filterIPv4Range {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
minValue : minValue ,
maxValue : maxValue ,
}
2024-05-12 14:33:29 +00:00
return fr , nil
2023-06-20 05:55:12 +00:00
}
if len ( args ) != 2 {
return nil , fmt . Errorf ( "unexpected number of args for %s(); got %d; want 2" , funcName , len ( args ) )
}
minValue , ok := tryParseIPv4 ( args [ 0 ] )
if ! ok {
return nil , fmt . Errorf ( "cannot parse lower bound ip %q in %s()" , funcName , args [ 0 ] )
}
maxValue , ok := tryParseIPv4 ( args [ 1 ] )
if ! ok {
return nil , fmt . Errorf ( "cannot parse upper bound ip %q in %s()" , funcName , args [ 1 ] )
}
2024-05-12 14:33:29 +00:00
fr := & filterIPv4Range {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
minValue : minValue ,
maxValue : maxValue ,
}
2024-05-12 14:33:29 +00:00
return fr , nil
2023-06-20 05:55:12 +00:00
} )
}
func tryParseIPv4CIDR ( s string ) ( uint32 , uint32 , bool ) {
n := strings . IndexByte ( s , '/' )
if n < 0 {
n , ok := tryParseIPv4 ( s )
return n , n , ok
}
ip , ok := tryParseIPv4 ( s [ : n ] )
if ! ok {
return 0 , 0 , false
}
maskBits , ok := tryParseUint64 ( s [ n + 1 : ] )
if ! ok || maskBits > 32 {
return 0 , 0 , false
}
mask := uint32 ( ( 1 << ( 32 - maskBits ) ) - 1 )
minValue := ip &^ mask
maxValue := ip | mask
return minValue , maxValue , true
}
2024-05-12 14:33:29 +00:00
func parseFilterIn ( lex * lexer , fieldName string ) ( filter , error ) {
2024-05-20 02:08:30 +00:00
if ! lex . isKeyword ( "in" ) {
return nil , fmt . Errorf ( "expecting 'in' keyword" )
}
// Try parsing in(arg1, ..., argN) at first
lexState := lex . backupState ( )
fi , err := parseFuncArgs ( lex , fieldName , func ( args [ ] string ) ( filter , error ) {
fi := & filterIn {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
values : args ,
}
2024-05-20 02:08:30 +00:00
return fi , nil
2023-06-20 05:55:12 +00:00
} )
2024-05-20 02:08:30 +00:00
if err == nil {
return fi , nil
}
// Parse in(query | fields someField) then
lex . restoreState ( lexState )
lex . nextToken ( )
if ! lex . isKeyword ( "(" ) {
return nil , fmt . Errorf ( "missing '(' after 'in'" )
}
lex . nextToken ( )
q , err := parseQuery ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot parse query inside 'in(...)': %w" , err )
}
if ! lex . isKeyword ( ")" ) {
return nil , fmt . Errorf ( "missing ')' after 'in(%s)'" , q )
}
lex . nextToken ( )
qFieldName , err := getFieldNameFromPipes ( q . pipes )
if err != nil {
return nil , fmt . Errorf ( "cannot determine field name for values in 'in(%s)': %w" , q , err )
}
fi = & filterIn {
fieldName : fieldName ,
needExecuteQuery : true ,
q : q ,
qFieldName : qFieldName ,
}
return fi , nil
}
func getFieldNameFromPipes ( pipes [ ] pipe ) ( string , error ) {
if len ( pipes ) == 0 {
return "" , fmt . Errorf ( "missing 'fields' or 'uniq' pipes at the end of query" )
}
switch t := pipes [ len ( pipes ) - 1 ] . ( type ) {
case * pipeFields :
if t . containsStar || len ( t . fields ) != 1 {
return "" , fmt . Errorf ( "'%s' pipe must contain only a single non-star field name" , t )
}
return t . fields [ 0 ] , nil
case * pipeUniq :
if len ( t . byFields ) != 1 {
return "" , fmt . Errorf ( "'%s' pipe must contain only a single non-star field name" , t )
}
return t . byFields [ 0 ] , nil
default :
return "" , fmt . Errorf ( "missing 'fields' or 'uniq' pipe at the end of query" )
}
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
func parseFilterSequence ( lex * lexer , fieldName string ) ( filter , error ) {
2023-06-20 05:55:12 +00:00
return parseFuncArgs ( lex , fieldName , func ( args [ ] string ) ( filter , error ) {
2024-05-12 14:33:29 +00:00
fs := & filterSequence {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
phrases : args ,
}
2024-05-12 14:33:29 +00:00
return fs , nil
2023-06-20 05:55:12 +00:00
} )
}
2024-05-12 14:33:29 +00:00
func parseFilterExact ( lex * lexer , fieldName string ) ( filter , error ) {
return parseFuncArgMaybePrefix ( lex , "exact" , fieldName , func ( phrase string , isFilterPrefix bool ) ( filter , error ) {
if isFilterPrefix {
f := & filterExactPrefix {
2023-07-18 00:19:41 +00:00
fieldName : fieldName ,
prefix : phrase ,
}
return f , nil
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
f := & filterExact {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
2023-07-18 00:19:41 +00:00
value : phrase ,
2023-06-20 05:55:12 +00:00
}
2023-07-18 00:19:41 +00:00
return f , nil
2023-06-20 05:55:12 +00:00
} )
}
2024-05-12 14:33:29 +00:00
func parseFilterRegexp ( lex * lexer , fieldName string ) ( filter , error ) {
2023-06-20 05:55:12 +00:00
funcName := lex . token
return parseFuncArg ( lex , fieldName , func ( arg string ) ( filter , error ) {
2024-05-23 19:47:21 +00:00
re , err := regexutil . NewRegex ( arg )
2023-06-20 05:55:12 +00:00
if err != nil {
return nil , fmt . Errorf ( "invalid regexp %q for %s(): %w" , arg , funcName , err )
}
2024-05-12 14:33:29 +00:00
fr := & filterRegexp {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
re : re ,
}
2024-05-12 14:33:29 +00:00
return fr , nil
2023-06-20 05:55:12 +00:00
} )
}
2024-05-22 19:01:20 +00:00
func parseFilterTilda ( lex * lexer , fieldName string ) ( filter , error ) {
lex . nextToken ( )
arg := getCompoundFuncArg ( lex )
2024-05-23 19:47:21 +00:00
re , err := regexutil . NewRegex ( arg )
2024-05-22 19:01:20 +00:00
if err != nil {
return nil , fmt . Errorf ( "invalid regexp %q: %w" , arg , err )
2024-05-20 02:08:30 +00:00
}
2024-05-22 19:01:20 +00:00
fr := & filterRegexp {
fieldName : fieldName ,
re : re ,
}
return fr , nil
}
2024-05-23 10:01:15 +00:00
func parseFilterNotTilda ( lex * lexer , fieldName string ) ( filter , error ) {
f , err := parseFilterTilda ( lex , fieldName )
if err != nil {
return nil , err
}
fn := & filterNot {
f : f ,
}
return fn , nil
}
2024-05-22 19:01:20 +00:00
func parseFilterEQ ( lex * lexer , fieldName string ) ( filter , error ) {
lex . nextToken ( )
phrase := getCompoundFuncArg ( lex )
if lex . isKeyword ( "*" ) && ! lex . isSkippedSpace {
lex . nextToken ( )
f := & filterExactPrefix {
fieldName : fieldName ,
prefix : phrase ,
}
return f , nil
}
f := & filterExact {
fieldName : fieldName ,
value : phrase ,
}
return f , nil
}
2024-05-23 10:01:15 +00:00
func parseFilterNEQ ( lex * lexer , fieldName string ) ( filter , error ) {
f , err := parseFilterEQ ( lex , fieldName )
if err != nil {
return nil , err
}
fn := & filterNot {
f : f ,
}
return fn , nil
}
2024-05-22 19:01:20 +00:00
func parseFilterGT ( lex * lexer , fieldName string ) ( filter , error ) {
2024-05-20 02:08:30 +00:00
lex . nextToken ( )
includeMinValue := false
op := ">"
if lex . isKeyword ( "=" ) {
lex . nextToken ( )
includeMinValue = true
op = ">="
}
2024-06-05 00:09:56 +00:00
lexState := lex . backupState ( )
minValue , fStr , err := parseNumber ( lex )
if err != nil {
lex . restoreState ( lexState )
2024-05-27 14:18:53 +00:00
fr := tryParseFilterGTString ( lex , fieldName , op , includeMinValue )
2024-06-05 00:09:56 +00:00
if fr == nil {
return nil , fmt . Errorf ( "cannot parse [%s] as number: %w" , fStr , err )
2024-05-27 14:18:53 +00:00
}
2024-06-05 00:09:56 +00:00
return fr , nil
2024-05-20 02:08:30 +00:00
}
if ! includeMinValue {
minValue = nextafter ( minValue , inf )
}
fr := & filterRange {
fieldName : fieldName ,
minValue : minValue ,
maxValue : inf ,
stringRepr : op + fStr ,
}
return fr , nil
}
func parseFilterLT ( lex * lexer , fieldName string ) ( filter , error ) {
lex . nextToken ( )
includeMaxValue := false
op := "<"
if lex . isKeyword ( "=" ) {
lex . nextToken ( )
includeMaxValue = true
op = "<="
}
2024-06-05 00:09:56 +00:00
lexState := lex . backupState ( )
maxValue , fStr , err := parseNumber ( lex )
if err != nil {
lex . restoreState ( lexState )
2024-05-27 14:18:53 +00:00
fr := tryParseFilterLTString ( lex , fieldName , op , includeMaxValue )
2024-06-05 00:09:56 +00:00
if fr == nil {
return nil , fmt . Errorf ( "cannot parse [%s] as number: %w" , fStr , err )
2024-05-27 14:18:53 +00:00
}
2024-06-05 00:09:56 +00:00
return fr , nil
2024-05-27 14:18:53 +00:00
}
2024-05-20 02:08:30 +00:00
if err != nil {
return nil , fmt . Errorf ( "cannot parse number after '%s': %w" , op , err )
}
if ! includeMaxValue {
maxValue = nextafter ( maxValue , - inf )
}
fr := & filterRange {
fieldName : fieldName ,
minValue : - inf ,
maxValue : maxValue ,
stringRepr : op + fStr ,
}
return fr , nil
}
2024-05-27 14:18:53 +00:00
func tryParseFilterGTString ( lex * lexer , fieldName , op string , includeMinValue bool ) filter {
minValueOrig , err := getCompoundToken ( lex )
if err != nil {
return nil
}
minValue := minValueOrig
if ! includeMinValue {
minValue = string ( append ( [ ] byte ( minValue ) , 0 ) )
}
fr := & filterStringRange {
fieldName : fieldName ,
minValue : minValue ,
maxValue : maxStringRangeValue ,
stringRepr : op + quoteStringTokenIfNeeded ( minValueOrig ) ,
}
return fr
}
func tryParseFilterLTString ( lex * lexer , fieldName , op string , includeMaxValue bool ) filter {
maxValueOrig , err := getCompoundToken ( lex )
if err != nil {
return nil
}
maxValue := maxValueOrig
if includeMaxValue {
maxValue = string ( append ( [ ] byte ( maxValue ) , 0 ) )
}
fr := & filterStringRange {
fieldName : fieldName ,
maxValue : maxValue ,
stringRepr : op + quoteStringTokenIfNeeded ( maxValueOrig ) ,
}
return fr
}
2024-05-12 14:33:29 +00:00
func parseFilterRange ( lex * lexer , fieldName string ) ( filter , error ) {
2023-06-20 05:55:12 +00:00
funcName := lex . token
lex . nextToken ( )
// Parse minValue
includeMinValue := false
switch {
case lex . isKeyword ( "(" ) :
includeMinValue = false
case lex . isKeyword ( "[" ) :
includeMinValue = true
default :
2024-05-12 14:33:29 +00:00
phrase := funcName + getCompoundSuffix ( lex , fieldName != "" )
2023-06-20 05:55:12 +00:00
return parseFilterForPhrase ( lex , phrase , fieldName )
}
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing args for %s()" , funcName )
}
2024-06-05 00:09:56 +00:00
minValue , minValueStr , err := parseNumber ( lex )
2023-06-20 05:55:12 +00:00
if err != nil {
return nil , fmt . Errorf ( "cannot parse minValue in %s(): %w" , funcName , err )
}
// Parse comma
if ! lex . isKeyword ( "," ) {
return nil , fmt . Errorf ( "unexpected token %q ater %q in %s(); want ','" , lex . token , minValueStr , funcName )
}
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing maxValue in %s()" , funcName )
}
// Parse maxValue
2024-06-05 00:09:56 +00:00
maxValue , maxValueStr , err := parseNumber ( lex )
2023-06-20 05:55:12 +00:00
if err != nil {
return nil , fmt . Errorf ( "cannot parse maxValue in %s(): %w" , funcName , err )
}
includeMaxValue := false
switch {
case lex . isKeyword ( ")" ) :
includeMaxValue = false
case lex . isKeyword ( "]" ) :
includeMaxValue = true
default :
return nil , fmt . Errorf ( "unexpected closing token %q in %s(); want ')' or ']'" , lex . token , funcName )
}
lex . nextToken ( )
2024-05-20 02:08:30 +00:00
stringRepr := "range"
2023-06-20 05:55:12 +00:00
if includeMinValue {
stringRepr += "["
} else {
stringRepr += "("
2024-05-20 02:08:30 +00:00
minValue = nextafter ( minValue , inf )
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
stringRepr += minValueStr + ", " + maxValueStr
2023-06-20 05:55:12 +00:00
if includeMaxValue {
stringRepr += "]"
} else {
stringRepr += ")"
2024-05-20 02:08:30 +00:00
maxValue = nextafter ( maxValue , - inf )
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
fr := & filterRange {
2023-06-20 05:55:12 +00:00
fieldName : fieldName ,
minValue : minValue ,
maxValue : maxValue ,
stringRepr : stringRepr ,
}
2024-05-12 14:33:29 +00:00
return fr , nil
2023-06-20 05:55:12 +00:00
}
2024-06-05 00:09:56 +00:00
func parseNumber ( lex * lexer ) ( float64 , string , error ) {
2024-05-20 02:08:30 +00:00
s , err := getCompoundToken ( lex )
if err != nil {
2024-05-22 19:01:20 +00:00
return 0 , "" , fmt . Errorf ( "cannot parse float64 from %q: %w" , s , err )
2024-05-20 02:08:30 +00:00
}
2024-06-04 16:07:45 +00:00
2024-06-04 23:18:26 +00:00
f := parseMathNumber ( s )
if ! math . IsNaN ( f ) || strings . EqualFold ( s , "nan" ) {
return f , s , nil
2024-06-04 16:07:45 +00:00
}
2024-06-13 16:49:15 +00:00
return 0 , s , fmt . Errorf ( "cannot parse %q as float64" , s )
2023-06-20 05:55:12 +00:00
}
func parseFuncArg ( lex * lexer , fieldName string , callback func ( args string ) ( filter , error ) ) ( filter , error ) {
funcName := lex . token
return parseFuncArgs ( lex , fieldName , func ( args [ ] string ) ( filter , error ) {
if len ( args ) != 1 {
return nil , fmt . Errorf ( "unexpected number of args for %s(); got %d; want 1" , funcName , len ( args ) )
}
return callback ( args [ 0 ] )
} )
}
func parseFuncArgs ( lex * lexer , fieldName string , callback func ( args [ ] string ) ( filter , error ) ) ( filter , error ) {
funcName := lex . token
lex . nextToken ( )
if ! lex . isKeyword ( "(" ) {
2024-05-12 14:33:29 +00:00
phrase := funcName + getCompoundSuffix ( lex , fieldName != "" )
2023-06-20 05:55:12 +00:00
return parseFilterForPhrase ( lex , phrase , fieldName )
}
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing args for %s()" , funcName )
}
var args [ ] string
for ! lex . isKeyword ( ")" ) {
if lex . isKeyword ( "," ) {
return nil , fmt . Errorf ( "unexpected ',' - missing arg in %s()" , funcName )
}
2024-05-20 02:08:30 +00:00
if lex . isKeyword ( "(" ) {
return nil , fmt . Errorf ( "unexpected '(' - missing arg in %s()" , funcName )
}
2023-06-20 05:55:12 +00:00
arg := getCompoundFuncArg ( lex )
args = append ( args , arg )
if lex . isKeyword ( ")" ) {
break
}
if ! lex . isKeyword ( "," ) {
return nil , fmt . Errorf ( "missing ',' after %q in %s()" , arg , funcName )
}
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing the next arg after %q in %s()" , arg , funcName )
}
}
lex . nextToken ( )
return callback ( args )
}
2023-07-18 01:17:43 +00:00
// startsWithYear returns true if s starts from YYYY
func startsWithYear ( s string ) bool {
if len ( s ) < 4 {
return false
}
for i := 0 ; i < 4 ; i ++ {
c := s [ i ]
if c < '0' || c > '9' {
return false
}
}
s = s [ 4 : ]
if len ( s ) == 0 {
return true
}
c := s [ 0 ]
return c == '-' || c == '+' || c == 'Z' || c == 'z'
}
2024-06-06 10:10:14 +00:00
func parseFilterTimeGeneric ( lex * lexer ) ( filter , error ) {
switch {
case lex . isKeyword ( "day_range" ) :
return parseFilterDayRange ( lex )
case lex . isKeyword ( "week_range" ) :
return parseFilterWeekRange ( lex )
default :
return parseFilterTimeRange ( lex )
}
}
func parseFilterDayRange ( lex * lexer ) ( * filterDayRange , error ) {
if ! lex . isKeyword ( "day_range" ) {
return nil , fmt . Errorf ( "unexpected token %q; want 'day_range'" , lex . token )
}
lex . nextToken ( )
startBrace := "["
switch {
case lex . isKeyword ( "[" ) :
lex . nextToken ( )
case lex . isKeyword ( "(" ) :
lex . nextToken ( )
startBrace = "("
default :
return nil , fmt . Errorf ( "missing '[' or '(' at day_range filter" )
}
start , startStr , err := getDayRangeArg ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot read `start` arg at day_range filter: %w" , err )
}
if ! lex . isKeyword ( "," ) {
return nil , fmt . Errorf ( "unexpected token %q; want ','" , lex . token )
}
lex . nextToken ( )
end , endStr , err := getDayRangeArg ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot read `end` arg at day_range filter: %w" , err )
}
endBrace := "]"
switch {
case lex . isKeyword ( "]" ) :
lex . nextToken ( )
case lex . isKeyword ( ")" ) :
lex . nextToken ( )
endBrace = ")"
default :
return nil , fmt . Errorf ( "missing ']' or ')' after day_range filter" )
}
offset := int64 ( 0 )
offsetStr := ""
if lex . isKeyword ( "offset" ) {
lex . nextToken ( )
s , err := getCompoundToken ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot parse offset in day_range filter: %w" , err )
}
d , ok := tryParseDuration ( s )
if ! ok {
return nil , fmt . Errorf ( "cannot parse offset %q for day_range filter" , s )
}
offset = int64 ( d )
offsetStr = " offset " + s
}
if startBrace == "(" {
start ++
}
if endBrace == ")" {
end --
}
fr := & filterDayRange {
start : start ,
end : end ,
offset : offset ,
stringRepr : fmt . Sprintf ( "%s%s, %s%s%s" , startBrace , startStr , endStr , endBrace , offsetStr ) ,
}
return fr , nil
}
func parseFilterWeekRange ( lex * lexer ) ( * filterWeekRange , error ) {
if ! lex . isKeyword ( "week_range" ) {
return nil , fmt . Errorf ( "unexpected token %q; want 'week_range'" , lex . token )
}
lex . nextToken ( )
startBrace := "["
switch {
case lex . isKeyword ( "[" ) :
lex . nextToken ( )
case lex . isKeyword ( "(" ) :
lex . nextToken ( )
startBrace = "("
default :
return nil , fmt . Errorf ( "missing '[' or '(' at week_range filter" )
}
startDay , startStr , err := getWeekRangeArg ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot read `start` arg at week_range filter: %w" , err )
}
if ! lex . isKeyword ( "," ) {
return nil , fmt . Errorf ( "unexpected token %q; want ','" , lex . token )
}
lex . nextToken ( )
endDay , endStr , err := getWeekRangeArg ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot read `end` arg at week_range filter: %w" , err )
}
endBrace := "]"
switch {
case lex . isKeyword ( "]" ) :
lex . nextToken ( )
case lex . isKeyword ( ")" ) :
lex . nextToken ( )
endBrace = ")"
default :
return nil , fmt . Errorf ( "missing ']' or ')' after week_range filter" )
}
offset := int64 ( 0 )
offsetStr := ""
if lex . isKeyword ( "offset" ) {
lex . nextToken ( )
s , err := getCompoundToken ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot parse offset in week_range filter: %w" , err )
}
d , ok := tryParseDuration ( s )
if ! ok {
return nil , fmt . Errorf ( "cannot parse offset %q for week_range filter" , s )
}
offset = int64 ( d )
offsetStr = " offset " + s
}
if startBrace == "(" {
startDay ++
}
if endBrace == ")" {
endDay --
}
fr := & filterWeekRange {
startDay : startDay ,
endDay : endDay ,
offset : offset ,
stringRepr : fmt . Sprintf ( "%s%s, %s%s%s" , startBrace , startStr , endStr , endBrace , offsetStr ) ,
}
return fr , nil
}
func getDayRangeArg ( lex * lexer ) ( int64 , string , error ) {
argStr , err := getCompoundToken ( lex )
if err != nil {
return 0 , "" , err
}
n := strings . IndexByte ( argStr , ':' )
if n < 0 {
return 0 , "" , fmt . Errorf ( "invalid format for day_range arg; want 'hh:mm'; got %q" , argStr )
}
hoursStr := argStr [ : n ]
minutesStr := argStr [ n + 1 : ]
hours , ok := tryParseUint64 ( hoursStr )
if ! ok {
return 0 , "" , fmt . Errorf ( "cannot parse hh from %q; expected format: 'hh:mm'" , hoursStr )
}
minutes , ok := tryParseUint64 ( minutesStr )
if ! ok {
return 0 , "" , fmt . Errorf ( "cannot parse mm from %q; expected format: 'hh:mm'" , minutesStr )
}
offset := int64 ( hours * nsecsPerHour + minutes * nsecsPerMinute )
if offset < 0 {
offset = 0
}
if offset >= nsecPerDay {
offset = nsecPerDay - 1
}
return offset , argStr , nil
}
func getWeekRangeArg ( lex * lexer ) ( time . Weekday , string , error ) {
argStr , err := getCompoundToken ( lex )
if err != nil {
return 0 , "" , err
}
var day time . Weekday
switch strings . ToLower ( argStr ) {
case "sun" , "sunday" :
day = time . Sunday
case "mon" , "monday" :
day = time . Monday
case "tue" , "tuesday" :
day = time . Tuesday
case "wed" , "wednesday" :
day = time . Wednesday
case "thu" , "thursday" :
day = time . Thursday
case "fri" , "friday" :
day = time . Friday
case "sat" , "saturday" :
day = time . Saturday
}
return day , argStr , nil
}
func parseFilterTimeRange ( lex * lexer ) ( * filterTime , error ) {
2024-05-12 14:33:29 +00:00
ft , err := parseFilterTime ( lex )
2023-07-18 02:05:43 +00:00
if err != nil {
return nil , err
}
if ! lex . isKeyword ( "offset" ) {
2024-05-12 14:33:29 +00:00
return ft , nil
2023-07-18 02:05:43 +00:00
}
2024-05-20 02:08:30 +00:00
lex . nextToken ( )
s , err := getCompoundToken ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot parse offset in _time filter: %w" , err )
2023-07-18 02:05:43 +00:00
}
2024-05-12 14:33:29 +00:00
d , ok := tryParseDuration ( s )
if ! ok {
2024-05-20 02:08:30 +00:00
return nil , fmt . Errorf ( "cannot parse offset %q for _time filter %s" , s , ft )
2023-07-18 02:05:43 +00:00
}
offset := int64 ( d )
2024-05-12 14:33:29 +00:00
ft . minTimestamp -= offset
ft . maxTimestamp -= offset
ft . stringRepr += " offset " + s
return ft , nil
2023-07-18 02:05:43 +00:00
}
2024-05-12 14:33:29 +00:00
func parseFilterTime ( lex * lexer ) ( * filterTime , error ) {
2023-06-20 05:55:12 +00:00
startTimeInclude := false
switch {
case lex . isKeyword ( "[" ) :
startTimeInclude = true
case lex . isKeyword ( "(" ) :
startTimeInclude = false
default :
2024-05-20 02:08:30 +00:00
s , err := getCompoundToken ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot parse _time filter: %w" , err )
}
2023-07-18 02:05:43 +00:00
sLower := strings . ToLower ( s )
if sLower == "now" || startsWithYear ( s ) {
2023-07-18 01:17:43 +00:00
// Parse '_time:YYYY-MM-DD', which transforms to '_time:[YYYY-MM-DD, YYYY-MM-DD+1)'
2024-06-03 14:58:47 +00:00
nsecs , err := promutils . ParseTimeAt ( s , lex . currentTimestamp )
2023-07-18 01:17:43 +00:00
if err != nil {
return nil , fmt . Errorf ( "cannot parse _time filter: %w" , err )
}
2024-05-22 19:46:50 +00:00
// Round to milliseconds
2024-06-03 14:58:47 +00:00
startTime := nsecs
2023-07-18 01:17:43 +00:00
endTime := getMatchingEndTime ( startTime , s )
2024-05-12 14:33:29 +00:00
ft := & filterTime {
2023-07-18 01:17:43 +00:00
minTimestamp : startTime ,
maxTimestamp : endTime ,
stringRepr : s ,
}
2024-05-12 14:33:29 +00:00
return ft , nil
2023-07-18 01:17:43 +00:00
}
// Parse _time:duration, which transforms to '_time:(now-duration, now]'
2024-05-12 14:33:29 +00:00
d , ok := tryParseDuration ( s )
if ! ok {
return nil , fmt . Errorf ( "cannot parse duration %q in _time filter" , s )
2023-07-18 01:17:43 +00:00
}
if d < 0 {
d = - d
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
ft := & filterTime {
2023-07-18 01:17:43 +00:00
minTimestamp : lex . currentTimestamp - int64 ( d ) ,
maxTimestamp : lex . currentTimestamp ,
2023-06-20 05:55:12 +00:00
2023-07-18 01:17:43 +00:00
stringRepr : s ,
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
return ft , nil
2023-06-20 05:55:12 +00:00
}
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing start time in _time filter" )
}
// Parse start time
startTime , startTimeString , err := parseTime ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot parse start time in _time filter: %w" , err )
}
if ! lex . isKeyword ( "," ) {
return nil , fmt . Errorf ( "unexpected token after start time in _time filter: %q; want ','" , lex . token )
}
if ! lex . mustNextToken ( ) {
return nil , fmt . Errorf ( "missing end time in _time filter" )
}
// Parse end time
endTime , endTimeString , err := parseTime ( lex )
if err != nil {
return nil , fmt . Errorf ( "cannot parse end time in _time filter: %w" , err )
}
endTimeInclude := false
switch {
case lex . isKeyword ( "]" ) :
endTimeInclude = true
case lex . isKeyword ( ")" ) :
endTimeInclude = false
default :
return nil , fmt . Errorf ( "_time filter ends with unexpected token %q; it must end with ']' or ')'" , lex . token )
}
lex . nextToken ( )
stringRepr := ""
if startTimeInclude {
stringRepr += "["
} else {
stringRepr += "("
startTime ++
}
stringRepr += startTimeString + "," + endTimeString
if endTimeInclude {
stringRepr += "]"
endTime = getMatchingEndTime ( endTime , endTimeString )
} else {
stringRepr += ")"
endTime --
}
2024-05-12 14:33:29 +00:00
ft := & filterTime {
2023-06-20 05:55:12 +00:00
minTimestamp : startTime ,
maxTimestamp : endTime ,
stringRepr : stringRepr ,
}
2024-05-12 14:33:29 +00:00
return ft , nil
2023-06-20 05:55:12 +00:00
}
func getMatchingEndTime ( startTime int64 , stringRepr string ) int64 {
tStart := time . Unix ( 0 , startTime ) . UTC ( )
tEnd := tStart
timeStr := stripTimezoneSuffix ( stringRepr )
switch {
case len ( timeStr ) == len ( "YYYY" ) :
y , m , d := tStart . Date ( )
nsec := startTime % ( 24 * 3600 * 1e9 )
tEnd = time . Date ( y + 1 , m , d , 0 , 0 , int ( nsec / 1e9 ) , int ( nsec % 1e9 ) , time . UTC )
case len ( timeStr ) == len ( "YYYY-MM" ) && timeStr [ len ( "YYYY" ) ] == '-' :
y , m , d := tStart . Date ( )
nsec := startTime % ( 24 * 3600 * 1e9 )
if d != 1 {
d = 0
m ++
}
tEnd = time . Date ( y , m + 1 , d , 0 , 0 , int ( nsec / 1e9 ) , int ( nsec % 1e9 ) , time . UTC )
case len ( timeStr ) == len ( "YYYY-MM-DD" ) && timeStr [ len ( "YYYY" ) ] == '-' :
tEnd = tStart . Add ( 24 * time . Hour )
case len ( timeStr ) == len ( "YYYY-MM-DDThh" ) && timeStr [ len ( "YYYY" ) ] == '-' :
tEnd = tStart . Add ( time . Hour )
case len ( timeStr ) == len ( "YYYY-MM-DDThh:mm" ) && timeStr [ len ( "YYYY" ) ] == '-' :
tEnd = tStart . Add ( time . Minute )
case len ( timeStr ) == len ( "YYYY-MM-DDThh:mm:ss" ) && timeStr [ len ( "YYYY" ) ] == '-' :
tEnd = tStart . Add ( time . Second )
2024-05-22 19:46:50 +00:00
case len ( timeStr ) == len ( "YYYY-MM-DDThh:mm:ss.SSS" ) && timeStr [ len ( "YYYY" ) ] == '-' :
tEnd = tStart . Add ( time . Millisecond )
2023-06-20 05:55:12 +00:00
default :
tEnd = tStart . Add ( time . Nanosecond )
}
return tEnd . UnixNano ( ) - 1
}
func stripTimezoneSuffix ( s string ) string {
if strings . HasSuffix ( s , "Z" ) {
return s [ : len ( s ) - 1 ]
}
if len ( s ) < 6 {
return s
}
tz := s [ len ( s ) - 6 : ]
if tz [ 0 ] != '-' && tz [ 0 ] != '+' {
return s
}
if tz [ 3 ] != ':' {
return s
}
return s [ : len ( s ) - len ( tz ) ]
}
2024-05-12 14:33:29 +00:00
func parseFilterStream ( lex * lexer ) ( * filterStream , error ) {
2024-05-20 02:08:30 +00:00
sf , err := parseStreamFilter ( lex )
2023-06-20 05:55:12 +00:00
if err != nil {
return nil , err
}
2024-05-20 02:08:30 +00:00
fs := & filterStream {
f : sf ,
2023-06-20 05:55:12 +00:00
}
2024-05-20 02:08:30 +00:00
return fs , nil
2023-06-20 05:55:12 +00:00
}
func parseTime ( lex * lexer ) ( int64 , string , error ) {
2024-05-20 02:08:30 +00:00
s , err := getCompoundToken ( lex )
if err != nil {
return 0 , "" , err
}
2024-06-03 14:58:47 +00:00
nsecs , err := promutils . ParseTimeAt ( s , lex . currentTimestamp )
2023-06-20 05:55:12 +00:00
if err != nil {
return 0 , "" , err
}
2024-06-03 14:58:47 +00:00
return nsecs , s , nil
2023-06-20 05:55:12 +00:00
}
2024-05-27 14:18:53 +00:00
func quoteStringTokenIfNeeded ( s string ) string {
if ! needQuoteStringToken ( s ) {
return s
}
return strconv . Quote ( s )
}
2023-06-20 05:55:12 +00:00
func quoteTokenIfNeeded ( s string ) string {
if ! needQuoteToken ( s ) {
return s
}
return strconv . Quote ( s )
}
2024-05-27 14:18:53 +00:00
func needQuoteStringToken ( s string ) bool {
return isNumberPrefix ( s ) || needQuoteToken ( s )
}
func isNumberPrefix ( s string ) bool {
if len ( s ) == 0 {
return false
}
if s [ 0 ] == '-' || s [ 0 ] == '+' {
s = s [ 1 : ]
if len ( s ) == 0 {
return false
}
}
2024-06-04 23:18:26 +00:00
if len ( s ) >= 3 && strings . EqualFold ( s , "inf" ) {
return true
}
2024-06-04 16:07:45 +00:00
if s [ 0 ] == '.' {
s = s [ 1 : ]
if len ( s ) == 0 {
return false
}
}
2024-05-27 14:18:53 +00:00
return s [ 0 ] >= '0' && s [ 0 ] <= '9'
}
2023-06-20 05:55:12 +00:00
func needQuoteToken ( s string ) bool {
sLower := strings . ToLower ( s )
if _ , ok := reservedKeywords [ sLower ] ; ok {
return true
}
2024-06-11 13:13:55 +00:00
if _ , ok := pipeNames [ sLower ] ; ok {
return true
}
2023-06-20 05:55:12 +00:00
for _ , r := range s {
if ! isTokenRune ( r ) && r != '.' && r != '-' {
return true
}
}
return false
}
var reservedKeywords = func ( ) map [ string ] struct { } {
kws := [ ] string {
// An empty keyword means end of parsed string
"" ,
// boolean operator tokens for 'foo and bar or baz not xxx'
"and" ,
"or" ,
"not" ,
"!" , // synonym for "not"
// parens for '(foo or bar) and baz'
"(" ,
")" ,
// stream filter tokens for '_stream:{foo=~"bar", baz="a"}'
"{" ,
"}" ,
"=" ,
"!=" ,
"=~" ,
"!~" ,
"," ,
// delimiter between query parts:
// 'foo and bar | extract "<*> foo <time>" | filter x:y | ...'
"|" ,
// delimiter between field name and query in filter: 'foo:bar'
":" ,
// prefix search: 'foo*'
"*" ,
// keywords for _time filter: '_time:(now-1h, now]'
"[" ,
"]" ,
"now" ,
2023-07-18 22:58:06 +00:00
"offset" ,
2023-06-20 05:55:12 +00:00
"-" ,
// functions
"exact" ,
"i" ,
"in" ,
"ipv4_range" ,
"len_range" ,
"range" ,
"re" ,
"seq" ,
"string_range" ,
}
m := make ( map [ string ] struct { } , len ( kws ) )
for _ , kw := range kws {
m [ kw ] = struct { } { }
}
return m
} ( )
2024-05-12 14:33:29 +00:00
func parseUint ( s string ) ( uint64 , error ) {
if strings . EqualFold ( s , "inf" ) || strings . EqualFold ( s , "+inf" ) {
return math . MaxUint64 , nil
}
n , err := strconv . ParseUint ( s , 0 , 64 )
if err == nil {
return n , nil
}
nn , ok := tryParseBytes ( s )
if ! ok {
nn , ok = tryParseDuration ( s )
if ! ok {
return 0 , fmt . Errorf ( "cannot parse %q as unsigned integer: %w" , s , err )
}
if nn < 0 {
return 0 , fmt . Errorf ( "cannot parse negative value %q as unsigned integer" , s )
}
}
return uint64 ( nn ) , nil
}
2024-05-20 02:08:30 +00:00
func nextafter ( f , xInf float64 ) float64 {
if math . IsInf ( f , 0 ) {
return f
}
return math . Nextafter ( f , xInf )
}