victorialogs: add more checks for stats query APIs (#7254)

1. Verify if field in [fields
pipe](https://docs.victoriametrics.com/victorialogs/logsql/#fields-pipe)
exists. If not, it generates a metric with illegal float value "" for
prometheus metrics protocol.
2. check if multiple time range filters produce conflicted query time
range, for instance:
```
query: _time: 5m | stats count(), 
start:2024-10-08T10:00:00.806Z, 
end: 2024-10-08T12:00:00.806Z, 
time: 2024-10-10T10:02:59.806Z
```
must give no result due to invalid final time range.

---------

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
Hui Wang 2024-10-17 01:25:43 +08:00 committed by GitHub
parent 202eb429a7
commit 72941eac36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 45 additions and 4 deletions

View file

@ -22,6 +22,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
* BUGFIX: avoid possible panic when logs for a new day are ingested during execution of concurrent queries.
* BUGFIX: avoid panic at `lib/logstorage.(*blockResultColumn).forEachDictValue()` when [stats with additional filters](https://docs.victoriametrics.com/victorialogs/logsql/#stats-with-additional-filters). The panic has been introduced in [v0.33.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.33.0-victorialogs) in [this commit](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/a350be48b68330ee1a487e1fb09b002d3be45163).
* BUGFIX: add more checks for [stats query APIs](https://docs.victoriametrics.com/victorialogs/querying/#querying-log-stats) to avoid invalid results.
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix error messages rendering from overflowing the screen with long messages. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7207).

View file

@ -484,6 +484,11 @@ func (q *Query) GetStatsByFieldsAddGroupingByTime(step int64) ([]string, error)
fields[i] = f.name
}
resultNames := make([]string, len(ps.funcs))
for i, f := range ps.funcs {
resultNames[i] = f.resultName
}
// verify that all the pipes after the idx do not add new fields
for i := idx + 1; i < len(pipes); i++ {
p := pipes[i]
@ -492,6 +497,9 @@ func (q *Query) GetStatsByFieldsAddGroupingByTime(step int64) ([]string, error)
// These pipes do not change the set of fields.
case *pipeMath:
// Allow pipeMath, since it adds additional metrics to the given set of fields.
for _, f := range t.entries {
resultNames = append(resultNames, f.resultField)
}
case *pipeFields:
// `| fields ...` pipe must contain all the by(...) fields, otherwise it breaks output.
for _, f := range fields {
@ -499,12 +507,24 @@ func (q *Query) GetStatsByFieldsAddGroupingByTime(step int64) ([]string, error)
return nil, fmt.Errorf("missing %q field at %q pipe in the query [%s]", f, p, q)
}
}
// field in `| fields ...` pipe must exist, otherwise it breaks output.
for _, f := range t.fields {
if !slices.Contains(fields, f) && !slices.Contains(resultNames, f) {
return nil, fmt.Errorf("unknown %q field at %q pipe in the query [%s]", f, p, q)
}
}
case *pipeDelete:
// Disallow deleting by(...) fields, since this breaks output.
for _, f := range t.fields {
if slices.Contains(fields, f) {
return nil, fmt.Errorf("the %q field cannot be deleted via %q in the query [%s]", f, p, q)
}
for i := range resultNames {
if resultNames[i] == f {
resultNames = append(resultNames[:i], resultNames[i+1:]...)
break
}
}
}
case *pipeCopy:
// Disallow copying by(...) fields, since this breaks output.
@ -513,12 +533,16 @@ func (q *Query) GetStatsByFieldsAddGroupingByTime(step int64) ([]string, error)
return nil, fmt.Errorf("the %q field cannot be copied via %q in the query [%s]", f, p, q)
}
}
resultNames = append(resultNames, t.dstFields...)
case *pipeRename:
// Update by(...) fields with dst fields
for i, f := range t.srcFields {
if n := slices.Index(fields, f); n >= 0 {
fields[n] = t.dstFields[i]
}
if n := slices.Index(resultNames, f); n >= 0 {
resultNames[n] = t.dstFields[i]
}
}
default:
return nil, fmt.Errorf("the %q pipe cannot be put after %q pipe in the query [%s]", p, ps, q)
@ -708,9 +732,20 @@ func ParseQuery(s string) (*Query, error) {
return ParseQueryAtTimestamp(s, timestamp)
}
// ParseStatsQuery parses s with needed stats query checks.
func ParseStatsQuery(s string) (*Query, error) {
timestamp := time.Now().UnixNano()
query, err := ParseQueryAtTimestamp(s, timestamp)
if err != nil {
return nil, err
}
_, err = query.GetStatsByFields()
return query, err
}
// ParseQueryAtTimestamp parses s in the context of the given timestamp.
//
// E.g. _time:duration filters are ajusted according to the provided timestamp as _time:[timestamp-duration, duration].
// E.g. _time:duration filters are adjusted according to the provided timestamp as _time:[timestamp-duration, duration].
func ParseQueryAtTimestamp(s string, timestamp int64) (*Query, error) {
lex := newLexerAtTimestamp(s, timestamp)

View file

@ -2240,6 +2240,10 @@ func TestQueryGetStatsByFieldsAddGroupingByTime_Failure(t *testing.T) {
f(`*`)
f(`_time:5m | count() | drop _time`)
f(`* | by (x) count() | keep x`)
f(`* | stats by (host) count() total | fields total`)
f(`* | stats by (host) count() total | delete host`)
f(`* | stats by (host) count() total | copy host as server`)
f(`* | stats by (host) count() total | rename host as server | fields host, total`)
}
func TestQueryGetStatsByFields_Success(t *testing.T) {
@ -2276,9 +2280,6 @@ func TestQueryGetStatsByFields_Success(t *testing.T) {
// math pipe is allowed after stats
f(`foo | stats by (x) count() total, count() if (error) errors | math errors / total`, []string{"x"})
// keep containing all the by(...) fields
f(`foo | stats by (x) count() total | keep x, y`, []string{"x"})
// drop which doesn't contain by(...) fields
f(`foo | stats by (x) count() total | drop y`, []string{"x"})
@ -2332,4 +2333,5 @@ func TestQueryGetStatsByFields_Failure(t *testing.T) {
f(`foo | count() | unroll by (x)`)
f(`* | by (x) count() as rows | math rows * 10, rows / 10 | drop x`)
f(`* | by (x) count() total | keep x, y`)
}

View file

@ -119,6 +119,9 @@ func (s *Storage) runQuery(ctx context.Context, tenantIDs []TenantID, q *Query,
})
minTimestamp, maxTimestamp := q.GetFilterTimeRange()
if minTimestamp > maxTimestamp {
return fmt.Errorf("invalid query time range: minTimestamp=%d cannot be bigger than maxTimestamp=%d", minTimestamp, maxTimestamp)
}
neededColumnNames, unneededColumnNames := q.getNeededColumns()
so := &genericSearchOptions{