This commit is contained in:
Aliaksandr Valialkin 2024-05-23 12:24:09 +02:00
parent a4337149a2
commit e3cbf97bdd
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
7 changed files with 51 additions and 14 deletions

View file

@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
## tip ## tip
* FEATURE: allow disabling automatic unquoting of the matched placeholders in [`extract` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#format-for-extract-pipe-pattern).
* BUGFIX: properly parse `!` in front of [exact filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter), [exact-prefix filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-prefix-filter) and [regexp filter](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter). For example, `!~"some regexp"` is properly parsed as `not ="some regexp"`. Previously it was incorrectly parsed as `'~="some regexp"'` [phrase filter](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter). * BUGFIX: properly parse `!` in front of [exact filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter), [exact-prefix filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-prefix-filter) and [regexp filter](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter). For example, `!~"some regexp"` is properly parsed as `not ="some regexp"`. Previously it was incorrectly parsed as `'~="some regexp"'` [phrase filter](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter).
## [v0.9.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.9.1-victorialogs) ## [v0.9.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.9.1-victorialogs)

View file

@ -1184,7 +1184,7 @@ Placeholders can be anonymous and named. Anonymous placeholders are written as `
must be skipped until the next `textX`. Named palceholders are written as `<some_name>`, where `some_name` is the name of the log field to store must be skipped until the next `textX`. Named palceholders are written as `<some_name>`, where `some_name` is the name of the log field to store
the corresponding matching substring to. the corresponding matching substring to.
The matching starts from the first occurence of the `text1` in the input text. If the `pattern` starts with `<field1>` and doesn't contain `text1`, Matching starts from the first occurence of the `text1` in the input text. If the `pattern` starts with `<field1>` and doesn't contain `text1`,
then the matching starts from the beginning of the input text. Matching is performed sequentially according to the `pattern`. If some `textX` isn't found then the matching starts from the beginning of the input text. Matching is performed sequentially according to the `pattern`. If some `textX` isn't found
in the remaining input text, then the remaining named placeholders receive empty string values and the matching finishes prematurely. in the remaining input text, then the remaining named placeholders receive empty string values and the matching finishes prematurely.
@ -1219,6 +1219,13 @@ This is useful for extracting JSON strings. For example, the following `pattern`
"message":<msg> "message":<msg>
``` ```
The automatic string unquoting can be disabled if needed by adding `plain:` prefix in front of the field name. For example, if some JSON array of string values must be captured
into `json_array` field, then the following `pattern` can be used:
```
some json string array: [<plain:json_array>]
```
If some special chars such as `<` must be matched by the `pattern`, then they can be [html-escaped](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references). If some special chars such as `<` must be matched by the `pattern`, then they can be [html-escaped](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references).
For example, the following `pattern` properly matches `a < b` text by extracting `a` into `left` field and `b` into `right` field: For example, the following `pattern` properly matches `a < b` text by extracting `a` into `left` field and `b` into `right` field:

View file

@ -38,7 +38,7 @@ func (p *logfmtParser) parse(s string) {
} }
// Search for field value // Search for field value
value, nOffset := tryUnquoteString(s) value, nOffset := tryUnquoteString(s, "")
if nOffset >= 0 { if nOffset >= 0 {
p.addField(name, value) p.addField(name, value)
s = s[nOffset:] s = s[nOffset:]

View file

@ -109,7 +109,7 @@ func (ptn *pattern) apply(s string) {
nextPrefix = steps[i+1].prefix nextPrefix = steps[i+1].prefix
} }
us, nOffset := tryUnquoteString(s) us, nOffset := tryUnquoteString(s, steps[i].opt)
if nOffset >= 0 { if nOffset >= 0 {
// Matched quoted string // Matched quoted string
matches[i] = us matches[i] = us
@ -136,20 +136,23 @@ func (ptn *pattern) apply(s string) {
} }
} }
func tryUnquoteString(s string) (string, int) { func tryUnquoteString(s, opt string) (string, int) {
if opt == "plain" {
return "", -1
}
if len(s) == 0 { if len(s) == 0 {
return s, -1 return "", -1
} }
if s[0] != '"' && s[0] != '`' { if s[0] != '"' && s[0] != '`' {
return s, -1 return "", -1
} }
qp, err := strconv.QuotedPrefix(s) qp, err := strconv.QuotedPrefix(s)
if err != nil { if err != nil {
return s, -1 return "", -1
} }
us, err := strconv.Unquote(qp) us, err := strconv.Unquote(qp)
if err != nil { if err != nil {
return s, -1 return "", -1
} }
return us, len(qp) return us, len(qp)
} }
@ -171,9 +174,10 @@ func parsePatternSteps(s string) ([]patternStep, error) {
step := &steps[i] step := &steps[i]
field := step.field field := step.field
if n := strings.IndexByte(field, ':'); n >= 0 { if n := strings.IndexByte(field, ':'); n >= 0 {
step.opt = field[:n] step.opt = strings.TrimSpace(field[:n])
step.field = field[n+1:] field = field[n+1:]
} }
step.field = strings.TrimSpace(field)
} }
return steps, nil return steps, nil

View file

@ -196,7 +196,7 @@ func TestParsePatternStepsSuccess(t *testing.T) {
prefix: "<&>", prefix: "<&>",
}, },
}) })
f("&lt;<foo>&amp;gt;", []patternStep{ f("&lt;< foo >&amp;gt;", []patternStep{
{ {
prefix: "<", prefix: "<",
field: "foo", field: "foo",
@ -205,7 +205,7 @@ func TestParsePatternStepsSuccess(t *testing.T) {
prefix: "&gt;", prefix: "&gt;",
}, },
}) })
f("<q:foo>bar<abc:baz:c:y>f<:foo:bar:baz>", []patternStep{ f("< q : foo >bar<plain : baz:c:y>f<:foo:bar:baz>", []patternStep{
{ {
field: "foo", field: "foo",
opt: "q", opt: "q",
@ -213,7 +213,7 @@ func TestParsePatternStepsSuccess(t *testing.T) {
{ {
prefix: "bar", prefix: "bar",
field: "baz:c:y", field: "baz:c:y",
opt: "abc", opt: "plain",
}, },
{ {
prefix: "f", prefix: "f",

View file

@ -99,6 +99,30 @@ func TestPipeExtract(t *testing.T) {
}, },
}) })
// single row, disable unquoting
f(`extract 'foo=[< plain : bar >]' from x`, [][]Field{
{
{"x", `a foo=["bc","de"]`},
},
}, [][]Field{
{
{"x", `a foo=["bc","de"]`},
{"bar", `"bc","de"`},
},
})
// single row, default unquoting
f(`extract 'foo=[< bar >]' from x`, [][]Field{
{
{"x", `a foo=["bc","de"]`},
},
}, [][]Field{
{
{"x", `a foo=["bc","de"]`},
{"bar", `bc`},
},
})
// single row, overwirte existing column // single row, overwirte existing column
f(`extract "foo=<bar> baz=<xx>" from x`, [][]Field{ f(`extract "foo=<bar> baz=<xx>" from x`, [][]Field{
{ {

View file

@ -1042,7 +1042,7 @@ func parseStreamLabels(dst []Field, s string) ([]Field, error) {
name := s[:n] name := s[:n]
s = s[n+1:] s = s[n+1:]
value, nOffset := tryUnquoteString(s) value, nOffset := tryUnquoteString(s, "")
if nOffset < 0 { if nOffset < 0 {
return dst, fmt.Errorf("cannot find parse label value in double quotes at [%s]", s) return dst, fmt.Errorf("cannot find parse label value in double quotes at [%s]", s)
} }