This commit is contained in:
Aliaksandr Valialkin 2024-05-23 12:24:09 +02:00
parent a4337149a2
commit e3cbf97bdd
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
7 changed files with 51 additions and 14 deletions

View file

@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
## tip
* FEATURE: allow disabling automatic unquoting of the matched placeholders in [`extract` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#format-for-extract-pipe-pattern).
* BUGFIX: properly parse `!` in front of [exact filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter), [exact-prefix filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-prefix-filter) and [regexp filter](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter). For example, `!~"some regexp"` is properly parsed as `not ="some regexp"`. Previously it was incorrectly parsed as `'~="some regexp"'` [phrase filter](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter).
## [v0.9.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.9.1-victorialogs)

View file

@ -1184,7 +1184,7 @@ Placeholders can be anonymous and named. Anonymous placeholders are written as `
must be skipped until the next `textX`. Named palceholders are written as `<some_name>`, where `some_name` is the name of the log field to store
the corresponding matching substring to.
The matching starts from the first occurence of the `text1` in the input text. If the `pattern` starts with `<field1>` and doesn't contain `text1`,
Matching starts from the first occurence of the `text1` in the input text. If the `pattern` starts with `<field1>` and doesn't contain `text1`,
then the matching starts from the beginning of the input text. Matching is performed sequentially according to the `pattern`. If some `textX` isn't found
in the remaining input text, then the remaining named placeholders receive empty string values and the matching finishes prematurely.
@ -1219,6 +1219,13 @@ This is useful for extracting JSON strings. For example, the following `pattern`
"message":<msg>
```
The automatic string unquoting can be disabled if needed by adding `plain:` prefix in front of the field name. For example, if some JSON array of string values must be captured
into `json_array` field, then the following `pattern` can be used:
```
some json string array: [<plain:json_array>]
```
If some special chars such as `<` must be matched by the `pattern`, then they can be [html-escaped](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references).
For example, the following `pattern` properly matches `a < b` text by extracting `a` into `left` field and `b` into `right` field:

View file

@ -38,7 +38,7 @@ func (p *logfmtParser) parse(s string) {
}
// Search for field value
value, nOffset := tryUnquoteString(s)
value, nOffset := tryUnquoteString(s, "")
if nOffset >= 0 {
p.addField(name, value)
s = s[nOffset:]

View file

@ -109,7 +109,7 @@ func (ptn *pattern) apply(s string) {
nextPrefix = steps[i+1].prefix
}
us, nOffset := tryUnquoteString(s)
us, nOffset := tryUnquoteString(s, steps[i].opt)
if nOffset >= 0 {
// Matched quoted string
matches[i] = us
@ -136,20 +136,23 @@ func (ptn *pattern) apply(s string) {
}
}
func tryUnquoteString(s string) (string, int) {
func tryUnquoteString(s, opt string) (string, int) {
if opt == "plain" {
return "", -1
}
if len(s) == 0 {
return s, -1
return "", -1
}
if s[0] != '"' && s[0] != '`' {
return s, -1
return "", -1
}
qp, err := strconv.QuotedPrefix(s)
if err != nil {
return s, -1
return "", -1
}
us, err := strconv.Unquote(qp)
if err != nil {
return s, -1
return "", -1
}
return us, len(qp)
}
@ -171,9 +174,10 @@ func parsePatternSteps(s string) ([]patternStep, error) {
step := &steps[i]
field := step.field
if n := strings.IndexByte(field, ':'); n >= 0 {
step.opt = field[:n]
step.field = field[n+1:]
step.opt = strings.TrimSpace(field[:n])
field = field[n+1:]
}
step.field = strings.TrimSpace(field)
}
return steps, nil

View file

@ -196,7 +196,7 @@ func TestParsePatternStepsSuccess(t *testing.T) {
prefix: "<&>",
},
})
f("&lt;<foo>&amp;gt;", []patternStep{
f("&lt;< foo >&amp;gt;", []patternStep{
{
prefix: "<",
field: "foo",
@ -205,7 +205,7 @@ func TestParsePatternStepsSuccess(t *testing.T) {
prefix: "&gt;",
},
})
f("<q:foo>bar<abc:baz:c:y>f<:foo:bar:baz>", []patternStep{
f("< q : foo >bar<plain : baz:c:y>f<:foo:bar:baz>", []patternStep{
{
field: "foo",
opt: "q",
@ -213,7 +213,7 @@ func TestParsePatternStepsSuccess(t *testing.T) {
{
prefix: "bar",
field: "baz:c:y",
opt: "abc",
opt: "plain",
},
{
prefix: "f",

View file

@ -99,6 +99,30 @@ func TestPipeExtract(t *testing.T) {
},
})
// single row, disable unquoting
f(`extract 'foo=[< plain : bar >]' from x`, [][]Field{
{
{"x", `a foo=["bc","de"]`},
},
}, [][]Field{
{
{"x", `a foo=["bc","de"]`},
{"bar", `"bc","de"`},
},
})
// single row, default unquoting
f(`extract 'foo=[< bar >]' from x`, [][]Field{
{
{"x", `a foo=["bc","de"]`},
},
}, [][]Field{
{
{"x", `a foo=["bc","de"]`},
{"bar", `bc`},
},
})
// single row, overwirte existing column
f(`extract "foo=<bar> baz=<xx>" from x`, [][]Field{
{

View file

@ -1042,7 +1042,7 @@ func parseStreamLabels(dst []Field, s string) ([]Field, error) {
name := s[:n]
s = s[n+1:]
value, nOffset := tryUnquoteString(s)
value, nOffset := tryUnquoteString(s, "")
if nOffset < 0 {
return dst, fmt.Errorf("cannot find parse label value in double quotes at [%s]", s)
}