mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/logstorage: add an ability to add prefix to resulting query field names in join
pipe
See https://docs.victoriametrics.com/victorialogs/logsql/#join-pipe
This commit is contained in:
parent
30dd4cdc0d
commit
5a6531b329
6 changed files with 90 additions and 16 deletions
|
@ -15,6 +15,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
|||
|
||||
## tip
|
||||
|
||||
* FEATURE: [`join` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#join-pipe): add an ability to add prefix to all the log field names from the joined query, by using `| join by (<by_fields>) (<query>) prefix "some_prefix"` syntax.
|
||||
|
||||
## [v0.41.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.41.0-victorialogs)
|
||||
|
||||
Released at 2024-11-06
|
||||
|
|
|
@ -1817,6 +1817,16 @@ _time:1d {app="app1"} | stats by (user) count() app1_hits
|
|||
| filter app2_hits:*
|
||||
```
|
||||
|
||||
It is possible adding a prefix to all the field names returned by the `<query>` by specifying the needed prefix after the `<query>`.
|
||||
For example, the following query adds `app2.` prefix to all `<query>` log fields:
|
||||
|
||||
```logsql
|
||||
_time:1d {app="app1"} | stats by (user) count() app1_hits
|
||||
| join by (user) (
|
||||
_time:1d {app="app2"} | stats by (user) count() app2_hits
|
||||
) prefix "app2."
|
||||
```
|
||||
|
||||
**Performance tips**:
|
||||
|
||||
- Make sure that the `<query>` in the `join` pipe returns relatively small number of results, since they are kept in RAM during execution of `join` pipe.
|
||||
|
|
|
@ -18,12 +18,19 @@ type pipeJoin struct {
|
|||
// q is a query for obtaining results for joining
|
||||
q *Query
|
||||
|
||||
// prefix is the prefix to add to log fields from q query
|
||||
prefix string
|
||||
|
||||
// m contains results for joining. They are automatically initialized during query execution
|
||||
m map[string][][]Field
|
||||
}
|
||||
|
||||
func (pj *pipeJoin) String() string {
|
||||
return fmt.Sprintf("join by (%s) (%s)", fieldNamesString(pj.byFields), pj.q.String())
|
||||
s := fmt.Sprintf("join by (%s) (%s)", fieldNamesString(pj.byFields), pj.q.String())
|
||||
if pj.prefix != "" {
|
||||
s += " prefix " + quoteTokenIfNeeded(pj.prefix)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (pj *pipeJoin) canLiveTail() bool {
|
||||
|
@ -43,7 +50,7 @@ func (pj *pipeJoin) initFilterInValues(_ map[string][]string, _ getFieldValuesFu
|
|||
}
|
||||
|
||||
func (pj *pipeJoin) initJoinMap(getJoinMapFunc getJoinMapFunc) (pipe, error) {
|
||||
m, err := getJoinMapFunc(pj.q, pj.byFields)
|
||||
m, err := getJoinMapFunc(pj.q, pj.byFields, pj.prefix)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot execute query at pipe [%s]: %w", pj, err)
|
||||
}
|
||||
|
@ -88,8 +95,9 @@ type pipeJoinProcessorShard struct {
|
|||
type pipeJoinProcessorShardNopad struct {
|
||||
wctx pipeUnpackWriteContext
|
||||
|
||||
byValues []string
|
||||
tmpBuf []byte
|
||||
byValues []string
|
||||
byValuesIdxs []int
|
||||
tmpBuf []byte
|
||||
}
|
||||
|
||||
func (pjp *pipeJoinProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
|
@ -105,12 +113,19 @@ func (pjp *pipeJoinProcessor) writeBlock(workerID uint, br *blockResult) {
|
|||
byValues := shard.byValues
|
||||
|
||||
cs := br.getColumns()
|
||||
shard.byValuesIdxs = slicesutil.SetLength(shard.byValuesIdxs, len(cs))
|
||||
byValuesIdxs := shard.byValuesIdxs
|
||||
for i := range cs {
|
||||
name := cs[i].name
|
||||
byValuesIdxs[i] = slices.Index(pj.byFields, name)
|
||||
|
||||
}
|
||||
|
||||
for rowIdx := 0; rowIdx < br.rowsLen; rowIdx++ {
|
||||
clear(byValues)
|
||||
for i := range cs {
|
||||
name := cs[i].name
|
||||
if cIdx := slices.Index(pj.byFields, name); cIdx >= 0 {
|
||||
byValues[cIdx] = cs[i].getValueAtRow(br, rowIdx)
|
||||
for j := range cs {
|
||||
if cIdx := byValuesIdxs[j]; cIdx >= 0 {
|
||||
byValues[cIdx] = cs[j].getValueAtRow(br, rowIdx)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -180,5 +195,14 @@ func parsePipeJoin(lex *lexer) (*pipeJoin, error) {
|
|||
q: q,
|
||||
}
|
||||
|
||||
if lex.isKeyword("prefix") {
|
||||
lex.nextToken()
|
||||
prefix, err := getCompoundToken(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read prefix for [%s]: %w", pj, err)
|
||||
}
|
||||
pj.prefix = prefix
|
||||
}
|
||||
|
||||
return pj, nil
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ func TestParsePipeJoinSuccess(t *testing.T) {
|
|||
|
||||
f(`join by (foo) (error)`)
|
||||
f(`join by (foo, bar) (a:b | fields x, y)`)
|
||||
f(`join by (foo) (a:b) prefix c`)
|
||||
}
|
||||
|
||||
func TestParsePipeJoinFailure(t *testing.T) {
|
||||
|
@ -31,6 +32,8 @@ func TestParsePipeJoinFailure(t *testing.T) {
|
|||
f(`join by (x) ()`)
|
||||
f(`join by (x) (`)
|
||||
f(`join by (x) (abc`)
|
||||
f(`join (x) (y) prefix`)
|
||||
f(`join (x) (y) prefix |`)
|
||||
}
|
||||
|
||||
func TestPipeJoinUpdateNeededFields(t *testing.T) {
|
||||
|
|
|
@ -217,9 +217,11 @@ func (s *Storage) GetFieldNames(ctx context.Context, tenantIDs []TenantID, q *Qu
|
|||
return s.runValuesWithHitsQuery(ctx, tenantIDs, q)
|
||||
}
|
||||
|
||||
func (s *Storage) getJoinMap(ctx context.Context, tenantIDs []TenantID, q *Query, byFields []string) (map[string][][]Field, error) {
|
||||
func (s *Storage) getJoinMap(ctx context.Context, tenantIDs []TenantID, q *Query, byFields []string, prefix string) (map[string][][]Field, error) {
|
||||
// TODO: track memory usage
|
||||
|
||||
logger.Infof("DEBUG: byFields=%q, prefix=%q", byFields, prefix)
|
||||
|
||||
m := make(map[string][][]Field)
|
||||
var mLock sync.Mutex
|
||||
writeBlockResult := func(_ uint, br *blockResult) {
|
||||
|
@ -229,8 +231,15 @@ func (s *Storage) getJoinMap(ctx context.Context, tenantIDs []TenantID, q *Query
|
|||
|
||||
cs := br.getColumns()
|
||||
columnNames := make([]string, len(cs))
|
||||
byValuesIdxs := make([]int, len(cs))
|
||||
for i := range cs {
|
||||
columnNames[i] = strings.Clone(cs[i].name)
|
||||
name := strings.Clone(cs[i].name)
|
||||
idx := slices.Index(byFields, name)
|
||||
if prefix != "" && idx < 0 {
|
||||
name = prefix + name
|
||||
}
|
||||
columnNames[i] = name
|
||||
byValuesIdxs[i] = idx
|
||||
}
|
||||
|
||||
byValues := make([]string, len(byFields))
|
||||
|
@ -242,16 +251,17 @@ func (s *Storage) getJoinMap(ctx context.Context, tenantIDs []TenantID, q *Query
|
|||
for j := range cs {
|
||||
name := columnNames[j]
|
||||
v := cs[j].getValueAtRow(br, rowIdx)
|
||||
if cIdx := slices.Index(byFields, name); cIdx >= 0 {
|
||||
if cIdx := byValuesIdxs[j]; cIdx >= 0 {
|
||||
byValues[cIdx] = v
|
||||
continue
|
||||
}
|
||||
if v == "" {
|
||||
continue
|
||||
}
|
||||
value := strings.Clone(v)
|
||||
fields = append(fields, Field{
|
||||
Name: name,
|
||||
Value: strings.Clone(v),
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -526,15 +536,15 @@ func (s *Storage) initFilterInValues(ctx context.Context, tenantIDs []TenantID,
|
|||
return qNew, nil
|
||||
}
|
||||
|
||||
type getJoinMapFunc func(q *Query, byFields []string) (map[string][][]Field, error)
|
||||
type getJoinMapFunc func(q *Query, byFields []string, prefix string) (map[string][][]Field, error)
|
||||
|
||||
func (s *Storage) initJoinMaps(ctx context.Context, tenantIDs []TenantID, q *Query) (*Query, error) {
|
||||
if !hasJoinPipes(q.pipes) {
|
||||
return q, nil
|
||||
}
|
||||
|
||||
getJoinMap := func(q *Query, byFields []string) (map[string][][]Field, error) {
|
||||
return s.getJoinMap(ctx, tenantIDs, q, byFields)
|
||||
getJoinMap := func(q *Query, byFields []string, prefix string) (map[string][][]Field, error) {
|
||||
return s.getJoinMap(ctx, tenantIDs, q, byFields, prefix)
|
||||
}
|
||||
|
||||
pipesNew := make([]pipe, len(q.pipes))
|
||||
|
|
|
@ -729,7 +729,7 @@ func TestStorageRunQuery(t *testing.T) {
|
|||
},
|
||||
})
|
||||
})
|
||||
t.Run("pipe-join-single", func(t *testing.T) {
|
||||
t.Run("pipe-join", func(t *testing.T) {
|
||||
f(t, `'message 5' | stats by (instance) count() x
|
||||
| join on (instance) (
|
||||
'block 0' instance:host-1 | stats by (instance)
|
||||
|
@ -753,6 +753,31 @@ func TestStorageRunQuery(t *testing.T) {
|
|||
},
|
||||
})
|
||||
})
|
||||
t.Run("pipe-join-prefix", func(t *testing.T) {
|
||||
f(t, `'message 5' | stats by (instance) count() x
|
||||
| join on (instance) (
|
||||
'block 0' instance:host-1 | stats by (instance)
|
||||
count() total,
|
||||
count_uniq(stream-id) streams,
|
||||
count_uniq(stream-id) x
|
||||
) prefix "abc."`, [][]Field{
|
||||
{
|
||||
{"instance", "host-0:234"},
|
||||
{"x", "55"},
|
||||
},
|
||||
{
|
||||
{"instance", "host-2:234"},
|
||||
{"x", "55"},
|
||||
},
|
||||
{
|
||||
{"instance", "host-1:234"},
|
||||
{"x", "55"},
|
||||
{"abc.total", "77"},
|
||||
{"abc.streams", "1"},
|
||||
{"abc.x", "1"},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
// Close the storage and delete its data
|
||||
s.MustClose()
|
||||
|
|
Loading…
Reference in a new issue