package doublestar import ( "errors" "io/fs" "path" ) // Glob returns the names of all files matching pattern or nil if there is no // matching file. The syntax of pattern is the same as in Match(). The pattern // may describe hierarchical names such as usr/*/bin/ed. // // Glob ignores file system errors such as I/O errors reading directories by // default. The only possible returned error is ErrBadPattern, reporting that // the pattern is malformed. // // To enable aborting on I/O errors, the WithFailOnIOErrors option can be // passed. // // Note: this is meant as a drop-in replacement for io/fs.Glob(). Like // io/fs.Glob(), this function assumes that your pattern uses `/` as the path // separator even if that's not correct for your OS (like Windows). If you // aren't sure if that's the case, you can use filepath.ToSlash() on your // pattern before calling Glob(). // // Like `io/fs.Glob()`, patterns containing `/./`, `/../`, or starting with `/` // will return no results and no errors. You can use SplitPattern to divide a // pattern into a base path (to initialize an `FS` object) and pattern. // // Note: users should _not_ count on the returned error, // doublestar.ErrBadPattern, being equal to path.ErrBadPattern. // func Glob(fsys fs.FS, pattern string, opts ...GlobOption) ([]string, error) { if !ValidatePattern(pattern) { return nil, ErrBadPattern } g := newGlob(opts...) if hasMidDoubleStar(pattern) { // If the pattern has a `**` anywhere but the very end, GlobWalk is more // performant because it can get away with less allocations. If the pattern // ends in a `**`, both methods are pretty much the same, but Glob has a // _very_ slight advantage because of lower function call overhead. var matches []string err := g.doGlobWalk(fsys, pattern, true, true, func(p string, d fs.DirEntry) error { matches = append(matches, p) return nil }) return matches, err } return g.doGlob(fsys, pattern, nil, true, true) } // Does the actual globbin' // - firstSegment is true if we're in the first segment of the pattern, ie, // the right-most part where we can match files. If it's false, we're // somewhere in the middle (or at the beginning) and can only match // directories since there are path segments above us. // - beforeMeta is true if we're exploring segments before any meta // characters, ie, in a pattern such as `path/to/file*.txt`, the `path/to/` // bit does not contain any meta characters. func (g *glob) doGlob(fsys fs.FS, pattern string, m []string, firstSegment, beforeMeta bool) (matches []string, err error) { matches = m patternStart := indexMeta(pattern) if patternStart == -1 { // pattern doesn't contain any meta characters - does a file matching the // pattern exist? // The pattern may contain escaped wildcard characters for an exact path match. path := unescapeMeta(pattern) pathInfo, pathExists, pathErr := g.exists(fsys, path, beforeMeta) if pathErr != nil { return nil, pathErr } if pathExists && (!firstSegment || !g.filesOnly || !pathInfo.IsDir()) { matches = append(matches, path) } return } dir := "." splitIdx := lastIndexSlashOrAlt(pattern) if splitIdx != -1 { if pattern[splitIdx] == '}' { openingIdx := indexMatchedOpeningAlt(pattern[:splitIdx]) if openingIdx == -1 { // if there's no matching opening index, technically Match() will treat // an unmatched `}` as nothing special, so... we will, too! splitIdx = lastIndexSlash(pattern[:splitIdx]) if splitIdx != -1 { dir = pattern[:splitIdx] pattern = pattern[splitIdx+1:] } } else { // otherwise, we have to handle the alts: return g.globAlts(fsys, pattern, openingIdx, splitIdx, matches, firstSegment, beforeMeta) } } else { dir = pattern[:splitIdx] pattern = pattern[splitIdx+1:] } } // if `splitIdx` is less than `patternStart`, we know `dir` has no meta // characters. They would be equal if they are both -1, which means `dir` // will be ".", and we know that doesn't have meta characters either. if splitIdx <= patternStart { return g.globDir(fsys, dir, pattern, matches, firstSegment, beforeMeta) } var dirs []string dirs, err = g.doGlob(fsys, dir, matches, false, beforeMeta) if err != nil { return } for _, d := range dirs { matches, err = g.globDir(fsys, d, pattern, matches, firstSegment, false) if err != nil { return } } return } // handle alts in the glob pattern - `openingIdx` and `closingIdx` are the // indexes of `{` and `}`, respectively func (g *glob) globAlts(fsys fs.FS, pattern string, openingIdx, closingIdx int, m []string, firstSegment, beforeMeta bool) (matches []string, err error) { matches = m var dirs []string startIdx := 0 afterIdx := closingIdx + 1 splitIdx := lastIndexSlashOrAlt(pattern[:openingIdx]) if splitIdx == -1 || pattern[splitIdx] == '}' { // no common prefix dirs = []string{""} } else { // our alts have a common prefix that we can process first dirs, err = g.doGlob(fsys, pattern[:splitIdx], matches, false, beforeMeta) if err != nil { return } startIdx = splitIdx + 1 } for _, d := range dirs { patIdx := openingIdx + 1 altResultsStartIdx := len(matches) thisResultStartIdx := altResultsStartIdx for patIdx < closingIdx { nextIdx := indexNextAlt(pattern[patIdx:closingIdx], true) if nextIdx == -1 { nextIdx = closingIdx } else { nextIdx += patIdx } alt := buildAlt(d, pattern, startIdx, openingIdx, patIdx, nextIdx, afterIdx) matches, err = g.doGlob(fsys, alt, matches, firstSegment, beforeMeta) if err != nil { return } matchesLen := len(matches) if altResultsStartIdx != thisResultStartIdx && thisResultStartIdx != matchesLen { // Alts can result in matches that aren't sorted, or, worse, duplicates // (consider the trivial pattern `path/to/{a,*}`). Since doGlob returns // sorted results, we can do a sort of in-place merge and remove // duplicates. But, we only need to do this if this isn't the first alt // (ie, `altResultsStartIdx != thisResultsStartIdx`) and if the latest // alt actually added some matches (`thisResultStartIdx != // len(matches)`) matches = sortAndRemoveDups(matches, altResultsStartIdx, thisResultStartIdx, matchesLen) // length of matches may have changed thisResultStartIdx = len(matches) } else { thisResultStartIdx = matchesLen } patIdx = nextIdx + 1 } } return } // find files/subdirectories in the given `dir` that match `pattern` func (g *glob) globDir(fsys fs.FS, dir, pattern string, matches []string, canMatchFiles, beforeMeta bool) (m []string, e error) { m = matches if pattern == "" { if !canMatchFiles || !g.filesOnly { // pattern can be an empty string if the original pattern ended in a // slash, in which case, we should just return dir, but only if it // actually exists and it's a directory (or a symlink to a directory) _, isDir, err := g.isPathDir(fsys, dir, beforeMeta) if err != nil { return nil, err } if isDir { m = append(m, dir) } } return } if pattern == "**" { return g.globDoubleStar(fsys, dir, m, canMatchFiles, beforeMeta) } dirs, err := fs.ReadDir(fsys, dir) if err != nil { if errors.Is(err, fs.ErrNotExist) { e = g.handlePatternNotExist(beforeMeta) } else { e = g.forwardErrIfFailOnIOErrors(err) } return } var matched bool for _, info := range dirs { name := info.Name() matched, e = matchWithSeparator(pattern, name, '/', false) if e != nil { return } if matched { matched = canMatchFiles if !matched || g.filesOnly { matched, e = g.isDir(fsys, dir, name, info) if e != nil { return } if canMatchFiles { // if we're here, it's because g.filesOnly // is set and we don't want directories matched = !matched } } if matched { m = append(m, path.Join(dir, name)) } } } return } func (g *glob) globDoubleStar(fsys fs.FS, dir string, matches []string, canMatchFiles, beforeMeta bool) ([]string, error) { dirs, err := fs.ReadDir(fsys, dir) if err != nil { if errors.Is(err, fs.ErrNotExist) { return matches, g.handlePatternNotExist(beforeMeta) } else { return matches, g.forwardErrIfFailOnIOErrors(err) } } if !g.filesOnly { // `**` can match *this* dir, so add it matches = append(matches, dir) } for _, info := range dirs { name := info.Name() isDir, err := g.isDir(fsys, dir, name, info) if err != nil { return nil, err } if isDir { matches, err = g.globDoubleStar(fsys, path.Join(dir, name), matches, canMatchFiles, false) if err != nil { return nil, err } } else if canMatchFiles { matches = append(matches, path.Join(dir, name)) } } return matches, nil } // Returns true if the pattern has a doublestar in the middle of the pattern. // In this case, GlobWalk is faster because it can get away with less // allocations. However, Glob has a _very_ slight edge if the pattern ends in // `**`. func hasMidDoubleStar(p string) bool { // subtract 3: 2 because we want to return false if the pattern ends in `**` // (Glob is _very_ slightly faster in that case), and the extra 1 because our // loop checks p[i] and p[i+1]. l := len(p) - 3 for i := 0; i < l; i++ { if p[i] == '\\' { // escape next byte i++ } else if p[i] == '*' && p[i+1] == '*' { return true } } return false } // Returns the index of the first unescaped meta character, or negative 1. func indexMeta(s string) int { var c byte l := len(s) for i := 0; i < l; i++ { c = s[i] if c == '*' || c == '?' || c == '[' || c == '{' { return i } else if c == '\\' { // skip next byte i++ } } return -1 } // Returns the index of the last unescaped slash or closing alt (`}`) in the // string, or negative 1. func lastIndexSlashOrAlt(s string) int { for i := len(s) - 1; i >= 0; i-- { if (s[i] == '/' || s[i] == '}') && (i == 0 || s[i-1] != '\\') { return i } } return -1 } // Returns the index of the last unescaped slash in the string, or negative 1. func lastIndexSlash(s string) int { for i := len(s) - 1; i >= 0; i-- { if s[i] == '/' && (i == 0 || s[i-1] != '\\') { return i } } return -1 } // Assuming the byte after the end of `s` is a closing `}`, this function will // find the index of the matching `{`. That is, it'll skip over any nested `{}` // and account for escaping. func indexMatchedOpeningAlt(s string) int { alts := 1 for i := len(s) - 1; i >= 0; i-- { if s[i] == '}' && (i == 0 || s[i-1] != '\\') { alts++ } else if s[i] == '{' && (i == 0 || s[i-1] != '\\') { if alts--; alts == 0 { return i } } } return -1 } // Returns true if the path exists func (g *glob) exists(fsys fs.FS, name string, beforeMeta bool) (fs.FileInfo, bool, error) { // name might end in a slash, but Stat doesn't like that namelen := len(name) if namelen > 1 && name[namelen-1] == '/' { name = name[:namelen-1] } info, err := fs.Stat(fsys, name) if errors.Is(err, fs.ErrNotExist) { return nil, false, g.handlePatternNotExist(beforeMeta) } return info, err == nil, g.forwardErrIfFailOnIOErrors(err) } // Returns true if the path exists and is a directory or a symlink to a // directory func (g *glob) isPathDir(fsys fs.FS, name string, beforeMeta bool) (fs.FileInfo, bool, error) { info, err := fs.Stat(fsys, name) if errors.Is(err, fs.ErrNotExist) { return nil, false, g.handlePatternNotExist(beforeMeta) } return info, err == nil && info.IsDir(), g.forwardErrIfFailOnIOErrors(err) } // Returns whether or not the given DirEntry is a directory. If the DirEntry // represents a symbolic link, the link is followed by running fs.Stat() on // `path.Join(dir, name)` (if dir is "", name will be used without joining) func (g *glob) isDir(fsys fs.FS, dir, name string, info fs.DirEntry) (bool, error) { if !g.noFollow && (info.Type()&fs.ModeSymlink) > 0 { p := name if dir != "" { p = path.Join(dir, name) } finfo, err := fs.Stat(fsys, p) if err != nil { if errors.Is(err, fs.ErrNotExist) { // this function is only ever called while expanding a glob, so it can // never return ErrPatternNotExist return false, nil } return false, g.forwardErrIfFailOnIOErrors(err) } return finfo.IsDir(), nil } return info.IsDir(), nil } // Builds a string from an alt func buildAlt(prefix, pattern string, startIdx, openingIdx, currentIdx, nextIdx, afterIdx int) string { // pattern: // ignored/start{alts,go,here}remaining - len = 36 // | | | | ^--- afterIdx = 27 // | | | \--------- nextIdx = 21 // | | \----------- currentIdx = 19 // | \----------------- openingIdx = 13 // \---------------------- startIdx = 8 // // result: // prefix/startgoremaining - len = 7 + 5 + 2 + 9 = 23 var buf []byte patLen := len(pattern) size := (openingIdx - startIdx) + (nextIdx - currentIdx) + (patLen - afterIdx) if prefix != "" && prefix != "." { buf = make([]byte, 0, size+len(prefix)+1) buf = append(buf, prefix...) buf = append(buf, '/') } else { buf = make([]byte, 0, size) } buf = append(buf, pattern[startIdx:openingIdx]...) buf = append(buf, pattern[currentIdx:nextIdx]...) if afterIdx < patLen { buf = append(buf, pattern[afterIdx:]...) } return string(buf) } // Running alts can produce results that are not sorted, and, worse, can cause // duplicates (consider the trivial pattern `path/to/{a,*}`). Since we know // each run of doGlob is sorted, we can basically do the "merge" step of a // merge sort in-place. func sortAndRemoveDups(matches []string, idx1, idx2, l int) []string { var tmp string for ; idx1 < idx2; idx1++ { if matches[idx1] < matches[idx2] { // order is correct continue } else if matches[idx1] > matches[idx2] { // need to swap and then re-sort matches above idx2 tmp = matches[idx1] matches[idx1] = matches[idx2] shft := idx2 + 1 for ; shft < l && matches[shft] < tmp; shft++ { matches[shft-1] = matches[shft] } matches[shft-1] = tmp } else { // duplicate - shift matches above idx2 down one and decrement l for shft := idx2 + 1; shft < l; shft++ { matches[shft-1] = matches[shft] } if l--; idx2 == l { // nothing left to do... matches[idx2:] must have been full of dups break } } } return matches[:l] }