package doublestar import ( "path/filepath" "unicode/utf8" ) // Match reports whether name matches the shell pattern. // The pattern syntax is: // // pattern: // { term } // term: // '*' matches any sequence of non-path-separators // '/**/' matches zero or more directories // '?' matches any single non-path-separator character // '[' [ '^' '!' ] { character-range } ']' // character class (must be non-empty) // starting with `^` or `!` negates the class // '{' { term } [ ',' { term } ... ] '}' // alternatives // c matches character c (c != '*', '?', '\\', '[') // '\\' c matches character c // // character-range: // c matches character c (c != '\\', '-', ']') // '\\' c matches character c // lo '-' hi matches character c for lo <= c <= hi // // Match returns true if `name` matches the file name `pattern`. `name` and // `pattern` are split on forward slash (`/`) characters and may be relative or // absolute. // // Match requires pattern to match all of name, not just a substring. // The only possible returned error is ErrBadPattern, when pattern // is malformed. // // A doublestar (`**`) should appear surrounded by path separators such as // `/**/`. A mid-pattern doublestar (`**`) behaves like bash's globstar // option: a pattern such as `path/to/**.txt` would return the same results as // `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`. // // Note: this is meant as a drop-in replacement for path.Match() which // always uses '/' as the path separator. If you want to support systems // which use a different path separator (such as Windows), what you want // is PathMatch(). Alternatively, you can run filepath.ToSlash() on both // pattern and name and then use this function. // // Note: users should _not_ count on the returned error, // doublestar.ErrBadPattern, being equal to path.ErrBadPattern. // func Match(pattern, name string) (bool, error) { return matchWithSeparator(pattern, name, '/', true) } // PathMatch returns true if `name` matches the file name `pattern`. The // difference between Match and PathMatch is that PathMatch will automatically // use your system's path separator to split `name` and `pattern`. On systems // where the path separator is `'\'`, escaping will be disabled. // // Note: this is meant as a drop-in replacement for filepath.Match(). It // assumes that both `pattern` and `name` are using the system's path // separator. If you can't be sure of that, use filepath.ToSlash() on both // `pattern` and `name`, and then use the Match() function instead. // func PathMatch(pattern, name string) (bool, error) { return matchWithSeparator(pattern, name, filepath.Separator, true) } func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) { return doMatchWithSeparator(pattern, name, separator, validate, -1, -1, -1, -1, 0, 0) } func doMatchWithSeparator(pattern, name string, separator rune, validate bool, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, patIdx, nameIdx int) (matched bool, err error) { patLen := len(pattern) nameLen := len(name) startOfSegment := true MATCH: for nameIdx < nameLen { if patIdx < patLen { switch pattern[patIdx] { case '*': if patIdx++; patIdx < patLen && pattern[patIdx] == '*' { // doublestar - must begin with a path separator, otherwise we'll // treat it like a single star like bash patIdx++ if startOfSegment { if patIdx >= patLen { // pattern ends in `/**`: return true return true, nil } // doublestar must also end with a path separator, otherwise we're // just going to treat the doublestar as a single star like bash patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) if patRune == separator { patIdx += patRuneLen doublestarPatternBacktrack = patIdx doublestarNameBacktrack = nameIdx starPatternBacktrack = -1 starNameBacktrack = -1 continue } } } startOfSegment = false starPatternBacktrack = patIdx starNameBacktrack = nameIdx continue case '?': startOfSegment = false nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) if nameRune == separator { // `?` cannot match the separator break } patIdx++ nameIdx += nameRuneLen continue case '[': startOfSegment = false if patIdx++; patIdx >= patLen { // class didn't end return false, ErrBadPattern } nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) matched := false negate := pattern[patIdx] == '!' || pattern[patIdx] == '^' if negate { patIdx++ } if patIdx >= patLen || pattern[patIdx] == ']' { // class didn't end or empty character class return false, ErrBadPattern } last := utf8.MaxRune for patIdx < patLen && pattern[patIdx] != ']' { patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) patIdx += patRuneLen // match a range if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' { if pattern[patIdx] == '\\' { // next character is escaped patIdx++ } patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:]) patIdx += patRuneLen if last <= nameRune && nameRune <= patRune { matched = true break } // didn't match range - reset `last` last = utf8.MaxRune continue } // not a range - check if the next rune is escaped if patRune == '\\' { patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:]) patIdx += patRuneLen } // check if the rune matches if patRune == nameRune { matched = true break } // no matches yet last = patRune } if matched == negate { // failed to match - if we reached the end of the pattern, that means // we never found a closing `]` if patIdx >= patLen { return false, ErrBadPattern } break } closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true) if closingIdx == -1 { // no closing `]` return false, ErrBadPattern } patIdx += closingIdx + 1 nameIdx += nameRuneLen continue case '{': startOfSegment = false beforeIdx := patIdx patIdx++ closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\') if closingIdx == -1 { // no closing `}` return false, ErrBadPattern } closingIdx += patIdx for { commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\') if commaIdx == -1 { break } commaIdx += patIdx result, err := doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx) if result || err != nil { return result, err } patIdx = commaIdx + 1 } return doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx) case '\\': if separator != '\\' { // next rune is "escaped" in the pattern - literal match if patIdx++; patIdx >= patLen { // pattern ended return false, ErrBadPattern } } fallthrough default: patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) if patRune != nameRune { if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' { // if this rune was meant to be escaped, we need to move patIdx // back to the backslash before backtracking or validating below patIdx-- } break } patIdx += patRuneLen nameIdx += nameRuneLen startOfSegment = patRune == separator continue } } if starPatternBacktrack >= 0 { // `*` backtrack, but only if the `name` rune isn't the separator nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:]) if nameRune != separator { starNameBacktrack += nameRuneLen patIdx = starPatternBacktrack nameIdx = starNameBacktrack startOfSegment = false continue } } if doublestarPatternBacktrack >= 0 { // `**` backtrack, advance `name` past next separator nameIdx = doublestarNameBacktrack for nameIdx < nameLen { nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) nameIdx += nameRuneLen if nameRune == separator { doublestarNameBacktrack = nameIdx patIdx = doublestarPatternBacktrack startOfSegment = true continue MATCH } } } if validate && patIdx < patLen && !doValidatePattern(pattern[patIdx:], separator) { return false, ErrBadPattern } return false, nil } if nameIdx < nameLen { // we reached the end of `pattern` before the end of `name` return false, nil } // we've reached the end of `name`; we've successfully matched if we've also // reached the end of `pattern`, or if the rest of `pattern` can match a // zero-length string return isZeroLengthPattern(pattern[patIdx:], separator) } func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) { // `/**`, `**/`, and `/**/` are special cases - a pattern such as `path/to/a/**` or `path/to/a/**/` // *should* match `path/to/a` because `a` might be a directory if pattern == "" || pattern == "*" || pattern == "**" || pattern == string(separator)+"**" || pattern == "**"+string(separator) || pattern == string(separator)+"**"+string(separator) { return true, nil } if pattern[0] == '{' { closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\') if closingIdx == -1 { // no closing '}' return false, ErrBadPattern } closingIdx += 1 patIdx := 1 for { commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\') if commaIdx == -1 { break } commaIdx += patIdx ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator) if ret || err != nil { return } patIdx = commaIdx + 1 } return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator) } // no luck - validate the rest of the pattern if !doValidatePattern(pattern, separator) { return false, ErrBadPattern } return false, nil } // Finds the index of the first unescaped byte `c`, or negative 1. func indexUnescapedByte(s string, c byte, allowEscaping bool) int { l := len(s) for i := 0; i < l; i++ { if allowEscaping && s[i] == '\\' { // skip next byte i++ } else if s[i] == c { return i } } return -1 } // Assuming the byte before the beginning of `s` is an opening `{`, this // function will find the index of the matching `}`. That is, it'll skip over // any nested `{}` and account for escaping func indexMatchedClosingAlt(s string, allowEscaping bool) int { alts := 1 l := len(s) for i := 0; i < l; i++ { if allowEscaping && s[i] == '\\' { // skip next byte i++ } else if s[i] == '{' { alts++ } else if s[i] == '}' { if alts--; alts == 0 { return i } } } return -1 }