package regexutil

import (
	"fmt"
	"regexp/syntax"
	"sort"
	"strings"
)

// RemoveStartEndAnchors removes '^' at the start of expr and '$' at the end of the expr.
func RemoveStartEndAnchors(expr string) string {
	for strings.HasPrefix(expr, "^") {
		expr = expr[1:]
	}
	for strings.HasSuffix(expr, "$") && !strings.HasSuffix(expr, "\\$") {
		expr = expr[:len(expr)-1]
	}
	return expr
}

// GetOrValues returns "or" values from the given regexp expr.
//
// It ignores start and end anchors ('^') and ('$') at the start and the end of expr.
// It returns ["foo", "bar"] for "foo|bar" regexp.
// It returns ["foo"] for "foo" regexp.
// It returns [""] for "" regexp.
// It returns an empty list if it is impossible to extract "or" values from the regexp.
func GetOrValues(expr string) []string {
	expr = RemoveStartEndAnchors(expr)
	prefix, tailExpr := Simplify(expr)
	if tailExpr == "" {
		return []string{prefix}
	}
	sre, err := syntax.Parse(tailExpr, syntax.Perl)
	if err != nil {
		panic(fmt.Errorf("BUG: unexpected error when parsing verified tailExpr=%q: %w", tailExpr, err))
	}
	orValues := getOrValuesExt(sre)

	// Sort orValues for faster index seek later
	sort.Strings(orValues)

	if len(prefix) > 0 {
		// Add prefix to orValues
		for i, orValue := range orValues {
			orValues[i] = prefix + orValue
		}
	}

	return orValues
}

func getOrValuesExt(sre *syntax.Regexp) []string {
	switch sre.Op {
	case syntax.OpCapture:
		return getOrValuesExt(sre.Sub[0])
	case syntax.OpLiteral:
		if !isLiteral(sre) {
			return nil
		}
		return []string{string(sre.Rune)}
	case syntax.OpEmptyMatch:
		return []string{""}
	case syntax.OpAlternate:
		a := make([]string, 0, len(sre.Sub))
		for _, reSub := range sre.Sub {
			ca := getOrValuesExt(reSub)
			if len(ca) == 0 {
				return nil
			}
			a = append(a, ca...)
			if len(a) > maxOrValues {
				// It is cheaper to use regexp here.
				return nil
			}
		}
		return a
	case syntax.OpCharClass:
		a := make([]string, 0, len(sre.Rune)/2)
		for i := 0; i < len(sre.Rune); i += 2 {
			start := sre.Rune[i]
			end := sre.Rune[i+1]
			for start <= end {
				a = append(a, string(start))
				start++
				if len(a) > maxOrValues {
					// It is cheaper to use regexp here.
					return nil
				}
			}
		}
		return a
	case syntax.OpConcat:
		if len(sre.Sub) < 1 {
			return []string{""}
		}
		prefixes := getOrValuesExt(sre.Sub[0])
		if len(prefixes) == 0 {
			return nil
		}
		if len(sre.Sub) == 1 {
			return prefixes
		}
		sre.Sub = sre.Sub[1:]
		suffixes := getOrValuesExt(sre)
		if len(suffixes) == 0 {
			return nil
		}
		if len(prefixes)*len(suffixes) > maxOrValues {
			// It is cheaper to use regexp here.
			return nil
		}
		a := make([]string, 0, len(prefixes)*len(suffixes))
		for _, prefix := range prefixes {
			for _, suffix := range suffixes {
				s := prefix + suffix
				a = append(a, s)
			}
		}
		return a
	default:
		return nil
	}
}

func isLiteral(sre *syntax.Regexp) bool {
	if sre.Op == syntax.OpCapture {
		return isLiteral(sre.Sub[0])
	}
	return sre.Op == syntax.OpLiteral && sre.Flags&syntax.FoldCase == 0
}

const maxOrValues = 100

// Simplify simplifies the given expr.
//
// It returns plaintext prefix and the remaining regular expression
// with dropped '^' and '$' anchors at the beginning and the end
// of the regular expression.
//
// The function removes capturing parens from the expr,
// so it cannot be used when capturing parens are necessary.
func Simplify(expr string) (string, string) {
	sre, err := syntax.Parse(expr, syntax.Perl)
	if err != nil {
		// Cannot parse the regexp. Return it all as prefix.
		return expr, ""
	}
	sre = simplifyRegexp(sre, false)
	if sre == emptyRegexp {
		return "", ""
	}
	if isLiteral(sre) {
		return string(sre.Rune), ""
	}
	var prefix string
	if sre.Op == syntax.OpConcat {
		sub0 := sre.Sub[0]
		if isLiteral(sub0) {
			prefix = string(sub0.Rune)
			sre.Sub = sre.Sub[1:]
			if len(sre.Sub) == 0 {
				return prefix, ""
			}
			sre = simplifyRegexp(sre, true)
		}
	}
	if _, err := syntax.Compile(sre); err != nil {
		// Cannot compile the regexp. Return it all as prefix.
		return expr, ""
	}
	s := sre.String()
	s = strings.ReplaceAll(s, "(?:)", "")
	s = strings.ReplaceAll(s, "(?-s:.)", ".")
	s = strings.ReplaceAll(s, "(?-m:$)", "$")
	return prefix, s
}

func simplifyRegexp(sre *syntax.Regexp, hasPrefix bool) *syntax.Regexp {
	s := sre.String()
	for {
		sre = simplifyRegexpExt(sre, hasPrefix, false)
		sre = sre.Simplify()
		if sre.Op == syntax.OpBeginText || sre.Op == syntax.OpEndText {
			sre = emptyRegexp
		}
		sNew := sre.String()
		if sNew == s {
			return sre
		}
		var err error
		sre, err = syntax.Parse(sNew, syntax.Perl)
		if err != nil {
			panic(fmt.Errorf("BUG: cannot parse simplified regexp %q: %w", sNew, err))
		}
		s = sNew
	}
}

func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Regexp {
	switch sre.Op {
	case syntax.OpCapture:
		// Substitute all the capture regexps with non-capture regexps.
		sre.Op = syntax.OpAlternate
		sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
		if sre.Sub[0] == emptyRegexp {
			return emptyRegexp
		}
		return sre
	case syntax.OpStar, syntax.OpPlus, syntax.OpQuest, syntax.OpRepeat:
		sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
		if sre.Sub[0] == emptyRegexp {
			return emptyRegexp
		}
		return sre
	case syntax.OpAlternate:
		// Do not remove empty captures from OpAlternate, since this may break regexp.
		for i, sub := range sre.Sub {
			sre.Sub[i] = simplifyRegexpExt(sub, hasPrefix, hasSuffix)
		}
		return sre
	case syntax.OpConcat:
		subs := sre.Sub[:0]
		for i, sub := range sre.Sub {
			sub = simplifyRegexpExt(sub, hasPrefix || len(subs) > 0, hasSuffix || i+1 < len(sre.Sub))
			if sub != emptyRegexp {
				subs = append(subs, sub)
			}
		}
		sre.Sub = subs
		// Remove anchros from the beginning and the end of regexp, since they
		// will be added later.
		if !hasPrefix {
			for len(sre.Sub) > 0 && sre.Sub[0].Op == syntax.OpBeginText {
				sre.Sub = sre.Sub[1:]
			}
		}
		if !hasSuffix {
			for len(sre.Sub) > 0 && sre.Sub[len(sre.Sub)-1].Op == syntax.OpEndText {
				sre.Sub = sre.Sub[:len(sre.Sub)-1]
			}
		}
		if len(sre.Sub) == 0 {
			return emptyRegexp
		}
		if len(sre.Sub) == 1 {
			return sre.Sub[0]
		}
		return sre
	case syntax.OpEmptyMatch:
		return emptyRegexp
	default:
		return sre
	}
}

var emptyRegexp = &syntax.Regexp{
	Op: syntax.OpEmptyMatch,
}