diff --git a/pkg/common/scan.go b/pkg/common/scan.go index c8951ed..390254b 100644 --- a/pkg/common/scan.go +++ b/pkg/common/scan.go @@ -9,27 +9,14 @@ func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error) { if atEOF && len(data) == 0 { return 0, nil, nil } - if i, j := bytes.IndexByte(data, '\r'), bytes.IndexByte(data, '\n'); i >= 0 || j >= 0 { - if i >= 0 && j >= 0 { - if i+1 == j { - // case 1: TOKEN\r\nTOKEN - return i + 2, data[:i+2], nil - } - if i < j { - // case 2: TOKEN\rTOKEN\nTOKEN - return i + 1, data[:i+1], nil - } else { - // case 3: TOKEN\nTOKEN\rTOKEN - return j + 1, data[:j+1], nil - } - } else if i >= 0 { - // case 4: TOKEN\rTOKEN - return i + 1, data[:i+1], nil - } else { - // case 5: TOKEN\nTOKEN - return j + 1, data[:j+1], nil - } + + if i := bytes.IndexByte(data, '\n'); i >= 0 { + return i + 1, data[:i+1], nil } + if i := bytes.IndexByte(data, '\r'); i >= 0 { + return i + 1, data[:i+1], nil + } + // If we're at EOF, we have a final, non-terminated line. Return it. if atEOF { return len(data), data, nil diff --git a/pkg/common/scan_test.go b/pkg/common/scan_test.go new file mode 100644 index 0000000..0ec1373 --- /dev/null +++ b/pkg/common/scan_test.go @@ -0,0 +1,90 @@ +package common + +import ( + "bytes" + "math/rand" + "regexp" + "testing" + "time" + "unsafe" +) + +var ( + n = 100 + lines = make([][]byte, n) + d = []string{"\r", "\n", "\r\n"} +) + +func init() { + rand.Seed(time.Now().UnixNano()) + for i := 0; i < n; i++ { + lines[i] = []byte(RandStr(rand.Intn(50)) + d[rand.Intn(3)] + RandStr(rand.Intn(50))) + } +} + +func BenchmarkIndexByteTwice(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, line := range lines { + _ = bytes.IndexByte(line, '\n') + _ = bytes.IndexByte(line, '\r') + } + } +} + +func BenchmarkLastIndexByteTwice(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, line := range lines { + _ = bytes.LastIndexByte(line, '\n') + _ = bytes.LastIndexByte(line, '\r') + } + } +} + +func BenchmarkIndexAny(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, line := range lines { + _ = bytes.IndexAny(line, "\r\n") + } + } +} + +var newlineReg = regexp.MustCompile(`\r?\n|\r\n?`) + +func BenchmarkRegexFindIndex(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, line := range lines { + _ = newlineReg.FindIndex(line) + } + } +} + +////////////////////////////////////////////// + +const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + +var src = rand.NewSource(time.Now().UnixNano()) + +const ( + // 6 bits to represent a letter index + letterIdBits = 6 + // All 1-bits as many as letterIdBits + letterIdMask = 1<= 0; { + if remain == 0 { + cache, remain = src.Int63(), letterIdMax + } + if idx := int(cache & letterIdMask); idx < len(letters) { + b[i] = letters[idx] + i-- + } + cache >>= letterIdBits + remain-- + } + return *(*string)(unsafe.Pointer(&b)) +}