1
0
mirror of https://github.com/zu1k/nali.git synced 2025-01-22 13:19:02 +08:00

refactor: Simplified ScanLines

Signed-off-by: zu1k <i@zu1k.com>
This commit is contained in:
zu1k 2022-08-15 09:11:10 +08:00
parent 8e68a770a7
commit 8307d14075
No known key found for this signature in database
GPG Key ID: AE381A8FB1EF2CC8
2 changed files with 97 additions and 20 deletions

View File

@ -9,27 +9,14 @@ func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i, j := bytes.IndexByte(data, '\r'), bytes.IndexByte(data, '\n'); i >= 0 || j >= 0 {
if i >= 0 && j >= 0 {
if i+1 == j {
// case 1: TOKEN\r\nTOKEN
return i + 2, data[:i+2], nil
}
if i < j {
// case 2: TOKEN\rTOKEN\nTOKEN
return i + 1, data[:i+1], nil
} else {
// case 3: TOKEN\nTOKEN\rTOKEN
return j + 1, data[:j+1], nil
}
} else if i >= 0 {
// case 4: TOKEN\rTOKEN
return i + 1, data[:i+1], nil
} else {
// case 5: TOKEN\nTOKEN
return j + 1, data[:j+1], nil
}
if i := bytes.IndexByte(data, '\n'); i >= 0 {
return i + 1, data[:i+1], nil
}
if i := bytes.IndexByte(data, '\r'); i >= 0 {
return i + 1, data[:i+1], nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil

90
pkg/common/scan_test.go Normal file
View File

@ -0,0 +1,90 @@
package common
import (
"bytes"
"math/rand"
"regexp"
"testing"
"time"
"unsafe"
)
var (
n = 100
lines = make([][]byte, n)
d = []string{"\r", "\n", "\r\n"}
)
func init() {
rand.Seed(time.Now().UnixNano())
for i := 0; i < n; i++ {
lines[i] = []byte(RandStr(rand.Intn(50)) + d[rand.Intn(3)] + RandStr(rand.Intn(50)))
}
}
func BenchmarkIndexByteTwice(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, line := range lines {
_ = bytes.IndexByte(line, '\n')
_ = bytes.IndexByte(line, '\r')
}
}
}
func BenchmarkLastIndexByteTwice(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, line := range lines {
_ = bytes.LastIndexByte(line, '\n')
_ = bytes.LastIndexByte(line, '\r')
}
}
}
func BenchmarkIndexAny(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, line := range lines {
_ = bytes.IndexAny(line, "\r\n")
}
}
}
var newlineReg = regexp.MustCompile(`\r?\n|\r\n?`)
func BenchmarkRegexFindIndex(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, line := range lines {
_ = newlineReg.FindIndex(line)
}
}
}
//////////////////////////////////////////////
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
var src = rand.NewSource(time.Now().UnixNano())
const (
// 6 bits to represent a letter index
letterIdBits = 6
// All 1-bits as many as letterIdBits
letterIdMask = 1<<letterIdBits - 1
letterIdMax = 63 / letterIdBits
)
func RandStr(n int) string {
b := make([]byte, n)
// A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdMax
}
if idx := int(cache & letterIdMask); idx < len(letters) {
b[i] = letters[idx]
i--
}
cache >>= letterIdBits
remain--
}
return *(*string)(unsafe.Pointer(&b))
}