1
0
mirror of https://github.com/zu1k/nali.git synced 2025-01-22 13:19:02 +08:00

refactor: new wry parsing implementation, supports concurrency

This commit is contained in:
zu1k 2022-10-20 15:54:20 +08:00
parent 3efa0d139c
commit b4e783e6ff
No known key found for this signature in database
GPG Key ID: AE381A8FB1EF2CC8
11 changed files with 289 additions and 383 deletions

View File

@ -1,8 +0,0 @@
package common
const (
// RedirectMode1 [IP][0x01][国家和地区信息的绝对偏移地址]
RedirectMode1 = 0x01
// RedirectMode2 [IP][0x02][信息的绝对偏移][...] or [IP][国家][...]
RedirectMode2 = 0x02
)

View File

@ -1,8 +0,0 @@
package common
func ByteToUInt32(data []byte) uint32 {
i := uint32(data[0]) & 0xff
i |= (uint32(data[1]) << 8) & 0xff00
i |= (uint32(data[2]) << 16) & 0xff0000
return i
}

View File

@ -1,90 +0,0 @@
package common
import (
"bytes"
"math/rand"
"regexp"
"testing"
"time"
"unsafe"
)
var (
n = 100
lines = make([][]byte, n)
d = []string{"\r", "\n", "\r\n"}
)
func init() {
rand.Seed(time.Now().UnixNano())
for i := 0; i < n; i++ {
lines[i] = []byte(RandStr(rand.Intn(50)) + d[rand.Intn(3)] + RandStr(rand.Intn(50)))
}
}
func BenchmarkIndexByteTwice(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, line := range lines {
_ = bytes.IndexByte(line, '\n')
_ = bytes.IndexByte(line, '\r')
}
}
}
func BenchmarkLastIndexByteTwice(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, line := range lines {
_ = bytes.LastIndexByte(line, '\n')
_ = bytes.LastIndexByte(line, '\r')
}
}
}
func BenchmarkIndexAny(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, line := range lines {
_ = bytes.IndexAny(line, "\r\n")
}
}
}
var newlineReg = regexp.MustCompile(`\r?\n|\r\n?`)
func BenchmarkRegexFindIndex(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, line := range lines {
_ = newlineReg.FindIndex(line)
}
}
}
//////////////////////////////////////////////
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
var src = rand.NewSource(time.Now().UnixNano())
const (
// 6 bits to represent a letter index
letterIdBits = 6
// All 1-bits as many as letterIdBits
letterIdMask = 1<<letterIdBits - 1
letterIdMax = 63 / letterIdBits
)
func RandStr(n int) string {
b := make([]byte, n)
// A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdMax
}
if idx := int(cache & letterIdMask); idx < len(letters) {
b[i] = letters[idx]
i--
}
cache >>= letterIdBits
remain--
}
return *(*string)(unsafe.Pointer(&b))
}

View File

@ -1,99 +0,0 @@
package common
import (
"fmt"
"os"
)
// FileData: info of database file
type FileData struct {
Data []byte
FilePath string
FileBase *os.File
}
// IPDB common ip database
type IPDB struct {
Data *FileData
Offset uint32
IPNum uint32
}
// setOffset 设置偏移量
func (db *IPDB) SetOffset(offset uint32) {
db.Offset = offset
}
// readString 获取字符串
func (db *IPDB) ReadString(offset uint32) []byte {
db.SetOffset(offset)
data := make([]byte, 0, 30)
buf := make([]byte, 1)
for {
buf = db.ReadData(1)
if buf[0] == 0 {
break
}
data = append(data, buf[0])
}
return data
}
// readData 从文件中读取数据
func (db *IPDB) ReadData(length uint32, offset ...uint32) (rs []byte) {
if len(offset) > 0 {
db.SetOffset(offset[0])
}
end := db.Offset + length
dataNum := uint32(len(db.Data.Data))
if db.Offset > dataNum {
return nil
}
if end > dataNum {
end = dataNum
}
rs = db.Data.Data[db.Offset:end]
db.Offset = end
return
}
// readMode 获取偏移值类型
func (db *IPDB) ReadMode(offset uint32) byte {
mode := db.ReadData(1, offset)
return mode[0]
}
// ReadUInt24
func (db *IPDB) ReadUInt24() uint32 {
buf := db.ReadData(3)
return ByteToUInt32(buf)
}
// readArea 读取区域
func (db *IPDB) ReadArea(offset uint32) []byte {
mode := db.ReadMode(offset)
if mode == RedirectMode1 || mode == RedirectMode2 {
areaOffset := db.ReadUInt24()
if areaOffset == 0 {
return []byte("")
}
return db.ReadString(areaOffset)
}
return db.ReadString(offset)
}
func GetMiddleOffset(start uint32, end uint32, indexLen uint32) uint32 {
records := ((end - start) / indexLen) >> 1
return start + records*indexLen
}
type Result struct {
Country string
Area string
}
func (r Result) String() string {
return fmt.Sprintf("%s %s", r.Country, r.Area)
}

View File

@ -8,10 +8,10 @@ import (
"os" "os"
"strings" "strings"
"github.com/lionsoul2014/ip2region/binding/golang/xdb"
"github.com/zu1k/nali/pkg/common"
"github.com/zu1k/nali/pkg/download" "github.com/zu1k/nali/pkg/download"
"github.com/zu1k/nali/pkg/wry"
"github.com/lionsoul2014/ip2region/binding/golang/xdb"
) )
var DownloadUrls = []string{ var DownloadUrls = []string{
@ -59,7 +59,7 @@ func (db Ip2Region) Find(query string, params ...string) (result fmt.Stringer, e
if err != nil { if err != nil {
return nil, err return nil, err
} else { } else {
return common.Result{ return wry.Result{
Country: strings.ReplaceAll(res, "|0", ""), Country: strings.ReplaceAll(res, "|0", ""),
}, nil }, nil
} }

View File

@ -4,15 +4,13 @@ import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt" "fmt"
"io/ioutil" "io"
"log" "log"
"net" "net"
"os" "os"
"strings"
"github.com/zu1k/nali/pkg/common"
"github.com/zu1k/nali/pkg/download" "github.com/zu1k/nali/pkg/download"
"golang.org/x/text/encoding/simplifiedchinese" "github.com/zu1k/nali/pkg/wry"
) )
var DownloadUrls = []string{ var DownloadUrls = []string{
@ -20,13 +18,12 @@ var DownloadUrls = []string{
} }
type QQwry struct { type QQwry struct {
common.IPDB wry.IPDB[uint32]
} }
// NewQQwry new database from path // NewQQwry new database from path
func NewQQwry(filePath string) (*QQwry, error) { func NewQQwry(filePath string) (*QQwry, error) {
var fileData []byte var fileData []byte
var fileInfo common.FileData
_, err := os.Stat(filePath) _, err := os.Stat(filePath)
if err != nil && os.IsNotExist(err) { if err != nil && os.IsNotExist(err) {
@ -36,27 +33,31 @@ func NewQQwry(filePath string) (*QQwry, error) {
return nil, err return nil, err
} }
} else { } else {
fileInfo.FileBase, err = os.OpenFile(filePath, os.O_RDONLY, 0400) fileBase, err := os.OpenFile(filePath, os.O_RDONLY, 0400)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer fileInfo.FileBase.Close() defer fileBase.Close()
fileData, err = ioutil.ReadAll(fileInfo.FileBase) fileData, err = io.ReadAll(fileBase)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} }
fileInfo.Data = fileData
buf := fileInfo.Data[0:8] header := fileData[0:8]
start := binary.LittleEndian.Uint32(buf[:4]) start := binary.LittleEndian.Uint32(header[:4])
end := binary.LittleEndian.Uint32(buf[4:]) end := binary.LittleEndian.Uint32(header[4:])
return &QQwry{ return &QQwry{
IPDB: common.IPDB{ IPDB: wry.IPDB[uint32]{
Data: &fileInfo, Data: fileData,
IPNum: (end-start)/7 + 1,
OffLen: 3,
IPLen: 4,
IPCnt: (end-start)/7 + 1,
IdxStart: start,
IdxEnd: end,
}, },
}, nil }, nil
} }
@ -64,87 +65,20 @@ func NewQQwry(filePath string) (*QQwry, error) {
func (db QQwry) Find(query string, params ...string) (result fmt.Stringer, err error) { func (db QQwry) Find(query string, params ...string) (result fmt.Stringer, err error) {
ip := net.ParseIP(query) ip := net.ParseIP(query)
if ip == nil { if ip == nil {
return nil, errors.New("Query should be IPv4") return nil, errors.New("query should be IPv4")
} }
ip4 := ip.To4() ip4 := ip.To4()
if ip4 == nil { if ip4 == nil {
return nil, errors.New("Query should be IPv4") return nil, errors.New("query should be IPv4")
} }
ip4uint := binary.BigEndian.Uint32(ip4) ip4uint := binary.BigEndian.Uint32(ip4)
offset := db.searchIndex(ip4uint) offset := db.SearchIndexV4(ip4uint)
if offset <= 0 { if offset <= 0 {
return nil, errors.New("Query not valid") return nil, errors.New("query not valid")
} }
var gbkCountry []byte reader := wry.NewReader(db.Data)
var gbkArea []byte reader.Parse(offset + 4)
return reader.Result.DecodeGBK(), nil
mode := db.ReadMode(offset + 4)
switch mode {
case common.RedirectMode1: // [IP][0x01][国家和地区信息的绝对偏移地址]
countryOffset := db.ReadUInt24()
mode = db.ReadMode(countryOffset)
if mode == common.RedirectMode2 {
c := db.ReadUInt24()
gbkCountry = db.ReadString(c)
countryOffset += 4
} else {
gbkCountry = db.ReadString(countryOffset)
countryOffset += uint32(len(gbkCountry) + 1)
}
gbkArea = db.ReadArea(countryOffset)
case common.RedirectMode2:
countryOffset := db.ReadUInt24()
gbkCountry = db.ReadString(countryOffset)
gbkArea = db.ReadArea(offset + 8)
default:
gbkCountry = db.ReadString(offset + 4)
gbkArea = db.ReadArea(offset + uint32(5+len(gbkCountry)))
}
enc := simplifiedchinese.GBK.NewDecoder()
country, _ := enc.String(string(gbkCountry))
area, _ := enc.String(string(gbkArea))
result = common.Result{
Country: strings.ReplaceAll(country, " CZ88.NET", ""),
Area: strings.ReplaceAll(area, " CZ88.NET", ""),
}
return result, nil
}
// searchIndex 查找索引位置
func (db *QQwry) searchIndex(ip uint32) uint32 {
header := db.ReadData(8, 0)
start := binary.LittleEndian.Uint32(header[:4])
end := binary.LittleEndian.Uint32(header[4:])
buf := make([]byte, 7)
mid := uint32(0)
ipUint := uint32(0)
for {
mid = common.GetMiddleOffset(start, end, 7)
buf = db.ReadData(7, mid)
ipUint = binary.LittleEndian.Uint32(buf[:4])
if end-start == 7 {
offset := common.ByteToUInt32(buf[4:])
buf = db.ReadData(7)
if ip < binary.LittleEndian.Uint32(buf[:4]) {
return offset
}
return 0
}
if ipUint > ip {
end = mid
} else if ipUint < ip {
start = mid
} else if ipUint == ip {
return common.ByteToUInt32(buf[4:])
}
}
} }

63
pkg/wry/index.go Normal file
View File

@ -0,0 +1,63 @@
package wry
import (
"encoding/binary"
)
func (db *IPDB[uint32]) SearchIndexV4(ip uint32) uint32 {
ipLen := db.IPLen
entryLen := uint32(db.OffLen + db.IPLen)
buf := make([]byte, entryLen)
l, r, mid, ipc := db.IdxStart, db.IdxEnd, uint32(0), uint32(0)
for {
mid = (r-l)/entryLen/2*entryLen + l
buf = db.Data[mid : mid+entryLen]
ipc = uint32(binary.LittleEndian.Uint32(buf[:ipLen]))
if r-l == entryLen {
if ip >= uint32(binary.LittleEndian.Uint32(db.Data[r:r+uint32(ipLen)])) {
buf = db.Data[r : r+entryLen]
}
return uint32(Bytes3ToUint32(buf[ipLen:entryLen]))
}
if ipc > ip {
r = mid
} else if ipc < ip {
l = mid
} else if ipc == ip {
return uint32(Bytes3ToUint32(buf[ipLen:entryLen]))
}
}
}
func (db *IPDB[uint64]) SearchIndexV6(ip uint64) uint32 {
ipLen := db.IPLen
entryLen := uint64(db.OffLen + db.IPLen)
buf := make([]byte, entryLen)
l, r, mid, ipc := db.IdxStart, db.IdxEnd, uint64(0), uint64(0)
for {
mid = (r-l)/entryLen/2*entryLen + l
buf = db.Data[mid : mid+entryLen]
ipc = uint64(binary.LittleEndian.Uint64(buf[:ipLen]))
if r-l == entryLen {
if ip >= uint64(binary.LittleEndian.Uint64(db.Data[r:r+uint64(ipLen)])) {
buf = db.Data[r : r+entryLen]
}
return Bytes3ToUint32(buf[ipLen:entryLen])
}
if ipc > ip {
r = mid
} else if ipc < ip {
l = mid
} else if ipc == ip {
return Bytes3ToUint32(buf[ipLen:entryLen])
}
}
}

47
pkg/wry/parse.go Normal file
View File

@ -0,0 +1,47 @@
package wry
const (
// RedirectMode1 [IP][0x01][国家和地区信息的绝对偏移地址]
RedirectMode1 = 0x01
// RedirectMode2 [IP][0x02][信息的绝对偏移][...] or [IP][国家][...]
RedirectMode2 = 0x02
)
func (r *Reader) Parse(offset uint32) {
if offset != 0 {
r.seekAbs(offset)
}
switch r.readMode() {
case RedirectMode1:
r.readOffset(true)
r.Parse(0)
case RedirectMode2:
r.Result.Country = r.parseRedMode2()
r.Result.Area = r.readArea()
default:
r.seekBack()
r.Result.Country = r.readString(true)
r.Result.Area = r.readArea()
}
}
func (r *Reader) parseRedMode2() string {
r.readOffset(true)
str := r.readString(false)
r.seekBack()
return str
}
func (r *Reader) readArea() string {
mode := r.readMode()
if mode == RedirectMode1 || mode == RedirectMode2 {
offset := r.readOffset(true)
if offset == 0 {
return ""
}
} else {
r.seekBack()
}
return r.readString(false)
}

116
pkg/wry/wry.go Normal file
View File

@ -0,0 +1,116 @@
package wry
import (
"bytes"
"fmt"
"strings"
"golang.org/x/text/encoding/simplifiedchinese"
)
// IPDB common ip database
type IPDB[T ~uint32 | ~uint64] struct {
Data []byte
OffLen uint8
IPLen uint8
IPCnt T
IdxStart T
IdxEnd T
}
type Reader struct {
s []byte
i uint32 // current reading index
l uint32 // last reading index
Result Result
}
func NewReader(data []byte) Reader {
return Reader{s: data, i: 0, l: 0, Result: Result{
Country: "",
Area: "",
}}
}
func (r *Reader) seekAbs(offset uint32) {
r.l = r.i
r.i = offset
}
func (r *Reader) seek(offset int64) {
r.l = r.i
r.i = uint32(int64(r.i) + offset)
}
// seekBack: seek to last index, can only call once
func (r *Reader) seekBack() {
r.i = r.l
}
func (r *Reader) read(length uint32) []byte {
rs := make([]byte, length)
copy(rs, r.s[r.i:])
r.l = r.i
r.i += length
return rs
}
func (r *Reader) readMode() (mode byte) {
mode = r.s[r.i]
r.l = r.i
r.i += 1
return
}
// readOffset: read 3 bytes as uint32 offset
func (r *Reader) readOffset(follow bool) uint32 {
buf := r.read(3)
offset := Bytes3ToUint32(buf)
if follow {
r.l = r.i
r.i = offset
}
return offset
}
func (r *Reader) readString(seek bool) string {
length := bytes.IndexByte(r.s[r.i:], 0)
str := string(r.s[r.i : r.i+uint32(length)])
if seek {
r.l = r.i
r.i += uint32(length) + 1
}
return str
}
type Result struct {
Country string
Area string
}
func (r *Result) DecodeGBK() *Result {
enc := simplifiedchinese.GBK.NewDecoder()
r.Country, _ = enc.String(r.Country)
r.Area, _ = enc.String(r.Area)
return r
}
func (r *Result) Trim() *Result {
r.Country = strings.TrimSpace(strings.ReplaceAll(r.Country, "CZ88.NET", ""))
r.Area = strings.TrimSpace(strings.ReplaceAll(r.Area, "CZ88.NET", ""))
return r
}
func (r Result) String() string {
r.Trim()
return strings.TrimSpace(fmt.Sprintf("%s %s", r.Country, r.Area))
}
func Bytes3ToUint32(data []byte) uint32 {
i := uint32(data[0]) & 0xff
i |= (uint32(data[1]) << 8) & 0xff00
i |= (uint32(data[2]) << 16) & 0xff0000
return i
}

View File

@ -33,12 +33,12 @@ const (
func getData() (data []byte, err error) { func getData() (data []byte, err error) {
data, err = common.GetHttpClient().Get(zx) data, err = common.GetHttpClient().Get(zx)
file7z, err := ioutil.TempFile("", "*") file7z, err := os.CreateTemp("", "*")
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer os.Remove(file7z.Name()) defer os.Remove(file7z.Name())
if err := ioutil.WriteFile(file7z.Name(), data, 0644); err == nil { if err := os.WriteFile(file7z.Name(), data, 0644); err == nil {
return Un7z(file7z.Name()) return Un7z(file7z.Name())
} }
return return
@ -51,11 +51,11 @@ func Un7z(filePath string) (data []byte, err error) {
} }
defer sz.Close() defer sz.Close()
fileNoNeed, err := ioutil.TempFile("", "*") fileNoNeed, err := os.CreateTemp("", "*")
if err != nil { if err != nil {
return nil, err return nil, err
} }
fileNeed, err := ioutil.TempFile("", "*") fileNeed, err := os.CreateTemp("", "*")
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -66,7 +66,7 @@ func Un7z(filePath string) (data []byte, err error) {
for { for {
hdr, err := sz.Next() hdr, err := sz.Next()
if err == io.EOF { if err == io.EOF {
break // End of archive break // IdxEnd of archive
} }
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -4,23 +4,20 @@ import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt" "fmt"
"io/ioutil" "io"
"log" "log"
"math/big"
"net" "net"
"os" "os"
"strings"
"github.com/zu1k/nali/pkg/common" "github.com/zu1k/nali/pkg/wry"
) )
type ZXwry struct { type ZXwry struct {
common.IPDB wry.IPDB[uint64]
} }
func NewZXwry(filePath string) (*ZXwry, error) { func NewZXwry(filePath string) (*ZXwry, error) {
var fileData []byte var fileData []byte
var fileInfo common.FileData
_, err := os.Stat(filePath) _, err := os.Stat(filePath)
if err != nil && os.IsNotExist(err) { if err != nil && os.IsNotExist(err) {
@ -30,99 +27,53 @@ func NewZXwry(filePath string) (*ZXwry, error) {
return nil, err return nil, err
} }
} else { } else {
fileInfo.FileBase, err = os.OpenFile(filePath, os.O_RDONLY, 0400) fileBase, err := os.OpenFile(filePath, os.O_RDONLY, 0400)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer fileInfo.FileBase.Close() defer fileBase.Close()
fileData, err = ioutil.ReadAll(fileInfo.FileBase) fileData, err = io.ReadAll(fileBase)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} }
fileInfo.Data = fileData header := fileData[:24]
offLen := header[6]
ipLen := header[7]
start := binary.LittleEndian.Uint64(header[16:24])
counts := binary.LittleEndian.Uint64(header[8:16])
end := start + counts*11
return &ZXwry{ return &ZXwry{
IPDB: common.IPDB{ IPDB: wry.IPDB[uint64]{
Data: &fileInfo, Data: fileData,
OffLen: offLen,
IPLen: ipLen,
IPCnt: counts,
IdxStart: start,
IdxEnd: end,
}, },
}, nil }, nil
} }
func (db ZXwry) Find(query string, params ...string) (result fmt.Stringer, err error) { func (db *ZXwry) Find(query string, _ ...string) (result fmt.Stringer, err error) {
ip := net.ParseIP(query) ip := net.ParseIP(query)
if ip == nil { if ip == nil {
return nil, errors.New("Query should be IPv6") return nil, errors.New("query should be IPv6")
} }
ip6 := ip.To16() ip6 := ip.To16()
if ip6 == nil { if ip6 == nil {
return nil, errors.New("Query should be IPv6") return nil, errors.New("query should be IPv6")
} }
ip6 = ip6[:8]
ipu64 := binary.BigEndian.Uint64(ip6)
tp := big.NewInt(0) offset := db.SearchIndexV6(ipu64)
op := big.NewInt(0) reader := wry.NewReader(db.Data)
tp.SetBytes(ip6) reader.Parse(offset)
op.SetString("18446744073709551616", 10) return reader.Result, nil
op.Div(tp, op)
tp.SetString("FFFFFFFFFFFFFFFF", 16)
op.And(op, tp)
ipv6 := op.Uint64()
offset := db.searchIndex(ipv6)
country, area := db.getAddr(offset)
result = common.Result{
Country: strings.ReplaceAll(country, " CZ88.NET", ""),
Area: strings.ReplaceAll(area, " CZ88.NET", ""),
}
return result, nil
}
func (db *ZXwry) getAddr(offset uint32) (string, string) {
mode := db.ReadMode(offset)
if mode == common.RedirectMode1 {
offset = db.ReadUInt24()
return db.getAddr(offset)
}
realOffset := db.Offset - 1
c1 := db.ReadArea(realOffset)
if mode == common.RedirectMode2 {
db.Offset = 4 + realOffset
} else {
db.Offset = realOffset + uint32(1+len(c1))
}
c2 := db.ReadArea(db.Offset)
return string(c1), string(c2)
}
func (db *ZXwry) searchIndex(ip uint64) uint32 {
header := db.ReadData(16, 8)
start := binary.LittleEndian.Uint32(header[8:])
counts := binary.LittleEndian.Uint32(header[:8])
end := start + counts*11
buf := make([]byte, 11)
for {
mid := common.GetMiddleOffset(start, end, 11)
buf = db.ReadData(11, mid)
ipBytes := binary.LittleEndian.Uint64(buf[:8])
if end-start == 11 {
if ip >= binary.LittleEndian.Uint64(db.ReadData(8, end)) {
buf = db.ReadData(11, end)
}
return common.ByteToUInt32(buf[8:])
}
if ipBytes > ip {
end = mid
} else if ipBytes < ip {
start = mid
} else if ipBytes == ip {
return common.ByteToUInt32(buf[8:])
}
}
} }