1
0
mirror of https://github.com/zu1k/nali.git synced 2025-01-23 05:39:03 +08:00

Add ParseLine

This commit is contained in:
zu1k 2021-07-30 22:30:27 +08:00
parent cd68efc956
commit f92b91af23
7 changed files with 87 additions and 41 deletions

View File

@ -4,23 +4,18 @@ import (
"fmt" "fmt"
"log" "log"
"path/filepath" "path/filepath"
"regexp"
"strings" "strings"
"github.com/zu1k/nali/constant" "github.com/zu1k/nali/constant"
"github.com/zu1k/nali/internal/re"
"github.com/zu1k/nali/internal/tools" "github.com/zu1k/nali/internal/tools"
"github.com/zu1k/nali/pkg/cdn" "github.com/zu1k/nali/pkg/cdn"
) )
var ( var (
cdnDB cdn.CDN cdnDB cdn.CDN
domainRe *regexp.Regexp
) )
func init() {
domainRe = regexp.MustCompile(`[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+`)
}
func InitCDNDB() { func InitCDNDB() {
cdnDB = cdn.NewCDN(filepath.Join(constant.HomePath, "cdn.json")) cdnDB = cdn.NewCDN(filepath.Join(constant.HomePath, "cdn.json"))
} }
@ -50,7 +45,7 @@ func find(cname string) string {
func ReplaceCDNInString(str string) (result string) { func ReplaceCDNInString(str string) (result string) {
done := make(map[string]bool) done := make(map[string]bool)
cnames := domainRe.FindAllString(str, -1) cnames := re.DomainRe.FindAllString(str, -1)
result = str result = str
for _, cname := range cnames { for _, cname := range cnames {
name := find(cname) name := find(cname)

View File

@ -1,7 +1,6 @@
package entity package entity
import ( import (
"sort"
"strings" "strings"
) )
@ -15,8 +14,7 @@ const (
) )
type Entity struct { type Entity struct {
Index uint Loc []int // s[Loc[0]:Loc[1]]
Length uint
Type EntityType Type EntityType
Text string Text string
@ -34,23 +32,20 @@ func (es Entities) Len() int {
} }
func (es Entities) Less(i, j int) bool { func (es Entities) Less(i, j int) bool {
return es[i].Index < es[j].Index return es[i].Loc[0] < es[j].Loc[0]
} }
func (es Entities) Swap(i, j int) { func (es Entities) Swap(i, j int) {
es[i],es[j] = es[j],es[i] es[i], es[j] = es[j], es[i]
} }
func (es Entities) String() string { func (es Entities) String() string {
sort.Sort(es)
var result strings.Builder var result strings.Builder
for _, entity := range es { for _, entity := range es {
result.WriteString(entity.Text) result.WriteString(entity.Text)
if entity.Type!=TypePlain && len(entity.Info)>0 { if entity.Type != TypePlain && len(entity.Info) > 0 {
result.WriteString("[" + entity.Info + "] ") result.WriteString("[" + entity.Info + "] ")
} }
} }
return result.String() return result.String()
} }

View File

@ -1,10 +1,58 @@
package entity package entity
import (
"sort"
"github.com/zu1k/nali/internal/re"
)
// ParseLine parse a line into entities // ParseLine parse a line into entities
func ParseLine(line string) Entities { func ParseLine(line string) Entities {
entities := make(Entities, 0) ip4sLoc := re.IPv4Re.FindAllStringIndex(line, -1)
ip6sLoc := re.IPv6Re.FindAllStringIndex(line, -1)
domainsLoc := re.DomainRe.FindAllStringIndex(line, -1)
// TODO: parse a line into entities tmp := make(Entities, 0, len(ip4sLoc)+len(ip6sLoc)+len(domainsLoc))
for _, e := range ip4sLoc {
tmp = append(tmp, &Entity{
Loc: e,
Type: TypeIPv4,
Text: line[e[0]:e[1]],
})
}
for _, e := range ip6sLoc {
tmp = append(tmp, &Entity{
Loc: e,
Type: TypeIPv6,
Text: line[e[0]:e[1]],
})
}
for _, e := range domainsLoc {
tmp = append(tmp, &Entity{
Loc: e,
Type: TypeDomain,
Text: line[e[0]:e[1]],
})
}
return entities sort.Sort(tmp)
es := make(Entities, 0, len(tmp))
idx := 0
for _, e := range tmp {
start := e.Loc[0]
if start >= idx {
if start > idx {
es = append(es, &Entity{
Loc: []int{idx, start},
Type: TypePlain,
Text: line[idx:start],
})
}
es = append(es, e)
idx = e.Loc[1]
}
}
return es
} }

View File

@ -0,0 +1,11 @@
package entity
import (
"fmt"
"testing"
)
func TestParse(t *testing.T) {
fmt.Println(ParseLine("2001:0db8:85a3:0000:0000:8a2e:0370:7334 baidu.com 1.2.3.4 baidu.com"))
fmt.Println(ParseLine("a.cn.b.com.c.org d.com"))
}

10
internal/re/re.go Normal file
View File

@ -0,0 +1,10 @@
package re
import "regexp"
var (
DomainRe = regexp.MustCompile(`[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+`)
IPv4Re = regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}`)
IPv6Re = regexp.MustCompile(`fe80:(:[0-9a-fA-F]{1,4}){0,4}(%\w+)?|([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?`)
)

View File

@ -1,4 +1,4 @@
package app package re
import ( import (
"fmt" "fmt"
@ -14,10 +14,10 @@ var domainList = []string{
func TestDomainRe(t *testing.T) { func TestDomainRe(t *testing.T) {
for _, domain := range domainList { for _, domain := range domainList {
if !domainRe.MatchString(domain) { if !DomainRe.MatchString(domain) {
t.Error(domain) t.Error(domain)
t.Fail() t.Fail()
} }
fmt.Println(domainRe.FindAllString(domain, -1)) fmt.Println(DomainRe.FindAllString(domain, -1))
} }
} }

View File

@ -2,32 +2,19 @@ package tools
import ( import (
"net" "net"
"regexp"
"strings" "strings"
"github.com/zu1k/nali/internal/re"
) )
var (
ipv4re *regexp.Regexp
ipv6re0 *regexp.Regexp
ipv6re *regexp.Regexp
)
func init() {
ipv4re = regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}`)
ipv6re0 = regexp.MustCompile(`^fe80:(:[0-9a-fA-F]{1,4}){0,4}(%\w+)?|([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?$`)
ipv6re = regexp.MustCompile(`fe80:(:[0-9a-fA-F]{1,4}){0,4}(%\w+)?|([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?`)
}
func GetIP4FromString(str string) []string { func GetIP4FromString(str string) []string {
str = strings.Trim(str, " ") str = strings.Trim(str, " ")
return ipv4re.FindAllString(str, -1) return re.IPv4Re.FindAllString(str, -1)
} }
func GetIP6FromString(str string) []string { func GetIP6FromString(str string) []string {
str = strings.Trim(str, " ") str = strings.Trim(str, " ")
return ipv6re.FindAllString(str, -1) return re.IPv6Re.FindAllString(str, -1)
} }
const ( const (