From 7a03f1589fcecc3986f12aac0d4168cd307b040d Mon Sep 17 00:00:00 2001 From: zu1k Date: Fri, 30 Jul 2021 22:30:27 +0800 Subject: [PATCH] Add ParseLine --- internal/app/cdn.go | 11 ++----- internal/entity/entity.go | 15 +++------ internal/entity/parse.go | 54 +++++++++++++++++++++++++++++++-- internal/entity/parse_test.go | 11 +++++++ internal/re/re.go | 10 ++++++ internal/{app => re}/re_test.go | 6 ++-- internal/tools/ipparser.go | 21 +++---------- 7 files changed, 87 insertions(+), 41 deletions(-) create mode 100644 internal/entity/parse_test.go create mode 100644 internal/re/re.go rename internal/{app => re}/re_test.go (74%) diff --git a/internal/app/cdn.go b/internal/app/cdn.go index c9748e4..aeb4358 100644 --- a/internal/app/cdn.go +++ b/internal/app/cdn.go @@ -4,23 +4,18 @@ import ( "fmt" "log" "path/filepath" - "regexp" "strings" "github.com/zu1k/nali/constant" + "github.com/zu1k/nali/internal/re" "github.com/zu1k/nali/internal/tools" "github.com/zu1k/nali/pkg/cdn" ) var ( - cdnDB cdn.CDN - domainRe *regexp.Regexp + cdnDB cdn.CDN ) -func init() { - domainRe = regexp.MustCompile(`[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+`) -} - func InitCDNDB() { cdnDB = cdn.NewCDN(filepath.Join(constant.HomePath, "cdn.json")) } @@ -50,7 +45,7 @@ func find(cname string) string { func ReplaceCDNInString(str string) (result string) { done := make(map[string]bool) - cnames := domainRe.FindAllString(str, -1) + cnames := re.DomainRe.FindAllString(str, -1) result = str for _, cname := range cnames { name := find(cname) diff --git a/internal/entity/entity.go b/internal/entity/entity.go index 7ed2123..5e6320b 100644 --- a/internal/entity/entity.go +++ b/internal/entity/entity.go @@ -1,7 +1,6 @@ package entity import ( - "sort" "strings" ) @@ -15,8 +14,7 @@ const ( ) type Entity struct { - Index uint - Length uint + Loc []int // s[Loc[0]:Loc[1]] Type EntityType Text string @@ -34,23 +32,20 @@ func (es Entities) Len() int { } func (es Entities) Less(i, j int) bool { - return es[i].Index < es[j].Index + return es[i].Loc[0] < es[j].Loc[0] } func (es Entities) Swap(i, j int) { - es[i],es[j] = es[j],es[i] + es[i], es[j] = es[j], es[i] } func (es Entities) String() string { - sort.Sort(es) - var result strings.Builder for _, entity := range es { result.WriteString(entity.Text) - if entity.Type!=TypePlain && len(entity.Info)>0 { + if entity.Type != TypePlain && len(entity.Info) > 0 { result.WriteString("[" + entity.Info + "] ") } } - return result.String() -} \ No newline at end of file +} diff --git a/internal/entity/parse.go b/internal/entity/parse.go index 861b821..76f43fc 100644 --- a/internal/entity/parse.go +++ b/internal/entity/parse.go @@ -1,10 +1,58 @@ package entity +import ( + "sort" + + "github.com/zu1k/nali/internal/re" +) + // ParseLine parse a line into entities func ParseLine(line string) Entities { - entities := make(Entities, 0) + ip4sLoc := re.IPv4Re.FindAllStringIndex(line, -1) + ip6sLoc := re.IPv6Re.FindAllStringIndex(line, -1) + domainsLoc := re.DomainRe.FindAllStringIndex(line, -1) - // TODO: parse a line into entities + tmp := make(Entities, 0, len(ip4sLoc)+len(ip6sLoc)+len(domainsLoc)) + for _, e := range ip4sLoc { + tmp = append(tmp, &Entity{ + Loc: e, + Type: TypeIPv4, + Text: line[e[0]:e[1]], + }) + } + for _, e := range ip6sLoc { + tmp = append(tmp, &Entity{ + Loc: e, + Type: TypeIPv6, + Text: line[e[0]:e[1]], + }) + } + for _, e := range domainsLoc { + tmp = append(tmp, &Entity{ + Loc: e, + Type: TypeDomain, + Text: line[e[0]:e[1]], + }) + } - return entities + sort.Sort(tmp) + es := make(Entities, 0, len(tmp)) + + idx := 0 + for _, e := range tmp { + start := e.Loc[0] + if start >= idx { + if start > idx { + es = append(es, &Entity{ + Loc: []int{idx, start}, + Type: TypePlain, + Text: line[idx:start], + }) + } + es = append(es, e) + idx = e.Loc[1] + } + } + + return es } diff --git a/internal/entity/parse_test.go b/internal/entity/parse_test.go new file mode 100644 index 0000000..4319a5d --- /dev/null +++ b/internal/entity/parse_test.go @@ -0,0 +1,11 @@ +package entity + +import ( + "fmt" + "testing" +) + +func TestParse(t *testing.T) { + fmt.Println(ParseLine("2001:0db8:85a3:0000:0000:8a2e:0370:7334 baidu.com 1.2.3.4 baidu.com")) + fmt.Println(ParseLine("a.cn.b.com.c.org d.com")) +} diff --git a/internal/re/re.go b/internal/re/re.go new file mode 100644 index 0000000..532132e --- /dev/null +++ b/internal/re/re.go @@ -0,0 +1,10 @@ +package re + +import "regexp" + +var ( + DomainRe = regexp.MustCompile(`[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+`) + + IPv4Re = regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}`) + IPv6Re = regexp.MustCompile(`fe80:(:[0-9a-fA-F]{1,4}){0,4}(%\w+)?|([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?`) +) diff --git a/internal/app/re_test.go b/internal/re/re_test.go similarity index 74% rename from internal/app/re_test.go rename to internal/re/re_test.go index 9cb7e36..b3cf43b 100644 --- a/internal/app/re_test.go +++ b/internal/re/re_test.go @@ -1,4 +1,4 @@ -package app +package re import ( "fmt" @@ -14,10 +14,10 @@ var domainList = []string{ func TestDomainRe(t *testing.T) { for _, domain := range domainList { - if !domainRe.MatchString(domain) { + if !DomainRe.MatchString(domain) { t.Error(domain) t.Fail() } - fmt.Println(domainRe.FindAllString(domain, -1)) + fmt.Println(DomainRe.FindAllString(domain, -1)) } } diff --git a/internal/tools/ipparser.go b/internal/tools/ipparser.go index 69c459e..ac9118b 100644 --- a/internal/tools/ipparser.go +++ b/internal/tools/ipparser.go @@ -2,32 +2,19 @@ package tools import ( "net" - "regexp" "strings" + + "github.com/zu1k/nali/internal/re" ) -var ( - ipv4re *regexp.Regexp - - ipv6re0 *regexp.Regexp - ipv6re *regexp.Regexp -) - -func init() { - ipv4re = regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}`) - - ipv6re0 = regexp.MustCompile(`^fe80:(:[0-9a-fA-F]{1,4}){0,4}(%\w+)?|([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?$`) - ipv6re = regexp.MustCompile(`fe80:(:[0-9a-fA-F]{1,4}){0,4}(%\w+)?|([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?`) -} - func GetIP4FromString(str string) []string { str = strings.Trim(str, " ") - return ipv4re.FindAllString(str, -1) + return re.IPv4Re.FindAllString(str, -1) } func GetIP6FromString(str string) []string { str = strings.Trim(str, " ") - return ipv6re.FindAllString(str, -1) + return re.IPv6Re.FindAllString(str, -1) } const (