1
0
mirror of https://github.com/zu1k/nali.git synced 2025-01-22 21:29:02 +08:00

Add ParseLine

This commit is contained in:
zu1k 2021-07-30 22:30:27 +08:00
parent e7ea663d27
commit 7a03f1589f
7 changed files with 87 additions and 41 deletions

View File

@ -4,23 +4,18 @@ import (
"fmt"
"log"
"path/filepath"
"regexp"
"strings"
"github.com/zu1k/nali/constant"
"github.com/zu1k/nali/internal/re"
"github.com/zu1k/nali/internal/tools"
"github.com/zu1k/nali/pkg/cdn"
)
var (
cdnDB cdn.CDN
domainRe *regexp.Regexp
cdnDB cdn.CDN
)
func init() {
domainRe = regexp.MustCompile(`[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+`)
}
func InitCDNDB() {
cdnDB = cdn.NewCDN(filepath.Join(constant.HomePath, "cdn.json"))
}
@ -50,7 +45,7 @@ func find(cname string) string {
func ReplaceCDNInString(str string) (result string) {
done := make(map[string]bool)
cnames := domainRe.FindAllString(str, -1)
cnames := re.DomainRe.FindAllString(str, -1)
result = str
for _, cname := range cnames {
name := find(cname)

View File

@ -1,7 +1,6 @@
package entity
import (
"sort"
"strings"
)
@ -15,8 +14,7 @@ const (
)
type Entity struct {
Index uint
Length uint
Loc []int // s[Loc[0]:Loc[1]]
Type EntityType
Text string
@ -34,23 +32,20 @@ func (es Entities) Len() int {
}
func (es Entities) Less(i, j int) bool {
return es[i].Index < es[j].Index
return es[i].Loc[0] < es[j].Loc[0]
}
func (es Entities) Swap(i, j int) {
es[i],es[j] = es[j],es[i]
es[i], es[j] = es[j], es[i]
}
func (es Entities) String() string {
sort.Sort(es)
var result strings.Builder
for _, entity := range es {
result.WriteString(entity.Text)
if entity.Type!=TypePlain && len(entity.Info)>0 {
if entity.Type != TypePlain && len(entity.Info) > 0 {
result.WriteString("[" + entity.Info + "] ")
}
}
return result.String()
}

View File

@ -1,10 +1,58 @@
package entity
import (
"sort"
"github.com/zu1k/nali/internal/re"
)
// ParseLine parse a line into entities
func ParseLine(line string) Entities {
entities := make(Entities, 0)
ip4sLoc := re.IPv4Re.FindAllStringIndex(line, -1)
ip6sLoc := re.IPv6Re.FindAllStringIndex(line, -1)
domainsLoc := re.DomainRe.FindAllStringIndex(line, -1)
// TODO: parse a line into entities
tmp := make(Entities, 0, len(ip4sLoc)+len(ip6sLoc)+len(domainsLoc))
for _, e := range ip4sLoc {
tmp = append(tmp, &Entity{
Loc: e,
Type: TypeIPv4,
Text: line[e[0]:e[1]],
})
}
for _, e := range ip6sLoc {
tmp = append(tmp, &Entity{
Loc: e,
Type: TypeIPv6,
Text: line[e[0]:e[1]],
})
}
for _, e := range domainsLoc {
tmp = append(tmp, &Entity{
Loc: e,
Type: TypeDomain,
Text: line[e[0]:e[1]],
})
}
return entities
sort.Sort(tmp)
es := make(Entities, 0, len(tmp))
idx := 0
for _, e := range tmp {
start := e.Loc[0]
if start >= idx {
if start > idx {
es = append(es, &Entity{
Loc: []int{idx, start},
Type: TypePlain,
Text: line[idx:start],
})
}
es = append(es, e)
idx = e.Loc[1]
}
}
return es
}

View File

@ -0,0 +1,11 @@
package entity
import (
"fmt"
"testing"
)
func TestParse(t *testing.T) {
fmt.Println(ParseLine("2001:0db8:85a3:0000:0000:8a2e:0370:7334 baidu.com 1.2.3.4 baidu.com"))
fmt.Println(ParseLine("a.cn.b.com.c.org d.com"))
}

10
internal/re/re.go Normal file
View File

@ -0,0 +1,10 @@
package re
import "regexp"
var (
DomainRe = regexp.MustCompile(`[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+`)
IPv4Re = regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}`)
IPv6Re = regexp.MustCompile(`fe80:(:[0-9a-fA-F]{1,4}){0,4}(%\w+)?|([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?`)
)

View File

@ -1,4 +1,4 @@
package app
package re
import (
"fmt"
@ -14,10 +14,10 @@ var domainList = []string{
func TestDomainRe(t *testing.T) {
for _, domain := range domainList {
if !domainRe.MatchString(domain) {
if !DomainRe.MatchString(domain) {
t.Error(domain)
t.Fail()
}
fmt.Println(domainRe.FindAllString(domain, -1))
fmt.Println(DomainRe.FindAllString(domain, -1))
}
}

View File

@ -2,32 +2,19 @@ package tools
import (
"net"
"regexp"
"strings"
"github.com/zu1k/nali/internal/re"
)
var (
ipv4re *regexp.Regexp
ipv6re0 *regexp.Regexp
ipv6re *regexp.Regexp
)
func init() {
ipv4re = regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}`)
ipv6re0 = regexp.MustCompile(`^fe80:(:[0-9a-fA-F]{1,4}){0,4}(%\w+)?|([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?$`)
ipv6re = regexp.MustCompile(`fe80:(:[0-9a-fA-F]{1,4}){0,4}(%\w+)?|([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?`)
}
func GetIP4FromString(str string) []string {
str = strings.Trim(str, " ")
return ipv4re.FindAllString(str, -1)
return re.IPv4Re.FindAllString(str, -1)
}
func GetIP6FromString(str string) []string {
str = strings.Trim(str, " ")
return ipv6re.FindAllString(str, -1)
return re.IPv6Re.FindAllString(str, -1)
}
const (