1
0
mirror of https://github.com/zu1k/nali.git synced 2025-01-22 21:29:02 +08:00
nali/pkg/entity/parse.go

73 lines
1.4 KiB
Go
Raw Normal View History

2021-07-30 21:46:06 +08:00
package entity
2021-07-30 22:30:27 +08:00
import (
"net/netip"
2021-07-30 22:30:27 +08:00
"sort"
2021-08-02 12:01:25 +08:00
"github.com/zu1k/nali/internal/db"
2021-08-02 12:03:06 +08:00
"github.com/zu1k/nali/pkg/dbif"
"github.com/zu1k/nali/pkg/re"
2021-07-30 22:30:27 +08:00
)
2021-07-30 21:46:06 +08:00
// ParseLine parse a line into entities
func ParseLine(line string) Entities {
2021-07-30 22:30:27 +08:00
ip4sLoc := re.IPv4Re.FindAllStringIndex(line, -1)
ip6sLoc := re.IPv6Re.FindAllStringIndex(line, -1)
domainsLoc := re.DomainRe.FindAllStringIndex(line, -1)
tmp := make(Entities, 0, len(ip4sLoc)+len(ip6sLoc)+len(domainsLoc))
for _, e := range ip4sLoc {
tmp = append(tmp, &Entity{
2023-03-02 16:53:50 +08:00
Loc: *(*[2]int)(e),
2021-07-30 22:30:27 +08:00
Type: TypeIPv4,
Text: line[e[0]:e[1]],
})
}
for _, e := range ip6sLoc {
text := line[e[0]:e[1]]
if ip, _ := netip.ParseAddr(text); !ip.Is4In6() {
tmp = append(tmp, &Entity{
2023-03-02 16:53:50 +08:00
Loc: *(*[2]int)(e),
Type: TypeIPv6,
Text: text,
})
}
2021-07-30 22:30:27 +08:00
}
for _, e := range domainsLoc {
tmp = append(tmp, &Entity{
2023-03-02 16:53:50 +08:00
Loc: *(*[2]int)(e),
2021-07-30 22:30:27 +08:00
Type: TypeDomain,
Text: line[e[0]:e[1]],
})
}
sort.Sort(tmp)
es := make(Entities, 0, len(tmp))
2021-07-30 21:46:06 +08:00
2021-07-30 22:30:27 +08:00
idx := 0
for _, e := range tmp {
start := e.Loc[0]
if start >= idx {
if start > idx {
es = append(es, &Entity{
2023-03-02 16:53:50 +08:00
Loc: [2]int{idx, start},
2021-07-30 22:30:27 +08:00
Type: TypePlain,
Text: line[idx:start],
})
}
2021-08-02 12:01:25 +08:00
e.Info = db.Find(dbif.QueryType(e.Type), e.Text)
2021-07-30 22:30:27 +08:00
es = append(es, e)
idx = e.Loc[1]
}
}
2021-08-03 08:31:27 +08:00
if total := len(line); idx < total {
2021-08-03 07:54:35 +08:00
es = append(es, &Entity{
2023-03-02 16:53:50 +08:00
Loc: [2]int{idx, total},
2021-08-03 07:54:35 +08:00
Type: TypePlain,
2021-08-03 08:33:26 +08:00
Text: line[idx:total],
2021-08-03 07:54:35 +08:00
})
}
2021-07-30 22:30:27 +08:00
return es
2021-07-30 21:46:06 +08:00
}