预处理下载文件,合并,排序,然后去重

This commit is contained in:
gently 2019-12-16 22:17:17 +08:00
parent 35b2a2de76
commit a88344ab1e
10 changed files with 64534 additions and 21187 deletions

File diff suppressed because it is too large Load Diff

View File

@ -73,15 +73,15 @@ class addressMaker {
continue;
}
if(preg_match('/^\|\|([0-9a-z\-\.]+[a-z]+)[\^\$]*(image|third-party|script)?$/', $line, $matchs)){
if(preg_match('/^\|\|([0-9a-z\-\.]+[a-z]+)[\^\$]*(image|third-party|script|subdocument|popup)?(,.+)?$/', $line, $matches)){
if(substr($matchs[1], 0, 4) == 'www.'){
$row = substr($matchs[1], 4);
if(substr($matches[1], 0, 4) == 'www.'){
$row = substr($matches[1], 4);
}else{
$row = $matchs[1];
$row = $matches[1];
}
$arr_domains[self::extract_main_domain($matchs[1])][] = $row;
$arr_domains[self::extract_main_domain($matches[1])][] = $row;
}
}
@ -96,7 +96,7 @@ class addressMaker {
*/
public static function get_domain_list($str_hosts){
$strlen = strlen($str_hosts);
if($strlen < 10){
if($strlen < 3){
return array();
}
@ -123,7 +123,6 @@ class addressMaker {
if(strpos($row[1], '.') === false){
continue;
}
$arr_domains[self::extract_main_domain($row[1])][] = $row[1];
}
@ -134,7 +133,7 @@ class addressMaker {
$fp = fopen($str_file, 'w');
$write_len = fwrite($fp, '#TIME=' . date('YmdHis') . "\n");
$write_len += fwrite($fp, '#URL=https://github.com/gentlyxu/anti-AD' . "\n");
$write_len += fwrite($fp, '#URL=https://github.com/privacy-protection-tools/anti-AD' . "\n");
foreach($arr_result as $rk => $rv){

View File

@ -114,6 +114,7 @@ return array(
'jpush.io' => array('jpush.io'),
'jiguang.cn' => array('jiguang.cn'),
'getui.com' => array('getui.com'),
'getui.net' => array('getui.net'),
'ebjvu.cn' => array('ebjvu.cn'),
'jumei.com' => array('adxapi.jumei.com', 'sd.int.jumei.com', 'sd.jumei.com'),

View File

@ -24,26 +24,11 @@ $arr_whitelist = require('./lib/white_domain_list.php');
$arr_result = array();
$easylist1 = file_get_contents('./origin-files/easylistchina+easylist.txt');
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_from_easylist($easylist1));
$easylist = file_get_contents('./origin-files/base-src-easylist.txt');
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_from_easylist($easylist));
$easylist2 = file_get_contents('./origin-files/cjx-annoyance.txt');
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_from_easylist($easylist2));
$easylist3 = file_get_contents('./origin-files/fanboy-annoyance.txt');
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_from_easylist($easylist3));
$host1 = file_get_contents('./origin-files/hosts1');
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_list($host1));
$host2 = file_get_contents('./origin-files/hosts2');
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_list($host2));
$host3 = file_get_contents('./origin-files/hosts3');
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_list($host3));
$arr_result = array_merge_recursive($arr_result, $arr_blacklist);
$hosts = file_get_contents('./origin-files/base-src-hosts.txt');
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_list($hosts));
echo 'Written file size:';
echo addressMaker::write_to_conf($arr_result, './adblock-for-dnsmasq.conf', 'q-filter.conf');

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,7 @@
[Adblock Plus 2.0]
! Version: 201912132030
! Version: 201912160821
! Title: CJX's Annoyance List
! Last modified: 2019/12/13 20:30 +0800
! Last modified: 2019/12/16 08:21 +0800
! Expires: 4 days (update frequency)
! Homepage: http://abpchina.org/forum/forum.php?mod=viewthread&tid=29667
!
@ -315,7 +315,6 @@
||china.cn/js/common/daniel_pop/
||chinaacc.com/lamu/*dl-
||chinaacc.com/lamu/*piao
||zol.com.cn/js/ask/ask.js
||xhd.cn/topic/leyu-piaofu/
||zol.com.cn/index.php?c=Api_JumpApi&
||jjwxc.net/adsmanage.
@ -365,7 +364,8 @@
||baidu.com/xpage/form/getform?id=wk_pc_skin
||bdimg.com^*/popOperationView_*.js
!------------------------Specific element hiding rules------------------------!
baidu.com###page-copyright > div:nth-of-type(4)
tmtpost.com##.open-app
kuman.com##.guide-download-footer-box
baidu.com##.guidetowkOperationwg-root
hamibook.com.tw##.custom-banners-cycle-slideshow
zmz2019.com##.corner > LI > A[href="http://hd.zmz2019.com/"]

View File

@ -1,7 +1,7 @@
[Adblock Plus 2.0]
! Version: 201912150721
! Version: 201912161334
! Title: Fanboy's Annoyance List
! Last modified: 15 Dec 2019 07:21 UTC
! Last modified: 16 Dec 2019 13:34 UTC
! Expires: 4 days (update frequency)
! License: http://creativecommons.org/licenses/by/3.0/
! Please report any unblocked content or problems by email or in our forums
@ -6368,6 +6368,7 @@ _site_cookie_
##.newsletter-call-to-action
##.newsletter-callout
##.newsletter-capture
##.newsletter-card
##.newsletter-center-form
##.newsletter-col
##.newsletter-compact
@ -8480,6 +8481,7 @@ _site_cookie_
###desktop-share-wrapper
###desktop-social-buttons
###detailHeadlineStickySocial
###details-share-icon-link
###dhSocial
###diaspora-button-container
###digg-widget
@ -36586,6 +36588,7 @@ classic-trader.com##.cui-lightbox
classic-trader.com##.cui-lightbox-container
classic-trader.com##.cui-overlay-shape
thenation.com##.current-issue
thewirecutter.com##.d2db3d8f
qz.com##.daily-brief-inline
iol.co.za##.daily-news-signup-form
onlymyhealth.com##.daily-wellness
@ -39196,15 +39199,15 @@ mtn.co.za##.Toast
6abc.com,abc11.com,abc13.com,abc30.com,abc7.com,abc7chicago.com,abc7news.com,abc7ny.com##.Tooltip
hiberworld.com##._1u7AkTjy
tunnelmb.net##._23LkM
thewirecutter.com##._25c8f525
911tabs.com##._29v-t
dreamhack.com##._2IIiex-CsxtuW7QGZ_oO5z
cnn.com##._2dc7d020
reddit.com##._3q-XSJ2vokDQrvdG6mR__k
thewirecutter.com##._5ce4f1c1
instagram.com##._he402
sunweb.co.uk##.absolute-dialog
greenfields.eu##.accept-alert
akeebabackup.com,imunify360.com,luxos.com,v-tac.eu##.activebar-container
akeebabackup.com,imunify360.com,luxos.com,reshade.me,v-tac.eu##.activebar-container
ticketmaster.ca##.agree-terms
adata.com,ahlulbayt.tv,altervista.org,askdifference.com,asklion.co.uk,bankid.com,cfainstitute.org,convert-my-image.com,costaclub.com,efinancialcareers.com,hattrick.org,kenweego.com,lawyersonline.co.uk,lifescience.net,linkedin.com,lonelyplanet.com,m-a.org.uk,nature.com,netweather.tv,norwegian.com,pozyx.io,supercell.com,theonlinesurgery.co.uk,tindie.com,ukpressonline.co.uk,vernemq.com,viewsonic.com,viewsoniceurope.com,youngsseafood.co.uk##.alert
espn.com##.alert--fixed
@ -41896,7 +41899,7 @@ lsb.dk###notificationBar
jobfinder.dk###sliding-popup
tv2.dk##.CookieWarning_container__1Lmss
santashop.dk##.center-notice
cembrit.dk,conferencemanager.dk,denstoredanske.dk,djurssommerland.dk,madsnorgaard.com,ruc.dk,sportmaster.dk##.cookie
cembrit.dk,conferencemanager.dk,denstoredanske.dk,djurssommerland.dk,kitchentime.dk,madsnorgaard.com,ruc.dk,sportmaster.dk##.cookie
kalfor.dk##.cookies
netdyredoktor.dk##.ct-inner
ritzau.dk##.footer__notification
@ -41997,6 +42000,7 @@ satakunnanautotalo.fi##.AVS-evasteseloste-container
kulttuurivihkot.fi##.activebar-container
americanairlines.fi##.alert
vismasign.fi##.banner
happypancake.fi##.bar.jsx-341223085
hs.fi##.cb-container
vertaa.fi##.cg-89.cg-97
jenkki.fi##.container-agree
@ -42009,7 +42013,6 @@ helmet.fi##.notifier_warning
suomi24.fi##.s24_cc_banner-wrapper
dplay.fi##div[class^="notification"]
linnunrata.org##div[style^="position:fixed;right:0;top:0;"]
happypancake.fi##.bar.jsx-341223085
!
! ---------- Greek ----------
!
@ -42272,10 +42275,9 @@ go4pro.lt##.wu_container
seher.no,sol.no,start.no###__next > div[class^="css"]
kube.no###cookie
homenet.no###toast-container
ba.no,ifinnmark.no,nettavisen.no,op.no##.active > div
netonnet.no##.alert
nextgentel.no##.blocker
parat.com##.cookie
kitchentime.no,parat.com##.cookie
lexus.no##.disclaimer
ruter.no##.fixed-messages
sveip.no##.kake_wrap
@ -42631,7 +42633,7 @@ elkedjan.se##.avalanche-message
inexchange.se##.bannerContainer
fz.se##.blck-info
nacka.se##.c-message
olisen.se##.cookie
kitchentime.se,olisen.se##.cookie
havkom.se,lfv.se,oru.se##.cookies
xlbygg.se##.cookievarning-wrap:not(body):not(html)
icebug.se##.css-4uxznl
@ -42844,6 +42846,7 @@ mercopress.com#@##goToTop
encompass.tv#@##goTop
nannicskin.ru,redlink.com.ar#@##gotop
libcom.org#@##main-share
upim.com#@##newsletter-modal
exposurelights.com,tactxflashlights.com,thefirefly.com#@##newsletter-popup
dnevnik.bg#@##newsletterBar
cnn.com#@##outbrain_widget_0
@ -42873,7 +42876,7 @@ battle.net,squareup.com,yandex.by,yandex.com,yandex.com.tr,yandex.kz,yandex.ru,y
toutelanutrition.com#@#.footer-newsletter
designdevostransports.fr#@#.gform_widget
ixbt.com#@#.glyphicon-arrow-up
novojornal.co.ao#@#.newsletter-modal
novojornal.co.ao,upim.com#@#.newsletter-modal
billabong.com,strikerconcepts.com,tactxflashlights.com,urbanedcsupply.com#@#.newsletter-popup
bidorbuy.co.za#@#.newsletterContainer
thefork.it#@#.newsletterSignup
@ -42916,6 +42919,7 @@ dj-extensions.com,thadenschool.org#@#.top-of-page
99bitcoins.com#@#.tve-leads-lightbox
getuikit.com#@#.uk-alert
tactxflashlights.com#@#.widget_subscribe
upim.com#@#[data-target="#newsletter-modal"]
twellow.com#@#a[href="http://www.twellow.com/"]
nasa.gov#@#backtotop
reddit.com#@#img[alt="submit to reddit"]
@ -43077,13 +43081,14 @@ jappy.de#@#.facebookbutton
100pour100foot.fr,6ter.fr,achetezfacile.com,clubic.com,deco.fr,fan2.fr,girondins.com,groupem6.fr,jeuxvideo.fr,jolidressing.com,m6.fr,m6blog.fr,m6bonus.fr,m6info.fr,m6jeux.fr,m6kid.fr,m6mobile.fr,m6pubdigital.fr,minuitsexy.fr,minutefacile.com,mobinaute.com,neteco.com,nouvellestar.fr,ozap.com,paris-premiere.fr,teva.fr,turbo.fr,w9.fr,wideo.fr#@#.fb-box
academia.edu,twitter.com#@#.fb-btn
airbnb.cat,airbnb.com,airbnb.de,dobreprogramy.pl#@#.fb-button
mediaexpert.leszczynscy.pl#@#.fb-container
kizlarsoruyor.com#@#.fb-content
firebase.google.com#@#.fb-dialog
allegro.pl,itweb.co.za#@#.fb-icon
e-biznes.pl,facebook.com,fb.com#@#.fb-like
facebook.com#@#.fb-likebox
facebook.com,fb.com#@#.fb-link
cieplikpodrozuje.pl,fashionvoyager.pl,majkrafci.pl#@#.fb-page
cieplikpodrozuje.pl,fashionvoyager.pl,majkrafci.pl,mediaexpert.leszczynscy.pl#@#.fb-page
facebook.com#@#.fb-quote
facebook.com#@#.fb-recommendations
facebook.com#@#.fb-recommendations-bar
@ -44200,6 +44205,7 @@ unicode.org#@#div[data-nconvert-cookie]
@@||jrjimg.cn/js.do?f=/share/js/
@@||jsfiddle.net^$generichide
@@||junglevibe2.net/min/?$script
@@||justwatch.com^$generichide
@@||kcrw.com^*/images/social-icons-$image,domain=kcrw.com
@@||keepa.com/img/social/Google.svg
@@||koszykowkanawozkach.pl^*/Header-facebook.jpg
@ -44215,6 +44221,7 @@ unicode.org#@#div[data-nconvert-cookie]
@@||letyshops.com/build/core/images/fb-white.
@@||lifetricks.com/wp-content/plugins/nextend-facebook-connect/
@@||liggoo.com/min/?$script
@@||limanowianin.in/wp-content/plugins/soslider-social-slider/js/jquery.soslider.min.js$script
@@||lipis.github.io^$generichide
@@||livescience.com/images/site/social/footer_*.gif
@@||login.kataweb.it^*/sprite-social.png

View File

@ -6,7 +6,9 @@ cd $(cd "$(dirname "$0")";pwd)
git pull
echo '开始下载 easylist1...'
wget -O ./origin-files/easylistchina+easylist.txt --timeout 30 https://easylist-downloads.adblockplus.org/easylistchina+easylist.txt
curl -o ./origin-files/easylist1.txt --connect-timeout 60 \
-s \
https://easylist-downloads.adblockplus.org/easylistchina+easylist.txt
# shellcheck disable=SC2181
if [ $? -ne 0 ];then
@ -15,7 +17,9 @@ if [ $? -ne 0 ];then
fi
echo '开始下载 easylist2...'
wget -O ./origin-files/cjx-annoyance.txt --timeout 30 https://raw.githubusercontent.com/cjx82630/cjxlist/master/cjx-annoyance.txt
curl -o ./origin-files/easylist2.txt --connect-timeout 60 \
-s \
https://raw.githubusercontent.com/cjx82630/cjxlist/master/cjx-annoyance.txt
# shellcheck disable=SC2181
if [ $? -ne 0 ];then
@ -24,7 +28,9 @@ if [ $? -ne 0 ];then
fi
echo '开始下载 easylist3...'
wget -O ./origin-files/fanboy-annoyance.txt --timeout 30 https://easylist.to/easylist/fanboy-annoyance.txt
curl -o ./origin-files/easylist3.txt --connect-timeout 60 \
-s \
https://easylist.to/easylist/fanboy-annoyance.txt
# shellcheck disable=SC2181
if [ $? -ne 0 ];then
@ -34,7 +40,7 @@ fi
echo '开始下载 hosts1...'
wget -O ./origin-files/hosts1 --timeout 30 https://hosts.nfz.moe/full/hosts
wget -O ./origin-files/hosts1 --timeout 60 https://hosts.nfz.moe/full/hosts
# shellcheck disable=SC2181
if [ $? -ne 0 ];then
@ -60,6 +66,16 @@ if [ $? -ne 0 ];then
exit 1
fi
cd origin-files
cat hosts* | grep -v -E "^((#.*)|(\s*))$" \
| grep -v -E "^[0-9\.:]+\s+(ip6\-)?(localhost|loopback)$" \
| sed s/0.0.0.0/127.0.0.1/g | sed s/::/127.0.0.1/g | sort \
| uniq >base-src-hosts.txt
cat easylist*.txt | grep -E "^\|\|[^\^]+\^.*$" | sort | uniq >base-src-easylist.txt
cd ../
PHP_RET=$(/usr/local/php/bin/php make-addr.php)