mirror of
https://github.com/privacy-protection-tools/anti-AD.git
synced 2025-01-22 14:29:16 +08:00
预处理下载文件,合并,排序,然后去重
This commit is contained in:
parent
35b2a2de76
commit
a88344ab1e
42829
adblock-for-dnsmasq.conf
42829
adblock-for-dnsmasq.conf
File diff suppressed because it is too large
Load Diff
@ -73,15 +73,15 @@ class addressMaker {
|
||||
continue;
|
||||
}
|
||||
|
||||
if(preg_match('/^\|\|([0-9a-z\-\.]+[a-z]+)[\^\$]*(image|third-party|script)?$/', $line, $matchs)){
|
||||
if(preg_match('/^\|\|([0-9a-z\-\.]+[a-z]+)[\^\$]*(image|third-party|script|subdocument|popup)?(,.+)?$/', $line, $matches)){
|
||||
|
||||
if(substr($matchs[1], 0, 4) == 'www.'){
|
||||
$row = substr($matchs[1], 4);
|
||||
if(substr($matches[1], 0, 4) == 'www.'){
|
||||
$row = substr($matches[1], 4);
|
||||
}else{
|
||||
$row = $matchs[1];
|
||||
$row = $matches[1];
|
||||
}
|
||||
|
||||
$arr_domains[self::extract_main_domain($matchs[1])][] = $row;
|
||||
$arr_domains[self::extract_main_domain($matches[1])][] = $row;
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,7 +96,7 @@ class addressMaker {
|
||||
*/
|
||||
public static function get_domain_list($str_hosts){
|
||||
$strlen = strlen($str_hosts);
|
||||
if($strlen < 10){
|
||||
if($strlen < 3){
|
||||
return array();
|
||||
}
|
||||
|
||||
@ -123,7 +123,6 @@ class addressMaker {
|
||||
if(strpos($row[1], '.') === false){
|
||||
continue;
|
||||
}
|
||||
|
||||
$arr_domains[self::extract_main_domain($row[1])][] = $row[1];
|
||||
}
|
||||
|
||||
@ -134,7 +133,7 @@ class addressMaker {
|
||||
|
||||
$fp = fopen($str_file, 'w');
|
||||
$write_len = fwrite($fp, '#TIME=' . date('YmdHis') . "\n");
|
||||
$write_len += fwrite($fp, '#URL=https://github.com/gentlyxu/anti-AD' . "\n");
|
||||
$write_len += fwrite($fp, '#URL=https://github.com/privacy-protection-tools/anti-AD' . "\n");
|
||||
|
||||
foreach($arr_result as $rk => $rv){
|
||||
|
||||
@ -143,7 +142,7 @@ class addressMaker {
|
||||
}
|
||||
|
||||
if(empty($rk)){//遗漏的域名,不会写入到最终的配置里
|
||||
// print_r($rv);
|
||||
// print_r($rv);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -114,6 +114,7 @@ return array(
|
||||
'jpush.io' => array('jpush.io'),
|
||||
'jiguang.cn' => array('jiguang.cn'),
|
||||
'getui.com' => array('getui.com'),
|
||||
'getui.net' => array('getui.net'),
|
||||
'ebjvu.cn' => array('ebjvu.cn'),
|
||||
'jumei.com' => array('adxapi.jumei.com', 'sd.int.jumei.com', 'sd.jumei.com'),
|
||||
|
||||
|
@ -24,26 +24,11 @@ $arr_whitelist = require('./lib/white_domain_list.php');
|
||||
$arr_result = array();
|
||||
|
||||
|
||||
$easylist1 = file_get_contents('./origin-files/easylistchina+easylist.txt');
|
||||
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_from_easylist($easylist1));
|
||||
$easylist = file_get_contents('./origin-files/base-src-easylist.txt');
|
||||
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_from_easylist($easylist));
|
||||
|
||||
$easylist2 = file_get_contents('./origin-files/cjx-annoyance.txt');
|
||||
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_from_easylist($easylist2));
|
||||
|
||||
$easylist3 = file_get_contents('./origin-files/fanboy-annoyance.txt');
|
||||
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_from_easylist($easylist3));
|
||||
|
||||
|
||||
$host1 = file_get_contents('./origin-files/hosts1');
|
||||
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_list($host1));
|
||||
|
||||
$host2 = file_get_contents('./origin-files/hosts2');
|
||||
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_list($host2));
|
||||
|
||||
$host3 = file_get_contents('./origin-files/hosts3');
|
||||
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_list($host3));
|
||||
|
||||
$arr_result = array_merge_recursive($arr_result, $arr_blacklist);
|
||||
$hosts = file_get_contents('./origin-files/base-src-hosts.txt');
|
||||
$arr_result = array_merge_recursive($arr_result, addressMaker::get_domain_list($hosts));
|
||||
|
||||
echo 'Written file size:';
|
||||
echo addressMaker::write_to_conf($arr_result, './adblock-for-dnsmasq.conf', 'q-filter.conf');
|
||||
|
32306
origin-files/base-src-easylist.txt
Normal file
32306
origin-files/base-src-easylist.txt
Normal file
File diff suppressed because it is too large
Load Diff
10290
origin-files/base-src-hosts.txt
Normal file
10290
origin-files/base-src-hosts.txt
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@ -1,7 +1,7 @@
|
||||
[Adblock Plus 2.0]
|
||||
! Version: 201912132030
|
||||
! Version: 201912160821
|
||||
! Title: CJX's Annoyance List
|
||||
! Last modified: 2019/12/13 20:30 +0800
|
||||
! Last modified: 2019/12/16 08:21 +0800
|
||||
! Expires: 4 days (update frequency)
|
||||
! Homepage: http://abpchina.org/forum/forum.php?mod=viewthread&tid=29667
|
||||
!
|
||||
@ -315,7 +315,6 @@
|
||||
||china.cn/js/common/daniel_pop/
|
||||
||chinaacc.com/lamu/*dl-
|
||||
||chinaacc.com/lamu/*piao
|
||||
||zol.com.cn/js/ask/ask.js
|
||||
||xhd.cn/topic/leyu-piaofu/
|
||||
||zol.com.cn/index.php?c=Api_JumpApi&
|
||||
||jjwxc.net/adsmanage.
|
||||
@ -365,7 +364,8 @@
|
||||
||baidu.com/xpage/form/getform?id=wk_pc_skin
|
||||
||bdimg.com^*/popOperationView_*.js
|
||||
!------------------------Specific element hiding rules------------------------!
|
||||
baidu.com###page-copyright > div:nth-of-type(4)
|
||||
tmtpost.com##.open-app
|
||||
kuman.com##.guide-download-footer-box
|
||||
baidu.com##.guidetowkOperationwg-root
|
||||
hamibook.com.tw##.custom-banners-cycle-slideshow
|
||||
zmz2019.com##.corner > LI > A[href="http://hd.zmz2019.com/"]
|
@ -1,7 +1,7 @@
|
||||
[Adblock Plus 2.0]
|
||||
! Version: 201912150721
|
||||
! Version: 201912161334
|
||||
! Title: Fanboy's Annoyance List
|
||||
! Last modified: 15 Dec 2019 07:21 UTC
|
||||
! Last modified: 16 Dec 2019 13:34 UTC
|
||||
! Expires: 4 days (update frequency)
|
||||
! License: http://creativecommons.org/licenses/by/3.0/
|
||||
! Please report any unblocked content or problems by email or in our forums
|
||||
@ -6368,6 +6368,7 @@ _site_cookie_
|
||||
##.newsletter-call-to-action
|
||||
##.newsletter-callout
|
||||
##.newsletter-capture
|
||||
##.newsletter-card
|
||||
##.newsletter-center-form
|
||||
##.newsletter-col
|
||||
##.newsletter-compact
|
||||
@ -8480,6 +8481,7 @@ _site_cookie_
|
||||
###desktop-share-wrapper
|
||||
###desktop-social-buttons
|
||||
###detailHeadlineStickySocial
|
||||
###details-share-icon-link
|
||||
###dhSocial
|
||||
###diaspora-button-container
|
||||
###digg-widget
|
||||
@ -36586,6 +36588,7 @@ classic-trader.com##.cui-lightbox
|
||||
classic-trader.com##.cui-lightbox-container
|
||||
classic-trader.com##.cui-overlay-shape
|
||||
thenation.com##.current-issue
|
||||
thewirecutter.com##.d2db3d8f
|
||||
qz.com##.daily-brief-inline
|
||||
iol.co.za##.daily-news-signup-form
|
||||
onlymyhealth.com##.daily-wellness
|
||||
@ -39196,15 +39199,15 @@ mtn.co.za##.Toast
|
||||
6abc.com,abc11.com,abc13.com,abc30.com,abc7.com,abc7chicago.com,abc7news.com,abc7ny.com##.Tooltip
|
||||
hiberworld.com##._1u7AkTjy
|
||||
tunnelmb.net##._23LkM
|
||||
thewirecutter.com##._25c8f525
|
||||
911tabs.com##._29v-t
|
||||
dreamhack.com##._2IIiex-CsxtuW7QGZ_oO5z
|
||||
cnn.com##._2dc7d020
|
||||
reddit.com##._3q-XSJ2vokDQrvdG6mR__k
|
||||
thewirecutter.com##._5ce4f1c1
|
||||
instagram.com##._he402
|
||||
sunweb.co.uk##.absolute-dialog
|
||||
greenfields.eu##.accept-alert
|
||||
akeebabackup.com,imunify360.com,luxos.com,v-tac.eu##.activebar-container
|
||||
akeebabackup.com,imunify360.com,luxos.com,reshade.me,v-tac.eu##.activebar-container
|
||||
ticketmaster.ca##.agree-terms
|
||||
adata.com,ahlulbayt.tv,altervista.org,askdifference.com,asklion.co.uk,bankid.com,cfainstitute.org,convert-my-image.com,costaclub.com,efinancialcareers.com,hattrick.org,kenweego.com,lawyersonline.co.uk,lifescience.net,linkedin.com,lonelyplanet.com,m-a.org.uk,nature.com,netweather.tv,norwegian.com,pozyx.io,supercell.com,theonlinesurgery.co.uk,tindie.com,ukpressonline.co.uk,vernemq.com,viewsonic.com,viewsoniceurope.com,youngsseafood.co.uk##.alert
|
||||
espn.com##.alert--fixed
|
||||
@ -41896,7 +41899,7 @@ lsb.dk###notificationBar
|
||||
jobfinder.dk###sliding-popup
|
||||
tv2.dk##.CookieWarning_container__1Lmss
|
||||
santashop.dk##.center-notice
|
||||
cembrit.dk,conferencemanager.dk,denstoredanske.dk,djurssommerland.dk,madsnorgaard.com,ruc.dk,sportmaster.dk##.cookie
|
||||
cembrit.dk,conferencemanager.dk,denstoredanske.dk,djurssommerland.dk,kitchentime.dk,madsnorgaard.com,ruc.dk,sportmaster.dk##.cookie
|
||||
kalfor.dk##.cookies
|
||||
netdyredoktor.dk##.ct-inner
|
||||
ritzau.dk##.footer__notification
|
||||
@ -41997,6 +42000,7 @@ satakunnanautotalo.fi##.AVS-evasteseloste-container
|
||||
kulttuurivihkot.fi##.activebar-container
|
||||
americanairlines.fi##.alert
|
||||
vismasign.fi##.banner
|
||||
happypancake.fi##.bar.jsx-341223085
|
||||
hs.fi##.cb-container
|
||||
vertaa.fi##.cg-89.cg-97
|
||||
jenkki.fi##.container-agree
|
||||
@ -42009,7 +42013,6 @@ helmet.fi##.notifier_warning
|
||||
suomi24.fi##.s24_cc_banner-wrapper
|
||||
dplay.fi##div[class^="notification"]
|
||||
linnunrata.org##div[style^="position:fixed;right:0;top:0;"]
|
||||
happypancake.fi##.bar.jsx-341223085
|
||||
!
|
||||
! ---------- Greek ----------
|
||||
!
|
||||
@ -42272,10 +42275,9 @@ go4pro.lt##.wu_container
|
||||
seher.no,sol.no,start.no###__next > div[class^="css"]
|
||||
kube.no###cookie
|
||||
homenet.no###toast-container
|
||||
ba.no,ifinnmark.no,nettavisen.no,op.no##.active > div
|
||||
netonnet.no##.alert
|
||||
nextgentel.no##.blocker
|
||||
parat.com##.cookie
|
||||
kitchentime.no,parat.com##.cookie
|
||||
lexus.no##.disclaimer
|
||||
ruter.no##.fixed-messages
|
||||
sveip.no##.kake_wrap
|
||||
@ -42631,7 +42633,7 @@ elkedjan.se##.avalanche-message
|
||||
inexchange.se##.bannerContainer
|
||||
fz.se##.blck-info
|
||||
nacka.se##.c-message
|
||||
olisen.se##.cookie
|
||||
kitchentime.se,olisen.se##.cookie
|
||||
havkom.se,lfv.se,oru.se##.cookies
|
||||
xlbygg.se##.cookievarning-wrap:not(body):not(html)
|
||||
icebug.se##.css-4uxznl
|
||||
@ -42844,6 +42846,7 @@ mercopress.com#@##goToTop
|
||||
encompass.tv#@##goTop
|
||||
nannicskin.ru,redlink.com.ar#@##gotop
|
||||
libcom.org#@##main-share
|
||||
upim.com#@##newsletter-modal
|
||||
exposurelights.com,tactxflashlights.com,thefirefly.com#@##newsletter-popup
|
||||
dnevnik.bg#@##newsletterBar
|
||||
cnn.com#@##outbrain_widget_0
|
||||
@ -42873,7 +42876,7 @@ battle.net,squareup.com,yandex.by,yandex.com,yandex.com.tr,yandex.kz,yandex.ru,y
|
||||
toutelanutrition.com#@#.footer-newsletter
|
||||
designdevostransports.fr#@#.gform_widget
|
||||
ixbt.com#@#.glyphicon-arrow-up
|
||||
novojornal.co.ao#@#.newsletter-modal
|
||||
novojornal.co.ao,upim.com#@#.newsletter-modal
|
||||
billabong.com,strikerconcepts.com,tactxflashlights.com,urbanedcsupply.com#@#.newsletter-popup
|
||||
bidorbuy.co.za#@#.newsletterContainer
|
||||
thefork.it#@#.newsletterSignup
|
||||
@ -42916,6 +42919,7 @@ dj-extensions.com,thadenschool.org#@#.top-of-page
|
||||
99bitcoins.com#@#.tve-leads-lightbox
|
||||
getuikit.com#@#.uk-alert
|
||||
tactxflashlights.com#@#.widget_subscribe
|
||||
upim.com#@#[data-target="#newsletter-modal"]
|
||||
twellow.com#@#a[href="http://www.twellow.com/"]
|
||||
nasa.gov#@#backtotop
|
||||
reddit.com#@#img[alt="submit to reddit"]
|
||||
@ -43077,13 +43081,14 @@ jappy.de#@#.facebookbutton
|
||||
100pour100foot.fr,6ter.fr,achetezfacile.com,clubic.com,deco.fr,fan2.fr,girondins.com,groupem6.fr,jeuxvideo.fr,jolidressing.com,m6.fr,m6blog.fr,m6bonus.fr,m6info.fr,m6jeux.fr,m6kid.fr,m6mobile.fr,m6pubdigital.fr,minuitsexy.fr,minutefacile.com,mobinaute.com,neteco.com,nouvellestar.fr,ozap.com,paris-premiere.fr,teva.fr,turbo.fr,w9.fr,wideo.fr#@#.fb-box
|
||||
academia.edu,twitter.com#@#.fb-btn
|
||||
airbnb.cat,airbnb.com,airbnb.de,dobreprogramy.pl#@#.fb-button
|
||||
mediaexpert.leszczynscy.pl#@#.fb-container
|
||||
kizlarsoruyor.com#@#.fb-content
|
||||
firebase.google.com#@#.fb-dialog
|
||||
allegro.pl,itweb.co.za#@#.fb-icon
|
||||
e-biznes.pl,facebook.com,fb.com#@#.fb-like
|
||||
facebook.com#@#.fb-likebox
|
||||
facebook.com,fb.com#@#.fb-link
|
||||
cieplikpodrozuje.pl,fashionvoyager.pl,majkrafci.pl#@#.fb-page
|
||||
cieplikpodrozuje.pl,fashionvoyager.pl,majkrafci.pl,mediaexpert.leszczynscy.pl#@#.fb-page
|
||||
facebook.com#@#.fb-quote
|
||||
facebook.com#@#.fb-recommendations
|
||||
facebook.com#@#.fb-recommendations-bar
|
||||
@ -44200,6 +44205,7 @@ unicode.org#@#div[data-nconvert-cookie]
|
||||
@@||jrjimg.cn/js.do?f=/share/js/
|
||||
@@||jsfiddle.net^$generichide
|
||||
@@||junglevibe2.net/min/?$script
|
||||
@@||justwatch.com^$generichide
|
||||
@@||kcrw.com^*/images/social-icons-$image,domain=kcrw.com
|
||||
@@||keepa.com/img/social/Google.svg
|
||||
@@||koszykowkanawozkach.pl^*/Header-facebook.jpg
|
||||
@ -44215,6 +44221,7 @@ unicode.org#@#div[data-nconvert-cookie]
|
||||
@@||letyshops.com/build/core/images/fb-white.
|
||||
@@||lifetricks.com/wp-content/plugins/nextend-facebook-connect/
|
||||
@@||liggoo.com/min/?$script
|
||||
@@||limanowianin.in/wp-content/plugins/soslider-social-slider/js/jquery.soslider.min.js$script
|
||||
@@||lipis.github.io^$generichide
|
||||
@@||livescience.com/images/site/social/footer_*.gif
|
||||
@@||login.kataweb.it^*/sprite-social.png
|
24
start.sh
24
start.sh
@ -6,7 +6,9 @@ cd $(cd "$(dirname "$0")";pwd)
|
||||
git pull
|
||||
|
||||
echo '开始下载 easylist1...'
|
||||
wget -O ./origin-files/easylistchina+easylist.txt --timeout 30 https://easylist-downloads.adblockplus.org/easylistchina+easylist.txt
|
||||
curl -o ./origin-files/easylist1.txt --connect-timeout 60 \
|
||||
-s \
|
||||
https://easylist-downloads.adblockplus.org/easylistchina+easylist.txt
|
||||
|
||||
# shellcheck disable=SC2181
|
||||
if [ $? -ne 0 ];then
|
||||
@ -15,7 +17,9 @@ if [ $? -ne 0 ];then
|
||||
fi
|
||||
|
||||
echo '开始下载 easylist2...'
|
||||
wget -O ./origin-files/cjx-annoyance.txt --timeout 30 https://raw.githubusercontent.com/cjx82630/cjxlist/master/cjx-annoyance.txt
|
||||
curl -o ./origin-files/easylist2.txt --connect-timeout 60 \
|
||||
-s \
|
||||
https://raw.githubusercontent.com/cjx82630/cjxlist/master/cjx-annoyance.txt
|
||||
|
||||
# shellcheck disable=SC2181
|
||||
if [ $? -ne 0 ];then
|
||||
@ -24,7 +28,9 @@ if [ $? -ne 0 ];then
|
||||
fi
|
||||
|
||||
echo '开始下载 easylist3...'
|
||||
wget -O ./origin-files/fanboy-annoyance.txt --timeout 30 https://easylist.to/easylist/fanboy-annoyance.txt
|
||||
curl -o ./origin-files/easylist3.txt --connect-timeout 60 \
|
||||
-s \
|
||||
https://easylist.to/easylist/fanboy-annoyance.txt
|
||||
|
||||
# shellcheck disable=SC2181
|
||||
if [ $? -ne 0 ];then
|
||||
@ -34,7 +40,7 @@ fi
|
||||
|
||||
|
||||
echo '开始下载 hosts1...'
|
||||
wget -O ./origin-files/hosts1 --timeout 30 https://hosts.nfz.moe/full/hosts
|
||||
wget -O ./origin-files/hosts1 --timeout 60 https://hosts.nfz.moe/full/hosts
|
||||
|
||||
# shellcheck disable=SC2181
|
||||
if [ $? -ne 0 ];then
|
||||
@ -60,6 +66,16 @@ if [ $? -ne 0 ];then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd origin-files
|
||||
|
||||
cat hosts* | grep -v -E "^((#.*)|(\s*))$" \
|
||||
| grep -v -E "^[0-9\.:]+\s+(ip6\-)?(localhost|loopback)$" \
|
||||
| sed s/0.0.0.0/127.0.0.1/g | sed s/::/127.0.0.1/g | sort \
|
||||
| uniq >base-src-hosts.txt
|
||||
|
||||
cat easylist*.txt | grep -E "^\|\|[^\^]+\^.*$" | sort | uniq >base-src-easylist.txt
|
||||
|
||||
cd ../
|
||||
|
||||
PHP_RET=$(/usr/local/php/bin/php make-addr.php)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user