2019-12-24 18:42:49 +08:00
|
|
|
|
<?php
|
|
|
|
|
/**
|
|
|
|
|
* easylist extend
|
|
|
|
|
*
|
|
|
|
|
* @file easylist-extend.php
|
|
|
|
|
* @date 2019-12-24
|
|
|
|
|
* @author gently
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
set_time_limit(0);
|
|
|
|
|
|
|
|
|
|
error_reporting(7);
|
|
|
|
|
|
|
|
|
|
define('START_TIME', microtime(true));
|
2020-01-23 12:49:20 +08:00
|
|
|
|
define('ROOT_DIR', dirname(__DIR__) . '/');
|
2020-01-17 13:17:27 +08:00
|
|
|
|
define('LIB_DIR', ROOT_DIR . 'lib/');
|
2019-12-24 18:42:49 +08:00
|
|
|
|
|
2020-01-17 13:17:27 +08:00
|
|
|
|
$black_domain_list = require_once LIB_DIR . 'black_domain_list.php';
|
|
|
|
|
require_once LIB_DIR . 'addressMaker.class.php';
|
2019-12-24 18:42:49 +08:00
|
|
|
|
define('WILDCARD_SRC', ROOT_DIR . 'origin-files/wildcard-src-easylist.txt');
|
|
|
|
|
define('WHITERULE_SRC', ROOT_DIR . 'origin-files/whiterule-src-easylist.txt');
|
|
|
|
|
|
|
|
|
|
$ARR_MERGED_WILD_LIST = array(
|
|
|
|
|
'ad*.udn.com' => null,
|
|
|
|
|
'*.mgr.consensu.org' => null,
|
|
|
|
|
'vs*.gzcu.u3.ucweb.com' => null,
|
|
|
|
|
'ad*.goforandroid.com' => null,
|
|
|
|
|
'bs*.9669.cn' => null,
|
2020-06-29 20:14:55 +08:00
|
|
|
|
'*serror*.wo.com.cn' => null,
|
2019-12-25 16:38:53 +08:00
|
|
|
|
'*mistat*.xiaomi.com' => null,
|
|
|
|
|
'affrh20*.com' => null,
|
|
|
|
|
'assoc-amazon.*' => null,
|
|
|
|
|
'clkservice*.youdao.com' => null,
|
|
|
|
|
'dsp*.youdao.com' => null,
|
|
|
|
|
'pussl*.com' => null,
|
|
|
|
|
'putrr*.com' => null,
|
|
|
|
|
't*.a.market.xiaomi.com' => null,
|
|
|
|
|
'ad*.bigmir.net' => null,
|
|
|
|
|
'log*.molitv.cn' => null,
|
|
|
|
|
'adm*.autoimg.cn' => null,
|
|
|
|
|
'cloudservice*.kingsoft-office-service.com' => null,
|
|
|
|
|
'gg*.51cto.com' => null,
|
|
|
|
|
'log.*.hunantv.com' => null,
|
2019-12-26 19:09:09 +08:00
|
|
|
|
'iflyad.*.openstorage.cn' => null,
|
2019-12-27 10:16:02 +08:00
|
|
|
|
'*customstat*.51togic.com' => null,
|
2021-02-21 13:10:51 +08:00
|
|
|
|
// 'appcloud*.zhihu.com' => null, // #344
|
2020-01-08 18:34:38 +08:00
|
|
|
|
'ad*.molitv.cn' => null,
|
2020-01-09 13:17:34 +08:00
|
|
|
|
'ads*-adnow.com' => null,
|
|
|
|
|
'aeros*.tk' => null,
|
|
|
|
|
'analyzer*.fc2.com' => null,
|
2020-01-11 22:59:33 +08:00
|
|
|
|
'admicro*.vcmedia.vn' => null,
|
2020-01-18 22:44:03 +08:00
|
|
|
|
'xn--xhq9mt12cf5v.*' => null,
|
2020-01-19 16:58:33 +08:00
|
|
|
|
'freecontent.*' => null,
|
|
|
|
|
'hostingcloud.*' => null,
|
|
|
|
|
'jshosting.*' => null,
|
|
|
|
|
'flightzy.*' => null,
|
|
|
|
|
'sunnimiq*.cf' => null,
|
2020-01-30 23:36:50 +08:00
|
|
|
|
'admob.*' => null,
|
|
|
|
|
'*log.droid4x.cn' => null,
|
|
|
|
|
'*tsdk.vivo.com.cn' => null,
|
2020-02-24 14:32:05 +08:00
|
|
|
|
'*.mmstat.com' => null,
|
2020-10-21 19:50:00 +08:00
|
|
|
|
//'sf*-ttcdn-tos.pstatp.com' => null,
|
2020-04-01 22:09:34 +08:00
|
|
|
|
'f-log*.grammarly.io' => null,
|
2020-05-31 16:04:09 +08:00
|
|
|
|
'24log.*' => null,
|
|
|
|
|
'24smi.*' => null,
|
2020-06-29 14:00:19 +08:00
|
|
|
|
'ad-*.wikawika.xyz' => null,
|
2020-07-26 23:00:05 +08:00
|
|
|
|
'ablen*.tk' => null,
|
|
|
|
|
'darking*.tk' => null,
|
|
|
|
|
'doubleclick*.xyz' => null,
|
|
|
|
|
'thepiratebay.*' => null,
|
|
|
|
|
'adserver.*' => null,
|
2020-07-27 23:49:18 +08:00
|
|
|
|
'clientlog*.music.163.com' => null,
|
|
|
|
|
'brucelead*.com' => null,
|
|
|
|
|
'gostats.*' => null,
|
|
|
|
|
'gralfusnzpo*.top' => null,
|
|
|
|
|
'oiwjcsh*.top' => null,
|
|
|
|
|
'*-analytics*.huami.com' => null,
|
|
|
|
|
'count*.pconline.com.cn' => null,
|
|
|
|
|
'qchannel*.cn' => null,
|
|
|
|
|
'sda*.xyz' => null,
|
2020-07-30 22:35:09 +08:00
|
|
|
|
'ad-*.com' => null,
|
|
|
|
|
'ad-*.net' => null,
|
2020-10-02 17:29:21 +08:00
|
|
|
|
'webads.*' => null,
|
|
|
|
|
'web-stat.*' => null,
|
|
|
|
|
'waframedia*.*' => null,
|
|
|
|
|
'wafmedia*.*' => null,
|
|
|
|
|
'voluumtrk*.com' => null,
|
|
|
|
|
'vmm-satellite*.com' => null,
|
|
|
|
|
'vente-unique.*' => null,
|
|
|
|
|
'vegaoo*.*' => null,
|
|
|
|
|
'umtrack*.com' => null,
|
|
|
|
|
'grjs0*.com' => null,
|
|
|
|
|
'imglnk*.com' => null,
|
2020-10-02 23:31:31 +08:00
|
|
|
|
'admarvel*.*' => null,
|
|
|
|
|
'admaster*.*' => null,
|
|
|
|
|
'adsage*.*' => null,
|
|
|
|
|
'adsensor*.*' => null,
|
|
|
|
|
'adservice*.*' => null,
|
|
|
|
|
'adsh*.*' => null,
|
|
|
|
|
'adsmogo*.*' => null,
|
|
|
|
|
'adsrvmedia*.*' => null,
|
|
|
|
|
'adsserving*.*' => null,
|
|
|
|
|
'adsystem*.*' => null,
|
|
|
|
|
'adwords*.*' => null,
|
|
|
|
|
'analysis*.*' => null,
|
|
|
|
|
'applovin*.*' => null,
|
|
|
|
|
'appsflyer*.*' => null,
|
|
|
|
|
'domob*.*' => null,
|
|
|
|
|
'duomeng*.*' => null,
|
|
|
|
|
'dwtrack*.*' => null,
|
|
|
|
|
'guanggao*.*' => null,
|
|
|
|
|
'lianmeng*.*' => null,
|
2021-01-05 23:06:35 +08:00
|
|
|
|
//'monitor*.*' => null,
|
2020-10-02 23:31:31 +08:00
|
|
|
|
'omgmta*.*' => null,
|
|
|
|
|
'omniture*.*' => null,
|
|
|
|
|
'openx*.*' => null,
|
|
|
|
|
'partnerad*.*' => null,
|
|
|
|
|
'pingfore*.*' => null,
|
|
|
|
|
'socdm*.*' => null,
|
|
|
|
|
'supersonicads*.*' => null,
|
|
|
|
|
'tracking*.*' => null,
|
|
|
|
|
'usage*.*' => null,
|
|
|
|
|
'wlmonitor*.*' => null,
|
|
|
|
|
'zjtoolbar*.*' => null,
|
2021-04-26 02:20:25 +08:00
|
|
|
|
'engage.3m*' => null,
|
|
|
|
|
'*.actonservice.com' => null,
|
2021-04-28 23:34:44 +08:00
|
|
|
|
'*-cor0*.api.p001.1drv.com' => null,
|
|
|
|
|
'*33*-*.1drv.com' => null,
|
|
|
|
|
'2cnjuh34j*.com' => null,
|
|
|
|
|
'ssc.southpark*' => null,
|
|
|
|
|
'tr.*.espmp-*fr.net' => null,
|
|
|
|
|
'tdep.vacansoleil.*' => null,
|
|
|
|
|
'da.hornbach.*' => null,
|
|
|
|
|
'*us*watcab*.blob.core.windows.net' => null,
|
2019-12-24 18:42:49 +08:00
|
|
|
|
);
|
|
|
|
|
|
2020-01-16 22:28:19 +08:00
|
|
|
|
$ARR_REGEX_LIST = array(
|
2020-02-08 12:26:58 +08:00
|
|
|
|
'/9377[a-z]{2}\.com$/' => null,
|
2021-04-25 00:27:27 +08:00
|
|
|
|
'/^(\S+\.)?ad(s?[\d]+|m|s)?\./' => null,
|
2021-04-26 02:20:25 +08:00
|
|
|
|
'/^(\S+\.)?advert/' => null, // TODO 覆盖面很大
|
2021-04-26 20:50:31 +08:00
|
|
|
|
'/^(\S+\.)?affiliat(es?[0-9a-z]*?|ion[0-9\-a-z]*?|ly[0-9a-z\-]*?)\./' => null, // fixed #406
|
2021-04-26 02:20:25 +08:00
|
|
|
|
'/^(\S+\.)?s?metrics\./' => null, // TODO 覆盖面很大
|
|
|
|
|
// '/^(\S+\.)?affiliat(es|ion|e)\./' => null,
|
2020-03-08 12:31:57 +08:00
|
|
|
|
'/afgr[\d]{1,2}\.com$/' => null,
|
2020-03-04 22:10:57 +08:00
|
|
|
|
'/^(\S+\.)?analytics(\-|\.)/' => null,
|
|
|
|
|
'/^(\S+\.)?counter(\-|\.)/' => null,
|
2020-03-03 22:58:55 +08:00
|
|
|
|
'/^(\S+\.)?pixels?\./' => null,
|
2020-02-08 12:26:58 +08:00
|
|
|
|
'/syma[a-z]\.cn$/' => null,
|
2020-03-04 22:10:57 +08:00
|
|
|
|
'/^(\S+\.)?widgets?\./' => null,
|
2021-03-26 22:35:23 +08:00
|
|
|
|
'/^(\S+\.)?(webstats?|swebstats?|mywebstats?)\./' => null,
|
2021-03-27 20:23:31 +08:00
|
|
|
|
// '/^(\S+\.)?stat\..+?\.(com|cn|ru|it|de|cz|net|kr|ai|pl|th|fi|fr|jp|hu|bz|sk|se)$/' => null,
|
2020-03-04 22:10:57 +08:00
|
|
|
|
'/^(\S+\.)?track(ing)?\./' => null,
|
|
|
|
|
'/^(\S+\.)?tongji\./' => null,
|
|
|
|
|
'/^(\S+\.)?toolbar\./' => null,
|
|
|
|
|
'/^(\S+\.)?adservice\.google\./' => null,
|
2020-03-08 12:31:57 +08:00
|
|
|
|
'/^(\S+\.)?d[\d]+\.sina(img)?(\.com)?\.cn/' => null,
|
|
|
|
|
'/^(\S+\.)?sax[\dns]?\.sina\.com\.cn/' => null,
|
|
|
|
|
'/delivery([\d]{2}|dom|modo).com$/' => null,
|
2020-03-04 23:03:28 +08:00
|
|
|
|
'/^(\S+\.)?[c-s]ads(abs|abz|ans|anz|ats|atz|del|ecs|ecz|ims|imz|ips|ipz|kis|kiz|oks|okz|one|pms|pmz)\.com/' => null,
|
2020-05-31 16:04:09 +08:00
|
|
|
|
'/^(\S+\.)?([a-z\d\-]+\.)?(?!xn--)[^\.\/]{26,}\.(com|net|cn)(\.cn)?$/' => null, //超长域名
|
2020-03-08 12:31:57 +08:00
|
|
|
|
'/^(\S+\.)?11599[\da-z]{2,20}\.com$/' => null, //"澳门新葡京"系列
|
|
|
|
|
'/^(\S+\.)?61677[\da-z]{0,20}\.com$/' => null, //"澳门新葡京"系列
|
2020-05-31 16:04:09 +08:00
|
|
|
|
'/^(\S+\.)?[0-9a-f]{15,}\.com$/' => null, //15个字符以上的16进制域名
|
2020-04-01 23:14:17 +08:00
|
|
|
|
'/^(\S+\.)?[0-9a-z]{16,}\.xyz$/' => null, //16个字符以上的.xyz域名
|
2020-05-31 16:04:09 +08:00
|
|
|
|
'/^(\S+\.)?6699[0-9]\.top$/' => null, //连号
|
|
|
|
|
'/^(\S+\.)?abie[0-9]+\.top$/' => null, //连号
|
|
|
|
|
'/^(\S+\.)?ad[0-9]{3,}m.com$/' => null, //连号
|
|
|
|
|
'/^(\S+\.)?aj[0-9]{4,}.online$/' => null, //连号
|
2020-05-31 16:14:12 +08:00
|
|
|
|
'/^(\S+\.)?xpj[0-9]\.net$/' => null, //连号
|
|
|
|
|
'/^(\S+\.)?ylx-[0-9].com$/' => null, //连号
|
2020-05-31 22:03:56 +08:00
|
|
|
|
'/^(\S+\.)?ali2[a-z]\.xyz$/' => null, //连号
|
2020-05-31 16:04:09 +08:00
|
|
|
|
'/^(\S+\.)?777\-?partners?\.(net|com)$/' => null, //组合
|
2020-05-31 16:14:12 +08:00
|
|
|
|
'/^(\S+\.)?voyage-prive\.[a-z]+(\.uk)?$/' => null, //组合
|
2020-07-26 23:24:29 +08:00
|
|
|
|
'/^(\S+\.)?e7[0-9]{2,4}\.(net|com)?$/' => null, //组合
|
|
|
|
|
'/^(\S+\.)?g[1-4][0-9]{8,9}\.com?$/' => null, //批量组合
|
2020-10-02 17:29:21 +08:00
|
|
|
|
'/^(\S+\.)?hg[0-9]{4,5}\.com?$/' => null, //批量组合
|
2021-04-26 02:20:25 +08:00
|
|
|
|
'/^(\S+\.)?333[1-9]{2}[0-9]{2}\.com?$/' => null, //批量组合
|
|
|
|
|
'/^(\S+\.)?5551[0-9]{3}\.com?$/' => null, //批量组合
|
2020-07-26 23:24:29 +08:00
|
|
|
|
|
2020-03-07 17:20:26 +08:00
|
|
|
|
// '/^(\S+\.)?(?=.*[a-f].*\.com$)(?=.*\d.*\.com$)[a-f0-9]{15,}\.com$/' => null,
|
2020-01-16 22:28:19 +08:00
|
|
|
|
);
|
|
|
|
|
|
2020-01-29 22:26:22 +08:00
|
|
|
|
//对通配符匹配或正则匹配增加的额外赦免规则
|
2020-01-17 13:17:27 +08:00
|
|
|
|
$ARR_WHITE_RULE_LIST = array(
|
2020-02-01 14:16:53 +08:00
|
|
|
|
'@@||tongji.*kuwo.cn^' => 0,
|
2020-02-13 09:44:09 +08:00
|
|
|
|
'@@||tracking.epicgames.com^' => 0,
|
2020-02-13 17:17:19 +08:00
|
|
|
|
'@@||tracker.eu.org^' => 1, //强制加白,BT tracker,有形如2.tracker.eu.org的域
|
|
|
|
|
'@@||stats.uptimerobot.com^' => 0, //uptimerobot监测相关
|
2020-02-17 19:50:23 +08:00
|
|
|
|
'@@||track.sendcloud.org^' => 0, //邮件退订域名
|
2020-02-24 14:32:05 +08:00
|
|
|
|
'@@||log.mmstat.com^' => 0, //修复优酷视频显示禁用了cookie
|
2020-02-24 16:04:19 +08:00
|
|
|
|
'@@||adm.10jqka.com.cn^' => 0, //同花顺
|
2020-02-28 21:22:14 +08:00
|
|
|
|
'@@||center-h5api.m.taobao.com^' => 1, //h5页面
|
2020-03-15 16:30:19 +08:00
|
|
|
|
'@@||app.adjust.com^' => 1, //https://github.com/AdguardTeam/AdGuardSDNSFilter/pull/186
|
2020-03-21 15:43:30 +08:00
|
|
|
|
'@@||widget.weibo.com^' => 0, //微博外链
|
2020-03-29 00:35:18 +08:00
|
|
|
|
'@@||uland.taobao.com^' => 1, //淘宝coupon #83
|
2020-04-06 21:23:59 +08:00
|
|
|
|
'@@||advertisement.taobao.com^' => 1, //CNAME 被杀,导致s.click.taobao.com等服务异常
|
2020-04-11 21:46:49 +08:00
|
|
|
|
'@@||baozhang.baidu.com^' => 1, //CNAME e.shifen.com
|
2020-04-19 22:50:02 +08:00
|
|
|
|
'@@||tongji.edu.cn^' => 1, // 同济大学
|
2020-11-14 15:00:20 +08:00
|
|
|
|
'@@||tongji.cn^' => 1, // 同济大学 #281
|
2020-05-04 16:33:30 +08:00
|
|
|
|
'@@||ad.siemens.com.cn^' => 1, // 西门子下载中心
|
2020-05-08 23:20:37 +08:00
|
|
|
|
'@@||sdkapi.sms.mob.com^' => 1, // 短信验证码 #127
|
2020-05-27 23:54:55 +08:00
|
|
|
|
'@@||stats.gov.cn^' => 1, // 国家统计局 #144
|
2020-05-28 00:01:26 +08:00
|
|
|
|
'@@||tj.gov.cn^' => 1,
|
2020-06-27 15:37:03 +08:00
|
|
|
|
'@@||sax.sina.com.cn^' => 1, // #155
|
2020-08-02 21:26:13 +08:00
|
|
|
|
'@@||api.ad-gone.com^' => 1, // #207
|
|
|
|
|
'@@||news-app.abumedia.yql.yahoo.com^' => 1, // #206
|
2020-08-07 23:09:06 +08:00
|
|
|
|
'@@||meizu.coapi.moji.com^' => 1, // #217
|
2020-10-09 17:49:12 +08:00
|
|
|
|
'@@||track.cpau.info^' => 1, // #251
|
2020-10-27 22:45:01 +08:00
|
|
|
|
'@@||passport.bobo.com^' => 1, // #265
|
2020-11-12 19:14:29 +08:00
|
|
|
|
'@@||stat.jseea.cn^' => 1, // #279
|
2020-11-14 17:03:05 +08:00
|
|
|
|
'@@||widget.intercom.io^' => 1, // #280
|
2021-01-08 15:12:50 +08:00
|
|
|
|
'@@||track.toggl.com^' => 1, // #307
|
2021-02-02 11:00:16 +08:00
|
|
|
|
'@@||www.msftconnecttest.com^' => 1, // #327
|
2021-02-05 22:52:24 +08:00
|
|
|
|
'@@||storage.live.com^' => 1, // #333
|
|
|
|
|
'@@||skyapi.onedrive.live.com^' => 1, // #333
|
2021-02-05 22:55:30 +08:00
|
|
|
|
'@@||counter-strike.net^' => 1, // #332
|
2021-02-24 20:17:19 +08:00
|
|
|
|
'@@||ftp.bmp.ovh^' => 1, // #353
|
2021-04-15 22:29:01 +08:00
|
|
|
|
'@@||profile*.se.360.cn^' => 1, // #381
|
2021-04-21 21:29:03 +08:00
|
|
|
|
'@@||pic.iask.cn^' => 1, // #397
|
2021-04-24 17:39:08 +08:00
|
|
|
|
'@@||ad.jp^' => 1, // #399
|
|
|
|
|
'@@||ad.azure.com^' => 1, // #399
|
|
|
|
|
'@@||ad.cityu.edu.hk^' => 1, // #398
|
2021-04-24 18:20:12 +08:00
|
|
|
|
'@@||edge-enterprise.activity.windows.com^' => 1, // #401
|
|
|
|
|
'@@||edge.activity.windows.com^' => 1, // #401
|
2021-04-27 22:11:27 +08:00
|
|
|
|
'@@||tracking-protection.cdn.mozilla.net^' => 1, // #407
|
2021-04-24 18:20:12 +08:00
|
|
|
|
|
2020-01-29 22:26:22 +08:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
//针对上游赦免规则anti-AD不予赦免的规则,即赦免名单的黑名单
|
|
|
|
|
$ARR_WHITE_RULE_BLK_LIST = array(
|
|
|
|
|
'@@||ads.nipr.ac.jp^' => null,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
//针对上游通配符规则中anti-AD不予采信的规则,即通配符黑名单
|
|
|
|
|
$ARR_WILD_BLK_LIST = array(
|
|
|
|
|
'cnt*rambler.ru' => null,
|
2020-03-16 22:01:07 +08:00
|
|
|
|
'um*.com' => null,
|
2020-01-17 13:17:27 +08:00
|
|
|
|
);
|
|
|
|
|
|
2019-12-24 18:42:49 +08:00
|
|
|
|
if(PHP_SAPI != 'cli'){
|
|
|
|
|
die('nothing.');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$src_file = '';
|
|
|
|
|
try{
|
|
|
|
|
$file = $argv[1];
|
|
|
|
|
$src_file = ROOT_DIR . $file;
|
|
|
|
|
}catch(Exception $e){
|
|
|
|
|
echo "get args failed.", $e->getMessage(), "\n";
|
|
|
|
|
die(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(empty($src_file) || !is_file($src_file)){
|
|
|
|
|
echo 'src_file:', $src_file, ' is not found.';
|
|
|
|
|
die(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(!is_file(WILDCARD_SRC) || !is_file(WHITERULE_SRC)){
|
|
|
|
|
echo 'key file is not found.';
|
|
|
|
|
die(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$src_fp = fopen($src_file, 'r');
|
|
|
|
|
$wild_fp = fopen(WILDCARD_SRC, 'r');
|
2019-12-24 21:59:28 +08:00
|
|
|
|
$new_fp = fopen($src_file . '.txt', 'w');
|
2019-12-24 18:42:49 +08:00
|
|
|
|
|
|
|
|
|
$wrote_wild = array();
|
|
|
|
|
$arr_wild_src = array();
|
|
|
|
|
|
|
|
|
|
while(!feof($wild_fp)){
|
|
|
|
|
$wild_row = fgets($wild_fp, 512);
|
|
|
|
|
if(empty($wild_row)){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if(!preg_match('/^\|\|?([\w\-\.\*]+?)\^(\$([^=]+?,)?(image|third-party|script)(,[^=]+)?)?$/', $wild_row, $matches)){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-01-29 22:26:22 +08:00
|
|
|
|
|
|
|
|
|
if(array_key_exists($matches[1], $ARR_WILD_BLK_LIST)){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$matched = false;
|
|
|
|
|
foreach($ARR_REGEX_LIST as $regex_str => $regex_row){
|
|
|
|
|
if(preg_match($regex_str, str_replace('*', '',$matches[1]))){
|
|
|
|
|
$matched = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if($matched){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2019-12-24 18:42:49 +08:00
|
|
|
|
$arr_wild_src[$matches[1]] = $wild_row;
|
|
|
|
|
}
|
|
|
|
|
fclose($wild_fp);
|
|
|
|
|
|
|
|
|
|
$arr_wild_src = array_merge($arr_wild_src, $ARR_MERGED_WILD_LIST);
|
2020-02-13 10:58:34 +08:00
|
|
|
|
$insert_pos = $written_size = $line_count = 0;
|
2019-12-24 18:42:49 +08:00
|
|
|
|
while(!feof($src_fp)){
|
|
|
|
|
$row = fgets($src_fp, 512);
|
|
|
|
|
if(empty($row)){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-23 09:49:59 +08:00
|
|
|
|
if((substr($row, 0, 1) === '!') && (substr($row, 0, 13) === '!Total lines:')){
|
2020-02-13 10:58:34 +08:00
|
|
|
|
$insert_pos = $written_size;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-24 18:42:49 +08:00
|
|
|
|
if(!preg_match('/^\|.+?/', $row)){
|
2020-02-13 10:58:34 +08:00
|
|
|
|
$written_size += fwrite($new_fp, $row);
|
2019-12-24 18:42:49 +08:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$matched = false;
|
2020-01-16 22:28:19 +08:00
|
|
|
|
foreach($ARR_REGEX_LIST as $regex_str => $regex_row){
|
|
|
|
|
if(preg_match($regex_str, substr(trim($row), 2, -1))){
|
|
|
|
|
$matched = true;
|
2020-01-17 13:17:27 +08:00
|
|
|
|
if(!array_key_exists($regex_str, $wrote_wild)){
|
2020-02-13 10:58:34 +08:00
|
|
|
|
$written_size += fwrite($new_fp, "${regex_str}\n");
|
|
|
|
|
$line_count++;
|
2020-01-17 13:17:27 +08:00
|
|
|
|
$wrote_wild[$regex_str] = 1;
|
2020-01-16 22:28:19 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if($matched){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2019-12-24 18:42:49 +08:00
|
|
|
|
|
2020-01-23 12:49:20 +08:00
|
|
|
|
foreach($arr_wild_src as $core_str => $wild_row){
|
2020-07-26 23:00:05 +08:00
|
|
|
|
$match_rule = str_replace(array('.', '*'), array('\\.', '.*'), $core_str);
|
2020-01-29 22:26:22 +08:00
|
|
|
|
if(!array_key_exists($core_str, $wrote_wild)){
|
2020-02-13 10:58:34 +08:00
|
|
|
|
$written_size += fwrite($new_fp, "||${core_str}^\n");
|
|
|
|
|
$line_count++;
|
2020-01-29 22:26:22 +08:00
|
|
|
|
$wrote_wild[$core_str] = 1;
|
|
|
|
|
}
|
2019-12-24 18:42:49 +08:00
|
|
|
|
if(preg_match("/\|${match_rule}/", $row)){
|
|
|
|
|
$matched = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if($matched){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-02-13 10:58:34 +08:00
|
|
|
|
$written_size += fwrite($new_fp, $row);
|
|
|
|
|
$line_count++;
|
2019-12-24 18:42:49 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-12-24 21:59:28 +08:00
|
|
|
|
//按需写入白名单规则
|
2020-01-17 14:31:06 +08:00
|
|
|
|
$wrote_whitelist = array();
|
2020-02-01 14:16:53 +08:00
|
|
|
|
$whiterule = file(WHITERULE_SRC, FILE_SKIP_EMPTY_LINES | FILE_IGNORE_NEW_LINES);
|
|
|
|
|
$whiterule=array_fill_keys($whiterule, 0);
|
|
|
|
|
$ARR_WHITE_RULE_LIST = array_merge($whiterule, $ARR_WHITE_RULE_LIST);
|
|
|
|
|
foreach($ARR_WHITE_RULE_LIST as $row => $v){
|
2020-12-23 09:52:24 +08:00
|
|
|
|
if(empty($row) || substr($row, 0, 1) !== '@' || substr($row, 1, 1) !== '@'){
|
2019-12-24 21:59:28 +08:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
$matches = array();
|
|
|
|
|
if(!preg_match('/@@\|\|([0-9a-z\.\-\*]+?)\^/', $row, $matches)){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-01-29 22:26:22 +08:00
|
|
|
|
|
|
|
|
|
if(array_key_exists("@@||${matches[1]}^", $ARR_WHITE_RULE_BLK_LIST)){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-02-01 14:16:53 +08:00
|
|
|
|
if($v === 1){
|
|
|
|
|
$wrote_whitelist[$matches[1]] = null;
|
|
|
|
|
fwrite($new_fp, "@@||${matches[1]}^\n");
|
2020-02-13 10:58:34 +08:00
|
|
|
|
$line_count++;
|
2020-02-01 14:16:53 +08:00
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-01-29 22:26:22 +08:00
|
|
|
|
|
2019-12-24 21:59:28 +08:00
|
|
|
|
foreach($wrote_wild as $core_str => $val){
|
2020-12-23 09:52:24 +08:00
|
|
|
|
if(substr($core_str, 0, 1) === '/'){
|
2020-01-17 13:17:27 +08:00
|
|
|
|
$match_rule = $core_str;
|
|
|
|
|
}else{
|
2020-07-26 23:00:05 +08:00
|
|
|
|
$match_rule = str_replace(array('.', '*'), array('\\.', '.*'), $core_str);
|
|
|
|
|
$match_rule = "/^${match_rule}/";
|
2020-01-17 13:17:27 +08:00
|
|
|
|
}
|
2020-01-23 12:49:20 +08:00
|
|
|
|
if(preg_match($match_rule, $matches[1])){
|
2020-01-20 15:40:44 +08:00
|
|
|
|
$domain = addressMaker::extract_main_domain($matches[1]);
|
2020-01-17 13:17:27 +08:00
|
|
|
|
if(array_key_exists($domain, $black_domain_list) ||
|
|
|
|
|
(is_array($black_domain_list[$domain]) && in_array($matches[1], $black_domain_list[$domain]))
|
|
|
|
|
){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-01-17 14:31:06 +08:00
|
|
|
|
if(array_key_exists($matches[1], $wrote_whitelist)){
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
$wrote_whitelist[$matches[1]] = null;
|
2020-01-17 13:17:27 +08:00
|
|
|
|
fwrite($new_fp, "@@||${matches[1]}^\n");
|
2020-02-13 10:58:34 +08:00
|
|
|
|
$line_count++;
|
2019-12-24 21:59:28 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2019-12-24 18:42:49 +08:00
|
|
|
|
}
|
2019-12-24 21:59:28 +08:00
|
|
|
|
|
2020-02-13 10:58:34 +08:00
|
|
|
|
if(($insert_pos > 0) && (fseek($new_fp, $insert_pos) === 0)){
|
2020-10-14 23:37:55 +08:00
|
|
|
|
fwrite($new_fp, "!Total lines: {$line_count}\n");
|
2020-02-13 10:58:34 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-12-24 21:59:28 +08:00
|
|
|
|
fclose($src_fp);
|
|
|
|
|
fclose($new_fp);
|
2020-01-09 13:17:34 +08:00
|
|
|
|
rename($src_file . '.txt', $src_file);
|
2021-04-18 13:57:19 +08:00
|
|
|
|
file_put_contents($src_file . '.md5', md5_file($src_file));
|
2019-12-24 21:59:28 +08:00
|
|
|
|
echo 'Time cost:', microtime(true) - START_TIME, "s, at ", date('m-d H:i:s'), "\n";
|