总结一下最近做的无头浏览器(chrome headless 的检测以及反检测)。
无头浏览器的检测应该是爬虫中非常重要的一块,一开始接触到的是 not-possible-to-block-chrome-headless 这个文章,大概是最初级的无头浏览器检测方案。不过在后续的爬虫中,有一种魔高一尺,道高一丈的感觉,陆续发现很多站点都能通过各种其他手法检测无头浏览器。
再后来,看一些网站的js,发现了他们会对很多字段进行收集,都与fingerprintjs2 收集的差不多,才算是豁然开朗。
故在此记录一下可以被用于检测无头浏览器的字段即如果绕过去。
-
userAgent
ua可以说是最基础的字段了,然是无头浏览器中,单纯修改ua还不行,需要ua 和一些字段(如navigator.platform)跟着修改才行。当然,单纯的ua 还是要尽量模仿桌面浏览器的
识别:通过 navigator.userAgent 通过ua来识别是否为爬虫
反识别:可以修改属性来绕过
Object.defineProperty(navigator, 'userAgent', { get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' });
-
webdriver
The webdriver read-only property of the
navigator
interface indicates whether the user agent is controlled by automation.识别:桌面chrome 浏览器navigator.webdriver 返回为undefined,故可以判断
!!navigator.webdriver
反识别:
Object.defineProperty(navigator, 'webdriver', { get: () => false });
或者:
delete navigator.__proto__.webdriver
发现过有网站监测
navigator.webdriver
是否为undefined,而不是简单的通过!!navigator.webdriver
进行监测的 -
language
不同浏览器对language 支持不同,chrome只有navigator.language 参考
识别: navigator.language || navigator.userLanguage || navigator.browserLanguage || navigator.systemLanguage 是否存在
反识别:
Object.defineProperty(navigator, 'language', { get: () => "zh-CN", });
-
colorDepth
window.screen.colorDepth 屏幕颜色深度,一般收集指纹的时候会用到。无头浏览器这个字段一般与桌面浏览器相同
-
deviceMemory
只读属性返回千兆字节为单位的大概的机器内存。这个值是一个2的次方数除以1024,舍去小数点的近似值。不过这还只是个实验特性,不一定所有浏览器都有。指纹收集的时候会使用到,以防万一可以设个值
navigator.deviceMemory
反识别:
Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 });
-
hardwareConcurrency
浏览器环境所拥有的CPU核心数
navigator.hardwareConcurrency
反识别:
Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 });
-
screenResolution
会被用于指纹
[window.screen.width, window.screen.height]
-
availableScreenResolution
会被用于指纹
[window.screen.availHeight, window.screen.availWidth]
-
timezoneOffset
调世界时(UTC)相对于当前时区的时间差值,单位为分钟。
new Date().getTimezoneOffset()
-
timezone
new window.Intl.DateTimeFormat().resolvedOptions().timeZone
-
sessionStorage *
只判断是否有sessionStorage
!!window.sessionStorage
-
localStorage *
!!window.localStorage
-
indexedDb
!!window.indexedDB
-
addBehavior
该字段为IE特有 参考
!!(document.body && document.body.addBehavior)
-
openDatabase *
会返回一个databse,可以执行sql。 指纹只判断是否有这个方法
!!window.openDatabase
-
cpuClass
cpu 类型,似乎chrome 浏览器返回为undefined
navigator.cpuClass
-
platform *
navigator.platform
识别: if (navigator.platform === Linux x86_64) Linux x86_64占有率较小,如果为Linux x86_64,大概率为爬虫
反识别:
Object.defineProperty(navigator, 'platform', { get: () => 'MacIntel' });
-
plugins
navigator.plugins
plugin对象有name filename description version 属性。可以使用plugin[0] 的方式获取MimeType对象
-
mimeTypes
navigator.mimeTypes
mimeTypes对象需要和plugin 对象对应,有type, suffixes, description, enabledPlugin 属性
-
canvas
获取canvas 指纹 参考, 通过指纹来判别一个浏览器,并根据指纹的一些特征识别是何种浏览器。
识别:
(function (name, context, definition) { if (typeof module !== 'undefined' && module.exports) { module.exports = definition(); } else if (typeof define === 'function' && define.amd) { define(definition); } else { context[name] = definition(); } })('Fingerprint', this, function () { 'use strict'; var Fingerprint = function (options) { var nativeForEach, nativeMap; nativeForEach = Array.prototype.forEach; nativeMap = Array.prototype.map; this.each = function (obj, iterator, context) { if (obj === null) { return; } if (nativeForEach && obj.forEach === nativeForEach) { obj.forEach(iterator, context); } else if (obj.length === +obj.length) { for (var i = 0, l = obj.length; i < l; i++) { if (iterator.call(context, obj[i], i, obj) === {}) return; } } else { for (var key in obj) { if (obj.hasOwnProperty(key)) { if (iterator.call(context, obj[key], key, obj) === {}) return; } } } }; this.map = function (obj, iterator, context) { var results = []; // Not using strict equality so that this acts as a // shortcut to checking for `null` and `undefined`. if (obj == null) return results; if (nativeMap && obj.map === nativeMap) return obj.map(iterator, context); this.each(obj, function (value, index, list) { results[results.length] = iterator.call(context, value, index, list); }); return results; }; if (typeof options == 'object') { this.hasher = options.hasher; this.screen_resolution = options.screen_resolution; this.screen_orientation = options.screen_orientation; this.canvas = options.canvas; this.ie_activex = options.ie_activex; } else if (typeof options == 'function') { this.hasher = options; } }; Fingerprint.prototype = { get: function () { var keys = []; keys.push(navigator.userAgent); keys.push(navigator.language); keys.push(screen.colorDepth); if (this.screen_resolution) { var resolution = this.getScreenResolution(); if (typeof resolution !== 'undefined') { // headless browsers, such as phantomjs keys.push(resolution.join('x')); } } keys.push(new Date().getTimezoneOffset()); keys.push(this.hasSessionStorage()); keys.push(this.hasLocalStorage()); keys.push(this.hasIndexDb()); //body might not be defined at this point or removed programmatically if (document.body) { keys.push(typeof(document.body.addBehavior)); } else { keys.push(typeof undefined); } keys.push(typeof(window.openDatabase)); keys.push(navigator.cpuClass); keys.push(navigator.platform); keys.push(navigator.doNotTrack); keys.push(this.getPluginsString()); if (this.canvas && this.isCanvasSupported()) { keys.push(this.getCanvasFingerprint()); } if (this.hasher) { return this.hasher(keys.join('###'), 31); } else { return this.murmurhash3_32_gc(keys.join('###'), 31); } }, /** * JS Implementation of MurmurHash3 (r136) (as of May 20, 2011) * * @author <a href="mailto:gary.court@gmail.com">Gary Court</a> * @see http://github.com/garycourt/murmurhash-js * @author <a href="mailto:aappleby@gmail.com">Austin Appleby</a> * @see http://sites.google.com/site/murmurhash/ * * @param {string} key ASCII only * @param {number} seed Positive integer only * @return {number} 32-bit positive integer hash */ murmurhash3_32_gc: function (key, seed) { var remainder, bytes, h1, h1b, c1, c2, k1, i; remainder = key.length & 3; // key.length % 4 bytes = key.length - remainder; h1 = seed; c1 = 0xcc9e2d51; c2 = 0x1b873593; i = 0; while (i < bytes) { k1 = ((key.charCodeAt(i) & 0xff)) | ((key.charCodeAt(++i) & 0xff) << 8) | ((key.charCodeAt(++i) & 0xff) << 16) | ((key.charCodeAt(++i) & 0xff) << 24); ++i; k1 = ((((k1 & 0xffff) * c1) + ((((k1 >>> 16) * c1) & 0xffff) << 16))) & 0xffffffff; k1 = (k1 << 15) | (k1 >>> 17); k1 = ((((k1 & 0xffff) * c2) + ((((k1 >>> 16) * c2) & 0xffff) << 16))) & 0xffffffff; h1 ^= k1; h1 = (h1 << 13) | (h1 >>> 19); h1b = ((((h1 & 0xffff) * 5) + ((((h1 >>> 16) * 5) & 0xffff) << 16))) & 0xffffffff; h1 = (((h1b & 0xffff) + 0x6b64) + ((((h1b >>> 16) + 0xe654) & 0xffff) << 16)); } k1 = 0; switch (remainder) { case 3: k1 ^= (key.charCodeAt(i + 2) & 0xff) << 16; case 2: k1 ^= (key.charCodeAt(i + 1) & 0xff) << 8; case 1: k1 ^= (key.charCodeAt(i) & 0xff); k1 = (((k1 & 0xffff) * c1) + ((((k1 >>> 16) * c1) & 0xffff) << 16)) & 0xffffffff; k1 = (k1 << 15) | (k1 >>> 17); k1 = (((k1 & 0xffff) * c2) + ((((k1 >>> 16) * c2) & 0xffff) << 16)) & 0xffffffff; h1 ^= k1; } h1 ^= key.length; h1 ^= h1 >>> 16; h1 = (((h1 & 0xffff) * 0x85ebca6b) + ((((h1 >>> 16) * 0x85ebca6b) & 0xffff) << 16)) & 0xffffffff; h1 ^= h1 >>> 13; h1 = ((((h1 & 0xffff) * 0xc2b2ae35) + ((((h1 >>> 16) * 0xc2b2ae35) & 0xffff) << 16))) & 0xffffffff; h1 ^= h1 >>> 16; return h1 >>> 0; }, // https://bugzilla.mozilla.org/show_bug.cgi?id=781447 hasLocalStorage: function () { try { return !!window.localStorage; } catch (e) { return true; // SecurityError when referencing it means it exists } }, hasSessionStorage: function () { try { return !!window.sessionStorage; } catch (e) { return true; // SecurityError when referencing it means it exists } }, hasIndexDb: function () { try { return !!window.indexedDB; } catch (e) { return true; // SecurityError when referencing it means it exists } }, isCanvasSupported: function () { var elem = document.createElement('canvas'); return !!(elem.getContext && elem.getContext('2d')); }, isIE: function () { if (navigator.appName === 'Microsoft Internet Explorer') { return true; } else if (navigator.appName === 'Netscape' && /Trident/.test(navigator.userAgent)) {// IE 11 return true; } return false; }, getPluginsString: function () { if (this.isIE() && this.ie_activex) { return this.getIEPluginsString(); } else { return this.getRegularPluginsString(); } }, getRegularPluginsString: function () { return this.map(navigator.plugins, function (p) { var mimeTypes = this.map(p, function (mt) { return [mt.type, mt.suffixes].join('~'); }).join(','); return [p.name, p.description, mimeTypes].join('::'); }, this).join(';'); }, getIEPluginsString: function () { if (window.ActiveXObject) { var names = ['ShockwaveFlash.ShockwaveFlash',//flash plugin 'AcroPDF.PDF', // Adobe PDF reader 7+ 'PDF.PdfCtrl', // Adobe PDF reader 6 and earlier, brrr 'QuickTime.QuickTime', // QuickTime // 5 versions of real players 'rmocx.RealPlayer G2 Control', 'rmocx.RealPlayer G2 Control.1', 'RealPlayer.RealPlayer(tm) ActiveX Control (32-bit)', 'RealVideo.RealVideo(tm) ActiveX Control (32-bit)', 'RealPlayer', 'SWCtl.SWCtl', // ShockWave player 'WMPlayer.OCX', // Windows media player 'AgControl.AgControl', // Silverlight 'Skype.Detection']; // starting to detect plugins in IE return this.map(names, function (name) { try { new ActiveXObject(name); return name; } catch (e) { return null; } }).join(';'); } else { return ""; // behavior prior version 0.5.0, not breaking backwards compat. } }, getScreenResolution: function () { var resolution; if (this.screen_orientation) { resolution = (screen.height > screen.width) ? [screen.height, screen.width] : [screen.width, screen.height]; } else { resolution = [screen.height, screen.width]; } return resolution; }, getCanvasFingerprint: function () { var canvas = document.createElement('canvas'); var ctx = canvas.getContext('2d'); // https://www.browserleaks.com/canvas#how-does-it-work var txt = 'http://valve.github.io'; ctx.textBaseline = "top"; ctx.font = "14px 'Arial'"; ctx.textBaseline = "alphabetic"; ctx.fillStyle = "#f60"; ctx.fillRect(125, 1, 62, 20); ctx.fillStyle = "#069"; ctx.fillText(txt, 2, 15); ctx.fillStyle = "rgba(102, 204, 0, 0.7)"; ctx.fillText(txt, 4, 17); return canvas.toDataURL(); } }; return Fingerprint; }); new Fingerprint({canvas: true}).get();
反识别: 通过重写toBlob 以及toDataURL 方法来解决
var inject = function () {
var overwrite = function (name) {
const OLD = HTMLCanvasElement.prototype[name];
Object.defineProperty(HTMLCanvasElement.prototype, name, {
"value": function () {
var shift = {
'r': Math.floor(Math.random() * 10) - 5,
'g': Math.floor(Math.random() * 10) - 5,
'b': Math.floor(Math.random() * 10) - 5,
'a': Math.floor(Math.random() * 10) - 5
};
var width = this.width, height = this.height, context = this.getContext("2d");
var imageData = context.getImageData(0, 0, width, height);
for (var i = 0; i < height; i++) {
for (var j = 0; j < width; j++) {
var n = ((i * (width * 4)) + (j * 4));
imageData.data[n + 0] = imageData.data[n + 0] + shift.r;
imageData.data[n + 1] = imageData.data[n + 1] + shift.g;
imageData.data[n + 2] = imageData.data[n + 2] + shift.b;
imageData.data[n + 3] = imageData.data[n + 3] + shift.a;
}
}
context.putImageData(imageData, 0, 0);
return OLD.apply(this, arguments);
}
});
};
overwrite('toBlob');
overwrite('toDataURL');
};
inject();
-
webgl
TODO
-
webglVendorAndRenderer
返会显卡型号相关信息
TODO
-
adBlock
检测adBlock 插件是否存在
-
hasLiedLanguages
判断navigator.language 是否为navigator.languages数组的第一个
-
hasLiedResolution
window.screen.width < window.screen.availWidth || window.screen.height < window.screen.availHeight
-
hasLiedOs
识别: 判断ua 和platform 之间的关联。
var getHasLiedOs = function () { var userAgent = navigator.userAgent.toLowerCase() var oscpu = navigator.oscpu var platform = navigator.platform.toLowerCase() var os // We extract the OS from the user agent (respect the order of the if else if statement) if (userAgent.indexOf('windows phone') >= 0) { os = 'Windows Phone' } else if (userAgent.indexOf('win') >= 0) { os = 'Windows' } else if (userAgent.indexOf('android') >= 0) { os = 'Android' } else if (userAgent.indexOf('linux') >= 0 || userAgent.indexOf('cros') >= 0) { os = 'Linux' } else if (userAgent.indexOf('iphone') >= 0 || userAgent.indexOf('ipad') >= 0) { os = 'iOS' } else if (userAgent.indexOf('mac') >= 0) { os = 'Mac' } else { os = 'Other' } // We detect if the person uses a mobile device var mobileDevice = (('ontouchstart' in window) || (navigator.maxTouchPoints > 0) || (navigator.msMaxTouchPoints > 0)) if (mobileDevice && os !== 'Windows Phone' && os !== 'Android' && os !== 'iOS' && os !== 'Other') { return true } console.log(oscpu); // We compare oscpu with the OS extracted from the UA if (typeof oscpu !== 'undefined') { oscpu = oscpu.toLowerCase() if (oscpu.indexOf('win') >= 0 && os !== 'Windows' && os !== 'Windows Phone') { return true } else if (oscpu.indexOf('linux') >= 0 && os !== 'Linux' && os !== 'Android') { return true } else if (oscpu.indexOf('mac') >= 0 && os !== 'Mac' && os !== 'iOS') { return true } else if ((oscpu.indexOf('win') === -1 && oscpu.indexOf('linux') === -1 && oscpu.indexOf('mac') === -1) !== (os === 'Other')) { return true } } // We compare platform with the OS extracted from the UA if (platform.indexOf('win') >= 0 && os !== 'Windows' && os !== 'Windows Phone') { return true } else if ((platform.indexOf('linux') >= 0 || platform.indexOf('android') >= 0 || platform.indexOf('pike') >= 0) && os !== 'Linux' && os !== 'Android') { return true } else if ((platform.indexOf('mac') >= 0 || platform.indexOf('ipad') >= 0 || platform.indexOf('ipod') >= 0 || platform.indexOf('iphone') >= 0) && os !== 'Mac' && os !== 'iOS') { return true } else { var platformIsOther = platform.indexOf('win') < 0 && platform.indexOf('linux') < 0 && platform.indexOf('mac') < 0 && platform.indexOf('iphone') < 0 && platform.indexOf('ipad') < 0 if (platformIsOther !== (os === 'Other')) { return true } } return typeof navigator.plugins === 'undefined' && os !== 'Windows' && os !== 'Windows Phone' }
反识别: 确保ua 以及platform 之间的关系
-
hasLiedBrowser
识别: 根据ua 判断browser,在根据一些神奇的特性去判断。。
var getHasLiedBrowser = function () { var userAgent = navigator.userAgent.toLowerCase() var productSub = navigator.productSub // we extract the browser from the user agent (respect the order of the tests) var browser if (userAgent.indexOf('firefox') >= 0) { browser = 'Firefox' } else if (userAgent.indexOf('opera') >= 0 || userAgent.indexOf('opr') >= 0) { browser = 'Opera' } else if (userAgent.indexOf('chrome') >= 0) { browser = 'Chrome' } else if (userAgent.indexOf('safari') >= 0) { browser = 'Safari' } else if (userAgent.indexOf('trident') >= 0) { browser = 'Internet Explorer' } else { browser = 'Other' } if ((browser === 'Chrome' || browser === 'Safari' || browser === 'Opera') && productSub !== '20030107') { return true } // eslint-disable-next-line no-eval var tempRes = eval.toString().length if (tempRes === 37 && browser !== 'Safari' && browser !== 'Firefox' && browser !== 'Other') { return true } else if (tempRes === 39 && browser !== 'Internet Explorer' && browser !== 'Other') { return true } else if (tempRes === 33 && browser !== 'Chrome' && browser !== 'Opera' && browser !== 'Other') { return true } // We create an error to see how it is handled var errFirefox try { // eslint-disable-next-line no-throw-literal throw 'a' } catch (err) { try { err.toSource() errFirefox = true } catch (errOfErr) { errFirefox = false } } return errFirefox && browser !== 'Firefox' && browser !== 'Other' }
-
touchSupport
var getTouchSupport = function () { var maxTouchPoints = 0 var touchEvent if (typeof navigator.maxTouchPoints !== 'undefined') { maxTouchPoints = navigator.maxTouchPoints } else if (typeof navigator.msMaxTouchPoints !== 'undefined') { maxTouchPoints = navigator.msMaxTouchPoints } try { document.createEvent('TouchEvent') touchEvent = true } catch (_) { touchEvent = false } var touchStart = 'ontouchstart' in window // [0, false, false] return [maxTouchPoints, touchEvent, touchStart] }
-
fonts
看有多少个font
-
audio
音频指纹 TODO