直接上代码:
function isbot($tmp) {
// 定义蜘蛛识别规则集(保持原顺序)
$rules = [
['patterns' => ['compatible; Googlebot/2.1'], 'label' => '谷歌蜘蛛'],
['patterns' => ['Googlebot-Mobile'], 'label' => '谷歌蜘蛛'],
['patterns' => ['Googlebot-Image'], 'label' => '谷歌图片蜘蛛'],
['patterns' => ['Mediapartners-Google'], 'label' => '谷歌广告蜘蛛'],
['patterns' => ['Adsbot-Google'], 'label' => '谷歌质量蜘蛛'],
['patterns' => ['Googlebot'], 'label' => '谷歌蜘蛛'],
['patterns' => ['GoogleOther'], 'label' => '谷歌蜘蛛'],
['patterns' => ['Baiduspider-mobile'], 'label' => '百度蜘蛛'],
['patterns' => ['Baidu-Thumbnail'], 'label' => '百度图片蜘蛛'],
['patterns' => ['Baiduspider-image'], 'label' => '百度图片蜘蛛'],
['patterns' => ['Baiduspider-news'], 'label' => '百度新闻蜘蛛'],
['patterns' => ['Baiduspider-video'], 'label' => '百度视频蜘蛛'],
['patterns' => ['Baidu-Transcoder'], 'label' => '百度音乐蜘蛛'],
['patterns' => ['baiduspider-mobile-gate'], 'label' => '百度移动蜘蛛'],
['patterns' => ['Baiduspider'], 'label' => '百度蜘蛛'],
['patterns' => ['Sosospider'], 'label' => '搜搜蜘蛛'],
['patterns' => ['Sosoimagespider'], 'label' => '搜搜图片蜘蛛'],
['patterns' => ['Yahoo! Slurp China'], 'label' => '雅虎中文蜘蛛'],
['patterns' => ['Yahoo ContentMatch Crawler'], 'label' => '雅虎竞价蜘蛛'],
['patterns' => ['Yahoo-MMCrawler'], 'label' => '雅虎图片蜘蛛'],
['patterns' => ['Yahoo! Slurp'], 'label' => '雅虎英文蜘蛛'],
['patterns' => ['msnbot'], 'label' => '微软蜘蛛'],
['patterns' => ['msnbot-media'], 'label' => '微软媒体蜘蛛'],
['patterns' => ['MSNBot-Media'], 'label' => '微软多媒体蜘蛛'],
['patterns' => ['MSNBot-NewsBlogs'], 'label' => '微软新闻及blog蜘蛛'],
['patterns' => ['MSNBot-Academic'], 'label' => '微软学术蜘蛛'],
['patterns' => ['MSNBot'], 'label' => '微软网页蜘蛛'],
['patterns' => ['Sosospider'], 'label' => '360蜘蛛'], // 注意:此规则在搜搜之后
['patterns' => ['360Spider'], 'label' => '360蜘蛛'], // 注意:此规则在搜搜之后
['patterns' => ['YodaoBot', 'OutfoxBot'], 'label' => '有道蜘蛛'],
['patterns' => ['Sogou web spider', 'Sogou Orion spider'], 'label' => '搜狗蜘蛛'],
['patterns' => ['Sogou inst spider'], 'label' => '搜狗蜘蛛'],
['patterns' => ['Sogou News Spider'], 'label' => '搜狗新闻蜘蛛'],
['patterns' => ['Sogou spider2'], 'label' => '搜狗蜘蛛'],
['patterns' => ['Sogou blog'], 'label' => '搜狗blog蜘蛛'],
['patterns' => ['sogou spider'], 'label' => '搜狗蜘蛛'],
['patterns' => ['bingbot'], 'label' => '必应蜘蛛'],
['patterns' => ['EtaoSpider'], 'label' => '一淘网蜘蛛'],
['patterns' => ['Scooter'], 'label' => 'Altavista蜘蛛'],
['patterns' => ['Lycos_Spider'], 'label' => 'Lycos蜘蛛'],
['patterns' => ['FAST-WebCrawler'], 'label' => 'Alltheweb蜘蛛'],
['patterns' => ['Slurp ASPSeek ASPSeek'], 'label' => 'INKTOMI蜘蛛'],
['patterns' => ['lanshanbot'], 'label' => '东方网景爬虫'],
['patterns' => ['BSpider'], 'label' => '日本爬虫'],
['patterns' => ['fast-webcrawler'], 'label' => 'fast-webcrawler'],
['patterns' => ['Gaisbot'], 'label' => 'Gaisbot'],
['patterns' => ['ia_archiver'], 'label' => 'Alexa蜘蛛'],
['patterns' => ['altavista'], 'label' => 'altavista爬虫'],
['patterns' => ['lycos_spider'], 'label' => 'Lycos蜘蛛'],
['patterns' => ['Inktomi slurp'], 'label' => 'Inktomi slurp'],
['patterns' => ['YandexBot'], 'label' => 'Yandex蜘蛛'],
['patterns' => ['AhrefsBot'], 'label' => 'AhrefsBot'],
['patterns' => ['ezooms.bot'], 'label' => 'ezooms.bot'],
['patterns' => ['YisouSpider'], 'label' => '神马搜索'],
['patterns' => ['MJ12bot'], 'label' => 'Majestic爬虫'],
['patterns' => ['SemrushBot'], 'label' => 'Semrush爬虫'],
['patterns' => ['DuckDuckBot'], 'label' => 'DuckDuckGo蜘蛛'],
['patterns' => ['facebookexternalhit'], 'label' => 'Facebook爬虫'],
['patterns' => ['Twitterbot'], 'label' => 'Twitter爬虫'],
['patterns' => ['LinkedInBot'], 'label' => 'LinkedIn爬虫'],
['patterns' => ['Pinterestbot'], 'label' => 'Pinterest爬虫'],
['patterns' => ['DotBot'], 'label' => 'DotNet爬虫'],
['patterns' => ['PetalBot'], 'label' => 'Petal爬虫'],
['patterns' => ['Exabot'], 'label' => 'Exalead爬虫'],
['patterns' => ['SeznamBot'], 'label' => 'Seznam爬虫'],
['patterns' => ['Slurp'], 'label' => 'Yahoo爬虫'],
['patterns' => ['rogerbot'], 'label' => 'Moz爬虫'],
['patterns' => ['Nimbostratus'], 'label' => 'CloudFlare爬虫'],
];
foreach ($rules as $rule) {
foreach ($rule['patterns'] as $pattern) {
if (stripos($tmp, $pattern) !== false) {
return $rule['label'];
}
}
}
return '';
}
以下是判断客户端访问类型:
function ClientType() {
// 安全获取 HTTP 头部信息
$userAgent = $_SERVER['HTTP_USER_AGENT'] ?? '';
$httpAccept = $_SERVER['HTTP_ACCEPT'] ?? '';
// 优先检测蜘蛛类型
if ($botType = isbot($userAgent)) {
return "蜘蛛:" . $botType;
}
// 非蜘蛛设备分类逻辑
if (stripos($httpAccept, 'text/html') !== false) {
return "访客";
}
// API 客户端、爬虫工具等特殊类型
// 常见爬虫工具列表
$crawlerTools = [
'curl' => 'cURL命令行工具',
'wget' => 'Wget下载工具',
'python' => 'Python爬虫',
'java' => 'Java爬虫',
'php' => 'PHP爬虫',
'perl' => 'Perl爬虫',
'ruby' => 'Ruby爬虫',
'go-http-client' => 'Go爬虫',
'node-fetch' => 'Node.js爬虫',
'libwww' => 'libwww-perl工具',
'okhttp' => 'OkHttp客户端',
'http-client' => 'HTTP客户端',
'apache-httpclient' => 'Apache HTTP客户端',
'axios' => 'Axios HTTP客户端',
];
// 检查爬虫工具
foreach ($crawlerTools as $key => $name) {
if (stripos($userAgent, $key) !== false) {
return $name;
}
}
// 默认未知类型
return "未知";
}
发表评论 取消回复