5)) { $frameDepth = $frameDepthDefault; } $actualFrameDepth = 0; $urls = array(); get_links($targetUrl, $frameDepth); print_links(); function get_links($targetUrl, $depth) { global $urls, $linkFilter, $frameDepth, $actualFrameDepth; if (! $targetUrl) { return; } if ($frameDepth - $depth > $actualFrameDepth) { $actualFrameDepth = $frameDepth - $depth; } $html = file_get_contents($targetUrl); $dom = new DOMDocument(); @$dom->loadHTML($html); $xpath = new DOMXPath($dom); $bases = $xpath->evaluate("/html/head//base"); if ($bases->length > 0) { $baseItem = $bases->item($bases->length - 1); $base = $baseItem->getAttribute('href'); } else { $base = $targetUrl; } if ($depth > 0) { $frames = $xpath->evaluate("/html/body//iframe"); for ($i = 0; $i < $frames->length; $i++) { $frame = $frames->item($i); $url = make_absolute($frame->getAttribute('src'), $base); if ($url != $targetUrl) { get_links($url, $depth -1); } } $frames = $xpath->evaluate("/html/body//frame"); for ($i = 0; $i < $frames->length; $i++) { $frame = $frames->item($i); $url = make_absolute($frame->getAttribute('src'), $base); if ($url != $targetUrl) { get_links($url, $depth -1); } } } $hrefs = $xpath->evaluate("/html/body//a"); for ($i = 0; $i < $hrefs->length; $i++) { $href = $hrefs->item($i); $url = $href->getAttribute('href'); $absolute = make_absolute($url, $base); if (preg_match("@".$linkFilter."@i", parse_url($absolute, PHP_URL_PATH))) { array_push($urls, $absolute); } } } function print_links() { global $urls, $targetUrl, $linkFilter, $frameDepth, $viewerUrl, $doubleEncodeLink, $headlessPage, $help, $actualFrameDepth; $labelColor = "lightgray"; $urls = array_unique($urls); sort($urls); print("" . "\n" . "" . "\n" . "
" . "\n"); if (($numUrls = count($urls)) == 1) { print "" . "\n"); print("targetUrl = target url to scan for links" . "\n"); print("linkFilter = filter for selecting links" . "\n"); print("frameDepth = maximum recursive depth to scan frames" . "\n"); print("viewerUrl = viewer url to open links" . "\n"); print("doubleEncodeLink = true|false" . "\n"); print("headlessPage = true|false" . "\n"); print("help = true" . "\n"); print("\n"); print("" . "\n"); } if (!$headlessPage) { print "targetUrl " . $targetUrl . "