domain = '';
$this->links = array();
if( strcmp($url,'') )
$this->ConvertToHTML($url);
else
$this->html = $url;
}
function ConvertToHTML($url)
{
$match_domain='_[hH][tT][tT][pP]:\/\/(.*?)(/|$)_';
preg_match($match_domain, $url, $res);
$this->domain=$res[1];
if (!$this->domain)
return false;
$this->html = $this->getUrl($url);
return true;
}
function DisplayHTML()
{
if( strlen($this->html) )
{
echo $this->html;
return true;
}
else
return false;
}
function ExtractLinks($filter, $sensitive = true)
{
$lookfor='/<[aA]\s.*?[hH][rR][eE][fF]=[ "\']{0,}([-.,\%_\(\)|=~;+:\?\&\/a-zA-Z0-9]+)[ "\'>]/';
preg_match_all($lookfor, $this->html, $data);
while (list($k, $v)=each($data[1]))
{
// filter by
if( strlen($filter) )
{
if( $sensitive )
{
if( strpos($v, $filter) === false )
continue;
}
else
{
if( strpos(strtolower($v), strtolower($filter)) === false )
continue;
}
}
if( stristr($v, 'javascript:') )
{
// ignore - contains javascript
}
elseif( stristr($v, '//') == $v )
{
$v = 'http:'.$v;
$this->links[] = $v;
}
elseif( stristr($v, 'http://') != $v )
{
if( stristr($v, '/') != $v )
$sep = '/';
else
$sep = '';
$v = 'http://' . $this->domain . $sep . $v;
$this->links[] = $v;
}
else
$this->links[] = $v;
}
if( count($this->links) )
{
$this->links = array_flip($this->links);
$this->links = array_keys($this->links);
}
else
$this->links[] = 'No Data';
return true;
}
function getUrl($url)
{
$handle = fopen($url, "r");
if($handle)
{
$contents = '';
while (!feof($handle)) {$contents .= fread($handle, 8192);}
return $contents;
}
else return false;
}
}
?>