Хабр Курсы для всех
РЕКЛАМА
Практикум, Хекслет, SkyPro, авторские курсы — собрали всех и попросили скидки. Осталось выбрать!
<?php
set_time_limit(0);
ini_set('error_reporting', E_ALL | E_STRICT);
ini_set('display_errors', 1);
$file = get_web_page("http://search.twitter.com/search?q=gmail.com+OR+hotmail.com++OR+%22email+me%22+OR+inbox.lv+OR+mail.ru");
$file = strip_tags($file['content']);
preg_match_all(
"([a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+(?:[A-Z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\b)siU",
$file,
$matches);
$link = mysql_connect('host', 'user', 'pass');
mysql_select_db('db', $link);
foreach ($matches[0] as $match) {
if (!empty($match)) {
if (!mysql_query("INSERT INTO mails (mail) VALUES ('".$match."')")) {
echo $match . ', ';
}
}
}
for ($i = 2; $i <= 5; $i++) {
$file = get_web_page("http://search.twitter.com/search?q=gmail.com+OR+hotmail.com++OR+%22email+me%22+OR+inbox.lv+OR+mail.ru&page=".$i);
$file = strip_tags($file['content']);
preg_match_all(
"([a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+(?:[A-Z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\b)siU",
$file,
$matches);
foreach ($matches[0] as $match) {
if (!empty($match)) {
if (!mysql_query("INSERT INTO mails (mail) VALUES ('".$match."')")) {
echo $match . ', ';
}
}
}
}
mysql_close($link);
function get_web_page( $url )
{
$options = array(
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_USERAGENT => "spider", // who am i
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
);
$ch = curl_init( $url );
curl_setopt_array( $ch, $options );
$content = curl_exec( $ch );
$err = curl_errno( $ch );
$errmsg = curl_error( $ch );
$header = curl_getinfo( $ch );
curl_close( $ch );
$header['errno'] = $err;
$header['errmsg'] = $errmsg;
$header['content'] = $content;
return $header;
}
Сбор email’ов в режиме реального времени