--- a/app/lib/BestautoFetcher.php +++ b/app/lib/BestautoFetcher.php @@ -20,16 +20,16 @@ 5 => 5, ); private $bodyAssoc = array ( - 456 => 1, //Hatchback - 389 => 2, //SUV - 2 => 3, //Berlina - 4 => 4, //Break - 421 => 5, //Van/minibus - 42 => 6, //Coupe - 40 => 7, //Off-road - 457 => 8, //Monovolum - 37 => 10, // Alta - 3 => 11, // Cabrio + 456 => 2, + 389 => 3, + 2 => 4, + 4 => 1, + 421 => 7, + 42 => 8, + 40 => 10, + 457 => 5, + 37 => 6, + 3 => 11 ); public function __construct () @@ -46,8 +46,8 @@ protected function getBrands () { - preg_match ("|ddlMarca\" onchange\=\"(.*)\"\>(.*)\<\/select\>|isU", $this->getProcessedHtml(), $brandsOptions); - preg_match_all ("|\<option value\=\"([0-9]+)\"\>(.*)\<\/option\>|isU", $brandsOptions[2], $brandsMatch); + $brandsOptions = $this->match ("|ddlMarca\" onchange\=\"(.*)\"\>(.*)\<\/select\>|isU", $this->getProcessedHtml()); + $brandsMatch = $this->matchAll ("|\<option value\=\"([0-9]+)\"\>(.*)\<\/option\>|isU", $brandsOptions[2]); unset ($brandsMatch[2][0]); $this->carBrands = $brandsMatch[2]; } @@ -55,7 +55,7 @@ protected function getBrandModel ($model) { foreach ($this->carBrands as $brand) { - if ( stristr ($model, $brand) ) { + if ( preg_match ("/^".$brand."/i", $model) ) { return array ( "brand" => $brand, "model" => trim ( str_replace ($brand, "", $model) ), @@ -66,9 +66,9 @@ protected function getCarData ($data) { - preg_match ("|\<td\>([0-9]+)\<div class\=\"verticalLine\"\>\<\/div\>\<\/td\>|isU", $data, $matchYear); - preg_match ("|\<td\>(.*)\<div class\=\"verticalLine\"\>\<\/div\>\<\/td>|isU", $data, $matchFuel); - preg_match ("|\<td\>([0-9\.]+) km\<\/td\>|isU", $data, $matchKm); + $matchYear = $this->match ("|\<td\>([0-9]+)\<div class\=\"verticalLine\"\>\<\/div\>\<\/td\>|isU", $data, true); + $matchFuel = $this->match ("|\<td\>(.*)\<div class\=\"verticalLine\"\>\<\/div\>\<\/td>|isU", $data); + $matchKm = $this->match ("|\<td\>([0-9\.]+) km\<\/td\>|isU", $data, true); return array ( "year" => $matchYear[1], "engine" => $matchFuel[1], @@ -84,8 +84,8 @@ foreach ($this->fuelAssoc as $fuel => $fuelId ) { $this->totalPages = ($this->totalPages < $this->startPage) ? $this->startPage : $this->totalPages; for ($page=$this->startPage;$page<=$this->totalPages;$page++) { - $this->debug ("start page ".$page); - $this->getHtml ($page."/0/?adsperpage=100&scat=".$scat."&fuel=".$fuel); + $this->debug ("start page ".$page." / fuel ".$fuel." / body ".$scat); + $this->getHtml ($page."/0/?adsperpage=100&scat=".$scat."&fuel=".$fuel, true); if ( $page == 1 ) { $this->getBrands ($this->getProcessedHtml()); } @@ -93,19 +93,23 @@ $ph = str_replace ("lblOldPrice", "lblPrice", $this->getOriginalHtml()); $ph = preg_replace("|\<span id\=\"ctl(.*)_resultRow_lblOldPrice\" style\=\"\color\:Green\;\"\>\<\/span\>|isU", "", $ph); $ph = str_replace (' style="color:Green;"', "", $ph); - preg_match ("|search_results_content(.*)AdContainer_Adsense_search_bottom|isU", $ph, $matchCars); + $matchCars = $this->match ("|search_results_content(.*)AdContainer_Adsense_search_bottom|isU", $ph); - preg_match_all ("|<span id\=\"ct([a-zA-Z0-9_]+)Price\">([0-9 \.]+) EUR\<\/span\>\r\n|isU", $matchCars[1], $matchPrice); + $matchPrice = $this->matchAll ("|<span id\=\"ct([a-zA-Z0-9_]+)Price\">([0-9 \.]+) EUR\<\/span\>\r\n|isU", $matchCars[1], true); + if ( count($matchPrice[0]) == 0 ) { + continue; + } - preg_match_all ("|\<table class\=\"vehicle_features\" cellspacing\=\"0\"\>(.*)\<\/table\>|isU", $matchCars[1], $matchCarData); + $matchCarData = $this->matchAll ("|\<table class\=\"vehicle_features\" cellspacing\=\"0\"\>(.*)\<\/table\>|isU", $matchCars[1]); - preg_match_all ("|\<h2\>(.*)\<\/h2\>|isU", $matchCars[1], $matchCar); + + $matchCar = $this->matchAll ("|\<h2\>(.*)\<\/h2\>|isU", $matchCars[1]); foreach ($matchPrice[2] as $key => $price) { - preg_match ("|href\=\"(.*)\"|isU", $matchCar[1][$key], $carLink); + $carLink = $this->match ("|href\=\"(.*)\"|isU", $matchCar[1][$key]); $carLink = addslashes ($carLink[1]); - preg_match ("|title\=\"(.*)\"|isU", $matchCar[1][$key], $carName); + $carName = $this->match ("|title\=\"(.*)\"|isU", $matchCar[1][$key]); $carData = $this->getCarData ($matchCarData[1][$key]); $carData['price'] = str_replace (".", "", $price); @@ -114,6 +118,13 @@ $carDetails = $this->getBrandModel ($carName[1]); $brandId = $this->storeCarBrand ($carDetails['brand']); $modelId = $this->storeCarModel ($carDetails['model'], $brandId, $bodyId); + + $cc = $this->guessCC ($brandId, $modelId, $bodyId, $fuelId, $carData); + $carData['engine'] = str_replace ("0.0", round ($cc/1000, 1), $carData['engine']); + if ( $cc > 0 ) { + //$this->debug ("guessed cc is ".$cc." for brandId=".$brandId." / modelId=".$modelId." / bodyId=".$bodyId." / fuel=".$fuelId." / year=".$carData['year'], 2); + //$this->debug ("url is ".$carLink, 3); + } $engineId = $this->storeCarEngine ($carData['engine'], $brandId, $modelId); @@ -128,17 +139,6 @@ } } - protected function _processHtmlForRegx ($html="") - { - $html = preg_replace ("/([ ]+)/i", " ", $html); - $html = preg_replace ("/\r\n/i", "", $html); - $html = preg_replace ("/\n/i", "", $html); - $html = preg_replace ("/\r/i", "", $html); - $html = preg_replace ("/\t/i", " ", $html); - $html = preg_replace ("/([ ]+)/i", " ", $html); - return $html; - } - } ?>