bestauto code cleanup
bestauto code cleanup

--- a/app/lib/BestautoFetcher.php
+++ b/app/lib/BestautoFetcher.php
@@ -20,16 +20,16 @@
 		5	=>	5,
 	);
 	private $bodyAssoc = array (
-		456	=>	1, //Hatchback
-		389 => 2, //SUV
-		2	=>	3, //Berlina
-		4 =>	4, //Break
-		421 => 5, //Van/minibus
-		42 => 6, //Coupe
-		40 => 7, //Off-road
-		457 => 8, //Monovolum
-		37 => 10, // Alta
-		3 => 11,	// Cabrio
+		456	=>	2,
+		389 => 3,
+		2	=>	4,
+		4 =>	1,
+		421 => 7,
+		42 => 8,
+		40 => 10,
+		457 => 5,
+		37 => 6,
+		3 => 11
 	);
 
 	public function __construct ()
@@ -46,8 +46,8 @@
 
 	protected function getBrands ()
 	{
-		preg_match ("|ddlMarca\" onchange\=\"(.*)\"\>(.*)\<\/select\>|isU", $this->getProcessedHtml(), $brandsOptions);
-		preg_match_all ("|\<option value\=\"([0-9]+)\"\>(.*)\<\/option\>|isU", $brandsOptions[2], $brandsMatch);
+		$brandsOptions = $this->match ("|ddlMarca\" onchange\=\"(.*)\"\>(.*)\<\/select\>|isU", $this->getProcessedHtml());
+		$brandsMatch = $this->matchAll ("|\<option value\=\"([0-9]+)\"\>(.*)\<\/option\>|isU", $brandsOptions[2]);
 		unset ($brandsMatch[2][0]);
 		$this->carBrands = $brandsMatch[2];
 	}
@@ -55,7 +55,7 @@
 	protected function getBrandModel ($model)
 	{
 		foreach ($this->carBrands as $brand) {
-			if ( stristr ($model, $brand) ) {
+			if ( preg_match ("/^".$brand."/i", $model) ) {
 				return array (
 					"brand" => $brand,
 					"model" => trim ( str_replace ($brand, "", $model) ),
@@ -66,9 +66,9 @@
 
 	protected function getCarData ($data)
 	{
-		preg_match ("|\<td\>([0-9]+)\<div class\=\"verticalLine\"\>\<\/div\>\<\/td\>|isU", $data, $matchYear);
-		preg_match ("|\<td\>(.*)\<div class\=\"verticalLine\"\>\<\/div\>\<\/td>|isU", $data, $matchFuel);
-		preg_match ("|\<td\>([0-9\.]+) km\<\/td\>|isU", $data, $matchKm);
+		$matchYear = $this->match ("|\<td\>([0-9]+)\<div class\=\"verticalLine\"\>\<\/div\>\<\/td\>|isU", $data, true);
+		$matchFuel = $this->match ("|\<td\>(.*)\<div class\=\"verticalLine\"\>\<\/div\>\<\/td>|isU", $data);
+		$matchKm = $this->match ("|\<td\>([0-9\.]+) km\<\/td\>|isU", $data, true);
 		return array (
 			"year" => $matchYear[1],
 			"engine" => $matchFuel[1],
@@ -84,8 +84,8 @@
 			foreach ($this->fuelAssoc as $fuel => $fuelId ) {
 				$this->totalPages = ($this->totalPages < $this->startPage) ? $this->startPage : $this->totalPages;
 				for ($page=$this->startPage;$page<=$this->totalPages;$page++) {
-					$this->debug ("start page ".$page);
-					$this->getHtml ($page."/0/?adsperpage=100&scat=".$scat."&fuel=".$fuel);
+					$this->debug ("start page ".$page." / fuel ".$fuel." / body ".$scat);
+					$this->getHtml ($page."/0/?adsperpage=100&scat=".$scat."&fuel=".$fuel, true);
 					if ( $page == 1 ) {
 						$this->getBrands ($this->getProcessedHtml());
 					}
@@ -93,19 +93,23 @@
 					$ph = str_replace ("lblOldPrice", "lblPrice", $this->getOriginalHtml());
 					$ph = preg_replace("|\<span id\=\"ctl(.*)_resultRow_lblOldPrice\" style\=\"\color\:Green\;\"\>\<\/span\>|isU", "", $ph);
 					$ph = str_replace (' style="color:Green;"', "", $ph);
-					preg_match ("|search_results_content(.*)AdContainer_Adsense_search_bottom|isU", $ph, $matchCars);
+					$matchCars = $this->match ("|search_results_content(.*)AdContainer_Adsense_search_bottom|isU", $ph);
 
-					preg_match_all ("|<span id\=\"ct([a-zA-Z0-9_]+)Price\">([0-9 \.]+) EUR\<\/span\>\r\n|isU", $matchCars[1], $matchPrice);
+					$matchPrice = $this->matchAll ("|<span id\=\"ct([a-zA-Z0-9_]+)Price\">([0-9 \.]+) EUR\<\/span\>\r\n|isU", $matchCars[1], true);
+					if ( count($matchPrice[0]) == 0 ) {
+						continue;
+					}
 
-					preg_match_all ("|\<table class\=\"vehicle_features\" cellspacing\=\"0\"\>(.*)\<\/table\>|isU", $matchCars[1], $matchCarData);
+					$matchCarData = $this->matchAll ("|\<table class\=\"vehicle_features\" cellspacing\=\"0\"\>(.*)\<\/table\>|isU", $matchCars[1]);
 
-					preg_match_all ("|\<h2\>(.*)\<\/h2\>|isU", $matchCars[1], $matchCar);
+
+					$matchCar = $this->matchAll ("|\<h2\>(.*)\<\/h2\>|isU", $matchCars[1]);
 
 					foreach ($matchPrice[2] as $key => $price) {
-						preg_match ("|href\=\"(.*)\"|isU", $matchCar[1][$key], $carLink);
+						$carLink = $this->match ("|href\=\"(.*)\"|isU", $matchCar[1][$key]);
 						$carLink = addslashes ($carLink[1]);
 
-						preg_match ("|title\=\"(.*)\"|isU", $matchCar[1][$key], $carName);
+						$carName = $this->match ("|title\=\"(.*)\"|isU", $matchCar[1][$key]);
 
 						$carData = $this->getCarData ($matchCarData[1][$key]);
 						$carData['price'] = str_replace (".", "", $price);
@@ -114,6 +118,13 @@
 						$carDetails = $this->getBrandModel ($carName[1]);
 						$brandId = $this->storeCarBrand ($carDetails['brand']);
 						$modelId = $this->storeCarModel ($carDetails['model'], $brandId, $bodyId);
+
+						$cc = $this->guessCC ($brandId, $modelId, $bodyId, $fuelId, $carData);
+						$carData['engine'] = str_replace ("0.0", round ($cc/1000, 1), $carData['engine']);
+						if ( $cc > 0 ) {
+							//$this->debug ("guessed cc is ".$cc." for brandId=".$brandId." / modelId=".$modelId." / bodyId=".$bodyId." / fuel=".$fuelId." / year=".$carData['year'], 2);
+							//$this->debug ("url is ".$carLink, 3);
+						}
 
 						$engineId = $this->storeCarEngine ($carData['engine'], $brandId, $modelId);
 
@@ -128,17 +139,6 @@
 		}
 	}
 
-	protected function _processHtmlForRegx ($html="")
-	{
-		$html = preg_replace ("/([ ]+)/i", " ", $html);
-		$html = preg_replace ("/\r\n/i", "", $html);
-		$html = preg_replace ("/\n/i", "", $html);
-		$html = preg_replace ("/\r/i", "", $html);
-		$html = preg_replace ("/\t/i", " ", $html);
-		$html = preg_replace ("/([ ]+)/i", " ", $html);
-		return $html;
-	}
-
 }
 
 ?>

comments