added bestauto.ro fetcher
added bestauto.ro fetcher

--- /dev/null
+++ b/app/lib/BestautoFetcher.php
@@ -1,1 +1,144 @@
+<?php
 
+/**
+ * @author      Razvan Stanga <git@razvi.ro>
+ */
+
+namespace carstats;
+
+class BestautoFetcher extends Common {
+
+	public $startPage = 1;
+	public $totalPages = 1;
+	private $carBrands = array ();
+	private $domain = "http://www.bestauto.ro";
+	private $fuelAssoc = array (
+		2	=>	1,
+		3	=>	3,
+		4	=>	2,
+		8	=>	4,
+		5	=>	5,
+	);
+	private $bodyAssoc = array (
+		456	=>	1, //Hatchback
+		389 => 2, //SUV
+		2	=>	3, //Berlina
+		4 =>	4, //Break
+		421 => 5, //Van/minibus
+		42 => 6, //Coupe
+		40 => 7, //Off-road
+		457 => 8, //Monovolum
+		37 => 10, // Alta
+		3 => 11,	// Cabrio
+	);
+
+	public function __construct ()
+	{
+		$this->setUrlBase ("http://www.bestauto.ro/auto/toate/toate/pg");
+		// required
+		parent::init();
+	}
+
+	protected function getTotalPages ()
+	{
+		$this->totalPages = 2;
+	}
+
+	protected function getBrands ()
+	{
+		preg_match ("|ddlMarca\" onchange\=\"(.*)\"\>(.*)\<\/select\>|isU", $this->getProcessedHtml(), $brandsOptions);
+		preg_match_all ("|\<option value\=\"([0-9]+)\"\>(.*)\<\/option\>|isU", $brandsOptions[2], $brandsMatch);
+		unset ($brandsMatch[2][0]);
+		$this->carBrands = $brandsMatch[2];
+	}
+
+	protected function getBrandModel ($model)
+	{
+		foreach ($this->carBrands as $brand) {
+			if ( stristr ($model, $brand) ) {
+				return array (
+					"brand" => $brand,
+					"model" => trim ( str_replace ($brand, "", $model) ),
+				);
+			}
+		}
+	}
+
+	protected function getCarData ($data)
+	{
+		preg_match ("|\<td\>([0-9]+)\<div class\=\"verticalLine\"\>\<\/div\>\<\/td\>|isU", $data, $matchYear);
+		preg_match ("|\<td\>(.*)\<div class\=\"verticalLine\"\>\<\/div\>\<\/td>|isU", $data, $matchFuel);
+		preg_match ("|\<td\>([0-9\.]+) km\<\/td\>|isU", $data, $matchKm);
+		return array (
+			"year" => $matchYear[1],
+			"engine" => $matchFuel[1],
+			"km" => str_replace (".", "", $matchKm[1])
+		);
+	}
+
+	public function batch ()
+	{
+		$this->getTotalPages ();
+
+		foreach ($this->bodyAssoc as $scat => $bodyId ) {
+			foreach ($this->fuelAssoc as $fuel => $fuelId ) {
+				$this->totalPages = ($this->totalPages < $this->startPage) ? $this->startPage : $this->totalPages;
+				for ($page=$this->startPage;$page<=$this->totalPages;$page++) {
+					$this->debug ("start page ".$page);
+					$this->getHtml ($page."/0/?adsperpage=100&scat=".$scat."&fuel=".$fuel);
+					if ( $page == 1 ) {
+						$this->getBrands ($this->getProcessedHtml());
+					}
+
+					$ph = str_replace ("lblOldPrice", "lblPrice", $this->getOriginalHtml());
+					$ph = preg_replace("|\<span id\=\"ctl(.*)_resultRow_lblOldPrice\" style\=\"\color\:Green\;\"\>\<\/span\>|isU", "", $ph);
+					$ph = str_replace (' style="color:Green;"', "", $ph);
+					preg_match ("|search_results_content(.*)AdContainer_Adsense_search_bottom|isU", $ph, $matchCars);
+
+					preg_match_all ("|<span id\=\"ct([a-zA-Z0-9_]+)Price\">([0-9 \.]+) EUR\<\/span\>\r\n|isU", $matchCars[1], $matchPrice);
+
+					preg_match_all ("|\<table class\=\"vehicle_features\" cellspacing\=\"0\"\>(.*)\<\/table\>|isU", $matchCars[1], $matchCarData);
+
+					preg_match_all ("|\<h2\>(.*)\<\/h2\>|isU", $matchCars[1], $matchCar);
+
+					foreach ($matchPrice[2] as $key => $price) {
+						preg_match ("|href\=\"(.*)\"|isU", $matchCar[1][$key], $carLink);
+						$carLink = addslashes ($carLink[1]);
+
+						preg_match ("|title\=\"(.*)\"|isU", $matchCar[1][$key], $carName);
+
+						$carData = $this->getCarData ($matchCarData[1][$key]);
+						$carData['price'] = str_replace (".", "", $price);
+						$carData['engine'] = "0.0 ".$this->getFuels()[$fuelId];
+
+						$carDetails = $this->getBrandModel ($carName[1]);
+						$brandId = $this->storeCarBrand ($carDetails['brand']);
+						$modelId = $this->storeCarModel ($carDetails['model'], $brandId, $bodyId);
+
+						$engineId = $this->storeCarEngine ($carData['engine'], $brandId, $modelId);
+
+						$this->storeCarPrice ($brandId, $modelId, $engineId, $bodyId, $carLink, $carData);
+					}
+					$this->debug ("memory usage: ".$this->getMemoryUsage(), 2);
+					$seconds = mt_rand (1, 5);
+					$this->debug ("sleep for ".$seconds." seconds", 2);
+					sleep ( $seconds );
+				}
+			}
+		}
+	}
+
+	protected function _processHtmlForRegx ($html="")
+	{
+		$html = preg_replace ("/([ ]+)/i", " ", $html);
+		$html = preg_replace ("/\r\n/i", "", $html);
+		$html = preg_replace ("/\n/i", "", $html);
+		$html = preg_replace ("/\r/i", "", $html);
+		$html = preg_replace ("/\t/i", " ", $html);
+		$html = preg_replace ("/([ ]+)/i", " ", $html);
+		return $html;
+	}
+
+}
+
+?>

comments