initial commit
initial commit

file:b/app/config/db.php (new)
--- /dev/null
+++ b/app/config/db.php
@@ -1,1 +1,11 @@
+<?php
 
+sql_host		= "localhost";				# Mysql hostname
+sql_user		= "tests";				# Mysql user name
+sql_pass		= "tests";				# Mysql user's password
+sql_db			= "admin";				# Database name
+sql_port		= 3306;
+persistent		= 0;
+charset			= "utf8";
+
+?>

--- /dev/null
+++ b/app/lib/AutovitFetcherClass.php
@@ -1,1 +1,119 @@
+<?php
 
+/**
+ * @author      Razvan Stanga <git@razvi.ro>
+ */
+
+class AutovitFetcher extends Common {
+
+	public $startPage = 1;
+	public $totalPages = 1;
+	private $carBrands = array ();
+	private $domain = "http://www.autovit.ro";
+
+	public function __construct ()
+	{
+		$this->setUrlBase ("http://www.autovit.ro/autoturisme");
+		// required
+		parent::init();
+	}
+
+	protected function getTotalPages ()
+	{
+		preg_match_all ("|\<p class=\"om-pager\">(.*)\<\/p\>|isU", $this->getProcessedHtml(), $pages);
+		preg_match_all ("|\<a href\=\"(.*)\" \>([0-9]+)\<\/a\>|isU", $pages[0][0], $lastpage);
+		$this->totalPages = $lastpage[2][ (count ($lastpage[2])-1) ];
+	}
+
+	protected function getBrands ()
+	{
+		preg_match_all ("|\<li class\=\"expanded \"\>\<a href\=\"(.*)\" data-no\=\"([0-9]+)\"\>\<span\>(.*)\<\/span\>\<\/a\>\<\/li\>|isU", $this->getProcessedHtml(), $brandsMatch);
+		$this->carBrands = $brandsMatch[3];
+	}
+
+	protected function getBrandModel ($model)
+	{
+		foreach ($this->carBrands as $brand) {
+			if ( stristr ($model, $brand) ) {
+				return array (
+					"brand" => $brand,
+					"model" => trim ( str_replace ($brand, "", $model) ),
+				);
+			}
+		}
+	}
+
+	protected function getCarData ($data)
+	{
+		preg_match ("| title\=\"Putere \(CP\)\: ([0-9]+)\"|isU", $data, $dataMatch);
+		if ( count ($dataMatch) ) {
+			$hp = $dataMatch[1];
+			$data = str_replace ($dataMatch[0], "", $data);
+		}
+		preg_match ("|\<span\>\<span\>Fabricatie\<\/span\>\: \<strong\>([0-9]+)\<\/strong\>\<\/span\>\<strong\>(.*)\<\/strong\>\<strong\>(.*)\<\/strong\>\<span\>\<span\>Rulaj pana la\<\/span\>\: \<strong\>([0-9]+) km\<\/strong\>\<\/span\>|isU", $data, $dataMatch);
+		if ( count ($dataMatch) == 0 ) { // new car ?
+			preg_match ("|\<span\>\<span\>Fabricatie\<\/span\>\: \<strong\>([0-9]+)\<\/strong\>\<\/span\>\<strong\>(.*)\<\/strong\>\<strong\>(.*)\<\/strong\>\<span\>\<\/span\>|isU", $data, $dataMatch);
+			return array (
+				"year" => $dataMatch[1],
+				"engine" => $dataMatch[2],
+				"body" => $dataMatch[3],
+				"km" => 0
+			);
+		} else {
+			return array (
+				"year" => $dataMatch[1],
+				"engine" => $dataMatch[2],
+				"body" => $dataMatch[3],
+				"km" => $dataMatch[4]
+			);
+		}
+	}
+
+	public function batch ()
+	{
+		$this->totalPages = ($this->totalPages < $this->startPage) ? $this->startPage : $this->totalPages;
+		for ($page=$this->startPage;$page<=$this->totalPages;$page++) {
+			$this->debug ("start page ".$page);
+			$this->getHtml ("?p=".$page);
+			if ( $page == 1 ) {
+				$this->getBrands ($this->getProcessedHtml());
+			}
+
+			preg_match_all ("|\<strong class\=\"om-price-amount\"\>([0-9 ]+)\<\/strong\>|isU", $this->getProcessedHtml(), $matchPrice);
+			preg_match_all ("|\<h3\>(.*)\<\/h3\>|isU", $this->getProcessedHtml(), $matchCar);
+			preg_match_all ("|\<p class=\"basic\"\>(.*)\<\/p\>|isU", $this->getProcessedHtml(), $matchData);
+
+			foreach ($matchPrice[1] as $key => $price) {
+				preg_match ("|\<a href\=\"(.*)\"\>(.*)\<\/a\>|isU", $matchCar[1][$key], $carMatch);
+				$carLink = addslashes ($this->domain.$carMatch[1]);
+
+				$carDetails = $this->getBrandModel ($carMatch[2]);
+
+				$brandId = $this->storeCarBrand ($carDetails['brand']);
+				$modelId = $this->storeCarModel ($carDetails['model'], $brandId);
+
+				$carData = $this->getCarData ($matchData[1][$key]);
+				$carData['price'] = str_replace (" ", "", $price);
+				$engineId = $this->storeCarEngine ($carData['engine'], $brandId, $modelId);
+
+				$this->storeCarPrice ($brandId, $modelId, $engineId, $carLink, $carData);
+
+				/*
+				Db::query ("SELECT `id` FROM `prices` WHERE `brand_id`='".$brandId."' AND `model_id`='".$modelId."' AND `carLink`='".$carLink."'");
+				if ($row = Db::fetchArray()) {
+				} else {
+					Db::query("INSERT INTO `prices` (`id`, `brand_id`, `model_id`, `engine_id`, `carLink`, `year`, `fuel`, `km`, `kmRange`, `price`, `priceRange`, `active`)
+					VALUES ('NULL', '".$brandId."', '".$modelId."', '".$engineId."', '".$carLink."', '".$carData['year']."', '1', '".$carData['km']."', '".$kmRange."', '".$price."', '".$priceRange."', '1')");
+				}
+				*/
+			}
+			$this->debug ("memory usage: ".$this->getMemoryUsage(), 2);
+			$seconds = mt_rand (1, 5);
+			$this->debug ("sleep for ".$seconds." seconds", 2);
+			sleep ( $seconds );
+		}
+	}
+
+}
+
+?>

--- /dev/null
+++ b/app/lib/CommonClass.php
@@ -1,1 +1,191 @@
+<?php
 
+/**
+ * @author      Razvan Stanga <git@razvi.ro>
+ */
+
+class Common {
+
+	private $init = false;
+	private $urlBase = null;
+	private $userAgent = 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31';
+	private $originalHtml;
+	private $processedHtml;
+	private $fuels = array (
+		1 => 'Benzină',
+		2 => 'Benzină + GPL',
+		3 => 'Diesel',
+		4 => 'Hibrid',
+		5 => 'Electrica',
+		6 => 'Hidrogen',
+		7 => 'Benzină + GNC',
+		8 => 'Etanol',
+		9 => 'Alta'
+	);
+	private $cacheBrands = array ();
+	private $cacheModels = array ();
+	private $cacheEngine = array ();
+
+	protected function __construct ()
+	{
+	}
+
+	protected function init ()
+	{
+		if ($this->urlBase == null) {
+			throw new Exception("\$urlBase is not set");
+		}
+		if ($this->userAgent == null) {
+			throw new Exception("\$userAgent is not set");
+		}
+		$this->init = true;
+	}
+
+	protected function setUserAgent ($userAgent)
+	{
+		$this->userAgent = $userAgent;
+	}
+
+	protected function setUrlBase ($urlBase)
+	{
+		$this->urlBase = $urlBase;
+	}
+
+	protected function getHtml ($url)
+	{
+		if ( $this->init == false ) {
+			throw new Exception ("init method must be called");
+		}
+		$cookie = tmpfile();
+		$ch = curl_init($this->urlBase.$url);
+
+		$options = array(
+			CURLOPT_CONNECTTIMEOUT => 20 ,
+			CURLOPT_USERAGENT => $this->userAgent,
+			CURLOPT_AUTOREFERER => true,
+			CURLOPT_FOLLOWLOCATION => true,
+			CURLOPT_RETURNTRANSFER => true,
+			CURLOPT_COOKIEFILE => $cookie,
+			CURLOPT_COOKIEJAR => $cookie ,
+			CURLOPT_SSL_VERIFYPEER => 0 ,
+			CURLOPT_SSL_VERIFYHOST => 0
+		);
+		curl_setopt_array ($ch, $options);
+		$this->originalHtml = curl_exec ($ch);
+		curl_close ($ch);
+
+		$this->processHtmlForRegx ();
+		$this->getTotalPages ();
+	}
+
+	protected function processHtmlForRegx ()
+	{
+		$this->processedHtml = preg_replace ("/([ ]+)/i", " ", $this->originalHtml);
+		$this->processedHtml = preg_replace ("/\r\n/i", "", $this->processedHtml);
+		$this->processedHtml = preg_replace ("/\n/i", "", $this->processedHtml);
+		$this->processedHtml = preg_replace ("/\r/i", "", $this->processedHtml);
+	}
+
+	protected function storeCarBrand ($brandName)
+	{
+		$brandName = trim ($brandName);
+		$value = $brandName;
+		if ( in_array ($value, $this->cacheBrands) ) {
+			return array_search($value, $this->cacheBrands);
+		}
+		Db::query ("SELECT `id` FROM `brand` WHERE `brandName`='".$brandName."'");
+		if ($row = Db::fetchArray()) {
+			$id = $row['id'];
+		} else {
+			Db::query ("INSERT INTO `brand` (`id`, `brandName`, `active`) VALUES ('NULL', '".$brandName."', '1')");
+			$id = Db::getId ();
+		}
+		$this->cacheBrands[$id] = $value;
+		return $id;
+	}
+
+	protected function storeCarModel ($modelName, $brandId)
+	{
+		$modelName = trim ($modelName);
+		$value = $brandId.$modelName;
+		if ( in_array ($value, $this->cacheModels) ) {
+			return array_search($value, $this->cacheModels);
+		}
+		Db::query ("SELECT `id` FROM `model` WHERE `brand_id`='".$brandId."' AND `modelName`='".$modelName."'");
+		if ($row = Db::fetchArray()) {
+			$id = $row['id'];
+		} else {
+			Db::query ("INSERT INTO `model` (`id`, `brand_id`, `modelName`, `active`) VALUES ('NULL', '".$brandId."', '".$modelName."', '1')");
+			$id = Db::getId ();
+		}
+		$this->cacheModels[$id] = $value;
+		return $id;
+	}
+
+	protected function storeCarEngine ($engineName, $brandId, $modelId)
+	{
+		$engineName = trim ($engineName);
+		$value = $brandId.$modelId.$engineName;
+		if ( in_array ($value, $this->cacheEngine) ) {
+			return array_search($value, $this->cacheEngine);
+		}
+		$cc = $this->getCc($engineName);
+		$fuel = $this->getFuel ($engineName);
+		Db::query ("SELECT `id` FROM `engine` WHERE `brand_id`='".$brandId."' AND `model_id`='".$modelId."' AND `engineName`='".$engineName."'");
+		if ($row = Db::fetchArray()) {
+			$id = $row['id'];
+		} else {
+			Db::query ("INSERT INTO `engine` (`id`, `brand_id`, `model_id`, `engineName`, `fuel`, `capacity`, `active`) VALUES ('NULL', '".$brandId."', '".$modelId."', '".$engineName."', '".$fuel."', '".$cc."', '1')");
+			$id = Db::getId ();
+		}
+		$this->cacheEngine[$id] = $value;
+		return $id;
+	}
+
+	protected function storeCarPrice ($brandId, $modelId, $engineId, $carLink, $carData)
+	{
+		$cc = $this->getCc($carData['engine']);
+		$fuel = $this->getFuel ($carData['engine']);
+		Db::query ("SELECT `id` FROM `prices` WHERE `brand_id`='".$brandId."' AND `model_id`='".$modelId."' AND `carLink`='".$carLink."'");
+		if ($row = Db::fetchArray()) {
+		} else {
+			Db::query("INSERT INTO `prices` (`id`, `brand_id`, `model_id`, `engine_id`, `carLink`, `year`, `fuel`, `cc`, `km`, `price`, `active`)
+			VALUES ('NULL', '".$brandId."', '".$modelId."', '".$engineId."', '".$carLink."', '".$carData['year']."', '".$fuel."', '".$cc."', '".$carData['km']."', '".$carData['price']."', '1')");
+		}
+	}
+
+	protected function getFuel ($engineName)
+	{
+		$engineData = explode (" ", trim ($engineName));
+		$value = $engineData[1];
+		if ( in_array ($value, $this->fuels) ) {
+			return array_search($value, $this->fuels);
+		}
+		throw new Exception ("unlisted fuel found : ".$value);
+	}
+
+	private function getCc ($engineName)
+	{
+		$engineData = explode (" ", trim ($engineName));
+		return $engineData[0] * 1000;
+	}
+
+	protected function getProcessedHtml ()
+	{
+		return $this->processedHtml;
+	}
+
+	protected function getMemoryUsage ()
+	{
+		$size = memory_get_usage(true);
+		$unit=array('b','kb','mb','gb','tb','pb');
+		return @round($size/pow(1024,($i=floor(log($size,1024)))),2).' '.$unit[$i];
+	}
+
+	protected function debug ($message, $indent=1)
+	{
+		echo str_repeat("\t", $indent)." - ".$message."\r\n";
+	}
+}
+
+?>

--- /dev/null
+++ b/app/lib/DbClass.php
@@ -1,1 +1,78 @@
+<?php
 
+/**
+ * @author      Razvan Stanga <git@razvi.ro>
+ */
+
+class Db {
+	private static $db = null;
+	private static $dbData = array ();
+	private static $dbQueryId;
+
+	private function __construct ()
+	{
+	}
+
+	/**
+	 * Connects to the database
+	 *
+	 * @return void
+	 */
+
+	private static function connect ()
+	{
+		self::$db = new mysqli(self::$dbData['sql_host'], self::$dbData['sql_user'], self::$dbData['sql_pass'], self::$dbData['sql_db'], self::$dbData['sql_port']);
+	}
+
+	/**
+	 * Process sql query
+	 *
+	 * @param string $sqlQuery database sql query
+	 * @return true|false|result
+	 */
+
+	public static function query ($sqlQuery)
+	{
+		if ( self::$db == null ) {
+			self::$dbData = parse_ini_file (appRoot."app/config/db.php", true);
+			self::connect();
+		}
+		return self::$dbQueryId = self::$db->query ($sqlQuery);
+	}
+
+	/**
+	 * Fetches result from database
+	 *
+	 * @param result $queryId sql query result
+	 * @return array
+	 */
+
+	public static function fetchArray ($queryId="")
+	{
+		if ($queryId == "") {
+			$queryId = self::$dbQueryId;
+		}
+		if ( $queryId ) {
+			return $queryId->fetch_array (MYSQLI_ASSOC);
+		} else {
+			return false;
+		}
+	}
+
+	/**
+	 * Fetches auto increment id
+	 *
+	 * @param result $autoIncrementId
+	 * @return int
+	 */
+
+	public static function getId ($queryId="")
+	{
+		if ($queryId == "") {
+			$queryId = self::$dbQueryId;
+		}
+		return self::$db->insert_id;
+	}
+}
+
+?>

file:b/fetch.php (new)
--- /dev/null
+++ b/fetch.php
@@ -1,1 +1,20 @@
+<?php
 
+/**
+ * @author      Razvan Stanga <git@razvi.ro>
+ */
+
+set_time_limit (0);
+
+$folder = dirname(__FILE__);
+chdir ($folder);
+define ("appRoot", $folder."/");
+
+include (appRoot."app/lib/DbClass.php");
+include (appRoot."app/lib/CommonClass.php");
+include (appRoot."app/lib/AutovitFetcherClass.php");
+
+$autovit = new AutovitFetcher;
+$autovit->batch ();
+
+?>

comments