Add packed object extraction class
Add packed object extraction class

Based on glip by Patrik Fimml

--- /dev/null
+++ b/include/git/Pack.class.php
@@ -1,1 +1,517 @@
-
+<?php
+/**
+ * GitPHP Pack
+ *
+ * Extracts data from a pack
+ * Based on code from Glip by Patrik Fimml
+ *
+ * @author Christopher Han <xiphux@gmail.com>
+ * @copyright Copyright (c) 2011 Christopher Han
+ * @package GitPHP
+ * @subpackage Git
+ */
+
+/**
+ * Pack class
+ *
+ * @package GitPHP
+ * @subpackage Git
+ */
+class GitPHP_Pack
+{
+
+	const OBJ_COMMIT = 1;
+	const OBJ_TREE = 2;
+	const OBJ_BLOB = 3;
+	const OBJ_TAG = 4;
+	const OBJ_OFS_DELTA = 6;
+	const OBJ_REF_DELTA = 7;
+
+	/**
+	 * project
+	 *
+	 * Stores the project internally
+	 *
+	 * @access protected
+	 */
+	protected $project;
+
+	/**
+	 * hash
+	 *
+	 * Stores the hash of the pack
+	 *
+	 * @access protected
+	 */
+	protected $hash;
+
+	/**
+	 * __construct
+	 *
+	 * Instantiates object
+	 *
+	 * @access public
+	 * @param mixed $project the project
+	 * @param string $hash pack hash
+	 * @return mixed pack object
+	 * @throws Exception exception on invalid hash
+	 */
+	public function __construct($project, $hash)
+	{
+		if (!(preg_match('/[0-9A-Fa-f]{40}/', $hash))) {
+			throw new Exception(sprintf(__('Invalid hash %1$s'), $hash));
+		}
+		$this->hash = $hash;
+		$this->project = $project;
+
+		if (!file_exists($project->GetPath() . '/objects/pack/pack-' . $hash . '.idx')) {
+			throw new Exception('Pack index does not exist');
+		}
+		if (!file_exists($project->GetPath() . '/objects/pack/pack-' . $hash . '.pack')) {
+			throw new Exception('Pack file does not exist');
+		}
+	}
+
+	/**
+	 * GetHash
+	 *
+	 * Gets the hash
+	 *
+	 * @access public
+	 * @return string object hash
+	 */
+	public function GetHash()
+	{
+		return $this->hash;
+	}
+
+	/**
+	 * ContainsObject
+	 *
+	 * Checks if an object exists in the pack
+	 *
+	 * @access public
+	 * @param string $hash object hash
+	 * @return boolean true if object is in pack
+	 */
+	public function ContainsObject($hash)
+	{
+		if (!preg_match('/[0-9a-fA-F]{40}/', $hash)) {
+			return false;
+		}
+
+		return $this->FindPackedObject($hash) !== false;
+	}
+
+	/**
+	 * FindPackedObject
+	 *
+	 * Searches for an object's offset in the index
+	 *
+	 * @return int offset
+	 * @param string $hash hash
+	 * @access private
+	 */
+	private function FindPackedObject($hash)
+	{
+		if (!preg_match('/[0-9a-fA-F]{40}/', $hash)) {
+			return false;
+		}
+
+		$offset = false;
+
+		$index = fopen($this->project->GetPath() . '/objects/pack/pack-' . $this->hash . '.idx', 'rb');
+		flock($index, LOCK_SH);
+
+		$magic = fread($index, 4);
+		if ($magic == "\xFFtOc") {
+			$version = GitPHP_Pack::fuint32($index);
+			if ($version == 2) {
+				$offset = $this->SearchIndexV2($index, $hash);
+			}
+		} else {
+			$offset = $this->SearchIndexV1($index, $hash);
+		}
+		flock($index, LOCK_UN);
+		fclose($index);
+		return $offset;
+	}
+
+	/**
+	 * SearchIndexV1
+	 *
+	 * Seraches a version 1 index for a hash
+	 *
+	 * @access private
+	 * @param resource $index file pointer to index
+	 * @param string $hash hash to find
+	 * @return int pack offset if found
+	 */
+	private function SearchIndexV1($index, $hash)
+	{
+		/*
+		 * index v1 struture:
+		 * fanout table - 256*4 bytes
+		 * offset/sha table - 24*count bytes (4 byte offset + 20 byte sha for each index)
+		 */
+
+		$binaryHash = pack('H40', $hash);
+
+		/*
+		 * get the start/end indices to search
+		 * from the fanout table
+		 */
+		list($cur, $after) = $this->ReadFanout($index, $binaryHash, 0);
+
+		$n = $after - $cur;
+		if ($n == 0) {
+			return false;
+		}
+
+		/*
+		 * find the index of the hash in the sha/offset listing
+		 * between cur and after from the fanout
+		 */
+		fseek($index, 4*256 + 24*$cur);
+		for ($i = 0; $i < $n; $i++) {
+			$off = GitPHP_Pack::fuint32($index);
+			$name = fread($index, 20);
+			if ($name == $binaryHash) {
+				return $off;
+			}
+		}
+
+		return false;
+	}
+
+	/**
+	 * SearchIndexV2
+	 *
+	 * Seraches a version 2 index for a hash
+	 *
+	 * @access private
+	 * @param resource $index file pointer to index
+	 * @param string $hash hash to find
+	 * @return int pack offset if found
+	 */
+	private function SearchIndexV2($index, $hash)
+	{
+		/*
+		 * index v2 structure:
+		 * magic and version - 2*4 bytes
+		 * fanout table - 256*4 bytes
+		 * sha listing - 20*count bytes
+		 * crc checksums - 4*count bytes
+		 * offsets - 4*count bytes
+		 */
+		$binaryHash = pack('H40', $hash);
+
+		/*
+		 * get the start/end indices to search
+		 * from the fanout table
+		 */
+		list($cur, $after) = $this->ReadFanout($index, $binaryHash, 8);
+		if ($cur == $after) {
+			return false;
+		}
+
+		/*
+		 * get the object count from fanout[255]
+		 */
+		fseek($index, 8 + 4*255);
+		$objectCount = GitPHP_Pack::fuint32($index);
+
+		/*
+		 * find the index of the hash in the sha listing
+		 * between cur and after from the fanout
+		 */
+		fseek($index, 8 + 4*256 + 20 * $cur);
+		for ($i = $cur; $i < $after; $i++) {
+			$name = fread($index, 20);
+			if ($name == $binaryHash) {
+				break;
+			}
+		}
+		if ($i == $after) {
+			return false;
+		}
+
+		/*
+		 * get the offset from the same index in the offset table
+		 */
+		fseek($index, 8 + 4*256 + 24*$objectCount + 4*$i);
+		$offset = GitPHP_Pack::fuint32($index);
+		if ($offset & 0x80000000) {
+			throw new Exception('64-bit offsets not implemented');
+		}
+		return $offset;
+	}
+
+	/**
+	 * ReadFanout
+	 *
+	 * Finds the start/end index a hash will be located between,
+	 * acconding to the fanout table
+	 *
+	 * @access private 
+	 * @param resource $index index file pointer
+	 * @param string $binaryHash binary encoded hash to find
+	 * @param int $offset offset in the index file where the fanout table is located
+	 * @return array Range where object can be located
+	 */
+	private function ReadFanout($index, $binaryHash, $offset)
+	{
+		/*
+		 * fanout table has 255 4-byte integers
+		 * indexed by the first byte of the object name.
+		 * the value at that index is the index at which objects
+		 * starting with that byte can be found
+		 * (first level fan-out)
+		 */
+		if ($binaryHash{0} == "\x00") {
+			$cur = 0;
+			fseek($index, $offset);
+			$after = GitPHP_Pack::fuint32($index);
+		} else {
+			fseek($index, $offset + (ord($binaryHash{0}) - 1) * 4);
+			$cur = GitPHP_Pack::fuint32($index);
+			$after = GitPHP_Pack::fuint32($index);
+		}
+		return array($cur, $after);
+	}
+
+	/**
+	 * GetObject
+	 *
+	 * Extracts an object from the pack
+	 *
+	 * @access public
+	 * @param string $hash hash of object to extract
+	 * @param int $type output parameter, returns the type of the object
+	 * @return string object content, or false if not found
+	 */
+	public function GetObject($hash, &$type = 0)
+	{
+		$offset = $this->FindPackedObject($hash);
+		if ($offset === false) {
+			return false;
+		}
+
+		$pack = fopen($this->project->GetPath() . '/objects/pack/pack-' . $this->hash . '.pack', 'rb');
+		flock($pack, LOCK_SH);
+
+		$magic = fread($pack, 4);
+		$version = GitPHP_Pack::fuint32($pack);
+		if ($magic != 'PACK' || $version != 2) {
+			flock($pack, LOCK_UN);
+			fclose($pack);
+			throw new Exception('Unsupported pack format');
+		}
+
+		list($type, $data) = $this->UnpackObject($pack, $offset);
+
+		flock($pack, LOCK_UN);
+		fclose($pack);
+		return $data;
+	}
+
+	/**
+	 * UnpackObject
+	 *
+	 * Extracts an object at an offset
+	 *
+	 * @access private
+	 * @param resource $pack pack file pointer
+	 * @param int $offset object offset
+	 * @return array object type and data
+	 */
+	private function UnpackObject($pack, $offset)
+	{
+		fseek($pack, $offset);
+
+		/*
+		 * object header:
+		 * first byte is the type (high 3 bits) and low byte of size (lower 4 bits)
+		 * subsequent bytes each have 7 next higher bits of the size (little endian)
+		 * most significant bit is either 1 or 0 to indicate whether the next byte
+		 * should be read as part of the size.  1 means continue reading the size,
+		 * 0 means the data is starting
+		 */
+		$c = ord(fgetc($pack));
+		$type = ($c >> 4) & 0x07;
+		$size = $c & 0x0F;
+		for ($i = 4; $c & 0x80; $i += 7) {
+			$c = ord(fgetc($pack));
+			$size |= (($c & 0x7f) << $i);
+		}
+
+		if ($type == GitPHP_Pack::OBJ_COMMIT || $type == GitPHP_Pack::OBJ_TREE || $type == GitPHP_Pack::OBJ_BLOB || $type == GitPHP_Pack::OBJ_TAG) {
+			/*
+			 * regular gzipped object data
+			 */
+			return array($type, gzuncompress(fread($pack, $size+512), $size));
+		} else if ($type == GitPHP_Pack::OBJ_OFS_DELTA) {
+			/*
+			 * delta of an object at offset
+			 */
+			$buf = fread($pack, $size+512+20);
+
+			/*
+			 * read the base object offset
+			 * each subsequent byte's 7 least significant bits
+			 * are part of the offset in decreasing significance per byte
+			 * (opposite of other places)
+			 * most significant bit is a flag indicating whether to read the
+			 * next byte as part of the offset
+			 */
+			$pos = 0;
+			$off = -1;
+			do {
+				$off++;
+				$c = ord($buf{$pos++});
+				$off = ($off << 7) + ($c & 0x7f);
+			} while ($c & 0x80);
+
+			/*
+			 * next read the compressed delta data
+			 */
+			$delta = gzuncompress(substr($buf, $pos), $size);
+			unset($buf);
+
+			$baseOffset = $offset - $off;
+			if ($baseOffset > 0) {
+				/*
+				 * read base object at offset and apply delta to it
+				 */
+				list($type, $base) = $this->UnpackObject($pack, $baseOffset);
+				$data = GitPHP_Pack::ApplyDelta($delta, $base);
+				return array($type, $data);
+			}
+		} else if ($type == GitPHP_Pack::OBJ_REF_DELTA) {
+			/*
+			 * delta of object with hash
+			 */
+
+			/*
+			 * first the base object's hash
+			 * load that object
+			 */
+			$hash = fread($pack, 20);
+			$hash = bin2hex($hash);
+			$base = $this->project->GetObject($hash, $type);
+
+			/*
+			 * then the gzipped delta data
+			 */
+			$delta = gzuncompress(fread($pack, $size + 512), $size);
+
+			$data = GitPHP_Pack::ApplyDelta($delta, $base);
+
+			return array($type, $data);
+		}
+
+		return false;
+	}
+
+	/**
+	 * ApplyDelta
+	 *
+	 * Applies a binary delta to a base object
+	 *
+	 * @static
+	 * @access private
+	 * @param string $delta delta string
+	 * @param string $base base object data
+	 * @return string patched content
+	 */
+	private static function ApplyDelta($delta, $base)
+	{
+		/*
+		 * algorithm from patch-delta.c
+		 */
+		$pos = 0;
+		$baseSize = GitPHP_Pack::ParseVarInt($delta, $pos);
+		$resultSize = GitPHP_Pack::ParseVarInt($delta, $pos);
+
+		$data = '';
+		$deltalen = strlen($delta);
+		while ($pos < $deltalen) {
+			$opcode = ord($delta{$pos++});
+			if ($opcode & 0x80) {
+				$off = 0;
+				if ($opcode & 0x01) $off = ord($delta{$pos++});
+				if ($opcode & 0x02) $off |= ord($delta{$pos++}) <<  8;
+				if ($opcode & 0x04) $off |= ord($delta{$pos++}) << 16;
+				if ($opcode & 0x08) $off |= ord($delta{$pos++}) << 24;
+				$len = 0;
+				if ($opcode & 0x10) $len = ord($delta{$pos++});
+				if ($opcode & 0x20) $len |= ord($delta{$pos++}) <<  8;
+				if ($opcode & 0x40) $len |= ord($delta{$pos++}) << 16;
+				if ($len == 0) $len = 0x10000;
+				$data .= substr($base, $off, $len);
+			} else if ($opcode > 0) {
+				$data .= substr($delta, $pos, $opcode);
+				$pos += $opcode;
+			}
+		}
+		return $data;
+	}
+
+	/**
+	 * ParseVarInt
+	 *
+	 * Reads a git-style packed variable length integer
+	 * sequence of bytes, where each byte's 7 less significant bits
+	 * are pieces of the int in increasing significance for each byte (little endian)
+	 * the most significant bit of each byte is a flag whether to continue
+	 * reading bytes or not
+	 *
+	 * @access private
+	 * @static
+	 * @param string $str packed data string
+	 * @param int $pos position in string to read from
+	 * @return int parsed integer
+	 */
+	private static function ParseVarInt($str, &$pos=0)
+	{
+		$ret = 0;
+		$byte = 0x80;
+		for ($shift = 0; $byte & 0x80; $shift += 7) {
+			$byte = ord($str{$pos++});
+			$ret |= (($byte & 0x7F) << $shift);
+		}
+		return $ret;
+	}
+
+	/**
+	 * uint32
+	 *
+	 * Unpacks a packed 32 bit integer
+	 *
+	 * @static
+	 * @access private
+	 * @return int integer
+	 * @param string $str binary data
+	 */
+	private static function uint32($str)
+	{
+		$a = unpack('Nx', substr($str, 0, 4));
+		return $a['x'];
+	}
+
+	/**
+	 * fuint32
+	 *
+	 * Reads and unpacks the next 32 bit integer
+	 *
+	 * @static
+	 * @access private
+	 * @return int integer
+	 * @param resource $handle file handle
+	 */
+	private static function fuint32($handle)
+	{
+		return GitPHP_Pack::uint32(fread($handle, 4));
+	}
+}
+

--- a/include/git/Project.class.php
+++ b/include/git/Project.class.php
@@ -14,6 +14,7 @@
 require_once(GITPHP_GITOBJECTDIR . 'Commit.class.php');
 require_once(GITPHP_GITOBJECTDIR . 'Head.class.php');
 require_once(GITPHP_GITOBJECTDIR . 'Tag.class.php');
+require_once(GITPHP_GITOBJECTDIR . 'Pack.class.php');
 
 /**
  * Project class
@@ -187,6 +188,24 @@
 	 * @access protected
 	 */
 	protected $commitCache = array();
+
+	/**
+	 * packs
+	 *
+	 * Stores the list of packs
+	 *
+	 * @access protected
+	 */
+	protected $packs = array();
+
+	/**
+	 * packsRead
+	 *
+	 * Stores whether packs have been read
+	 *
+	 * @access protected
+	 */
+	protected $packsRead = false;
 
 	/**
 	 * __construct
@@ -1221,5 +1240,77 @@
 		unset($exe);
 	}
 
+	/**
+	 * GetObject
+	 *
+	 * Gets the raw content of an object
+	 *
+	 * @access public
+	 * @param string $hash object hash
+	 * @return string object data
+	 */
+	public function GetObject($hash, &$type = 0)
+	{
+		if (!preg_match('/^[0-9A-Fa-f]{40}$/', $hash)) {
+			return false;
+		}
+
+		// first check if it's unpacked
+		$path = $this->GetPath() . '/objects/' . substr($hash, 0, 2) . '/' . substr($hash, 2);
+		if (file_exists($path)) {
+			list($header, $data) = explode("\0", gzuncompress(file_get_contents($path)), 2);
+			sscanf($header, "%s %d", $typestr, $size);
+			switch ($typestr) {
+				case 'commit':
+					$type = GitPHP_Pack::OBJ_COMMIT;
+					break;
+				case 'tree':
+					$type = GitPHP_Pack::OBJ_TREE;
+					break;
+				case 'blob':
+					$type = GitPHP_Pack::OBJ_BLOB;
+					break;
+				case 'tag':
+					$type = GitPHP_Pack::OBJ_TAG;
+					break;
+			}
+			return $data;
+		}
+
+		if (!$this->packsRead) {
+			$this->ReadPacks();
+		}
+
+		// then try packs
+		foreach ($this->packs as $pack) {
+			$data = $pack->GetObject($hash, $type);
+			if ($data !== false) {
+				return $data;
+			}
+		}
+
+		return false;
+	}
+
+	/**
+	 * ReadPacks
+	 *
+	 * Read the list of packs in the repository
+	 *
+	 * @access private
+	 */
+	private function ReadPacks()
+	{
+		$dh = opendir($this->GetPath() . '/objects/pack');
+		if ($dh !== false) {
+			while (($file = readdir($dh)) !== false) {
+				if (preg_match('/^pack-([0-9A-Fa-f]{40})\.idx$/', $file, $regs)) {
+					$this->packs[] = new GitPHP_Pack($this, $regs[1]);
+				}
+			}
+		}
+		$this->packsRead = true;
+	}
+
 }
 

comments