1 |
efrain |
1 |
<?php
|
|
|
2 |
namespace Aws\Glacier;
|
|
|
3 |
|
|
|
4 |
use Aws\CommandInterface;
|
|
|
5 |
use Aws\HashingStream;
|
|
|
6 |
use Aws\Multipart\AbstractUploader;
|
|
|
7 |
use Aws\Multipart\UploadState;
|
|
|
8 |
use Aws\PhpHash;
|
|
|
9 |
use Aws\ResultInterface;
|
|
|
10 |
use GuzzleHttp\Psr7;
|
|
|
11 |
use Psr\Http\Message\StreamInterface as Stream;
|
|
|
12 |
|
|
|
13 |
/**
|
|
|
14 |
* Encapsulates the execution of a multipart upload to Glacier.
|
|
|
15 |
*/
|
|
|
16 |
class MultipartUploader extends AbstractUploader
|
|
|
17 |
{
|
|
|
18 |
const PART_MIN_SIZE = 1048576;
|
|
|
19 |
|
|
|
20 |
private static $validPartSizes = [
|
|
|
21 |
1048576, // 1 MB
|
|
|
22 |
2097152, // 2 MB
|
|
|
23 |
4194304, // 4 MB
|
|
|
24 |
8388608, // 8 MB
|
|
|
25 |
16777216, // 16 MB
|
|
|
26 |
33554432, // 32 MB
|
|
|
27 |
67108864, // 64 MB
|
|
|
28 |
134217728, // 128 MB
|
|
|
29 |
268435456, // 256 MB
|
|
|
30 |
536870912, // 512 MB
|
|
|
31 |
1073741824, // 1 GB
|
|
|
32 |
2147483648, // 2 GB
|
|
|
33 |
4294967296, // 4 GB
|
|
|
34 |
];
|
|
|
35 |
|
|
|
36 |
/**
|
|
|
37 |
* Creates an UploadState object for a multipart upload by querying the
|
|
|
38 |
* service for the specified upload's information.
|
|
|
39 |
*
|
|
|
40 |
* @param GlacierClient $client GlacierClient object to use.
|
|
|
41 |
* @param string $vaultName Vault name for the multipart upload.
|
|
|
42 |
* @param string $uploadId Upload ID for the multipart upload.
|
|
|
43 |
* @param string $accountId Account ID for the multipart upload.
|
|
|
44 |
*
|
|
|
45 |
* @return UploadState
|
|
|
46 |
*/
|
|
|
47 |
public static function getStateFromService(
|
|
|
48 |
GlacierClient $client,
|
|
|
49 |
$vaultName,
|
|
|
50 |
$uploadId,
|
|
|
51 |
$accountId = '-'
|
|
|
52 |
) {
|
|
|
53 |
$state = new UploadState([
|
|
|
54 |
'accountId' => $accountId,
|
|
|
55 |
'vaultName' => $vaultName,
|
|
|
56 |
'uploadId' => $uploadId,
|
|
|
57 |
]);
|
|
|
58 |
|
|
|
59 |
foreach ($client->getPaginator('ListParts', $state->getId()) as $result) {
|
|
|
60 |
// Get the part size from the first part in the first result.
|
|
|
61 |
if (!$state->getPartSize()) {
|
|
|
62 |
$state->setPartSize($result['PartSizeInBytes']);
|
|
|
63 |
}
|
|
|
64 |
// Mark all the parts returned by ListParts as uploaded.
|
|
|
65 |
foreach ($result['Parts'] as $part) {
|
|
|
66 |
list($rangeIndex, $rangeSize) = self::parseRange(
|
|
|
67 |
$part['RangeInBytes'],
|
|
|
68 |
$state->getPartSize()
|
|
|
69 |
);
|
|
|
70 |
$state->markPartAsUploaded($rangeIndex, [
|
|
|
71 |
'size' => $rangeSize,
|
|
|
72 |
'checksum' => $part['SHA256TreeHash'],
|
|
|
73 |
]);
|
|
|
74 |
}
|
|
|
75 |
}
|
|
|
76 |
|
|
|
77 |
$state->setStatus(UploadState::INITIATED);
|
|
|
78 |
|
|
|
79 |
return $state;
|
|
|
80 |
}
|
|
|
81 |
|
|
|
82 |
/**
|
|
|
83 |
* Creates a multipart upload for a Glacier archive.
|
|
|
84 |
*
|
|
|
85 |
* The valid configuration options are as follows:
|
|
|
86 |
*
|
|
|
87 |
* - account_id: (string, default=string('-')) Account ID for the archive
|
|
|
88 |
* being uploaded, if different from the account making the request.
|
|
|
89 |
* - archive_description: (string) Description of the archive.
|
|
|
90 |
* - before_complete: (callable) Callback to invoke before the
|
|
|
91 |
* `CompleteMultipartUpload` operation. The callback should have a
|
|
|
92 |
* function signature like `function (Aws\Command $command) {...}`.
|
|
|
93 |
* - before_initiate: (callable) Callback to invoke before the
|
|
|
94 |
* `InitiateMultipartUpload` operation. The callback should have a
|
|
|
95 |
* function signature like `function (Aws\Command $command) {...}`.
|
|
|
96 |
* - before_upload: (callable) Callback to invoke before any
|
|
|
97 |
* `UploadMultipartPart` operations. The callback should have a function
|
|
|
98 |
* signature like `function (Aws\Command $command) {...}`.
|
|
|
99 |
* - concurrency: (int, default=int(3)) Maximum number of concurrent
|
|
|
100 |
* `UploadMultipartPart` operations allowed during the multipart upload.
|
|
|
101 |
* - part_size: (int, default=int(1048576)) Part size, in bytes, to use when
|
|
|
102 |
* doing a multipart upload. This must between 1 MB and 4 GB, and must be
|
|
|
103 |
* a power of 2 (in megabytes).
|
|
|
104 |
* - prepare_data_source: (callable) Callback to invoke before starting the
|
|
|
105 |
* multipart upload workflow. The callback should have a function
|
|
|
106 |
* signature like `function () {...}`.
|
|
|
107 |
* - state: (Aws\Multipart\UploadState) An object that represents the state
|
|
|
108 |
* of the multipart upload and that is used to resume a previous upload.
|
|
|
109 |
* When this options is provided, the `account_id`, `key`, and `part_size`
|
|
|
110 |
* options are ignored.
|
|
|
111 |
* - vault_name: (string, required) Vault name to use for the archive being
|
|
|
112 |
* uploaded.
|
|
|
113 |
*
|
|
|
114 |
* @param GlacierClient $client Client used for the upload.
|
|
|
115 |
* @param mixed $source Source of the data to upload.
|
|
|
116 |
* @param array $config Configuration used to perform the upload.
|
|
|
117 |
*/
|
|
|
118 |
public function __construct(GlacierClient $client, $source, array $config = [])
|
|
|
119 |
{
|
|
|
120 |
parent::__construct($client, $source, $config + [
|
|
|
121 |
'account_id' => '-',
|
|
|
122 |
'vault_name' => null,
|
|
|
123 |
]);
|
|
|
124 |
}
|
|
|
125 |
|
|
|
126 |
protected function loadUploadWorkflowInfo()
|
|
|
127 |
{
|
|
|
128 |
return [
|
|
|
129 |
'command' => [
|
|
|
130 |
'initiate' => 'InitiateMultipartUpload',
|
|
|
131 |
'upload' => 'UploadMultipartPart',
|
|
|
132 |
'complete' => 'CompleteMultipartUpload',
|
|
|
133 |
],
|
|
|
134 |
'id' => [
|
|
|
135 |
'account_id' => 'accountId',
|
|
|
136 |
'vault_name' => 'vaultName',
|
|
|
137 |
'upload_id' => 'uploadId',
|
|
|
138 |
],
|
|
|
139 |
'part_num' => 'range',
|
|
|
140 |
];
|
|
|
141 |
}
|
|
|
142 |
|
|
|
143 |
protected function determinePartSize()
|
|
|
144 |
{
|
|
|
145 |
// Make sure the part size is set.
|
|
|
146 |
$partSize = $this->config['part_size'] ?: self::PART_MIN_SIZE;
|
|
|
147 |
|
|
|
148 |
// Ensure that the part size is valid.
|
|
|
149 |
if (!in_array($partSize, self::$validPartSizes)) {
|
|
|
150 |
throw new \InvalidArgumentException('The part_size must be a power '
|
|
|
151 |
. 'of 2, in megabytes, such that 1 MB <= PART_SIZE <= 4 GB.');
|
|
|
152 |
}
|
|
|
153 |
|
|
|
154 |
return $partSize;
|
|
|
155 |
}
|
|
|
156 |
|
|
|
157 |
protected function createPart($seekable, $number)
|
|
|
158 |
{
|
|
|
159 |
$data = [];
|
|
|
160 |
$firstByte = $this->source->tell();
|
|
|
161 |
|
|
|
162 |
// Read from the source to create the body stream. This also
|
|
|
163 |
// calculates the linear and tree hashes as the data is read.
|
|
|
164 |
if ($seekable) {
|
|
|
165 |
// Case 1: Stream is seekable, can make stream from new handle.
|
|
|
166 |
$body = Psr7\Utils::tryFopen($this->source->getMetadata('uri'), 'r');
|
|
|
167 |
$body = $this->limitPartStream(Psr7\Utils::streamFor($body));
|
|
|
168 |
// Create another stream decorated with hashing streams and read
|
|
|
169 |
// through it, so we can get the hash values for the part.
|
|
|
170 |
$decoratedBody = $this->decorateWithHashes($body, $data);
|
|
|
171 |
while (!$decoratedBody->eof()) $decoratedBody->read(1048576);
|
|
|
172 |
// Seek the original source forward to the end of the range.
|
|
|
173 |
$this->source->seek($this->source->tell() + $body->getSize());
|
|
|
174 |
} else {
|
|
|
175 |
// Case 2: Stream is not seekable, must store part in temp stream.
|
|
|
176 |
$source = $this->limitPartStream($this->source);
|
|
|
177 |
$source = $this->decorateWithHashes($source, $data);
|
|
|
178 |
$body = Psr7\Utils::streamFor();
|
|
|
179 |
Psr7\Utils::copyToStream($source, $body);
|
|
|
180 |
}
|
|
|
181 |
|
|
|
182 |
// Do not create a part if the body size is zero.
|
|
|
183 |
if ($body->getSize() === 0) {
|
|
|
184 |
return false;
|
|
|
185 |
}
|
|
|
186 |
|
|
|
187 |
$body->seek(0);
|
|
|
188 |
$data['body'] = $body;
|
|
|
189 |
$lastByte = $this->source->tell() - 1;
|
|
|
190 |
$data['range'] = "bytes {$firstByte}-{$lastByte}/*";
|
|
|
191 |
|
|
|
192 |
return $data;
|
|
|
193 |
}
|
|
|
194 |
|
|
|
195 |
protected function handleResult(CommandInterface $command, ResultInterface $result)
|
|
|
196 |
{
|
|
|
197 |
list($rangeIndex, $rangeSize) = $this->parseRange(
|
|
|
198 |
$command['range'],
|
|
|
199 |
$this->state->getPartSize()
|
|
|
200 |
);
|
|
|
201 |
|
|
|
202 |
$this->state->markPartAsUploaded($rangeIndex, [
|
|
|
203 |
'size' => $rangeSize,
|
|
|
204 |
'checksum' => $command['checksum']
|
|
|
205 |
]);
|
|
|
206 |
}
|
|
|
207 |
|
|
|
208 |
protected function getInitiateParams()
|
|
|
209 |
{
|
|
|
210 |
$params = ['partSize' => $this->state->getPartSize()];
|
|
|
211 |
if (isset($this->config['archive_description'])) {
|
|
|
212 |
$params['archiveDescription'] = $this->config['archive_description'];
|
|
|
213 |
}
|
|
|
214 |
|
|
|
215 |
return $params;
|
|
|
216 |
}
|
|
|
217 |
|
|
|
218 |
protected function getCompleteParams()
|
|
|
219 |
{
|
|
|
220 |
$treeHash = new TreeHash();
|
|
|
221 |
$archiveSize = 0;
|
|
|
222 |
foreach ($this->state->getUploadedParts() as $part) {
|
|
|
223 |
$archiveSize += $part['size'];
|
|
|
224 |
$treeHash->addChecksum($part['checksum']);
|
|
|
225 |
}
|
|
|
226 |
|
|
|
227 |
return [
|
|
|
228 |
'archiveSize' => $archiveSize,
|
|
|
229 |
'checksum' => bin2hex($treeHash->complete()),
|
|
|
230 |
];
|
|
|
231 |
}
|
|
|
232 |
|
|
|
233 |
/**
|
|
|
234 |
* Decorates a stream with a tree AND linear sha256 hashing stream.
|
|
|
235 |
*
|
|
|
236 |
* @param Stream $stream Stream to decorate.
|
|
|
237 |
* @param array $data Data bag that results are injected into.
|
|
|
238 |
*
|
|
|
239 |
* @return Stream
|
|
|
240 |
*/
|
|
|
241 |
private function decorateWithHashes(Stream $stream, array &$data)
|
|
|
242 |
{
|
|
|
243 |
// Make sure that a tree hash is calculated.
|
|
|
244 |
$stream = new HashingStream($stream, new TreeHash(),
|
|
|
245 |
function ($result) use (&$data) {
|
|
|
246 |
$data['checksum'] = bin2hex($result);
|
|
|
247 |
}
|
|
|
248 |
);
|
|
|
249 |
|
|
|
250 |
// Make sure that a linear SHA256 hash is calculated.
|
|
|
251 |
$stream = new HashingStream($stream, new PhpHash('sha256'),
|
|
|
252 |
function ($result) use (&$data) {
|
|
|
253 |
$data['ContentSHA256'] = bin2hex($result);
|
|
|
254 |
}
|
|
|
255 |
);
|
|
|
256 |
|
|
|
257 |
return $stream;
|
|
|
258 |
}
|
|
|
259 |
|
|
|
260 |
/**
|
|
|
261 |
* Parses a Glacier range string into a size and part number.
|
|
|
262 |
*
|
|
|
263 |
* @param string $range Glacier range string (e.g., "bytes 5-5000/*")
|
|
|
264 |
* @param int $partSize The chosen part size
|
|
|
265 |
*
|
|
|
266 |
* @return array
|
|
|
267 |
*/
|
|
|
268 |
private static function parseRange($range, $partSize)
|
|
|
269 |
{
|
|
|
270 |
// Strip away the prefix and suffix.
|
|
|
271 |
if (strpos($range, 'bytes') !== false) {
|
|
|
272 |
$range = substr($range, 6, -2);
|
|
|
273 |
}
|
|
|
274 |
|
|
|
275 |
// Split that range into it's parts.
|
|
|
276 |
list($firstByte, $lastByte) = explode('-', $range);
|
|
|
277 |
|
|
|
278 |
// Calculate and return range index and range size
|
|
|
279 |
return [
|
|
|
280 |
intval($firstByte / $partSize) + 1,
|
|
|
281 |
$lastByte - $firstByte + 1,
|
|
|
282 |
];
|
|
|
283 |
}
|
|
|
284 |
}
|