Skip to main content

Multipart Upload

Upload large files efficiently by breaking them into smaller parts. Recommended for files larger than 100MB, required for files larger than 5GB.

Benefits

  • Resume uploads - Recover from network failures
  • Parallel uploads - Upload parts simultaneously
  • Large files - Up to 5TB (vs 50GB for single PUT)
  • Better reliability - Retry individual failed parts

Process Overview

  1. Initiate multipart upload
  2. Upload parts (5MB-5GB each)
  3. Complete multipart upload
info

WAYSCloud Storage supports standard S3 multipart upload API. Use AWS SDK for automatic handling.

import boto3
import os
from boto3.s3.transfer import TransferConfig

# Configure S3 client
s3 = boto3.client(
's3',
endpoint_url='https://api.wayscloud.services/v1/storage',
aws_access_key_id='wayscloud',
aws_secret_access_key=os.getenv('WAYSCLOUD_API_KEY')
)

# Configure multipart settings
config = TransferConfig(
multipart_threshold=100 * 1024 * 1024, # 100MB
max_concurrency=10,
multipart_chunksize=10 * 1024 * 1024, # 10MB parts
use_threads=True
)

# Upload large file (automatic multipart)
s3.upload_file(
'large-video.mp4', # Local file
'my-bucket', # Bucket
'videos/large-video.mp4', # Key
Config=config,
Callback=lambda bytes_transferred: print(f'Uploaded: {bytes_transferred} bytes')
)

Manual Multipart Upload

Step 1: Initiate Upload

import requests
import os

API_KEY = os.getenv('WAYSCLOUD_API_KEY')
bucket = 'my-bucket'
key = 'large-file.zip'

# Initiate multipart upload
response = requests.post(
f'https://api.wayscloud.services/v1/storage/{bucket}/{key}?uploads',
headers={'Authorization': f'Bearer {API_KEY}'}
)

# Parse upload ID from XML response
import xml.etree.ElementTree as ET
root = ET.fromstring(response.content)
upload_id = root.find('.//{http://s3.amazonaws.com/doc/2006-03-01/}UploadId').text
print(f'Upload ID: {upload_id}')

Step 2: Upload Parts

import os

part_size = 10 * 1024 * 1024 # 10MB
parts = []

with open('large-file.zip', 'rb') as f:
part_number = 1

while True:
data = f.read(part_size)
if not data:
break

# Upload part
response = requests.put(
f'https://api.wayscloud.services/v1/storage/{bucket}/{key}',
params={'partNumber': part_number, 'uploadId': upload_id},
headers={'Authorization': f'Bearer {API_KEY}'},
data=data
)

etag = response.headers['ETag']
parts.append({'PartNumber': part_number, 'ETag': etag})

print(f'Uploaded part {part_number}, ETag: {etag}')
part_number += 1

Step 3: Complete Upload

# Build completion XML
parts_xml = ''.join([
f'<Part><PartNumber>{p["PartNumber"]}</PartNumber><ETag>{p["ETag"]}</ETag></Part>'
for p in parts
])

complete_xml = f'''
<CompleteMultipartUpload>
{parts_xml}
</CompleteMultipartUpload>
'''

# Complete multipart upload
response = requests.post(
f'https://api.wayscloud.services/v1/storage/{bucket}/{key}',
params={'uploadId': upload_id},
headers={'Authorization': f'Bearer {API_KEY}', 'Content-Type': 'application/xml'},
data=complete_xml
)

print('Upload completed successfully!')

With Progress Bar

from tqdm import tqdm
import os

def upload_with_progress(file_path, bucket, key):
file_size = os.path.getsize(file_path)
part_size = 10 * 1024 * 1024

# Initiate upload
response = requests.post(
f'https://api.wayscloud.services/v1/storage/{bucket}/{key}?uploads',
headers={'Authorization': f'Bearer {API_KEY}'}
)
root = ET.fromstring(response.content)
upload_id = root.find('.//{http://s3.amazonaws.com/doc/2006-03-01/}UploadId').text

parts = []
part_number = 1

with open(file_path, 'rb') as f:
with tqdm(total=file_size, unit='B', unit_scale=True) as pbar:
while True:
data = f.read(part_size)
if not data:
break

# Upload part
response = requests.put(
f'https://api.wayscloud.services/v1/storage/{bucket}/{key}',
params={'partNumber': part_number, 'uploadId': upload_id},
headers={'Authorization': f'Bearer {API_KEY}'},
data=data
)

parts.append({
'PartNumber': part_number,
'ETag': response.headers['ETag']
})

pbar.update(len(data))
part_number += 1

# Complete upload
parts_xml = ''.join([
f'<Part><PartNumber>{p["PartNumber"]}</PartNumber><ETag>{p["ETag"]}</ETag></Part>'
for p in parts
])
complete_xml = f'<CompleteMultipartUpload>{parts_xml}</CompleteMultipartUpload>'

requests.post(
f'https://api.wayscloud.services/v1/storage/{bucket}/{key}',
params={'uploadId': upload_id},
headers={'Authorization': f'Bearer {API_KEY}', 'Content-Type': 'application/xml'},
data=complete_xml
)

print('Upload completed!')

# Usage
upload_with_progress('large-video.mp4', 'my-bucket', 'videos/large-video.mp4')

Parallel Upload

from concurrent.futures import ThreadPoolExecutor
import os

def upload_part(bucket, key, upload_id, part_number, data):
response = requests.put(
f'https://api.wayscloud.services/v1/storage/{bucket}/{key}',
params={'partNumber': part_number, 'uploadId': upload_id},
headers={'Authorization': f'Bearer {API_KEY}'},
data=data
)
return {
'PartNumber': part_number,
'ETag': response.headers['ETag']
}

def parallel_multipart_upload(file_path, bucket, key, max_workers=5):
file_size = os.path.getsize(file_path)
part_size = 10 * 1024 * 1024

# Initiate upload
response = requests.post(
f'https://api.wayscloud.services/v1/storage/{bucket}/{key}?uploads',
headers={'Authorization': f'Bearer {API_KEY}'}
)
root = ET.fromstring(response.content)
upload_id = root.find('.//{http://s3.amazonaws.com/doc/2006-03-01/}UploadId').text

# Read all parts
parts_data = []
with open(file_path, 'rb') as f:
part_number = 1
while True:
data = f.read(part_size)
if not data:
break
parts_data.append((part_number, data))
part_number += 1

# Upload parts in parallel
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [
executor.submit(upload_part, bucket, key, upload_id, part_num, data)
for part_num, data in parts_data
]
parts = [future.result() for future in futures]

# Sort by part number
parts.sort(key=lambda x: x['PartNumber'])

# Complete upload
parts_xml = ''.join([
f'<Part><PartNumber>{p["PartNumber"]}</PartNumber><ETag>{p["ETag"]}</ETag></Part>'
for p in parts
])
complete_xml = f'<CompleteMultipartUpload>{parts_xml}</CompleteMultipartUpload>'

requests.post(
f'https://api.wayscloud.services/v1/storage/{bucket}/{key}',
params={'uploadId': upload_id},
headers={'Authorization': f'Bearer {API_KEY}', 'Content-Type': 'application/xml'},
data=complete_xml
)

print('Parallel upload completed!')

Abort Multipart Upload

If upload fails or is cancelled:

# Abort incomplete upload
response = requests.delete(
f'https://api.wayscloud.services/v1/storage/{bucket}/{key}',
params={'uploadId': upload_id},
headers={'Authorization': f'Bearer {API_KEY}'}
)

Best Practices

  1. Part Size: 10-50MB for optimal performance
  2. Parallel Uploads: 5-10 concurrent parts
  3. Abort Failed Uploads: Clean up incomplete uploads
  4. Retry Logic: Retry failed parts individually
  5. Use SDK: boto3 handles complexity automatically

Limits

ParameterLimit
Minimum part size5MB (except last part)
Maximum part size5GB
Maximum parts10,000
Maximum file size5TB

Next Steps