google-cloud
, Python idiomatic clients for Google Cloud Platform services. There is an older Python library also officially supported by Google, google-api-python-client
, which is in maintenance mode.
ref:
https://github.com/googleapis/google-cloud-python
https://github.com/googleapis/google-api-python-client
Installation
$ pip install google-cloud
# you could also only install specific components
$ pip install google-cloud-storage
ref:
https://pypi.org/search/?q=google+cloud
Google Cloud Storage
It is worth noting that, initializing storage.Client()
is a blocking call.
ref:
https://googleapis.github.io/google-cloud-python/latest/storage/buckets.html
https://cloud.google.com/storage/docs/reference/libraries
Upload From String
from google.cloud.storage.bucket import Bucket
from google.cloud.storage.blob import Blob
def upload_from_string(bucket_id, content, filename, content_type):
client = storage.Client()
bucket = Bucket(client, bucket_id)
blob = Blob(filename, bucket)
blob.upload_from_string(content, content_type)
Upload From An URL
from google.cloud import storage
import requests
def upload_from_url(bucket_id, filename, url):
client = storage.Client()
session = requests.Session()
with session.get(url, stream=True) as response:
bucket = client.get_bucket(bucket_id)
blob = bucket.blob(filename)
blob.upload_from_file(response.raw, content_type=response.headers.get('Content-Type'))
Update A File's Metadata
from google.cloud import storage
def update_metadata(bucket, filepath, new_metadata):
bucket = task.storage_client.get_bucket(bucket)
blob = bucket.get_blob(filepath)
blob.metadata = {**blob.metadata, **new_metadata} if blob.metadata else new_metadata
blob.patch()
new_metadata = {
'Link': '<https://api.example.com/users/57c16f5bb811055b66d8ef46>; rel="user"',
}
ref:
https://github.com/GoogleCloudPlatform/google-cloud-python/issues/1185
Copy A File
from google.cloud import storage
def copy_file(source_bucket, source_name, destination_bucket, destination_name):
storage_client = storage.Client()
source_bucket = storage_client.get_bucket(source_bucket)
source_file = source_bucket.blob(source_name)
destination_bucket = storage_client.get_bucket(destination_bucket)
destination_file = source_bucket.copy_blob(source_file, destination_bucket, destination_name)
return destination_file
file_ext_mapping = {
'image/jpeg': 'jpg',
'video/mp4': 'mp4',
}
file_ext = file_ext_mapping[original_message.media.mimetype]
source_name = f'messages/{original_message.id}.{file_ext}'
destination_name = f'messages/{new_message.id}.{file_ext}'
copy_file(
source_bucket='asia.uploads.example.com',
source_name=source_name,
destination_bucket='asia.uploads.example.com',
destination_name=destination_name,
)
ref:
https://cloud.google.com/storage/docs/json_api/v1/objects/copy
https://cloud.google.com/storage/docs/renaming-copying-moving-objects#storage-copy-object-python
Copy A Folder With Batch Operations
from google.cloud import storage
def copy_files(source_bucket_name, source_name_prefix, destination_bucket_name, fix_destination_name_func=None):
storage_client = storage.Client()
source_bucket = storage_client.get_bucket(source_bucket_name)
destination_bucket = storage_client.get_bucket(destination_bucket_name)
blobs = source_bucket.list_blobs(prefix=source_name_prefix)
# YOU CANNOT DO THIS
# blobs is a HTTP iterator
# blobs.num_results always return 0
# if not blobs.num_results:
# raise ValueError(f'No objects matched: gs://{source_bucket.name}/{source_name_prefix}')
with storage_client.batch():
for source_blob in blobs:
destination_name = fix_destination_name_func(source_blob.name) if callable(fix_destination_name_func) else source_blob.name
source_bucket.copy_blob(source_blob, destination_bucket, destination_name)
return True
source_bucket_name = 'asia.uploads.example.com'
destination_bucket_name = 'asia.contents.example.com'
source_name_prefix = 'media/123'
copy_files(
source_bucket_name=source_bucket_name,
destination_bucket_name=destination_bucket_name,
source_name_prefix=source_name_prefix,
fix_destination_name_func=lambda source_name: source_name.replace(source_name_prefix, 'forum-posts'),
)
equals to
$ gsutil cp -r "gs://asia.uploads.example.com/media/123/*" "gs://asia.contents.example.com/"
ref:
https://cloud.google.com/storage/docs/listing-objects
batch()
does not guarantee the order of executions, so do not mix different type of calls in the same batch. For instance, the batch should not be a mixture of "copy a.txt" then delete a.txt
.
ref:
https://googlecloudplatform.github.io/google-cloud-python/latest/storage/batch.html
Upload A File Directly To A Bucket
We first need to generate a signed upload URL and we could upload the file to the URL.
import base64
import datetime
import time
from oauth2client.client import GoogleCredentials
import yarl
credentials = GoogleCredentials.get_application_default()
def signurl(method, url, content_type=None, expires_at=None, md5sum=None, meta=None):
method, is_resumable = method.upper(), False
if method in ['RESUMABLE']:
method, is_resumable = 'POST', True
path = yarl.URL(url).path
def signature():
def _signature_parts():
def _meta():
for key, value in (meta or {}).items():
yield 'x-goog-meta-{key}:{value}'.format(key=key, value=value)
if is_resumable:
yield 'x-goog-resumable:start'
yield method
yield md5sum or ''
# we need to use `curl -H 'content-type:'` to upload if we sign an empty content-type
yield content_type or 'application/octet-stream'
yield str(int(time.mktime(expires_at.timetuple()))) if expires_at else ''
yield from sorted(_meta())
yield path
_, signature = credentials.sign_blob('\n'.join(_signature_parts()))
return base64.b64encode(signature).decode('utf-8')
def params():
yield 'GoogleAccessId', credentials.service_account_email
if expires_at:
yield 'Expires', int(time.mktime(expires_at.timetuple()))
yield 'Signature', signature()
return str(yarl.URL(url).with_query(**dict(params())))
signurl(
method='RESUMABLE',
url='https://storage.googleapis.com/asia.uploads.example.com/media/your-filename.ext'
expires_at=datetime.datetime.utcnow() + datetime.timedelta(hours=24),
)
$ curl -v -X 'POST' \
-H 'content-type: application/octet-stream' \
-H 'x-goog-resumable:start' \
-d '' 'THE_SIGNED_UPLOAD_URL'
$ curl -v -X PUT \
--upload-file whatever.mp4 \
THE_URL_FROM_LOCATION_HEADER_OF_THE_ABOVE_RESPONSE
ref:
https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable
https://cloud.google.com/storage/docs/uploading-objects
https://cloud.google.com/storage/docs/json_api/v1/how-tos/upload
https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload
https://cloud.google.com/storage/docs/xml-api/resumable-upload
Enable CORS For A Google Cloud Storage Bucket
$ gsutil cors get gs://your_bucket_name
$ cat cors.json
[
{
"origin": ["*"],
"responseHeader": ["Content-Type", "x-goog-resumable:start"],
"method": ["GET", "PUT", ""]
}
]
$ gsutil cors set cors.json gs://your_bucket_name
ref:
https://cloud.google.com/storage/docs/gsutil/commands/cors
https://medium.com/imersotechblog/upload-files-to-google-cloud-storage-gcs-from-the-browser-159810bb11e3
http://andrewvos.com/uploading-files-directly-to-google-cloud-storage-from-client-side-javascript