Integrate with Google Cloud API in Python

Installation

$ pipenv install google-cloud

# you could install specific components you want
$ pipenv install google-cloud-storage

ref:
https://google-cloud-python.readthedocs.io/en/latest/index.html

Google Cloud Storage

It is worth noting that, initializing storage.Client() is a blocking call.

ref:
https://google-cloud-python.readthedocs.io/en/latest/storage/buckets.html
https://cloud.google.com/storage/docs/reference/libraries

Update A File's Metadata

from google.cloud import storage

storage_client = storage.Client()
source_bucket = storage_client.get_bucket('asia.public.swag.live')
source_file = source_bucket.get_blob('launchs/57c16f5bb811055b66d8ef46.jpg')
source_file.metadata = {
    'Link': '<https://api.v2.swag.live/users/57c16f5bb811055b66d8ef46>; rel="user"',
}
source_file.patch()

ref:
https://github.com/GoogleCloudPlatform/google-cloud-python/issues/1185

Copy A File

from google.cloud import storage

def copy_file(source_bucket, source_name, destination_bucket, destination_name):
    storage_client = storage.Client()
    source_bucket = storage_client.get_bucket(source_bucket)
    source_file = source_bucket.blob(source_name)
    destination_bucket = storage_client.get_bucket(destination_bucket)
    destination_file = source_bucket.copy_blob(source_file, destination_bucket, destination_name)
    return destination_file

file_ext_mapping = {
    'image/jpeg': 'jpg',
    'video/mp4': 'mp4',
}
file_ext = file_ext_mapping[original_message.media.mimetype]
source_name = f'messages/{original_message.id}.{file_ext}'
destination_name = f'messages/{new_message.id}.{file_ext}'

copy_file(
    source_bucket='asia.uploads.swag.live',
    source_name=source_name,
    destination_bucket='asia.uploads.swag.live',
    destination_name=destination_name,
)

ref:
https://cloud.google.com/storage/docs/json_api/v1/objects/copy
https://cloud.google.com/storage/docs/renaming-copying-moving-objects#storage-copy-object-python

Copy A Folder With Batch Operations

from google.cloud import storage

def copy_files(source_bucket_name, source_name_prefix, destination_bucket_name, fix_destination_name_func=None):
    storage_client = storage.Client()
    source_bucket = storage_client.get_bucket(source_bucket_name)
    destination_bucket = storage_client.get_bucket(destination_bucket_name)
    blobs = source_bucket.list_blobs(prefix=source_name_prefix)

    # YOU CANNOT DO THIS
    # blobs is a HTTP iterator
    # blobs.num_results always return 0
    # if not blobs.num_results:
    #     raise ValueError(f'No objects matched: gs://{source_bucket.name}/{source_name_prefix}')

    with storage_client.batch():
        for source_blob in blobs:
            destination_name = fix_destination_name_func(source_blob.name) if callable(fix_destination_name_func) else source_blob.name
            source_bucket.copy_blob(source_blob, destination_bucket, destination_name)
    return True

source_bucket_name = 'asia.uploads.swag.live'
destination_bucket_name = 'asia.contents.swag.live'
source_name_prefix = 'forum-posts/123'

copy_files(
    source_bucket_name=source_bucket_name,
    destination_bucket_name=destination_bucket_name,
    source_name_prefix=source_name_prefix,
    fix_destination_name_func=lambda source_name: source_name.replace(source_name_prefix, 'forum-posts'),
)

equals to

$ gsutil cp -r "gs://asia.uploads.swag.live/forum-posts/123/*" "gs://asia.contents.swag.live/"

ref:
https://cloud.google.com/storage/docs/listing-objects

batch() does not guarantee the order of executions, so do not mix different type of calls in the same batch. For instance, the batch should not be a mixture of "copy a.txt" then delete a.txt.

ref:
https://googlecloudplatform.github.io/google-cloud-python/latest/storage/batch.html

Upload A File Directly To A Bucket

We first need to generate a signed upload URL and we could upload the file to the URL.

import base64
import datetime
import time

from oauth2client.client import GoogleCredentials
import yarl

credentials = GoogleCredentials.get_application_default()

def signurl(method, url, content_type=None, expires_at=None, md5sum=None, meta=None):
    method, is_resumable = method.upper(), False
    if method in ['RESUMABLE']:
        method, is_resumable = 'POST', True
    path = yarl.URL(url).path

    def signature():
        def _signature_parts():
            def _meta():
                for key, value in (meta or {}).items():
                    yield 'x-goog-meta-{key}:{value}'.format(key=key, value=value)
                if is_resumable:
                    yield 'x-goog-resumable:start'

            yield method
            yield md5sum or ''
            # we need to use `curl -H 'content-type:'` to upload if we sign an empty content-type
            yield content_type or 'application/octet-stream'
            yield str(int(time.mktime(expires_at.timetuple()))) if expires_at else ''
            yield from sorted(_meta())
            yield path

        _, signature = credentials.sign_blob('\n'.join(_signature_parts()))
        return base64.b64encode(signature).decode('utf-8')

    def params():
        yield 'GoogleAccessId', credentials.service_account_email
        if expires_at:
            yield 'Expires', int(time.mktime(expires_at.timetuple()))
        yield 'Signature', signature()

    return str(yarl.URL(url).with_query(**dict(params())))

signurl(
    method='RESUMABLE',
    url='https://storage.googleapis.com/asia.uploads.swag.live/forum-posts/your-filename.ext'
    expires_at=datetime.datetime.utcnow() + datetime.timedelta(hours=24),
)
$ curl -v -X 'POST' \
-H 'content-type: application/octet-stream' \
-H 'x-goog-resumable:start' \
-d '' 'THE_SIGNED_UPLOAD_URL'

$ curl -v -X PUT \
--upload-file whatever.mp4 \
THE_URL_FROM_LOCATION_HEADER_OF_THE_ABOVE_RESPONSE

ref:
https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable
https://cloud.google.com/storage/docs/xml-api/resumable-upload
https://cloud.google.com/storage/docs/json_api/v1/how-tos/upload
https://cloud.google.com/storage/docs/uploading-objects