Integrate with Google Cloud API in Python

google-cloud, Python idiomatic clients for Google Cloud Platform services. There is an older Python library also officially supported by Google, google-api-python-client, which is in maintenance mode.

ref:
https://github.com/googleapis/google-cloud-python
https://github.com/googleapis/google-api-python-client

Installation

$ pip install google-cloud

# you could also only install specific components
$ pip install google-cloud-storage

ref:
https://pypi.org/search/?q=google+cloud

Google Cloud Storage

It is worth noting that, initializing storage.Client() is a blocking call.

ref:
https://googleapis.github.io/google-cloud-python/latest/storage/buckets.html
https://cloud.google.com/storage/docs/reference/libraries

Upload From String

from google.cloud.storage.bucket import Bucket
from google.cloud.storage.blob import Blob

def upload_from_string(bucket_id, content, filename, content_type):
    client = storage.Client()
    bucket = Bucket(client, bucket_id)
    blob = Blob(filename, bucket)
    blob.upload_from_string(content, content_type)

Upload From An URL

from google.cloud import storage
import requests

def upload_from_url(bucket_id, filename, url):
    client = storage.Client()
    session = requests.Session()
    with session.get(url, stream=True) as response:
        bucket = client.get_bucket(bucket_id)
        blob = bucket.blob(filename)
        blob.upload_from_file(response.raw, content_type=response.headers.get('Content-Type'))

Update A File's Metadata

from google.cloud import storage

def update_metadata(bucket, filepath, new_metadata):
    bucket = task.storage_client.get_bucket(bucket)
    blob = bucket.get_blob(filepath)
    blob.metadata = {**blob.metadata, **new_metadata} if blob.metadata else new_metadata
    blob.patch()

new_metadata = {
    'Link': '<https://api.example.com/users/57c16f5bb811055b66d8ef46>; rel="user"',
}

ref:
https://github.com/GoogleCloudPlatform/google-cloud-python/issues/1185

Copy A File

from google.cloud import storage

def copy_file(source_bucket, source_name, destination_bucket, destination_name):
    storage_client = storage.Client()
    source_bucket = storage_client.get_bucket(source_bucket)
    source_file = source_bucket.blob(source_name)
    destination_bucket = storage_client.get_bucket(destination_bucket)
    destination_file = source_bucket.copy_blob(source_file, destination_bucket, destination_name)
    return destination_file

file_ext_mapping = {
    'image/jpeg': 'jpg',
    'video/mp4': 'mp4',
}
file_ext = file_ext_mapping[original_message.media.mimetype]
source_name = f'messages/{original_message.id}.{file_ext}'
destination_name = f'messages/{new_message.id}.{file_ext}'

copy_file(
    source_bucket='asia.uploads.example.com',
    source_name=source_name,
    destination_bucket='asia.uploads.example.com',
    destination_name=destination_name,
)

ref:
https://cloud.google.com/storage/docs/json_api/v1/objects/copy
https://cloud.google.com/storage/docs/renaming-copying-moving-objects#storage-copy-object-python

Copy A Folder With Batch Operations

from google.cloud import storage

def copy_files(source_bucket_name, source_name_prefix, destination_bucket_name, fix_destination_name_func=None):
    storage_client = storage.Client()
    source_bucket = storage_client.get_bucket(source_bucket_name)
    destination_bucket = storage_client.get_bucket(destination_bucket_name)
    blobs = source_bucket.list_blobs(prefix=source_name_prefix)

    # YOU CANNOT DO THIS
    # blobs is a HTTP iterator
    # blobs.num_results always return 0
    # if not blobs.num_results:
    #     raise ValueError(f'No objects matched: gs://{source_bucket.name}/{source_name_prefix}')

    with storage_client.batch():
        for source_blob in blobs:
            destination_name = fix_destination_name_func(source_blob.name) if callable(fix_destination_name_func) else source_blob.name
            source_bucket.copy_blob(source_blob, destination_bucket, destination_name)
    return True

source_bucket_name = 'asia.uploads.example.com'
destination_bucket_name = 'asia.contents.example.com'
source_name_prefix = 'media/123'

copy_files(
    source_bucket_name=source_bucket_name,
    destination_bucket_name=destination_bucket_name,
    source_name_prefix=source_name_prefix,
    fix_destination_name_func=lambda source_name: source_name.replace(source_name_prefix, 'forum-posts'),
)

equals to

$ gsutil cp -r "gs://asia.uploads.example.com/media/123/*" "gs://asia.contents.example.com/"

ref:
https://cloud.google.com/storage/docs/listing-objects

batch() does not guarantee the order of executions, so do not mix different type of calls in the same batch. For instance, the batch should not be a mixture of "copy a.txt" then delete a.txt.

ref:
https://googlecloudplatform.github.io/google-cloud-python/latest/storage/batch.html

Upload A File Directly To A Bucket

We first need to generate a signed upload URL and we could upload the file to the URL.

import base64
import datetime
import time

from oauth2client.client import GoogleCredentials
import yarl

credentials = GoogleCredentials.get_application_default()

def signurl(method, url, content_type=None, expires_at=None, md5sum=None, meta=None):
    method, is_resumable = method.upper(), False
    if method in ['RESUMABLE']:
        method, is_resumable = 'POST', True
    path = yarl.URL(url).path

    def signature():
        def _signature_parts():
            def _meta():
                for key, value in (meta or {}).items():
                    yield 'x-goog-meta-{key}:{value}'.format(key=key, value=value)
                if is_resumable:
                    yield 'x-goog-resumable:start'

            yield method
            yield md5sum or ''
            # we need to use `curl -H 'content-type:'` to upload if we sign an empty content-type
            yield content_type or 'application/octet-stream'
            yield str(int(time.mktime(expires_at.timetuple()))) if expires_at else ''
            yield from sorted(_meta())
            yield path

        _, signature = credentials.sign_blob('\n'.join(_signature_parts()))
        return base64.b64encode(signature).decode('utf-8')

    def params():
        yield 'GoogleAccessId', credentials.service_account_email
        if expires_at:
            yield 'Expires', int(time.mktime(expires_at.timetuple()))
        yield 'Signature', signature()

    return str(yarl.URL(url).with_query(**dict(params())))

signurl(
    method='RESUMABLE',
    url='https://storage.googleapis.com/asia.uploads.example.com/media/your-filename.ext'
    expires_at=datetime.datetime.utcnow() + datetime.timedelta(hours=24),
)
$ curl -v -X 'POST' \
-H 'content-type: application/octet-stream' \
-H 'x-goog-resumable:start' \
-d '' 'THE_SIGNED_UPLOAD_URL'

$ curl -v -X PUT \
--upload-file whatever.mp4 \
THE_URL_FROM_LOCATION_HEADER_OF_THE_ABOVE_RESPONSE

ref:
https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable
https://cloud.google.com/storage/docs/uploading-objects
https://cloud.google.com/storage/docs/json_api/v1/how-tos/upload
https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload
https://cloud.google.com/storage/docs/xml-api/resumable-upload

Enable CORS For A Google Cloud Storage Bucket

$ gsutil cors get gs://your_bucket_name

$ cat cors.json
[
  {
    "origin": ["*"],
    "responseHeader": ["Content-Type", "x-goog-resumable:start"],
    "method": ["GET", "PUT", ""]
  }
]
$ gsutil cors set cors.json gs://your_bucket_name

ref:
https://cloud.google.com/storage/docs/gsutil/commands/cors
https://medium.com/imersotechblog/upload-files-to-google-cloud-storage-gcs-from-the-browser-159810bb11e3
http://andrewvos.com/uploading-files-directly-to-google-cloud-storage-from-client-side-javascript