MongoDB cookbook: Queries and Aggregations

MongoDB cookbook: Queries and Aggregations

Frequently accessed items are cached in memory, so that MongoDB can provide optimal response time.

MongoDB Shell in JavaScript

Administration

db.adminCommand({"currentOp": 1, "$ownOps" : true})

db.adminCommand({ 
    "currentOp": true,
    "op" : "query", 
    "planSummary": "COLLSCAN"
}) 

ref:
https://hackernoon.com/mongodb-currentop-18fe2f9dbd68

Check If A Document Exists

It is significantly faster to use find() + limit() because findOne() will always read + return the document if it exists. find() just returns a cursor (or not) and only reads the data if you iterate through the cursor.

db.getCollection('message').find({_id: ObjectId("585836504b287b5022a3ae26", delivered: false)}, {_id: 1}).limit(1)

ref:
https://stackoverflow.com/questions/8389811/how-to-query-mongodb-to-test-if-an-item-exists
https://blog.serverdensity.com/checking-if-a-document-exists-mongodb-slow-findone-vs-find/

Find Documents

db.getCollection('user').find({username: 'nanababy520'})

db.getCollection('message').find({_id: ObjectId("5a6383b8d93d7a3fadf75af3")})

db.getCollection('message').find({_cls: 'Message'}).sort({posted_at: -1})

db.getCollection('message').find({sender: ObjectId("57aace67ac08e72acc3b265f"), pricing: {$ne: 0}})

db.getCollection('message').find({
    sender: ObjectId("5ac0f56038cfff013a123d85"),
    created_at: {
        $gte: ISODate('2018-04-21 12:00:00Z'),
        $lte: ISODate('2018-04-21 13:00:00Z')
    }
})
.sort({created_at: -1})

Find Documents With Regular Expression

db.getCollection('user').find({'username': /vicky/})

ref:
https://docs.mongodb.com/manual/reference/operator/query/regex/

Find Documents With An Array Field

  • $in: [...] means "intersection" or "any element in"
  • $all: [...] means "subset" or "contain"
  • $elemMatch: {...} means "any element match"
  • $not: {$elemMatch: {$nin: [...]}} means "subset" or "in"

The last one roughly means not any([False, False, False, False]) where each False is indicating if the item is not in in [...].

ref:
https://stackoverflow.com/questions/12223465/mongodb-query-subset-of-an-array

db.getCollection('message').find({includes: ObjectId("5a4bb448af9c462c610d0cc7")})

db.getCollection('user').find({gender: 'F', tags: 'promoted'})
db.getCollection('user').find({gender: 'F', 'tags.1': {$exists: true}})

ref:
https://docs.mongodb.com/manual/reference/operator/query/exists/#exists-true

Find Documents With An Array Field Of Embedded Documents

Usually, you could use $elemMatch.

{'the_array_field': {'$elemMatch': {
    'a_field_of_each_element': {'$lte': now},
    'another_field_of_each_element': 123
}}}
db.getCollection('message').find({
    unlocks: {
        $elemMatch: {
            _cls: 'PointsUnlock',
            user: ObjectId("57f662e727a79d07993faec5")
        }
    }
})

db.getCollection('feature.shop.product').find({
    purchases: {
        $elemMatch: {
            _cls: 'Purchase'
        }
    }
})

db.getCollection('feature.shop.product').find({
    '_id': 'prod_CWlSTXBEU4mhEu',
    'purchases': {'$not': {'$elemMatch': {
        '_cls': 'DirectPurchase',
        'user': ObjectId("58b61d9094ab56f912ba10a5")
    }}},
})

ref:
https://docs.mongodb.com/manual/reference/operator/query/elemMatch/

Find Documents With Existence Of Fields Or Values

  • .find({'field': {'$exists': true}}): the field exists
  • .find({'field': {'$exists': false}}): the field does not exist
  • .find({'field': {'$type': 10}}): the field exists with a null value
  • .find({'field': null}): the field exists with a null value or the field does not exist
  • .find({'field': {'$ne': null}}): the field exists and the value is not null
db.test.insert({'num': 1, 'check': 'value'})
db.test.insert({'num': 2, 'check': null})
db.test.insert({'num': 3})

db.test.find({});

db.test.find({'check': {'$exists': true}})
// return 1 and 2

db.test.find({'check': {'$exists': false}})
// return 3

db.test.find({'check': {'$type': 10}});
// return 2

db.test.find({'check': null})
// return 2 and 3

db.test.find({'check': {'$ne': null}});
// return 1

ref:
https://stackoverflow.com/questions/4057196/how-do-you-query-this-in-mongo-is-not-null
https://docs.mongodb.com/manual/tutorial/query-for-null-fields/

Find Documents Where An Array Field Does Not Contain A Certain Value

db.getCollection('user').update({_id: ObjectId("579994ac61ff217f96a585d9"), tags: {$ne: 'tag_to_add'}}, {$push: {tags: 'tag_to_add'}})

db.getCollection('user').update({_id: ObjectId("579994ac61ff217f96a585d9"), tags: {$nin: ['tag_to_add']}}, {$push: {tags: 'tag_to_add'}})

ref:
https://stackoverflow.com/questions/16221599/find-documents-with-arrays-not-containing-a-document-with-a-particular-field-val

Find Documents Where An Array Field Is Not Empty

db.getCollection('message').find({unlocks: {$exists: true}})

ref:
https://stackoverflow.com/questions/14789684/find-mongodb-records-where-array-field-is-not-empty

Find Documents Where An Array Field's Size Is Greater Than 1

db.getCollection('user.inbox').find({
    'messages.0': {'$exists': true}
})

db.getCollection('message').find({
    '_cls': 'Message',
    'unlocks.10': {'$exists': true}
}).sort({'posted_at': -1})

db.getCollection('message').find({
    '_cls': 'Message.ChatMessage',
    'sender': ObjectId("582ee32a5b9c861c87dc297e"),
    'unlocks': {'$exists': true, '$not': {'$size': 0}}
})

ref:
https://stackoverflow.com/questions/7811163/query-for-documents-where-array-size-is-greater-than-1/15224544

Find Documents With Computed Values Using $expr

$expr is the better $where.

ref:
https://thecodebarbarian.com/a-nodejs-perspective-on-mongodb-36-lookup-expr

Insert Documents

db.getCollection('feature.launch').insert({
    'url': '//asia.public.swag.live/launchs/5a06b88aaf9c462c6146ce12.jpg',
    'user': {
        'id': ObjectId("5a06b88aaf9c462c6146ce12"),
        'username': 'luke0804',
        'tags': ["gender:male"]
    }
})

db.getCollection('feature.launch').insert({
    'url': '//asia.public.swag.live/launchs/57c16f5bb811055b66d8ef46.jpg',
    'user': {
        'id': ObjectId("57c16f5bb811055b66d8ef46"),
        'username': 'riva',
        'tags': ["gender:female"]
    }
})

Update An Array Field

You should use arrayFilters as much as possible.

The syntax of arrayFilters would be:

db.collection.update(
   { <query selector> },
   { <update operator>: { "array.$[<identifier>].field" : value } },
   { arrayFilters: [ { <identifier>: <condition> } } ] }
)
Inbox._get_collection().update_many(
    {'messages.id': message_id},
    {'$set': {'messages.$[message].tags': tags}},
    array_filters=[
        {'message.id': message_id},
    ],
)

ref:
https://docs.mongodb.com/manual/reference/operator/update/positional-filtered/

Insert an element into an array field at a certain position.

db.getCollection('feature.forums.post').update(
   { _id: ObjectId("5b3c6a9c8433b15569cae54e") },
   {
     $push: {
        media: {
           $each: [{
                "mimetype" : "image/jpeg",
                "url" : "https://asia.uploads.swag.live/posts/5adb795b47d057338abe8910.jpg",
                "presets" : {}
            }],
           $position: 1
        }
     }
   }
)

ref:
https://docs.mongodb.com/manual/reference/operator/update/position/

Set an array field to empty.

db.getCollection('message').update(
    {'tags': 'pack:joycelai-1'},
    {'$set': {'unlocks': []}},
    {'multi': true}
)

db.getCollection('feature.shop.product').update(
    {},
    {'$set': {'purchases': []}},
    {'multi': true}
)

ref:
https://docs.mongodb.com/manual/reference/method/db.collection.update/
https://docs.mongodb.com/manual/reference/operator/update/set/

Remove elements from an array field.

var userId = ObjectId("57985b784af4124063f090d3");

db.getCollection('user').update(
    {'follows.user': userId},
    {'$pull': {'follows': {'user': userId}}},
    {
        'multi': true,
    }
);

db.getCollection('message').update(
    {'_id': {'$in': [
        ObjectId('5aca1ffc4271ab1624787ec4'),
        ObjectId('5aca31ab93ef2936291c3dd4'),
        ObjectId('5aca33d9b5eaef04943c0d0b'),
        ObjectId('5aca34e7a48c543b07fb0a0f'),
        ObjectId('5aca272d93ef296edc1c3dee'),
        ObjectId('5aca342aa48c54306dfb0a21'),
        ObjectId('5aca20756bd01023a8cb02e9')
    ]}},
    {'$pull': {'tags': 'pack:prod_D75YlDMzcCiAw3'}},
    {'multi': true}
);

ref:
https://docs.mongodb.com/manual/reference/operator/update/pull/

Update Large Numbers Of Documents

Use Bulk.find.arrayFilters() and Bulk.find.update() together.

import datetime

expiration_time = datetime.datetime.utcnow() - datetime.timedelta(hours=48)

bulk = Outbox._get_collection().initialize_unordered_bulk_op()

for outbox in Outbox.objects.only('id').filter(messages__posted_at__lt=expiration_time):
    bulk.find({'_id': outbox.id}).update_one({
        '$pull': {'messages': {
            'posted_at': {'$lt': expiration_time},
        }},
    })

try:
    results = bulk.execute()
except pymongo.errors.InvalidOperation as err:
    if str(err) != 'No operations to execute':
        raise err

ref:
https://docs.mongodb.com/manual/reference/method/Bulk/
https://docs.mongodb.com/manual/reference/method/Bulk.find.arrayFilters/

Of course, you could also update the same document with multiple operations. However, it does not make sense.

from pymongo import UpdateOne
import bson

def _operations():
    if title = payload.get('title'):
        yield UpdateOne({'_id': bson.ObjectId(post_id)}, {'$set': {'title': title}})

    if location = payload.get('location'):
        yield UpdateOne({'_id': bson.ObjectId(post_id)}, {'$set': {'location': location}})      

    if pricing = payload.get('pricing'):
        yield UpdateOne({'_id': bson.ObjectId(post_id)}, {'$set': {'pricing': pricing}})

    if description = payload.get('description'):
        yield UpdateOne({'_id': bson.ObjectId(post_id)}, {'$set': {'description': description}})

    UpdateOne(
        {
            '_id': bson.ObjectId(post_id),
            'media.0': {'$exists': True},
            'title': {'$ne': None},
            'location': {'$ne': None},
            'pricing': {'$ne': None},
            'posted_at': {'$eq': None},
        },
        {'$set': {'posted_at': utils.utcnow()}},
    )

operations = list(_operations())
result = Post._get_collection().bulk_write(operations, ordered=True)
print(result.bulk_api_result)

ref:
https://api.mongodb.com/python/current/examples/bulk.html

Remove items from an array field of documents.

var userId = ObjectId("57a42a779f22bb6bcc434520");

db.getCollection('user').update(
    {'follows.user': userId},
    {'$pull': {'follows': {'user': userId}}},
    {'multi': true}
)

ref:
https://stackoverflow.com/questions/33594397/how-to-update-a-large-number-of-documents-in-mongodb-most-effeciently

MongoEngine In Python

ref:
http://docs.mongoengine.org/guide/index.html
http://docs.mongoengine.org/apireference.html

Define Collections

It seems every collection in MongoEngine must have a id field.

ref:
http://docs.mongoengine.org/guide/defining-documents.html

Filter With Raw Queries

post = Post.objects \
    .no_dereference().only('posted_at') \
    .filter(__raw__={
        '_id': bson.ObjectId(post_id),
        'media.0': {'$exists': True},
        'title': {'$ne': None},
        'location': {'$ne': None},
        'gender': {'$ne': None},
        'pricing': {'$ne': None},
    }) \
    .modify(__raw__={'$min': {'posted_at': utils.utcnow()}}, new=True)

print(post.posted_at)

ref:
http://docs.mongoengine.org/guide/querying.html#raw-queries

Check If A Document Exists

Use .exists().

import datetime

now = datetime.datetime.now(datetime.timezone.utc)
if TagSchedule.objects.filter(user=user_id, tag=tag, started_at__gt=now).exists():
    return 'exists'

You have to use __raw__ if the field you want to query is a db.ListField(GenericEmbeddedDocumentField(XXX) field.

if MessagePackProduct.objects.filter(id=message_pack_id, __raw__={'purchases.user': g.user.id}).exists():
    return 'exists'

Update With Conditions Of Field Values

You could update the value of the field to a specified value if the specified value is less than or greater than the current value of the field. The $min and $max operators can compare values of different types.

Only set posted_at to current timestamp if its current value is None or absent.

Post.objects.update_one(
    {
        '_id': bson.ObjectId(post_id),
        'media.0': {'$exists': True},
        'title': {'$ne': None},
        'location': {'$ne': None},
        'gender': {'$ne': None},
        'pricing': {'$ne': None},
    },
    {
        '$min': {'posted_at': utils.utcnow()},
    },
)

ref:
https://docs.mongodb.com/manual/reference/operator/update/min/
https://docs.mongodb.com/manual/reference/operator/update/max/

Update An Array Field

Array update operators:

  • $: Acts as a placeholder to update the first element in an array for documents that matches the query condition.
  • $[]: Acts as a placeholder to update all elements in an array for documents that match the query condition.
  • $[<identifier>]: Acts as a placeholder to update elements in an array that match the arrayFilters condition.
  • $addToSet: Adds elements to an array only if they do not already exist in the set.
  • $push: Adds an item to an array.
  • $pop: Removes the first or last item of an array.
  • $pull: Removes all array elements that match a specified query.
  • $pullAll: Removes all matching values from an array.

ref:
https://docs.mongodb.com/manual/reference/operator/update-array/
http://docs.mongoengine.org/guide/querying.html#atomic-updates
http://thecodebarbarian.com/a-nodejs-perspective-on-mongodb-36-array-filters.html

Add an element in an array field.

user_id = '582ee32a5b9c861c87dc297e'
tag = 'my_tag'

updated = User.objects \
    .filter(id=user_id, tags__ne=tag) \
    .update_one(push__tags=tag)

updated = User.objects \
    .filter(id=user_id) \
    .update_one(add_to_set__schedules={
        'tag': tag,
         'nbf': datetime.datetime(2018, 6, 4, 0, 0),
        'exp': datetime.datetime(2019, 5, 1, 0, 0),
    })

Insert an element into an array at a certain position.

slot = 2
Post.objects.filter(id=post_id, media__id__ne=media_id).update_one(__raw__={
    '$push': {
        'media': {
            '$each': [{'id': bson.ObjectId(media_id)}],
            '$position': slot,
        }
    }
})

ref:
https://docs.mongodb.com/manual/reference/operator/update/position/
http://docs.mongoengine.org/guide/querying.html#querying-lists

Remove elements in an array field. It is also worth noting that update(pull__abc=xyz) always returns 1.

user_id = '582ee32a5b9c861c87dc297e'
tag = 'my_tag'

updated = User.objects \
    .filter(id=user_id) \
    .update_one(pull__tags=tag)

updated = User.objects \
    .filter(id=user_id) \
    .update_one(pull__schedules={'tag': tag})

Remove multiple embedded documents in an array field.

import bson

user_id = '5a66d5c2af9c462c617ce552'
tags = ['valid_tag_1', 'future_tag']

updated_result = User._get_collection().update_one(
    {'_id': bson.ObjectId(user_id)},
    {'$pull': {'schedules': {'tag': {'$in': tags}}}},
)
print(updated_result.raw_result)
# {'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}

ref:
https://stackoverflow.com/questions/28102691/pullall-while-removing-embedded-objects

You could also use add_to_set to add an item to an array only if it is not in the list, which always returns 1 if filter() matches any document. However, you are able to set full_result=True to get detail updated result.

update_result = User.objects.filter(id=user_id).update_one(
    add_to_set__tags=tag,
    full_result=True,
)
# {'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}

ref:
http://docs.mongoengine.org/guide/querying.html#atomic-updates

Update a multi-level nested array field. Yes, arrayFilters supports it.

ref:
https://docs.mongodb.com/manual/reference/operator/update/positional-filtered/
https://stackoverflow.com/questions/23577123/updating-a-nested-array-with-mongodb

Update an embedding document in an array field.

MessagePackProduct.objects \
    .filter(id='prod_CR1u34BIpDbHeo', skus__id='sku_CR23rZOTLhYprP') \
    .update(__raw__={
        '$set': {'skus.$': {'id': 'sku_CR23rZOTLhYprP', 'test': 'test'}}
    })

ref:
https://stackoverflow.com/questions/9200399/replacing-embedded-document-in-array-in-mongodb
https://docs.mongodb.com/manual/reference/method/db.collection.update/#db.collection.update

Update specific embedded documents with arrayFilters in an array field.

User data:

{
    "_id" : ObjectId("5a66d5c2af9c462c617ce552"),
    "username" : "gibuloto",
    "tags" : [
        "beta",
        "future_tag",
        "expired_tag"
    ],
    "schedules" : [
        {
            "tag" : "valid_tag",
            "nbf" : ISODate("2018-05-01T16:00:00.000Z"),
            "exp" : ISODate("2020-06-04T16:00:00.000Z")
        },
        {
            "tag" : "future_tag",
            "nbf" : ISODate("2020-01-28T16:00:00.000Z"),
            "exp" : ISODate("2020-12-14T16:00:00.000Z")
        },
        {
            "tag" : "expired_tag",
            "nbf" : ISODate("2016-02-12T16:00:00.000Z"),
            "exp" : ISODate("2016-04-21T16:00:00.000Z")
        }
    ],
}

It is worth noting that <identifier> in $arrayFilters can only contain lowercase alphanumeric characters.

import bson

user_id = '5a66d5c2af9c462c617ce552'
tags = ['from_past_to_future']

updated_result = User._get_collection().update_one(
    {'_id': bson.ObjectId(user_id)},
    {
        '$addToSet': {'tags': {'$each': tags}},
        '$unset': {'schedules.$[schedule].nbf': True},
    },
    array_filters=[{'schedule.tag': {'$in': [tag for tag in tags]}}],
)
print(updated_result.raw_result)
# {'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}

ref:
https://docs.mongodb.com/master/reference/operator/update/positional-filtered/

Update A Dictionary Field

Set a key/value in a dictionary field.

tutorial.data = {
    "price_per_message": 1200,
    "inbox": []
}

new_inbox = [
    {
        "id": "5af118c598eacb528e8fb8f9",
        "sender": "5a13239eaf9c462c611510fc"
    },
    {
        "id": "5af1117298eacb212a8fb8e9",
        "sender": "5a99554be9a21d5ff38b8ca5"
    }
]
tutorial.update(set__data__inbox=new_inbox)

ref:
https://stackoverflow.com/questions/21158028/updating-a-dictfield-in-mongoengine

Upsert: Update Or Create

You must use upsert=true with uniquely indexed fields. If you don't need the modified document, you should just use update_one(field1=123, field2=456, upsert=True).

Additionally, remember that modify() always reloads the whole object even the original one only loads specific fields with only(). Try to avoid using document.DB_QUERY_METHOD(), and using User.objects.filter().only().modify() or User.objects.filter().update() when it is possible.

tag_schedule = TagSchedule.objects \
    .filter(user=user_id, tag='vip') \
    .modify(
        started_at=started_at,
        ended_at=ended_at,
        upsert=True
    )

user = User.objects \
    .filter(id=user.id, tutorials__buy_diamonds__version=None) \
    .modify(set__tutorials__buy_diamonds__version='v1')

updated = User.objects \
    .filter(user=user_id, tag=tag) \
    .update_one(
        push__followers=new_follower,
    )

ref:
https://docs.mongodb.com/manual/reference/method/db.collection.update/#update-with-unique-indexes
http://docs.mongoengine.org/apireference.html#mongoengine.queryset.QuerySet.modify
http://docs.mongoengine.org/apireference.html#mongoengine.queryset.QuerySet.update_one

Upsert: Get Or Create

buy_diamonds = BuyDiamonds.objects.filter(user_id=user.id).upsert_one()

ref:
http://docs.mongoengine.org/apireference.html#mongoengine.queryset.QuerySet.upsert_one

Store Datetime

MongoDB stores datetimes in UTC.

ref:
https://docs.mongodb.com/manual/reference/method/Date/

Two-phase Commit

The easiest way to think about 2-phase commit is idempotency, i.e., if you run a update many times, the results would "be the same": initial -> pending -> applied -> done.

ref:
https://docs.mongodb.com/manual/tutorial/perform-two-phase-commits/

Aggregation Pipeline

  • $match: Filters documents.
  • $project: Modifies document fields.
  • $group: Groups documents by fields.
  • $lookup: Joins another collection.
  • $replaceRoot: Promotes an embedded document field to the top level and replace all other fields.
  • $unwind: Expanses an array field into multiple documents along with original documents.
  • $facet: Processes multiple pipelines within one stage and output to different fields.

Return Date As Unix Timestamp

import datetime

def stages():
    yield {'$project': {
        'createdAt': {'$floor': {'$divide': [{'$subtract': ['$$created', datetime.datetime.utcfromtimestamp(0)]}, 1000]}},
    }}

try:
    docs = MessagePackProduct.objects.aggregate(*stages())
except StopIteration:
    docs = []
else:
    for doc in docs:
        print(doc)

ref:
https://stackoverflow.com/questions/39274311/convert-iso-date-to-timestamp-in-mongo-query

Match Multiple Conditions Which Store In An Array Fields

db.getCollection('feature.promotions').insert({
    "name": "女 / 六月排行榜:寶石獵人",
    "nbf": ISODate("2018-05-31 16:00:00.000Z"),
    "exp": ISODate("2018-06-30 15:59:00.001Z"),
    "positions": {
        "discover": {
            "urls": [
                "https://swag.live/promo/events/2018/Jun/female/banner.html"
            ]
        }
    },
    "requirements" : [
        {
            // users who like women
            "preferences" : [
                "gender:female"
            ]
        },
        {
            // female CPs
            "tags" : [
                "stats",
                "gender:female"
            ]
        }
    ]
});
user_preferences = ['gender:female', 'gender:male']
user_tags = ['beta', 'vip']
user_platforms = ['ios']

def stages():
    now = utils.utcnow()

    yield {'$match': {
        '$and': [
            {'nbf': {'$lte': now}},
            {'exp': {'$gt': now}},
            {'requirements': {'$elemMatch': {
                'preferences': {'$not': {'$elemMatch': {'$nin': user_preferences}}},
                'tags': {'$not': {'$elemMatch': {'$nin': user_tags}}},
                'platforms': {'$not': {'$elemMatch': {'$nin': user_platforms}}},
            }}},
        ],
    }}
    yield {'$project': {
        'name': True,
        'nbf': True,
        'exp': True,
        'positions': {'$objectToArray': '$positions'},
    }}
    yield {'$unwind': '$positions'}
    yield {'$sort': {
        'exp': 1,
    }}
    yield {'$project': {
        '_id': False,
        'name': True,
        'position': '$positions.k',
        'url': {'$arrayElemAt': ['$positions.v.urls', 0]},
        'startedAt': {'$floor': {'$divide': [{'$subtract': ['$nbf', constants.UNIX_EPOCH]}, 1000]}},
        'endedAt': {'$floor': {'$divide': [{'$subtract': ['$exp', constants.UNIX_EPOCH]}, 1000]}},
    }}
    yield {'$group': {
        '_id': '$position',
        'items': {'$push': '$$ROOT'},
    }}

try:
    docs = Promotion.objects.aggregate(*stages())
except StopIteration:
    docs = []
else:
    docs = list(docs)

ref:
https://docs.mongodb.com/manual/reference/operator/query/in/
https://docs.mongodb.com/manual/reference/operator/query/nin/
https://docs.mongodb.com/manual/reference/operator/aggregation/setIsSubset/

Do Distinct With $group

def stages():
    yield {'$match': {
        'tags': 'some_tag',
    }}
    yield {'$unwind': '$unlocks'}
    yield {'$replaceRoot': {'newRoot': '$unlocks'}}
    yield {'$match': {
        '_cls': 'MessagePackUnlock',
    }}
    yield {'$group': {
        '_id': '$user',
        'timestamp': {'$first': '$timestamp'},
    }}

for unlock in MessagePackMessage.objects.aggregate(*stages()):
    tasks.offline_purchase_pack.apply(kwargs=dict(
        user_id=unlock['_id'],
        message_pack_id=message_pack.id,
        timestamp=unlock['timestamp'],
    ))

ref:
https://docs.mongodb.com/manual/reference/operator/aggregation/group/

Collect Items With $group And $addToSet

User data:

{
    "_id" : ObjectId("5a66d5c2af9c462c617ce552"),
    "username" : "gibuloto",
    "tags" : [ 
        "beta"
    ],
    "schedules" : [ 
        {
            "tag" : "stats",
            "nbf" : ISODate("2018-02-01T16:00:00.000Z"),
            "exp" : ISODate("2018-08-12T16:00:00.000Z")
        }, 
        {
            "tag" : "vip",
            "nbf" : ISODate("2018-05-13T16:00:00.000Z"),
            "exp" : ISODate("2018-05-20T16:00:00.000Z")
        }
    ]
}
def stages():
    now = utils.utcnow()

    yield {'$match': {
        'schedules': {'$elemMatch': {
            'nbf': {'$lte': now},
            'exp': {'$gte': now}
        }}
    }}
    yield {'$unwind': '$schedules'}
    yield {'$match': {
        'schedules.nbf': {'$lte': now},
        'schedules.exp': {'$gte': now}
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'username': True,
        'tag': '$schedules.tag',
        'nbf': '$schedules.nbf',
        'exp': '$schedules.exp'
    }}
    yield {'$group': {
        '_id': '$id',
        'tags': {'$addToSet': '$tag'},
    }}

for user_tag_schedule in User.objects.aggregate(*stages()):
    print(user_tag_schedule)

# output:
# {'_id': ObjectId('579b9387b7af8e1fd1635da9'), 'tags': ['stats']}
# {'_id': ObjectId('5a66d5c2af9c462c617ce552'), 'tags': ['chat', 'vip']}

ref:
https://docs.mongodb.com/manual/reference/operator/aggregation/group/

Project A New Field Based On Whether Elements Exist In Another Array Field

Use $addFields with $cond.

def stages():
    user_preferences = g.user.settings.preferences or ['gender:female']
    yield {'$match': {
        'gender': {'$in': [prefix_gender.replace('gender:', '') for prefix_gender in user_preferences]}
    }}

    yield {'$addFields': {
        'isPinned': {'$cond': {
            'if': {'$in': [constants.tags.HIDDEN, '$badges']},
            'then': True,
            'else': False,
        }},
    }}
    yield {'$sort': {
        'isPinned': -1,
        'posted_at': -1,
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'author': '$author',
        'title': '$title',
        'location': '$location',
        'postedAt': {'$floor': {'$divide': [{'$subtract': ['$posted_at', constants.UNIX_EPOCH]}, 1000]}},
        'viewCount': '$view_count',
        'commentCount': {'$size': {'$ifNull': ['$comments', []]}},
        'badges': '$badges',
        'isPinned': '$isPinned',
    }}

try:
    results = Post.objects.aggregate(*stages()).next()
except StopIteration:
    return Response(status=http.HTTPStatus.NOT_FOUND)

ref:
https://stackoverflow.com/questions/16512329/project-new-boolean-field-based-on-element-exists-in-an-array-of-a-subdocument
https://docs.mongodb.com/manual/reference/operator/aggregation/cond/
https://docs.mongodb.com/manual/reference/operator/aggregation/addFields/

Project And Filter Out Elements Of An Array With $filter

Elements in details might have no value field.

def stages():
    yield {'$match': {
        '_id': bson.ObjectId(post_id),
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'author': '$author',
        'title': '$title',
        'location': '$location',
        'postedAt': {'$floor': {'$divide': [{'$subtract': ['$posted_at', constants.UNIX_EPOCH]}, 1000]}},
        'viewCount': '$view_count',
        'commentCount': {'$size': '$comments'},
        'details': [
            {'key': 'gender', 'value': '$gender'},
            {'key': 'pricing', 'value': '$pricing'},
            {'key': 'lineId', 'value': {'$ifNull': ['$lineId', None]}},
            {'key': 'description', 'value': {'$ifNull': ['$description', None]}},
        ],
    }}
    yield {'$addFields': {
        'details': {
            '$filter': {
                'input': '$details',
                'as': 'detail',
                'cond': {'$ne': ['$$detail.value', None]},
            }
        }
    }}

try:
    post = next(Post.objects.aggregate(*stages()))
except StopIteration:
    return Response(status=http.HTTPStatus.NOT_FOUND)

ref:
https://docs.mongodb.com/manual/reference/operator/aggregation/filter/#exp._S_filter
https://docs.mongodb.com/manual/reference/operator/aggregation/addFields/

Project Specific Fields Of Elements Of An Array With $map

def stages():
    yield {'$match': {
        '_id': bson.ObjectId(post_id),
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'author': '$author',
        'title': '$title',
        'location': '$location',
        'postedAt': {'$floor': {'$divide': [{'$subtract': ['$posted_at', constants.UNIX_EPOCH]}, 1000]}},
        'viewCount': '$view_count',
        'commentCount': {'$size': '$comments'},
        'details': [
            {'key': 'gender', 'value': '$gender'},
            {'key': 'pricing', 'value': '$pricing'},
            {'key': 'lineId', 'value': {'$ifNull': ['$lineId', None]}},
            {'key': 'description', 'value': {'$ifNull': ['$description', None]}},
        ],
        'media': {
            '$map': {
                'input': '$media',
                'as': 'transcoded_media',
                'in': {
                    'mimetype': '$$transcoded_media.mimetype',
                    'dash': '$$transcoded_media.presets.dash',
                    'hls': '$$transcoded_media.presets.hls',
                    'thumbnail': '$$transcoded_media.thumbnail',
                }
            }
        },
    }}
    yield {'$addFields': {
        'details': {
            '$filter': {
                'input': '$details',
                'as': 'detail',
                'cond': {'$ne': ['$$detail.value', None]},
            }
        }
    }}

try:
    post = next(Post.objects.aggregate(*stages()))
except StopIteration:
    return Response(status=http.HTTPStatus.NOT_FOUND)

ref:
https://stackoverflow.com/questions/33831665/how-to-project-specific-fields-from-a-document-inside-an-array

Do Advanced $project With $let

If you find youself want to do $project twice to tackle some fields, you should use $let.

def stages():
    yield {'$match': {
        'purchases.user': g.user.id,
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'name': True,
        'image': {
            '$ifNull': [{'$arrayElemAt': ['$images', 0]}, None],
        },
        'purchasedAt': {
            '$let': {
                'vars': {
                    'purchase': {
                        '$arrayElemAt': [
                            {
                                '$filter': {
                                    'input': '$purchases',
                                    'as': 'purchase',
                                    'cond': {
                                        '$and': [
                                            {'$eq': ['$$purchase.user', g.user.id]},
                                        ],
                                    },
                                },
                            },
                            0,
                        ],
                    },
                },
                'in': '$$purchase.timestamp',
            },
        },
    }}

try:
    docs = MessagePackProduct.objects.aggregate(*stages())
except StopIteration:
    docs = []
else:
    for doc in docs:
        print(doc)

ref:
https://docs.mongodb.com/manual/reference/operator/aggregation/let/

Deconstruct An Array Field With $unwind And Query Them With $match

def stages():
    category_tag = 'category:user'
    currency = 'usd'
    platform = 'ios'

    yield {'$match': {
        'active': True,
        'tags': category_tag,
        'total': {'$gt': 0},
        'preview_message': {'$exists': True},
    }}
    yield {'$unwind': '$skus'}
    yield {'$match': {
        'skus.attributes.platform': platform,
        'skus.attributes.currency': currency,
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'name': True,
        'caption': True,
        'description': True,
        'image': {
            '$ifNull': [{'$arrayElemAt': ['$images', 0]}, None],
        },
        'sku': '$skus',
        'created_at': True,
        'is_purchased': {'$in': [g.user.id, {'$ifNull': ['$purchases.user', []]}]},
    }}
    yield {'$sort': {'is_purchased': 1, 'created_at': -1}}

try:
    docs = MessagePackProduct.objects.aggregate(*stages())
except StopIteration:
    docs = []
else:
    for doc in docs:
        print(doc)

ref:
https://docs.mongodb.com/manual/reference/operator/aggregation/match/
https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/
https://docs.mongodb.com/manual/reference/operator/aggregation/project/

Query The First Element In An Array Field With $arrayElemAt And $filter

def stages():
    category_tag = 'category:user'
    currency = 'usd'
    platform = 'ios'

    yield {'$match': {
        'active': True,
        'tags': category_tag,
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'name': True,
        'caption': True,
        'description': True,
        'image': {
            '$ifNull': [{'$arrayElemAt': ['$images', 0]}, None],
        },
        'preview_message': True,
        'metadata': True,
        'created_at': True,
        'updated_at': True,
        'active': True,
        'sku': {
            '$ifNull': [
                {
                    '$arrayElemAt': [
                        {
                            '$filter': {
                                'input': '$skus',
                                'as': 'sku',
                                'cond': {
                                    '$and': [
                                        {'$eq': ['$$sku.currency', currency]},
                                        {'$eq': ['$$sku.attributes.platform', platform]},
                                    ]
                                }
                            },
                        },
                        0
                    ]
                },
                None
            ],
        },
        'tags': True,
        'total': True,
        'is_bought': {'$in': [g.user.id, {'$ifNull': ['$purchases.user', []]}]},
    }}
    yield {'$sort': {'is_bought': 1, 'created_at': -1}}

try:
    docs = MessagePackProduct.objects.aggregate(*stages())
except StopIteration:
    docs = []
else:
    for doc in docs:
        print(doc)

ref:
https://docs.mongodb.com/master/reference/operator/aggregation/filter/
https://stackoverflow.com/questions/3985214/retrieve-only-the-queried-element-in-an-object-array-in-mongodb-collection

Join Another Collection With $lookup

def stages():
    yield {'$match': {
        'tags': 'pack:prod_CR1u34BIpDbHeo',
    }}
    yield {'$lookup': {
        'from': 'user',
        'localField': 'sender',
        'foreignField': '_id',
        'as': 'sender_data',
    }}
    yield {'$unwind': '$sender_data'}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'sender': {
            'id': '$sender_data._id',
            'username': '$sender_data.username',
        },
        'caption': True,
        'posted_at': True,
    }}
    yield {'$sort': {'posted_at': -1}}

try:
    docs = Message.objects.aggregate(*stages())
except StopIteration:
    docs = []
else:
    for doc in docs:
        print(doc)

ref:
https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/
https://thecodebarbarian.com/a-nodejs-perspective-on-mongodb-36-lookup-expr

Count Documents In Another Collection With $lookup (JOIN)

def stages():
    category_tag = f'category:{category}'
    yield {'$match': {
        'active': True,
        'tags': category_tag,
    }}
    yield {'$addFields': {
        'message_pack_id_tag': {'$concat': ['pack:', '$_id']},
    }}
    yield {'$lookup': {
        'from': 'message',
        'localField': 'message_pack_id_tag',
        'foreignField': 'tags',
        'as': 'total',
    }}
    yield {'$addFields': {
        'total': {'$size': '$total'}
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'name': True,
        'total': True,
    }}

try:
    docs = MessagePackProduct.objects.aggregate(*stages())
except StopIteration:
    docs = []
else:
    for doc in docs:
        print(doc)

ref:
https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/#equality-match

Collapse Documents In An Array

def stages():
    yield {'$match': {
        'tags': f'tutorial:buy-diamonds:v1',
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'caption.text': True,
        'sender': True,
        'media.type': '$media.mimetype',
    }}
    yield {'$facet': {
        'inbox': [
            {'$sort': {'created_at': -1}},
            {'$limit': 10}
        ],
    }}
    yield {'$project': {
        'inbox': True,
        'required_unlock_count': {'$literal': 5},
        'price_per_message': {'$literal': 1200},
    }}

try:
    result = Message.objects.aggregate(*stages()).next()
except StopIteration:
    result = {}

JSON output:

{
    "inbox": [
        {
            "caption": {
                "text": "fuck yeah"
            },
            "id": "5aaba1e9593950337a90dcb3",
            "media": {
                "type": "video/mp4"
            },
            "sender": "5a66d5c2af9c462c617ce552"
        },
        {
            "caption": {
                "text": "test"
            },
            "id": "5ad549276b2c362a4efe5e21",
            "media": {
                "type": "image/jpeg"
            },
            "sender": "5a66d5c2af9c462c617ce552"
        }
    ],
    "price_per_message": 1200,
    "required_unlock_count": 5
}

Do Pagination With $facet And $project

def stages():
    # normal query
    yield {'$match': {
        'purchases.user': g.user.id,
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'name': True,
        'created_at': True,
        'meta': {
            'revision': '$revision',
            'tags': '$tags',
        },
    }}
    yield {'$sort': {'created_at': -1}}

    # pagination
    page = 0
    limit = 10
    yield {'$facet': {
        'meta': [
            {'$count': 'total'},
        ],
        'objects': [
            {'$skip': page * limit},
            {'$limit': limit},
        ]
    }}
    # JSON output:
    # {
    #    "meta": [
    #       {"total": 2}
    #    ],
    #    "objects": [
    #       {
    #          "id": "prod_CR1u34BIpDbHeo",
    #          "name": "Product Name 2"
    #       },
    #       {
    #          "id": "prod_Fkhf9JFK3Rdgk9",
    #          "name": "Product Name 1"
    #       }
    #    ]
    # }
    yield {'$project': {
        'total': {'$let': {
            'vars': {
                'meta': {'$arrayElemAt': ['$meta', 0]},
            },
            'in': '$$meta.total',
        }},
        'objects': True,
    }}
    # JSON output:
    # {
    #    "total": 2,
    #    "objects": [
    #       {
    #          "id": "prod_CR1u34BIpDbHeo",
    #          "name": "Product Name 2"
    #       },
    #       {
    #          "id": "prod_Fkhf9JFK3Rdgk9",
    #          "name": "Product Name 1"
    #       }
    #    ]
    # }

try:
    output = MessagePackProduct.objects.aggregate(*stages()).next()
except StopIteration:
    output = {}
else:
    print(output)

ref:
https://docs.mongodb.com/manual/reference/operator/aggregation/facet/
https://docs.mongodb.com/manual/reference/operator/aggregation/project/

Perform $facet + $project => Unwrap with $unwind => Do $facet + $project Again

def stages():
    yield {'$match': {
        'purchases.user': g.user.id,
    }}
    yield {'$project': {
        '_id': False,
        'id': '$_id',
        'name': True,
        'image': {
            '$ifNull': [{'$arrayElemAt': ['$images', 0]}, None],
        },
        'created_at': True,
    }}
    yield {'$sort': {'created_at': -1}}

    # pagination
    page = 0
    limit = 10
    yield {'$facet': {
        'meta': [
            {'$count': 'total'},
        ],
        'objects': [
            {'$skip': page * limit},
            {'$limit': limit},
        ]
    }}
    yield {'$project': {
        'total': {'$let': {
            'vars': {
                'meta': {'$arrayElemAt': ['$meta', 0]},
            },
            'in': '$$meta.total',
        }},
        'objects': True,
    }}

    # do $lookup after the pagination
    yield {'$unwind': '$objects'}
    yield {'$addFields': {
        'objects.message_pack_id_tag': {'$concat': ['pack:', '$objects.id']},
    }}
    yield {'$lookup': {
        'from': 'message',
        'localField': 'objects.message_pack_id_tag',
        'foreignField': 'tags',
        'as': 'objects.total',
    }}
    yield {'$addFields': {
        'objects.total': {'$size': '$objects.total'}
    }}

    # re-wrap into the pagination structure
    yield {'$facet': {
        'total_list': [
            {'$project': {
                'total': True,
            }},
        ],
        'objects': [
            {'$replaceRoot': {'newRoot': '$objects'}},
        ]
    }}
    yield {'$project': {
        'total': {'$let': {
            'vars': {
                'meta': {'$arrayElemAt': ['$total_list', 0]},
            },
            'in': '$$meta.total',
        }},
        'objects': True,
    }}

try:
    output = MessagePackProduct.objects.aggregate(*stages()).next()
except StopIteration:
    output = {}
else:
    print(output)

Do $group First To Reduce Numbers Of $lookup Calls

def stages():
    yield {'$match': {
        'tags': f'pack:{message_pack_id}',
    }}
    yield {'$group': {
        '_id': '$sender',
        'messages': {'$push': '$$ROOT'},
    }}
    yield {'$lookup': {
        'from': 'user',
        'localField': '_id',
        'foreignField': '_id',
        'as': 'sender_data',
    }}
    yield {'$unwind': '$messages'}
    yield {'$project': {
        '_id': False,
        'id': '$messages._id',
        'caption': {
            'text': '$messages.caption.text',
            'y': '$messages.caption.y',
        },
        'sender': {
            'id': {'$arrayElemAt': ['$sender_data._id', 0]},
            'username': {'$arrayElemAt': ['$sender_data.username', 0]},
        },
    }}

try:
    docs = Message.objects.aggregate(*stages())
except StopIteration:
    docs = []
else:
    for doc in docs:
        print(doc)

ref:
https://docs.mongodb.com/manual/reference/operator/aggregation/group/

Useful Tools

$ mongotop

ref:
https://docs.mongodb.com/manual/reference/program/mongotop/

$ pip install mongotail

$ mongotail 127.0.0.1:27017/swag -l 2
$ mongotail 127.0.0.1:27017/swag -f

ref:
https://github.com/mrsarm/mongotail
https://stackoverflow.com/questions/15204341/mongodb-logging-all-queries

$ pip install mtools

$ mloginfo mongod.log

ref:
https://github.com/rueckstiess/mtools

Speed up Python and Node.js builds on Travis CI

Speed up Python and Node.js builds on Travis CI

Travis CI's caching archives all directories listed in the configuration and uploads them to Amazon S3. Cached contents are available to any build on the repository, including Pull Requests. For Python and Node.js projects, you could cache both site-packages and node_modules directories in every Travis CI build.

Here is an example of .travis.yml:

sudo: false

language: python

python:
  - "2.7"

node_js: 4

cache:
  directories:
    - $HOME/.cache/pip
    - $HOME/virtualenv/python2.7.9/lib/python2.7/site-packages
    - node_modules

before_install:
  - pip install -U pip

install:
  - pip install -r requirements.txt
  - pip install coverage --ignore-installed
  - npm install

script:
  - coverage run manage.py test

In the case of mine, after applying these changes, the installation time of pip and npm reduces from 180 seconds to 5 seconds.

One thing should be mentioned here: Since we didn't specify any bin folder in the configuration (and I don't think that's necessary), any execution file that being installed by pip such as coverage or django-admin.py will not exist in subsequent builds. If you need those commands, you could just force install them by adding pip install some_package --ignore-installed.

References:

Caching Dependencies and Directories
https://docs.travis-ci.com/user/caching/

How to cache requirements for a Django project on Travis-CI?
http://stackoverflow.com/questions/19422229/how-to-cache-requirements-for-a-django-project-on-travis-ci

如何在 Travis CI 加快 Python 單元測試速度
https://tzangms.com/how-to-speed-up-python-unit-test-on-travis-ci/

Integrate with webpages using CasperJS (built on top of PhantomJS)

Integrate with webpages using CasperJS (built on top of PhantomJS)

PhantomJS is a headless and scriptable WebKit runtime (aka browser) with JavaScript API.

Usage

in script.js

Login and delete spare movie tags on Douban.

var casper = require('casper').create({
  pageSettings: {
    loadImages: true,
    loadPlugins: false
  },
  logLevel: 'debug',
  verbose: true
});

// save session cookies
var fs = require('fs');
var page = require('webpage').create();

var cookieFile = 'cookies.json';

var saveSessionCookie = function() {
  try {
    fs.statSync(cookieFile);
  } catch (e) {
    fs.write(cookieFile, JSON.stringify(phantom.cookies), 'w');
  }
}

if (fs.isFile(cookieFile)) {
  Array.prototype.forEach.call(JSON.parse(fs.read(cookieFile)), function(x) {
    phantom.addCookie(x);
  });
}

// script
var loginUrl = 'https://accounts.douban.com/login';
var startUrl = 'https://movie.douban.com/people/vinta/all';

var tags_do_not_delete = [
  '丹麦', '新西兰', '新加坡', '以色列', '印度', '意大利', '瑞典', '墨西哥', '俄罗斯', '西班牙', '比利时'
];

casper.start(loginUrl, function() {
  this.echo(this.getCurrentUrl());
  this.echo(this.getTitle());

  this.capture('login.png');

  var data = {
    form_email: 'xxx',
    form_password: 'xxx'
  };

  // 可能會被豆瓣要求輸入驗證碼
  // 可以用 casperjs script.js --remote-debugger-port=9000
  // 先打開 login.png 看驗證碼是什麼
  // 到 http://127.0.0.1:9000/ 的 console 手動輸入驗證碼
  // data['captcha-solution'] = '123';

  this.waitForSelector('form#lzform');
  this.fill('form#lzform', data, true);
});

casper.then(function() {
  this.echo(this.getCurrentUrl());
  this.echo(this.getTitle());

  saveSessionCookie();

  this.capture('all.png');

  this.open(startUrl).then(function() {
    this.waitForSelector('#open_tags', function() {
      this.click('#open_tags');
    });

    this.waitWhileSelector('#open_tags');
  });
});

casper.then(function() {
  this.echo(this.getCurrentUrl());
  this.echo(this.getTitle());

  var links = this.evaluate(function() {
    var tagList = document.querySelectorAll('ul.tag-list li a');
    var theLinks = Array.prototype.map.call(tagList, function(elem) {
        return {
          tag: elem.textContent.trim(),
          href: elem.getAttribute('href'),
          count: parseInt(elem.nextElementSibling.textContent, 10)
        };
    });

    return theLinks;
  });

  var filteredLinks = links.filter(function(link) {
    if (link.count < 5 && tags_do_not_delete.indexOf(link.tag) == -1) {
      return true;
    }
    return false;
  });

  this.each(filteredLinks, function(self, link) {
    this.echo(link.tag + ', ' + link.count);

    self.thenOpen(link.href, function() {
      this.echo(this.getCurrentUrl());
      this.echo(this.getTitle());

      this.waitForSelector('#tag-del', function() {
        this.click('#tag-del');
      });

      this.waitForSelector('input[name="del_submit"]', function() {
        this.click('input[name="del_submit"]');
      });
    });
  });
});

casper.run();

To evaluate JavaScript code in the context of the webpage, you must use evaluate() function. The context is a sandbox.

ref:
http://docs.casperjs.org/en/latest/modules/index.html

ref:
https://github.com/vinta/playground/blob/master/casperjs/script.js

Save session cookies

--cookies-file=xxx.txt only store non-session cookies (which remain your logged-in or authenticated status). You have to save every cookie manually.

var casper = require('casper').create();

// save session cookies
var fs = require('fs');
var page = require('webpage').create();

var cookieFile = 'cookies.json';

var saveSessionCookie = function() {
  try {
    fs.statSync(cookieFile);
  } catch (e) {
    fs.write(cookieFile, JSON.stringify(phantom.cookies), 'w');
  }
}

if (fs.isFile(cookieFile)) {
  Array.prototype.forEach.call(JSON.parse(fs.read(cookieFile)), function(x) {
    phantom.addCookie(x);
  });
}

casper.start('yourUrl', function() {
  // do your shit
});

ref:
http://stackoverflow.com/questions/18739354/how-can-i-use-persisted-cookies-from-a-file-using-phantomjs

Run

$ docker run --rm -v `pwd`:/data vinta/casperjs:1.1.3 script.js

# or

$ brew install casperjs
$ casperjs script.js --disk-cache=true

ref:
https://hub.docker.com/r/vinta/casperjs/
https://hub.docker.com/r/zopanix/casperjs/

ref:
http://phantomjs.org/api/command-line.html

Run in debugging mode

$ casperjs script.js --remote-debugger-port=9000
$ open http://127.0.0.1:9000/
  • Click the first link (something like "file:///usr/local/Cellar/xxx").
  • In Sources tab, press "Enable Debugging" button.
  • In Console tab, type "__run();" to start.
  • Once breakpoints worked, you could go to Console tab to debug.

ref:
http://phantomjs.org/troubleshooting.html

for loop in JavaScript

有 index 的 for loop

for (var i = 0; i < 10; i++) {
   console.log(i);
}

for in 用在 object

不能用在 array

var obj = {a:1, b:2, c:3};

for (var prop in obj) {
  console.log("obj." + prop + " = " + obj[prop]);
}

ref:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...in

for of 用在 array(或 iterator 物件)

ES6 才有的語法

let iterable = [10, 20, 30];

for (const value of iterable) {
  console.log(value);
}

不然也可以用 Array.forEach()

[2, 5, , 9].forEach(function (element, index, array) {
  console.log('a[' + index + '] = ' + element);
});

ref:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...of

ES6 Promise 筆記

new Promise() 接受一個 function 做為初始化參數
這個 function 又接受兩個參數 resolvereject
它們都是 function
resolve() 的作用是把 Promise object 的狀態從 pending 改成 resolved / fulfilled(成功)
reject() 則是把狀態從 pending 改成 rejected(失敗)

Promise object 產生之後
可以用 Promise object 的 then() 來指定 resolved 狀態的 callback function
然後 catch() 來指定 rejected 狀態的 callback function

每個 then()catch() 都會 return 一個新的 Promise objecct
如果你在 then() 裡 return 的不是 Promise object
它會隱式地用 Promise.resolve() 幫你轉換

const yourPromiseFunc = function (params) {
  return new Promise((resolve, reject) => {
    doYourAsyncShit(params, (err, data) => {
      if (err) {
        reject(err);
      } else {
        resolve(data);
      }
    });
  });
};

yourPromiseFunc('some parameter')
.then((data) => {
  console.log('success', data);
})
.catch((err) => {
  console.log('fail', data);
});

嚴格來說你只能在 then() 裡做三件事:

  • return 另外一個 Promise object
  • return 一個 synchronous value(字串、數字或其他 object)
  • throw 一個 Error()

如果你沒有顯式地 return 的話,JavaScript 會自動幫你 return undefined;

ref:
http://www.html5rocks.com/zh/tutorials/es6/promises/
http://es6.ruanyifeng.com/#docs/promise#基本用法

使用 reject() 而不是 throw

在 Promise 裡
如果可以用 reject(new Error('your error message')) 就用
不要用 throw new Error('your error message');

基本上就是用 reject() 來表示我們有意識地拋出的錯誤

ref:
http://liubin.org/promises-book/#not-throw-use-reject

把任意 object 轉換成 Promise object

Promise.resolve(xxx); 就是把 xxx 包裝成 Promise object
然後 resolve() 它
如果 xxx 已經是 Promise object 了,則會 clone 一個新的

Promise.resolve(42);

# equals to

new Promise((resolve) => {
  resolve(42);
});

因為所有 Promise 操作都一定是 async 的(這是規格裡規定的)
所以就算是 Promise.resolve(42); 的 42 也不會馬上被執行到

var promise = new Promise(function (resolve) {
    console.log("inner promise"); // 執行順序 1
    resolve(42);
});

promise.then(function(value) {
    console.log(value); // 執行順序 3
});

console.log("outer promise"); // 執行順序 2

ref:
http://liubin.org/promises-book/#chapter2-how-to-write-promise

Promise chains

一律使用 then().catch() 的方式分別指定 resolved 和 rejected 的 callback functions
通常會在 promise chains 的最後放一個 catch()

因為每個 then() 執行完都會 return 一個新的 Promise object(注意!是新的 Promise object,不是你最一開始 new 出來的那個)
所以你可以一直用很多個 then() 串起來
你也可以在 then() 裡 return 某個值作為下一個 then() 的參數

get('story.json')
.then(function(response) {
  return JSON.parse(response);
})
.then(function(data) {
  console.log(data);
});

ref:
http://liubin.org/promises-book/#then-return-new-promise

// 會照順序由上往下執行(除了 .catch() 之外)
Promise.resolve()
  .then(functionA)
  .then(functionB)
  .then(functionC)
  .catch(errorHandler)
  .then(finalFunction);

errorHandler 只能 catch 到 functionA、functionB 和 functionC 中拋出的錯誤

你可以在 then() 裡 return 某個值
他會被包裝成 Promise object(透過 Promise.resolve(某個值))然後傳給下一個 then()

如果你想在 functionC 裡同時使用 funtionA 和 functionB 的結果
你可以這麼寫

firstThingAsync()
  .then(function(result1) {
    return Promise.all([result1, secondThingAsync(result1)]);
  })
  .then(function(results) {
    // do something with results array: results[0], results[1]
  })
  .catch((err) => {
    doErrorHandling();
  });

Anti-patterns

// 這種寫法會造成你的 badAsyncCall() 得不到 newVar 的返回值
function badAsyncCall() {
    var promise = Promise.resolve();
    promise.then(function() {
        return newVar;
    });

    return promise;
}

// 應該要寫成
function badAsyncCall() {
    var promise = Promise.resolve();
    return promise.then(function() {
        return newVar;
    });
}

ref:
https://pouchdb.com/2015/05/18/we-have-a-problem-with-promises.html
http://www.datchley.name/promise-patterns-anti-patterns/

等到所有 promises 都執行完才執行某個動作

你可以用 Promise.all()

Promise.all([promise1, promise2, promise3])
.then((results) => {
  // 這裡會在 promise1, promise2, promise3 的狀態都是 fulfilled 時執行
  // results 的順序跟 .all() 的順序一定會是一致的
  // results[0] 就是 promise1 的回傳值,results[1] 則是 promise2,以此類推
  // 你也可以寫成 .then(([data1, data2, data3]) => {}),不過 Node.js v4.3 還不支援這個語法就是了
})
.catch((err) => {
  // 這裡會在任一個 promise 變成 rejected 時執行
});

ref:
http://www.datchley.name/es6-promises/
https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/Promise/all
http://liubin.org/promises-book/#ch2-promise-all