MongoDB Chapter4
MongoDB Chapter4
From Query
Components to
Aggregation Stages
INTRODUCTION TO MONGODB IN PYTHON
Donny Winston
Instructor
Queries have implicit stages
cursor = db.laureates.find( cursor = db.laureates.aggregate([
filter={"bornCountry": "USA"}, {"$match": {"bornCountry": "USA"}},
projection={"prizes.year": 1}, {"$project": {"prizes.year": 1}},
limit=3 {"$limit": 3}
) ])
for doc in cursor: for doc in cursor:
print(doc["prizes"]) print(doc["prizes"])
cursor = db.laureates.aggregate([
stage_1,
stage_2,
...
])
list(db.laureates.aggregate([
{"$match": {"bornCountry": "USA"}},
{"$project": {"prizes.year": 1, "_id": 0}},
{"$sort": OrderedDict([("prizes.year", 1)])},
{"$skip": 1},
{"$limit": 3}
]))
[{'n_USA-born-laureates': 269}]
db.laureates.count_documents({"bornCountry": "USA"})
269
Donny Winston
Instructor
Field paths
expression object ? db.laureates.aggregate([
{"$project": {"n_prizes": {"$size": "$prizes"}}}
{field1: <expression1>, ...}
]).next()
db.laureates.aggregate([
{"$project": {"prizes.share": 1}} {'_id': ObjectId('5bd3a610053b1704219e19d4'),
]).next() 'n_prizes': 1}
db.laureates.aggregate([
{"$project": {"n_prizes": {"$size": ["$prizes"]}}}
]).next()
True
$group must map _id , which must be unique (like any Mongo document)
Donny Winston
Instructor
Sizing and summing
list(db.prizes.aggregate([ list(db.prizes.aggregate([
{"$project": {"n_laureates": {"$size": "$laureates"}, {"$project": {"n_laureates": {"$size": "$laureates"},
"year": 1, "category": 1, "_id": 0}} "category": 1}},
])) {"$group": {"_id": "$category", "n_laureates":
{"$sum": "$n_laureates"}}},
{"$sort": {"n_laureates": -1}},
[{'year': '2018', 'category': 'physics', 'n_laureates': 3},
]))
{'year': '2018', 'category': 'chemistry', 'n_laureates': 3},
{'year': '2018', 'category': 'medicine', 'n_laureates': 2},
...] [{'_id': 'medicine', 'n_laureates': 216},
{'_id': 'physics', 'n_laureates': 210},
{'_id': 'chemistry', 'n_laureates': 181},
{'_id': 'peace', 'n_laureates': 133},
{'_id': 'literature', 'n_laureates': 114},
{'_id': 'economics', 'n_laureates': 81}]
[{'year': '2018',
'category': 'physics',
'laureates': {'surname': 'Ashkin', 'share': '2'}},
{'year': '2018',
'category': 'physics',
'laureates': {'surname': 'Mourou', 'share': '4'}},
{'year': '2018',
'category': 'physics',
'laureates': {'surname': 'Strickland', 'share': '4'}}]
list(db.prizes.aggregate([
{"$unwind": "$laureates"},
{"$group": {"_id": "$category", "n_laureates": {"$sum": 1}}},
{"$sort": {"n_laureates": -1}},
]))
{"$unwind": "$laureate_bios"},
{"$group": {"_id": None,
"bornCountries":
{"$addToSet": "$laureate_bios.bornCountry"}
}},
]))
[{'_id': None,
'bornCountries': [
'the Netherlands', 'British West Indies (now Saint Lucia)', 'Italy',
'Germany (now Poland)', 'Hungary', 'Austria', 'India', 'USA',
'Canada', 'British Mandate of Palestine (now Israel)', 'Norway',
'Russian Empire (now Russia)', 'Russia', 'Finland', 'Scotland',
'France', 'Sweden', 'Germany', 'Russian Empire (now Belarus)',
'United Kingdom', 'Cyprus'
]}]
Donny Winston
Instructor
A somber $project
docs = list(db.laureates.aggregate([
{"$project": {"died": {"$dateFromString": {"dateString": "$died"}},
"born": {"$dateFromString": {"dateString": "$born"}}}}
]))
docs = list(db.laureates.aggregate([
{"$match": {"died": {"$gt": "1700"}, "born": {"$gt": "1700"}}},
{"$project": {"died": {"$dateFromString": {"dateString": "$died"}},
"born": {"$dateFromString": {"dateString": "$born"}}}}
]))
]))
( [ ])
Donny Winston
Instructor
You know know how to...
Create and compose query lters and use operators
Aggregate
MongoDB documentation
PyMongo documentation