Mongodb aggregate pipeline templates
Mongodb aggregate
is a powerful operation in data query for analysis purpose and so on but I find I always need to refer to the official documentation or AI tools like ChatGPT or Perplexity for even very simple syntax when trying to compose aggregate operations, so I summarize some commonly used patterns as templates for quick reference at work.
Templates
- Basic with match, group and project
db.collection.aggregate(
[
{
$match: {}
},
{
$group: {}
},
{
$project: {}
}
]
)
- Match results by different data types
db.collection.aggregate(
[
{
$match: {
name: 'andy', // string literal
count: 30, // number literal
is_login: true, // false, null
name: {
$in: ['andy', 'jiang'] // array contain
},
is_login: {
$not: { // opposite value
$in: [false, null]
}
},
count: {
$gte: 5 // $lte, $gt, $lt, $eq, $ne, etc
},
opened_at: {
$lt: new Date('2025-02-13') // $lte, $gt, $lt, $eq, $ne, etc
},
$or: [ // or, must be used at top level
{ x: { $lt: 0 } },
{ x: { $gt: 10 } }
],
$and: [ // and, must be used at top level
{ x: { $gt: 0 } },
{ x: { $lt: 10 } }
]
}
},
{
$group: {}
},
{
$project: {}
}
]
)
- Group by
_id
to calculate values
db.collection.aggregate(
[
{
$match: {}
},
{
$group: {
_id: null, // group by all documents
_id: '$field', // group by single field
_id: { // group by multiple fields
field1: '$field1',
field2: '$field2'
},
total: {
$count: {}, // count doc way 1
$sum: 1 // count doc way 2
},
sum_of_x: {
$sum: '$x' // sum of x
},
avg_of_x: {
$avg: '$x' // avg of x
}
}
},
{
$project: {}
}
]
)
- Output specific fields in project
db.collection.aggregate(
[
{
$match: {}
},
{
$group: {}
},
{
$project: {
_id: 0, // or false, do not output _id
field1: 1, // or true, output field1
field: '$_id', // output field used in single-field _id
x: '$_id.x', // output _id.x in multiple-field _id
y: '$arr.0', // output array element by index
z: '$obj.z', // output child object field
'x.y.z': 1 // output nested field
}
}
]
)
- De-dup with
window
function before match
db.collection.aggregate(
[
{
$setWindowFields: {
partitionBy: {
field1: '$field1',
field2: '$field2'
},
sortBy: {
_id: 1
},
output: {
rn: {
$documentNumber: {}
}
}
}
},
{
$match: {
rn: 1 // only keep records with row number = 1
}
},
{
$group: {}
},
{
$project: {}
}
]
)
- Convert timestamp with
$dateTrunc
db.collection.aggregate(
[
{
$match: {}
},
{
$group: {
_id: {
field1: '$field1',
date_field: {
$dateTrunc: {
date: '$timestamp',
unit: 'day', // day, week, month
timezone: 'Asia/Tokyo',
startOfWeek: 'Monday'
}
}
},
field2: {
$sum: '$field2'
}
}
},
{
$project: {}
}
]
)
References