Mongodb aggregate pipeline templates

Mongodb aggregate is a powerful operation in data query for analysis purpose and so on but I find I always need to refer to the official documentation or AI tools like ChatGPT or Perplexity for even very simple syntax when trying to compose aggregate operations, so I summarize some commonly used patterns as templates for quick reference at work.

Templates

  • Basic with match, group and project
db.collection.aggregate(
  [
    {
      $match: {}
    },
    {
      $group: {}
    },
    {
      $project: {}
    }
  ]
)
  • Match results by different data types
db.collection.aggregate(
  [
    {
      $match: {
        name: 'andy',                  // string literal
        count: 30,                     // number literal
        is_login: true,                // false, null
        name: {
          $in: ['andy', 'jiang']       // array contain
        },
        is_login: {
          $not: {                      // opposite value
            $in: [false, null]
          }
        },
        count: {
          $gte: 5                      // $lte, $gt, $lt, $eq, $ne, etc
        },
        opened_at: {
          $lt: new Date('2025-02-13')  // $lte, $gt, $lt, $eq, $ne, etc
        },
        $or: [                         // or, must be used at top level
          { x: { $lt: 0 } },
          { x: { $gt: 10 } }
        ],
        $and: [                        // and, must be used at top level
          { x: { $gt: 0 } },
          { x: { $lt: 10 } }
        ]
      }
    },
    {
      $group: {}
    },
    {
      $project: {}
    }
  ]
)
  • Group by _id to calculate values
db.collection.aggregate(
  [
    {
      $match: {}
    },
    {
      $group: {
        _id: null,      // group by all documents
        _id: '$field',  // group by single field
        _id: {          // group by multiple fields
          field1: '$field1',
          field2: '$field2'
        },
        total: {
          $count: {},   // count doc way 1
          $sum: 1       // count doc way 2
        },
        sum_of_x: {
          $sum: '$x'    // sum of x
        },
        avg_of_x: {
          $avg: '$x'    // avg of x
        }
      }
    },
    {
      $project: {}
    }
  ]
)
  • Output specific fields in project
db.collection.aggregate(
  [
    {
      $match: {}
    },
    {
      $group: {}
    },
    {
      $project: {
        _id: 0,           // or false, do not output _id
        field1: 1,        // or true, output field1
        field: '$_id',    // output field used in single-field _id
        x: '$_id.x',      // output _id.x in multiple-field _id
        y: '$arr.0',      // output array element by index
        z: '$obj.z',      // output child object field
        'x.y.z': 1        // output nested field
      }
    }
  ]
)
  • De-dup with window function before match
db.collection.aggregate(
  [
    {
      $setWindowFields: {
        partitionBy: {
          field1: '$field1',
          field2: '$field2'
        },
        sortBy: {
          _id: 1
        },
        output: {
          rn: {
            $documentNumber: {}
          }
        }
      }
    },
    {
      $match: {
        rn: 1   // only keep records with row number = 1
      }
    },
    {
      $group: {}
    },
    {
      $project: {}
    }
  ]
)
  • Convert timestamp with $dateTrunc
db.collection.aggregate(
  [
    {
      $match: {}
    },
    {
      $group: { 
        _id: {
          field1: '$field1',
          date_field: {
            $dateTrunc: {
              date: '$timestamp',
              unit: 'day', // day, week, month
              timezone: 'Asia/Tokyo',
              startOfWeek: 'Monday'
            }
          }
        },
        field2: {
          $sum: '$field2'
        }
      }
    },
    {
      $project: {}
    }
  ]
)

References

mongodb