Mongo / Mongoose Aggregation - $ redact and $ cond issues
I was lucky enough to get a great answer to another SOA Mongo / Mongoose question - Aggregating by Date from @chridam, given a lot of docs like:
{ "_id" : ObjectId("5907a5850b459d4fdcdf49ac"), "amount" : -33.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-04-26T23:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.581Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49ba"), "amount" : -61.3, "name" : "Amazon", "method" : "VIS", "date" : ISODate("2017-03-23T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.592Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49ce"), "amount" : -3.3, "name" : "Tesco", "method" : "VIS", "date" : ISODate("2017-03-15T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.601Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49cc"), "amount" : -26.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-16T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.600Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49f7"), "amount" : -63.3, "name" : "Sky", "method" : "VIS", "date" : ISODate("2017-03-02T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.617Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49be"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-22T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.593Z"), "category" : "Not Set", "__v" : 0 }
required a query that would aggregate costs by vendor, year, month, and week. The request is below and it almost works fantastic, but since I used it in my application I noticed a significant problem
db.statements.aggregate([
{ "$match": { "name": "RINGGO" } },
{
"$redact": {
"$cond": [
{
"$and": [
{ "$eq": [{ "$year": "$date" }, 2017 ]}, // within my route this uses parseInt(req.params.year)
{ "$eq": [{ "$month": "$date" }, 3 ]}, // within my route this uses parseInt(req.params.month)
{ "$eq": [{ "$week": "$date" }, 12 ]} // within my route this uses parseInt(req.params.week)
]
},
"$$KEEP",
"$$PRUNE"
]
}
},{
"$group": {
"_id": {
"name": "$name",
"year": { "$year": "$date" },
"month": { "$month": "$date" },
"week": { "$week": "$date" }
},
"total": { "$sum": "$amount" }
}
},
{
"$group": {
"_id": {
"name": "$_id.name",
"year": "$_id.year"
},
"YearlySpends": { "$push": "$total" },
"totalYearlyAmount": { "$sum": "$total" },
"data": { "$push": "$$ROOT" }
}
},
{ "$unwind": "$data" },
{
"$group": {
"_id": {
"name": "$_id.name",
"month": "$data._id.month"
},
"YearlySpends": { "$first": "$YearlySpends" },
"totalYearlyAmount": { "$first": "$totalYearlyAmount" },
"MonthlySpends": { "$push": "$data.total" },
"totalMonthlyAmount": { "$sum": "$data.total" },
"data": { "$push": "$data" }
}
},
{ "$unwind": "$data" },
{
"$group": {
"_id": {
"name": "$_id.name",
"week": "$data._id.week"
},
"YearlySpends": { "$first": "$YearlySpends" },
"totalYearlyAmount": { "$first": "$totalYearlyAmount" },
"MonthlySpends": { "$first": "$MonthlySpends" },
"totalMonthlyAmount": { "$first": "$totalMonthlyAmount" },
"WeeklySpends": { "$push": "$data.total" },
"totalWeeklyAmount": { "$sum": "$data.total" },
"data": { "$push": "$data" }
}
},
{ "$unwind": "$data" },
{
"$group": {
"_id": "$data._id",
"YearlySpends": { "$first": "$YearlySpends" },
"totalYearlyAmount": { "$first": "$totalYearlyAmount" },
"MonthlySpends": { "$first": "$MonthlySpends" },
"totalMonthlyAmount": { "$first": "$totalMonthlyAmount" },
"WeeklySpends": { "$first": "$WeeklySpends" },
"totalWeeklyAmount": { "$first": "$totalWeeklyAmount" }
}
}
])
Running this query returns
{ "_id" :
{ "name" : "RINGGO",
"year" : 2017,
"month" : 3,
"week" : 12 },
"YearlySpends" : [ -9.6 ],
"totalYearlyAmount" : -9.6,
"MonthlySpends" : [ -9.6 ],
"totalMonthlyAmount" : -9.6,
"WeeklySpends" : [ -9.6 ],
"totalWeeklyAmount" : -9.6
}
And when I turn to wanting to see the spending of the month
"$cond": [
{
"$and": [
{ "$eq": [{ "$year": "$date" }, 2017 ]},
{ "$eq": [{ "$month": "$date" }, 3 ]}
]
},
"$$KEEP",
"$$PRUNE"
]
I get:
{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 12 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -9.6 ], "totalWeeklyAmount" : -9.6 }
{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 9 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -3.3 ], "totalWeeklyAmount" : -3.3 }
{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 11 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -9.6 ], "totalWeeklyAmount" : -9.6 }
{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 13 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -3.3 ], "totalWeeklyAmount" : -3.3 }
However, when I run simple db.statements.find({"name":"RINGGO"})
I get:
{ "_id" : ObjectId("5907a5850b459d4fdcdf49ac"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-26T23:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.581Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49ba"), "amount" : -6.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-23T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.592Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49ce"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-15T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.601Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49cc"), "amount" : -6.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-16T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.600Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49f7"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-02T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.617Z"), "category" : "Not Set", "__v" : 0 }
{ "_id" : ObjectId("5907a5850b459d4fdcdf49be"), "amount" : -3.3, "name" : "RINGGO", "method" : "VIS", "date" : ISODate("2017-03-22T00:00:00Z"), "importDate" : ISODate("2017-05-01T21:15:49.593Z"), "category" : "Not Set", "__v" : 0 }
So you can see that in the previous release, there is a different number of items in MonthlySpends
compared to the one shown in the output from the search by name. Also you can see that some of the values ββare being added to MonthlySpends
when they shouldn't be.
Ideally, I am looking for an output that: when I have $redact
one containing:
"$cond": [
{
"$and": [
{ "$eq": [{ "$year": "$date" }, 2017 ]},
{ "$eq": [{ "$month": "$date" }, 3 ]},
{ "$eq": [{ "$week": "$date" }, 12 ]}
]
},
"$$KEEP",
"$$PRUNE"
]
returns
{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3, "week" : 12 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997, "WeeklySpends" : [ -9.6 ], "totalWeeklyAmount" : -9.6 }
when I have $redact
one containing:
"$cond": [
{
"$and": [
{ "$eq": [{ "$year": "$date" }, 2017 ]},
{ "$eq": [{ "$month": "$date" }, 3 ]},
]
},
"$$KEEP",
"$$PRUNE"
]
returns
{ "_id" : { "name" : "RINGGO", "year" : 2017, "month" : 3 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997, "MonthlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalMonthlyAmount" : -25.799999999999997 }
when I have $redact
one containing:
"$cond": [
{
"$and": [
{ "$eq": [{ "$year": "$date" }, 2017 ]}
]
},
"$$KEEP",
"$$PRUNE"
]
returns
{ "_id" : { "name" : "RINGGO", "year" : 2017 }, "YearlySpends" : [ -3.3, -9.6, -9.6, -3.3 ], "totalYearlyAmount" : -25.799999999999997}
Any help on this is really needed. I tried to work around the query, but I'm afraid I just don't understand it enough to change it correctly.
My version is Mongoose ^4.9.5
and my Mongo is 3.4.2
.
source to share
You can try $facet
with $addFields
for parallel aggregation in version 3.4
.
This will reduce the overall complexity and you can run groups at the same time with their own corresponding input.
The following code builds the aggregation pipeline dynamically based on the request object.
// Sample request
var request = {
"name":"RINGGO",
"year": 2017,
"month":3,
"week":12
};
// Build initial match document on name
var match1 = {
name: request["name"]
};
// Build project & facet document for date based aggregation
var addFields = {};
var facet = {};
// Add year followed by year facet
if (request["year"]) {
addFields["year"] = { "$year": "$date" },
facet["Yearly"] =
[
{
"$match":{ "year": request["year"] }
},
{
"$group": {
"_id": {
"name": "$name",
"year": "$year"
},
"spend": { "$push":"$amount" },
"total": { "$sum": "$amount" }
}
}
];
}
// Add month followed by month facet
if (request["month"]) {
addFields["month"] = { "$month": "$date" };
facet["Monthly"] =
[
{
"$match":{ "month": request["month"] }
},
{
"$group": {
"_id": {
"name": "$name",
"month": "$month"
},
"spend": { "$push":"$amount" },
"total": { "$sum": "$amount" }
}
}
];
}
// Add week followed by week facet
if (request["week"]) {
addFields["week"] = { "$week": "$date" };
facet["Weekly"] =
[
{
"$match":{ "week": request["week"] }
},
{
"$group": {
"_id": {
"name": "$name",
"week": "$week"
},
"spend": { "$push":"$amount" },
"total": { "$sum": "$amount" }
}
}
];
}
// Use aggregate builder
statements.aggregate()
.match(match1)
.append({"$addFields": addFields}) // No addFields stage in mongoose builder
.facet(facet)
.exec(function(err, data) {});
Mongo Shell query for criteria name/year/month/week
.
db.statements.aggregate({
'$match': {
name: 'RINGGO'
}
}, {
'$addFields': {
year: {
'$year': '$date'
},
month: {
'$month': '$date'
},
week: {
'$week': '$date'
}
}
}, {
'$facet': {
Yearly: [{
'$match': {
year: 2017
}
},
{
'$group': {
_id: {
name: '$name',
year: '$year'
},
spend: {
'$push': '$amount'
},
total: {
'$sum': '$amount'
}
}
}
],
Monthly: [{
'$match': {
month: 3
}
},
{
'$group': {
_id: {
name: '$name',
month: '$month'
},
spend: {
'$push': '$amount'
},
total: {
'$sum': '$amount'
}
}
}
],
Weekly: [{
'$match': {
week: 12
}
},
{
'$group': {
_id: {
name: '$name',
week: '$week'
},
spend: {
'$push': '$amount'
},
total: {
'$sum': '$amount'
}
}
}
]
}
})
Sample response
{
"Yearly": [{
"_id": {
"name": "RINGGO",
"year": 2017
},
"spend": [-3.3, -6.3, -3.3, -6.3, -3.3, -3.3],
"total": -25.799999999999997
}],
"Monthly": [{
"_id": {
"name": "RINGGO",
"month": 3
},
"spend": [-3.3, -6.3, -3.3, -6.3, -3.3, -3.3],
"total": -25.799999999999997
}],
"Weekly": [{
"_id": {
"name": "RINGGO",
"week": 12
},
"spend": [-6.3, -3.3],
"total": -9.6
}]
}
You can run a similar aggregation for the input values Year/Month
and Year
.
So you can see what's in the MonthlySpends in the previous release versus the one shown in the output from search by name. Also you can see that some of the values ββare being added together in MonthlySpends when they shouldn't be.
This occurs at $group
1, where the aggregation $week
collapses each of the two dates [15, 16] into week 11 and the other two dates [22, 23] 12 weeks later, to be displayed as summarized totals in MonthySpends
.
source to share