How to get data in batches in mongodb
I want to get data from MongoDB, 5 at a time
I am using the limit the number of records returned
router.post('/List', function (req, res) {
var db = req.db;
var collection = db.get('clnName');
collection.find({}, { limit: 5 * req.body.requestCount }, function (e, docs) {
res.json(docs);
});
});
Here, from the client, I am incrementing the requestCount variable to get the data 5x . What I want to achieve is get the first 5 data in the first request, get the next 5 data in the second request, but what happens is I get the first 5 data and then the first 10 data .
What change should I make to achieve what I need?
will batch size be used in mongo cursor methods solve my problem?
source to share
It is well clear what is used here .skip()
as a modifier together with .limit()
to implement data swapping:
collection.find({}, { "limit": 5, "skip": 5 * req.body.requestCount }, function
But it's better if you're just processing batches, just filter out the range you've already seen. The field _id
makes a nice identifier for this without any other sorting. So, on the first request:
var lastSeen = null;
collection.find(
{},
{ "limit": 5, "sort": { "_id": 1} },
function(err,docs) {
docs.forEach(function(doc) {
// do something
lastSeen = doc._id; // keep the _id
});
}
);
And the next time after storing that "lastSeen" in something like a session variable (or some other loop construct where batches are simply processed):
collection.find(
{ "_id": { "$gt": lastSeen },
{ "limit": 5, "sort": { "_id": 1} },
function(err,docs) {
docs.forEach(function(doc) {
// do something
lastSeen = doc._id; // keep the _id
});
}
);
Thus, excluding all results less than the last value _id
.
With a different sort, this is still possible, but you need to also pay attention to both the last _id
and the last sorted value. In addition, the save is _id
treated as a list since the last change in the value.
var lastSeenIds = [],
lastSeenValue = null;
collection.find(
{},
{ "limit": 5, "sort": { "other": 1, "_id": 1 } },
function(err,docs) {
docs.forEach(function(doc) {
// do something
if ( lastSeenValue != doc.other ) { // clear on change
lastSeenValue = doc.other;
lastSeenIds = [];
}
lastSeenIds.push(doc._id); // keep a list
});
}
);
Then on the next iteration, with the variables in place:
collection.find(
{ "_id": { "$nin": lastSeenIds }, "other": { "$gte": lastSeenValue } },
{ "limit": 5, "sort": { "other": 1, "_id": 1 } },
function(err,docs) {
docs.forEach(function(doc) {
// do something
if ( lastSeenValue != doc.other ) { // clear on change
lastSeenValue = doc.other;
lastSeenIds = [];
}
lastSeenIds.push(doc._id); // keep a list
});
}
);
This is much more efficient than "skipping" results that match the main query condition.
source to share