How to get data in batches in mongodb

I want to get data from MongoDB, 5 at a time

I am using the limit the number of records returned

router.post('/List', function (req, res) {
    var db = req.db;
    var collection = db.get('clnName');
    collection.find({}, { limit: 5 * req.body.requestCount }, function (e, docs) {
        res.json(docs);
    });
});

      

Here, from the client, I am incrementing the requestCount variable to get the data 5x . What I want to achieve is get the first 5 data in the first request, get the next 5 data in the second request, but what happens is I get the first 5 data and then the first 10 data .

What change should I make to achieve what I need?

will batch size be used in mongo cursor methods solve my problem?

+3


source to share


1 answer


It is well clear what is used here .skip()

as a modifier together with .limit()

to implement data swapping:

    collection.find({}, { "limit": 5, "skip": 5 * req.body.requestCount  }, function 

      

But it's better if you're just processing batches, just filter out the range you've already seen. The field _id

makes a nice identifier for this without any other sorting. So, on the first request:

var lastSeen = null;
    collection.find(
        {}, 
        { "limit": 5, "sort": { "_id": 1}  },
        function(err,docs) {
           docs.forEach(function(doc) {
               // do something
               lastSeen = doc._id;        // keep the _id
           });
        }
    );

      

And the next time after storing that "lastSeen" in something like a session variable (or some other loop construct where batches are simply processed):

    collection.find(
        { "_id": { "$gt": lastSeen }, 
        { "limit": 5, "sort": { "_id": 1}  },
        function(err,docs) {
           docs.forEach(function(doc) {
               // do something
               lastSeen = doc._id;        // keep the _id
           });
        }
    );

      



Thus, excluding all results less than the last value _id

.

With a different sort, this is still possible, but you need to also pay attention to both the last _id

and the last sorted value. In addition, the save is _id

treated as a list since the last change in the value.

    var lastSeenIds = [],
        lastSeenValue = null;    

    collection.find(
        {}, 
        { "limit": 5, "sort": { "other": 1, "_id": 1 }  },
        function(err,docs) {
           docs.forEach(function(doc) {
               // do something
               if ( lastSeenValue != doc.other ) {  // clear on change
                   lastSeenValue = doc.other;
                   lastSeenIds = [];
               }
               lastSeenIds.push(doc._id);     // keep a list
           });
        }
    );

      

Then on the next iteration, with the variables in place:

    collection.find(
        { "_id": { "$nin": lastSeenIds }, "other": { "$gte": lastSeenValue } },
        { "limit": 5, "sort": { "other": 1, "_id": 1 }  },
        function(err,docs) {
           docs.forEach(function(doc) {
               // do something
               if ( lastSeenValue != doc.other ) {  // clear on change
                   lastSeenValue = doc.other;
                   lastSeenIds = [];
               }
               lastSeenIds.push(doc._id);     // keep a list
           });
        }
    );

      

This is much more efficient than "skipping" results that match the main query condition.

+4


source







All Articles