MongoDB removes fs.chunks where not in fs.files
I have 10 GB of data in fs.chunks and I want to delete every document that is not on fs.files. I've already deleted every entry in fs.files. I don't want every id in fs.files to be a file that I want to keep.
So, I need something like db.fs.chunks.remove({"_id": {$nin: fs.files._id}})
or "delete every entry in fs.chunks that doesn't exist in fs.files".
Edit: I'm looking for the mongo SQL equivalent delete from fs_chunks where id not in (select id from fs_files)
.
source to share
I don't think there is an easy way to do this other than doing a search and then iterating with forEach. So something like:
function removeChunkIfNoOwner(chunk){
//Look for the parent file
var parentCount = db.fs.files.find({'_id' : chunk.files_id}).count();
if (parentCount === 0 ){
db.fs.chunks.remove({'_id': chunk._id});
print("Removing chunk " + chunk._id);
}
}
db.fs.chunks.find().forEach(removeChunkIfNoOwner);
You can see that this approach should work if you create a function like this:
function listParentFile(chunk){
var parent = db.fs.files.findOne({'_id' : chunk.files_id});
printjson(parent);
}
db.fs.chunks.find().forEach(listParentFile);
source to share
I found this Mick based solution to be slightly faster for a lot of snippets:
function removeChunkIfNoOwner(files_id){
//Look for the parent file
var parentCount = db.fs.files.find({'_id' : files_id}).count();
if (parentCount === 0 ){
res = db.fs.chunks.remove({'files_id':files_id})
}
}
files = db.fs.chunks.distinct('files_id').forEach(removeChunkIfNoOwner)
source to share
Using different ones wouldn't work for me because my collection was too big. A little change to Bass's request worked for me.
function removeChunkIfNoOwner(chunk){
var parentCount = db.fs.files.find({'_id' : chunk.files_id}).count();
if(parentCount === 0){
res = db.fs.chunks.remove({'files_id':chunk._id})
print("item removed")
}
}
files = db.fs.chunks.find({files_id:{$exists:1}},{data:0}).forEach(removeChunkIfNoOwner)
source to share