How to get the top five fastest ground speeds using mongodb

57 Views Asked by At

So I have this data imported from excel and I want to be able find the top five fastest speeds. I tried using the aggregate function but what was returned was 0.

db.planes.aggregate({
    $group : {
        _id : "$msg_dummy",
        fastest : {$max : "$ground_speed"}
    }
})


{ "_id" : "MSG", "fastest" : "" }

and only one printed out. I need the top five. Here is some of the data

{ "_id" : ObjectId("58076dffa85331269df16994"), "msg_dummy" : "MSG", "msg_type" : 4, "flight_id" : 680, "aircraft_id" : "A1C8EB", "date" : 20131213, "time" : 214224, "latitude" : "", "longitude" : 306, "altitude" : 133, "ground_speed" : -512, "heading" : "", "dummy1" : "" }
{ "_id" : ObjectId("58076dffa85331269df16995"), "msg_dummy" : "MSG", "msg_type" : 4, "flight_id" : 680, "aircraft_id" : "A1C8EB", "date" : 20131213, "time" : 214223, "latitude" : "", "longitude" : 307, "altitude" : 133, "ground_speed" : -512, "heading" : "", "dummy1" : "" }
{ "_id" : ObjectId("58076dffa85331269df16996"), "msg_dummy" : "MSG", "msg_type" : 1, "flight_id" : 680, "aircraft_id" : "A1C8EB", "date" : 20131213, "time" : 214223, "latitude" : "", "longitude" : "", "altitude" : "", "ground_speed" : "" }
{ "_id" : ObjectId("58076dffa85331269df16997"), "msg_dummy" : "MSG", "msg_type" : 4, "flight_id" : 680, "aircraft_id" : "A1C8EB", "date" : 20131213, "time" : 214223, "latitude" : "", "longitude" : 307, "altitude" : 133, "ground_speed" : -512, "heading" : "", "dummy1" : "" }
{ "_id" : ObjectId("58076dffa85331269df16998"), "msg_dummy" : "MSG", "msg_type" : 1, "flight_id" : "AAL1538", "aircraft_id" : "ABFEFD", "date" : 20131213, "time" : 214224, "latitude" : "", "longitude" : "", "altitude" : "", "ground_speed" : "" }
{ "_id" : ObjectId("58076dffa85331269df16999"), "msg_dummy" : "MSG", "msg_type" : 4, "flight_id" : "AAL1538", "aircraft_id" : "ABFEFD", "date" : 20131213, "time" : 214224, "latitude" : "", "longitude" : 298, "altitude" : 123, "ground_speed" : 64, "heading" : "", "dummy1" : "" }
{ "_id" : ObjectId("58076dffa85331269df1699a"), "msg_dummy" : "MSG", "msg_type" : 4, "flight_id" : 680, "aircraft_id" : "A1C8EB", "date" : 20131213, "time" : 214224, "latitude" : "", "longitude" : 306, "altitude" : 133, "ground_speed" : -512, "heading" : "", "dummy1" : "" }
{ "_id" : ObjectId("58076dffa85331269df1699b"), "msg_dummy" : "MSG", "msg_type" : 1, "flight_id" : "NKS355", "aircraft_id" : "A67CA6", "date" : 20131213, "time" : 214225, "latitude" : "", "longitude" : "", "altitude" : "", "ground_speed" : "" }
{ "_id" : ObjectId("58076dffa85331269df1699c"), "msg_dummy" : "MSG", "msg_type" : 3, "flight_id" : 680, "aircraft_id" : "A1C8EB", "date" : 20131213, "time" : 214225, "latitude" : 28.64559, "longitude" : -81.57871, "altitude" : 10100, "ground_speed" : 306, "heading" : 133, "dummy1" : -512, "dummy2" : "", "field13" : "" }
{ "_id" : ObjectId("58076dffa85331269df1699d"), "msg_dummy" : "MSG", "msg_type" : 3, "flight_id" : 680, "aircraft_id" : "A1C8EB", "date" : 20131213, "time" : 214225, "latitude" : 28.64517, "longitude" : -81.57823, "altitude" : 10100, "ground_speed" : 306, "heading" : 133, "dummy1" : -512, "dummy2" : "", "field13" : "" }
{ "_id" : ObjectId("58076dffa85331269df1699e"), "msg_dummy" : "MSG", "msg_type" : 4, "flight_id" : "AAL1538", "aircraft_id" : "ABFEFD", "date" : 20131213, "time" : 214225, "latitude" : "", "longitude" : 298, "altitude" : 123, "ground_speed" : 64, "heading" : "", "dummy1" : "" }
2

There are 2 best solutions below

0
chridam On

Because the ground_speed field has a mixture of numerical and string values (empty string), the $max operator returns the maximum value since it compares both value and type using the specified BSON comparison order for values of different types.

You need to filter the documents and compare only the ones with numerical values for ground_speed:

db.planes.aggregate([
    { "$match": { 
        "ground_speed": { "$exists": true, "$type": 1 }  
    } },
    {
        "$group" : {
            "_id" : "$msg_dummy",
            "fastest" : { "$max" : "$ground_speed"}
        }
    }
])

To answer your question, getting the top 5 speeds requires ordering the documents by the ground_speed field using the $sort pipeline, creating an array with the ground_speed values within the $group pipeline using the accumulator operator $push and then returning the top 5 from the array using a $project pipeline and the $slice operator.

The following example shows this:

db.planes.aggregate([
    { "$match": { 
        "ground_speed": { "$exists": true, "$type": 1 }  
    } },
    { "$sort": { "ground_speed": -1 } },
    {
        "$group" : {
            "_id" : "$msg_dummy",
            "ground_speeds" : { "$push" : "$ground_speed" }
        }
    },
    {
        "$project": {
            "_id": 0,
            "msg_dummy": "$_id",
            "top_five_fastest": { "$slice": ["$ground_speeds", 5] },
            "ground_speeds": 1
        }
    }
])

For MongoDB versions which do not support the $slice operator, as an alternative you can get the top 5 by limiting the number of documents getting into the $group pipeline using the $limit operator and this should be placed after the $sort pipeline (on ordered documents):

db.planes.aggregate([
    { "$match": { 
        "ground_speed": { "$exists": true, "$type": 1 }  
    } },
    { "$sort": { "ground_speed": -1 } },
    { "$limit": 5 },
    {
        "$group" : {
            "_id" : "$msg_dummy",
            "top_five_fastest" : { "$push" : "$ground_speed" }
        }
    }   
])
2
robjwilkins On

You can do this without using an aggregate query.

Try the following:

 db.planes.find({},{_id:1,ground_speed:1}).sort({ground_speed:-1}).limit(5)