Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-9180

Nested $or queries seem to not use indexes efficiently

    • Type: Icon: Bug Bug
    • Resolution: Duplicate
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: None
    • Component/s: Querying
    • Labels:
    • ALL

      Firstly, Assume my schema is the below:

       
      {
      	"_id" : ObjectId("51548ccdc77b96c10579b54f"),
      	"key" : {
      		"val" : 557
      	},
      	"list" : [
      		{
      			"A" : 696,
      			"B" : 688,
      			"C" : 743
      		},
      		{
      			"A" : 743,
      			"B" : 696,
      			"C" : 688
      		},
      		{
      			"A" : 688,
      			"B" : 743,
      			"C" : 696
      		}
      	]
      }
      

      Second, assume my indexes are the below:

      db.test.ensureIndex(

      { "key.val" : 1 }

      );
      db.test.ensureIndex(

      { "list.A" : 1 }

      );
      db.test.ensureIndex(

      { "list.B" : 1 }

      );

      Now consider the query below:

       
      
      db.test.find( { "$or" : [ { "key.val" : 123456789} ,  
                                { "list" : { "$elemMatch" : 
                                                            { "$or" : [ { "A" : 696} , { "B" : 696 } ] ,  "C" : 743}}  }  ] } )
      
      

      What this is saying is find me all docs where key.val = 12345679 OR ( ( list.A = 696 OR list.B = 696) AND C = 743).

      If I execute this query on my DB I get the following explain output.

      > db.test.find( { "$or" : [ { "key.val" : 123456789} ,  { "list" : { "$elemMatch" : { "$or" : [ { "A" : 696} , { "B" : 696 } ] ,  "C" : 743}}  }  ] } ).explain()
      {
      	"cursor" : "BasicCursor",
      	"isMultiKey" : false,
      	"n" : 4,
      	"nscannedObjects" : 100000,
      	"nscanned" : 100000,
      	"nscannedObjectsAllPlans" : 100000,
      	"nscannedAllPlans" : 100000,
      	"scanAndOrder" : false,
      	"indexOnly" : false,
      	"nYields" : 1,
      	"nChunkSkips" : 0,
      	"millis" : 117,
      	"indexBounds" : {
      
      	},
      	"server" : "Oz-Olivo-MacBook-Pro.local:27017"
      }
      

      The results show that this DOES NOT use an index. But does correctly return the 4 matching documents in my DB. At the very least, it should be using the val index for the first part of the OR if not all 3 indexes for the different OR clauses. Why?

      The four matching documents are below:

      > db.test.find( { "$or" : [ { "key.val" : 123456789} ,  { "list" : { "$elemMatch" : { "$or" : [ { "A" : 696} , { "B" : 696 } ] ,  "C" : 743}}  }  ] } ).pretty()
      {
      	"_id" : ObjectId("51548ccdc77b96c10579b54f"),
      	"key" : {
      		"val" : 557
      	},
      	"list" : [
      		{
      			"A" : 696,
      			"B" : 688,
      			"C" : 743
      		},
      		{
      			"A" : 743,
      			"B" : 696,
      			"C" : 688
      		},
      		{
      			"A" : 688,
      			"B" : 743,
      			"C" : 696
      		}
      	]
      }
      {
      	"_id" : ObjectId("515599fc23a957b837c8a24d"),
      	"key" : {
      		"val" : 708
      	},
      	"list" : [
      		{
      			"A" : 743,
      			"B" : 647,
      			"C" : 696
      		},
      		{
      			"A" : 696,
      			"B" : 743,
      			"C" : 647
      		},
      		{
      			"A" : 647,
      			"B" : 696,
      			"C" : 743
      		}
      	]
      }
      {
      	"_id" : ObjectId("515599fc23a957b837c8ac95"),
      	"key" : {
      		"val" : 505
      	},
      	"list" : [
      		{
      			"A" : 264,
      			"B" : 696,
      			"C" : 743
      		},
      		{
      			"A" : 743,
      			"B" : 264,
      			"C" : 696
      		},
      		{
      			"A" : 696,
      			"B" : 743,
      			"C" : 264
      		}
      	]
      }
      {
      	"_id" : ObjectId("515599fc23a957b837c8b30a"),
      	"key" : {
      		"val" : 849
      	},
      	"list" : [
      		{
      			"A" : 323,
      			"B" : 696,
      			"C" : 743
      		},
      		{
      			"A" : 743,
      			"B" : 323,
      			"C" : 696
      		},
      		{
      			"A" : 696,
      			"B" : 743,
      			"C" : 323
      		}
      	]
      }
      

      Alternatively, I thought this query could be rewritten as

      db.test.find({ "$or" : [ { "key.val" : 123456789} ,     
                               { "list" : { "$elemMatch" : {  "A" : 696  , "C" : 743 }}} ,
      
                               { "list" : { "$elemMatch" : {  "B" : 696 , "C" :743 }}}     ]})
      

      What this is saying is find me all docs where key.val = 12345679 OR ( list.A = 696 AND C = 743 ) OR (list.B = 696 AND C = 743). Which should be semantically equivalent to the above.

      When I run the explain on this query I get:

      > db.test.find({ "$or" : [ { "key.val" : 123456789} ,     { "list" : { "$elemMatch" : {  "A" : 696  , "C" : 743 }}} ,{ "list" : { "$elemMatch" : {  "B" : 696 , "C" :743 }}}     ]}).explain()
      {
      	"clauses" : [
      		{
      			"cursor" : "BtreeCursor key.val_1",
      			"isMultiKey" : false,
      			"n" : 0,
      			"nscannedObjects" : 0,
      			"nscanned" : 0,
      			"nscannedObjectsAllPlans" : 0,
      			"nscannedAllPlans" : 0,
      			"scanAndOrder" : false,
      			"indexOnly" : false,
      			"nYields" : 0,
      			"nChunkSkips" : 0,
      			"millis" : 0,
      			"indexBounds" : {
      				"key.val" : [
      					[
      						123456789,
      						123456789
      					]
      				]
      			}
      		},
      		{
      			"cursor" : "BtreeCursor list.A_1",
      			"isMultiKey" : true,
      			"n" : 4,
      			"nscannedObjects" : 297,
      			"nscanned" : 297,
      			"nscannedObjectsAllPlans" : 594,
      			"nscannedAllPlans" : 594,
      			"scanAndOrder" : false,
      			"indexOnly" : false,
      			"nYields" : 0,
      			"nChunkSkips" : 0,
      			"millis" : 1,
      			"indexBounds" : {
      				"list.A" : [
      					[
      						696,
      						696
      					]
      				]
      			}
      		},
      		{
      			"cursor" : "BtreeCursor list.B_1",
      			"isMultiKey" : true,
      			"n" : 0,
      			"nscannedObjects" : 297,
      			"nscanned" : 297,
      			"nscannedObjectsAllPlans" : 594,
      			"nscannedAllPlans" : 594,
      			"scanAndOrder" : false,
      			"indexOnly" : false,
      			"nYields" : 0,
      			"nChunkSkips" : 0,
      			"millis" : 1,
      			"indexBounds" : {
      				"list.B" : [
      					[
      						696,
      						696
      					]
      				]
      			}
      		}
      	],
      	"n" : 4,
      	"nscannedObjects" : 594,
      	"nscanned" : 594,
      	"nscannedObjectsAllPlans" : 1188,
      	"nscannedAllPlans" : 1188,
      	"millis" : 3,
      	"server" : "Oz-Olivo-MacBook-Pro.local:27017"
      }
      
      

      This DOES use the indexes and finds 4 matches.

      Why does query 1 not use any index while query 2 does?

      I've uploaded my initial data load script in case it can be of any help. You may have to change the values for A, B, and C in the queries to find anything reasonable though, as the values are randomly generated

            Assignee:
            hari.khalsa@10gen.com hari.khalsa@10gen.com
            Reporter:
            osmar.olivo Osmar Olivo
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

              Created:
              Updated:
              Resolved: