Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-32356

Use of options:x and comments with $regex search including \n can lead to incorrect documents being returned

    • Type: Icon: Bug Bug
    • Resolution: Works as Designed
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 3.4.9
    • Component/s: Querying
    • Labels:
      None
    • ALL
    • Hide

      Pass the following in a file to the mongo shell:

        db.regexOptionsX.drop();
      
        db.regexOptionsX.insert([
          {_id: 1, f1: "realapp"  },
          {_id: 2, f1: "rea"      },
          {_id: 3, f1: "rea\napp" },
          {_id: 4, f1: "rea\nlapp" },
          {_id: 5, f1: "suretrea" }
        ]) ;
      
        print("\nAll: find()");
        var a = db.regexOptionsX.find() ; while (a.hasNext()) {printjson(a.next())}
      
        print("\nQuotes: find({f1:{$regex:'rea \\n# comment 1\\nl',$options:'x'}})");
        var s = db.regexOptionsX.find({f1:{$regex:'rea \n# comment 1\nl',$options:'x'}})
        while (s.hasNext()) {printjson(s.next())}
      
        print("\nSlashes: find({f1:{$regex:/rea \\n# comment 1\\nl/,$options:'x'}})");
        var x = db.regexOptionsX.find({f1:{$regex:/rea \n# comment 1\nl/,$options:'x'}})
        while (x.hasNext()) {printjson(x.next())}
      
      
        print("\nNo Comment and Quotes: find({f1:{$regex:'rea \\nl',$options:'x'}})");
        var s = db.regexOptionsX.find({f1:{$regex:'rea \nl',$options:'x'}})
        while (s.hasNext()) {printjson(s.next())}
      
        print("\nNo Comment and Slashes: find({f1:{$regex:/rea \\nl/,$options:'x'}})");
        var x = db.regexOptionsX.find({f1:{$regex:/rea \nl/,$options:'x'}})
        while (x.hasNext()) {printjson(x.next())}
      
      
        print("\nNo space, no comment, and Quotes: find({f1:{$regex:'rea\\nl',$options:'x'}})");
        var s = db.regexOptionsX.find({f1:{$regex:'rea\nl',$options:'x'}})
        while (s.hasNext()) {printjson(s.next())}
      
        print("\nNo space, no comment, and Slashes: find({f1:{$regex:/rea\\nl/,$options:'x'}})");
        var x = db.regexOptionsX.find({f1:{$regex:/rea\nl/,$options:'x'}})
        while (x.hasNext()) {printjson(x.next())}
      
      
        print("\nNo options:x (or spaces or comments) and Quotes: find({f1:{$regex:'rea\\nl'}})");
        var s = db.regexOptionsX.find({f1:{$regex:'rea\nl'}})
        while (s.hasNext()) {printjson(s.next())}
      
        print("\nNo options:x (or spaces or comments) and Slashes: find({f1:{$regex:/rea\\nl/}})");
        var x = db.regexOptionsX.find({f1:{$regex:/rea\nl/}})
        while (x.hasNext()) {printjson(x.next())}
      

      Summary of output:

      • The only document that should be returned is {"_id": 4, "f1": "rea\nlapp"} as it is the only one with the "rea" followed by a newline and an "l".
      • If the $regex uses quotes and $options:'x', then this document is returned: {"_id": 1, "f1": "realapp"}, meaning the newline is being seen as a formatting "space" and not something to be searched for. With no #comment
        or $options:'x' present it gets the right results.
      • If the $regex uses slashes, $options:'x', and has a comment, then this document: {"_id": 3, "f1": "rea\napp"} is returned as well as the correct one. With no #comment it gets the right results, even with $options:'x' present.
      Show
      Pass the following in a file to the mongo shell: db.regexOptionsX.drop(); db.regexOptionsX.insert([ {_id: 1, f1: "realapp" }, {_id: 2, f1: "rea" }, {_id: 3, f1: "rea\napp" }, {_id: 4, f1: "rea\nlapp" }, {_id: 5, f1: "suretrea" } ]) ; print( "\nAll: find()" ); var a = db.regexOptionsX.find() ; while (a.hasNext()) {printjson(a.next())} print( "\nQuotes: find({f1:{$regex: 'rea \\n# comment 1\\nl' ,$options: 'x' }})" ); var s = db.regexOptionsX.find({f1:{$regex: 'rea \n# comment 1\nl' ,$options: 'x' }}) while (s.hasNext()) {printjson(s.next())} print( "\nSlashes: find({f1:{$regex:/rea \\n# comment 1\\nl/,$options: 'x' }})" ); var x = db.regexOptionsX.find({f1:{$regex:/rea \n# comment 1\nl/,$options: 'x' }}) while (x.hasNext()) {printjson(x.next())} print( "\nNo Comment and Quotes: find({f1:{$regex: 'rea \\nl' ,$options: 'x' }})" ); var s = db.regexOptionsX.find({f1:{$regex: 'rea \nl' ,$options: 'x' }}) while (s.hasNext()) {printjson(s.next())} print( "\nNo Comment and Slashes: find({f1:{$regex:/rea \\nl/,$options: 'x' }})" ); var x = db.regexOptionsX.find({f1:{$regex:/rea \nl/,$options: 'x' }}) while (x.hasNext()) {printjson(x.next())} print( "\nNo space, no comment, and Quotes: find({f1:{$regex: 'rea\\nl' ,$options: 'x' }})" ); var s = db.regexOptionsX.find({f1:{$regex: 'rea\nl' ,$options: 'x' }}) while (s.hasNext()) {printjson(s.next())} print( "\nNo space, no comment, and Slashes: find({f1:{$regex:/rea\\nl/,$options: 'x' }})" ); var x = db.regexOptionsX.find({f1:{$regex:/rea\nl/,$options: 'x' }}) while (x.hasNext()) {printjson(x.next())} print( "\nNo options:x (or spaces or comments) and Quotes: find({f1:{$regex: 'rea\\nl' }})" ); var s = db.regexOptionsX.find({f1:{$regex: 'rea\nl' }}) while (s.hasNext()) {printjson(s.next())} print( "\nNo options:x (or spaces or comments) and Slashes: find({f1:{$regex:/rea\\nl/}})" ); var x = db.regexOptionsX.find({f1:{$regex:/rea\nl/}}) while (x.hasNext()) {printjson(x.next())} Summary of output: The only document that should be returned is { "_id": 4, "f1": "rea\nlapp" } as it is the only one with the "rea" followed by a newline and an "l". If the $regex uses quotes and $options:'x' , then this document is returned: { "_id": 1, "f1": "realapp" }, meaning the newline is being seen as a formatting "space" and not something to be searched for. With no #comment or $options:'x' present it gets the right results. If the $regex uses slashes, $options:'x' , and has a comment, then this document: { "_id": 3, "f1": "rea\napp" } is returned as well as the correct one. With no #comment it gets the right results, even with $options:'x' present.
    • Query 2018-01-15

      With a simple set of documents:

      • {_id: 1, f1: "realapp"}
      • {_id: 2, f1: "rea"}
      • {_id: 3, f1: "rea\napp"}
      • {_id: 4, f1: "rea\nlapp"}
      • {_id: 5, f1: "suretrea"}

      these two $regex expressions (identical except for use of quotes or slashes):

      • find({f1:{$regex:'rea \n# comment 1\nl',$options:"x"}})
      • find({f1:{$regex:/rea \n# comment 1\nl/,$options:"x"}})

      give different results, both incorrect, despite our $regex documentation suggesting that the syntaxes with the search strings wrapped in quotes or slashes are equivalent.

      Removing the option:x, the comment and the space leads to correct results from both forms.

        1. regexOptionsX.js
          2 kB
        2. regexOptionsX.out
          1 kB

            Assignee:
            david.storch@mongodb.com David Storch
            Reporter:
            william.byrne@mongodb.com William Byrne III
            Votes:
            1 Vote for this issue
            Watchers:
            10 Start watching this issue

              Created:
              Updated:
              Resolved: