Shard filtering bug with collation

XMLWordPrintableJSON

    • Query Optimization
    • Fully Compatible
    • ALL
    • v8.0, v7.0, v6.0, v5.0
    • Hide

      I ran the following in the no_passthrough suite.

       

      import {configureFailPoint} from "jstests/libs/fail_point_util.js";
      import {ShardingTest} from "jstests/libs/shardingtest.js";
      
      
      const caseInsensitive = {
          locale: "en_US",
          strength: 2
      };
      
      
      // 2 shards.
      var st = new ShardingTest({shards: 2});
      var testDB = st.s.getDB("test");
      assert.commandWorked(
          testDB.adminCommand({enableSharding: testDB.getName(), primaryShard: st.shard0.shardName}));
      
      
      // Create a collection with a case-insensitive default collation sharded on {a: 1}.
      var collCaseInsensitive = testDB.getCollection("case_insensitive");
      collCaseInsensitive.drop();
      assert.commandWorked(testDB.createCollection("case_insensitive", {collation: caseInsensitive}));
      assert.commandWorked(collCaseInsensitive.createIndex({a: 1}, {collation: {locale: "simple"}}));
      assert.commandWorked(testDB.adminCommand({
          shardCollection: collCaseInsensitive.getFullName(),
          key: {a: 1},
          collation: {locale: "simple"}
      }));
      
      
      // Split the collection.
      // shard0: [chunk 1] { "a" : { "$minKey" : 1 } } -->> { "a" : 0 }
      // shard0: [chunk 2] { "a" : 0 } -->> { "a" : "b"}
      // shard1: [chunk 3] { "a" : "b" } -->> { "a" : { "$maxKey" : 1 }}
      // Note that we need two chunks on shard0 for this repro. My guess is that its because if we move
      // all the chunks from shard0 to shard1, we will somehow know to only target shard1 in the query
      // below.
      assert.commandWorked(
          testDB.adminCommand({split: collCaseInsensitive.getFullName(), middle: {a: 0}}));
      assert.commandWorked(
          testDB.adminCommand({split: collCaseInsensitive.getFullName(), middle: {a: "b"}}));
      
      
      assert.commandWorked(testDB.adminCommand(
          {moveChunk: collCaseInsensitive.getFullName(), find: {a: -1}, to: st.shard0.shardName}));
      assert.commandWorked(testDB.adminCommand(
          {moveChunk: collCaseInsensitive.getFullName(), find: {a: "a"}, to: st.shard0.shardName}));
      assert.commandWorked(testDB.adminCommand(
          {moveChunk: collCaseInsensitive.getFullName(), find: {a: "c"}, to: st.shard1.shardName}));
      
      
      // Put data on shard0, that will go into chunk 1.
      const doc = {
          _id: 0,
          a: "a"
      };
      assert.commandWorked(collCaseInsensitive.insert(doc));
      
      
      // Perform a chunk migration of chunk 1 from shard0 to shard1, but do not clean up orphans on
      // shard0.
      let suspendRangeDeletionShard0 = configureFailPoint(st.shard0, 'suspendRangeDeletion');
      testDB.adminCommand(
          {moveChunk: collCaseInsensitive.getFullName(), find: {a: "a"}, to: st.shard1.shardName});
      
      
      // Equality predicate. Since the query has non-simple collation we will have to broadcast to all
      // shards (since the shard key is on a simple collation index). However, we return the result doc
      // twice which means we do not properly do shard filtering in this case.
      const results = collCaseInsensitive.find({a: "a"}).collation(caseInsensitive).toArray();
      print("results are ", tojson(results));
      assert.eq(results, [doc, doc]);  // BAD! DUPLICATE RESULT!
      
      
      suspendRangeDeletionShard0.off();
      st.stop();
      

       

      Show
      I ran the following in the no_passthrough suite.   import {configureFailPoint} from "jstests/libs/fail_point_util.js" ; import {ShardingTest} from "jstests/libs/shardingtest.js" ; const caseInsensitive = {     locale: "en_US" ,     strength: 2 }; // 2 shards. var st = new ShardingTest({shards: 2}); var testDB = st.s.getDB( "test" ); assert .commandWorked(     testDB.adminCommand({enableSharding: testDB.getName(), primaryShard: st.shard0.shardName})); // Create a collection with a case -insensitive default collation sharded on {a: 1}. var collCaseInsensitive = testDB.getCollection( "case_insensitive" ); collCaseInsensitive.drop(); assert .commandWorked(testDB.createCollection( "case_insensitive" , {collation: caseInsensitive})); assert .commandWorked(collCaseInsensitive.createIndex({a: 1}, {collation: {locale: "simple" }})); assert .commandWorked(testDB.adminCommand({     shardCollection: collCaseInsensitive.getFullName(),     key: {a: 1},     collation: {locale: "simple" } })); // Split the collection. // shard0: [chunk 1] { "a" : { "$minKey" : 1 } } -->> { "a" : 0 } // shard0: [chunk 2] { "a" : 0 } -->> { "a" : "b" } // shard1: [chunk 3] { "a" : "b" } -->> { "a" : { "$maxKey" : 1 }} // Note that we need two chunks on shard0 for this repro. My guess is that its because if we move // all the chunks from shard0 to shard1, we will somehow know to only target shard1 in the query // below. assert .commandWorked(     testDB.adminCommand({split: collCaseInsensitive.getFullName(), middle: {a: 0}})); assert .commandWorked(     testDB.adminCommand({split: collCaseInsensitive.getFullName(), middle: {a: "b" }})); assert .commandWorked(testDB.adminCommand(     {moveChunk: collCaseInsensitive.getFullName(), find: {a: -1}, to: st.shard0.shardName})); assert .commandWorked(testDB.adminCommand(     {moveChunk: collCaseInsensitive.getFullName(), find: {a: "a" }, to: st.shard0.shardName})); assert .commandWorked(testDB.adminCommand(     {moveChunk: collCaseInsensitive.getFullName(), find: {a: "c" }, to: st.shard1.shardName})); // Put data on shard0, that will go into chunk 1. const doc = {     _id: 0,     a: "a" }; assert .commandWorked(collCaseInsensitive.insert(doc)); // Perform a chunk migration of chunk 1 from shard0 to shard1, but do not clean up orphans on // shard0. let suspendRangeDeletionShard0 = configureFailPoint(st.shard0, 'suspendRangeDeletion' ); testDB.adminCommand(     {moveChunk: collCaseInsensitive.getFullName(), find: {a: "a" }, to: st.shard1.shardName}); // Equality predicate. Since the query has non-simple collation we will have to broadcast to all // shards (since the shard key is on a simple collation index). However, we return the result doc // twice which means we do not properly do shard filtering in this case . const results = collCaseInsensitive.find({a: "a" }).collation(caseInsensitive).toArray(); print( "results are " , tojson(results)); assert .eq(results, [doc, doc]);  // BAD! DUPLICATE RESULT! suspendRangeDeletionShard0.off(); st.stop();  
    • QO 2024-09-16, QO 2024-09-30, QO 2024-10-14
    • 0
    • None
    • None
    • None
    • None
    • None
    • None
    • None

      Issue Status as of 29 January, 2026

      ISSUE DESCRIPTION AND IMPACT

      Queries against a sharded collection with a non-simple collation and an equality predicate against all fields of the sharding key may return duplicate records. This can only happen on a sharded collection during chunk migration. Non-sharded collections and queries issued when no chunk migrations are in progress are not affected. The issue will go away once the chunk migration has fully completed (including deletion of the orphaned documents from the donor shard). Since the issue only occurs transiently during chunk migration, no explicit remediation steps are required.

      DIAGNOSIS AND AFFECTED VERSIONS

      MongoDB 6.0.0 - 6.0.27, 7.0.0 - 7.0.28, 8.0.0 - 8.0.12
      MongoDB 7.0.29, 8.0.13, and all later versions contain fixes for these issues.

      REMEDIATION AND WORKAROUNDS

      Customers are recommended to upgrade to the latest version that contains the fix. For MongoDB Atlas Customers on 7.0 and 8.0, your Atlas clusters have already been automatically upgraded to a version containing the fix.

      There is currently no known workaround without upgrading the cluster.

      Original description

      In the case where we have a collection with non-simple default collation, and the collection is sharded on a field where the backing index has a simple collation (what's important here is that its just a different collation from the default collection collation), we cannot do shard targeting for an equality query with a non-simple collation so we broadcast the query to all shards. However, we also do not do shard filtering in this case. So if a chunk migration happens right before the query and the source shard hasn't yet cleaned up orphans, we will return duplicate results. I've attached a more thorough repro in the "steps to reproduce" section.

       

      This happens on master, 8.0, 7.0, 6.0, and 5.0 (i.e. the repro I attached succeeds on all these versions).

            Assignee:
            James Harrison
            Reporter:
            Militsa Sotirova
            Votes:
            0 Vote for this issue
            Watchers:
            19 Start watching this issue

              Created:
              Updated:
              Resolved: