[SERVER-37186] Cannot move primary from 4.0 to 3.6 shard Created: 18/Sep/18  Updated: 06/Dec/22  Resolved: 06/Feb/20

Status: Closed
Project: Core Server
Component/s: Sharding
Affects Version/s: 4.0.2
Fix Version/s: None

Type: Bug Priority: Major - P3
Reporter: Tess Avitabile (Inactive) Assignee: [DO NOT USE] Backlog - Sharding Team
Resolution: Won't Fix Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Assigned Teams:
Sharding
Operating System: ALL
Steps To Reproduce:

(function() {
	"use strict";
 
	let st = new ShardingTest({shards: [{binVersion: "latest"}, {binVersion: "last-stable"}], 
		other: {mongosOptions: {binVersion: "last-stable"}}});
	st.stopBalancer();
 
        let db = st.s.getDB("test");
        let coll = db.getCollection("coll");
 
        assert.commandWorked(coll.insert({_id: 0}));
 
        assert.commandWorked(db.adminCommand({enableSharding: db.getName()}));
 
  	// Ensure the 4.0 shard is the primary shard.
        st.ensurePrimaryShard(db.getName(), st.shard0.shardName);
 
        // Test moving primary from a 4.0 shard to a 3.6 shard. This fails.
        st.ensurePrimaryShard(db.getName(), st.shard1.shardName);
    
        st.stop();
}());

Sprint: Sharding 2018-11-05
Participants:

 Description   

Attempting to move primary from a 4.0 shard to a 3.6 shard fails with the following error:

[js_test:repro] 2018-09-18T11:17:24.243-0400 c20022| 2018-09-18T11:17:24.239-0400 I SHARDING [conn23] clone failed{ operationTime: Timestamp(1537283844, 6), ok: 0.0, errmsg: "Failed to parse: { find: "coll", snapshot: true, noCursorTimeout: true, $db: "test" }. Unrecognized field 'snapshot'.", code: 9, codeName: "FailedToParse", $gleStats: { lastOpTime: { ts: Timestamp(1537283844, 6), t: 1 }, electionId: ObjectId('7fffffff0000000000000001') }, $configServerState: { opTime: { ts: Timestamp(1537283844, 4), t: 1 } }, $clusterTime: { clusterTime: Timestamp(1537283844, 6), signature: { hash: BinData(0, 0000000000000000000000000000000000000000), keyId: 0 } } }
...
[js_test:repro] 2018-09-18T11:17:24.257-0400 assert failed : {
[js_test:repro] 2018-09-18T11:17:24.257-0400 	"ok" : 0,
[js_test:repro] 2018-09-18T11:17:24.258-0400 	"errmsg" : "clone failed",
[js_test:repro] 2018-09-18T11:17:24.258-0400 	"code" : 96,
[js_test:repro] 2018-09-18T11:17:24.258-0400 	"codeName" : "OperationFailed",
[js_test:repro] 2018-09-18T11:17:24.258-0400 	"lastCommittedOpTime" : Timestamp(1537283844, 5),
[js_test:repro] 2018-09-18T11:17:24.258-0400 	"operationTime" : Timestamp(1537283844, 5),
[js_test:repro] 2018-09-18T11:17:24.258-0400 	"$clusterTime" : {
[js_test:repro] 2018-09-18T11:17:24.258-0400 		"clusterTime" : Timestamp(1537283844, 5),
[js_test:repro] 2018-09-18T11:17:24.258-0400 		"signature" : {
[js_test:repro] 2018-09-18T11:17:24.258-0400 			"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
[js_test:repro] 2018-09-18T11:17:24.259-0400 			"keyId" : NumberLong(0)
[js_test:repro] 2018-09-18T11:17:24.259-0400 		}
[js_test:repro] 2018-09-18T11:17:24.259-0400 	}
[js_test:repro] 2018-09-18T11:17:24.259-0400 }
[js_test:repro] 2018-09-18T11:17:24.259-0400 doassert@src/mongo/shell/assert.js:20:14
[js_test:repro] 2018-09-18T11:17:24.259-0400 assert@src/mongo/shell/assert.js:150:9
[js_test:repro] 2018-09-18T11:17:24.259-0400 ShardingTest/this.ensurePrimaryShard@src/mongo/shell/shardingtest.js:957:9
[js_test:repro] 2018-09-18T11:17:24.259-0400 @/home/tess/repro.js:19:1
[js_test:repro] 2018-09-18T11:17:24.259-0400 @/home/tess/repro.js:1:2



 Comments   
Comment by Sheeri Cabral (Inactive) [ 06/Feb/20 ]

Users are pretty careful to make sure systems are compatible when doing movePrimary. Does not seem pervasive enough to fix.

Comment by Esha Maharishi (Inactive) [ 18/Sep/18 ]

Note that this only appears to happen if there is currently data on the from-shard.

Generated at Thu Feb 08 04:45:16 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.