Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-39607

MongoDB 3.4 should unconditionally use afterOpTime for initial sync oplog fetching

    • Type: Icon: Bug Bug
    • Resolution: Won't Fix
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: 3.4.19
    • Component/s: Replication
    • Labels:
      None
    • Replication
    • ALL
    • Hide

      Run the following test on 3.6:

      	(function() {
      	    'use strict';
      	
      	    load('jstests/replsets/rslib.js');
      	    const basename = 'initial_sync_visibility';
      	
      	    jsTestLog('Bring up set');
      	    const rst = new ReplSetTest({name: basename, nodes: 1});
      	    rst.startSet();
      	    rst.initiate();
      	
      	    const primary = rst.getPrimary();
      	    const primaryDB = primary.getDB(basename);
      
      	    assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: "3.4"}));
      	
      	    jsTestLog('Create a collection');
      	    assert.writeOK(primaryDB['coll'].save({_id: "visible"}));
      	    jsTestLog('Make sure synced');
      	    rst.awaitReplication();
      	
      	    jsTestLog('Activate WT visibility failpoint and write an invisible document');
      	    assert.commandWorked(primaryDB.adminCommand(
      	        {configureFailPoint: 'WTPausePrimaryOplogDurabilityLoop', mode: 'alwaysOn'}));
      	    assert.writeOK(primaryDB['coll'].save({_id: "invisible"}));
      	
      	    jsTestLog('Bring up a new node');
      	    const secondary = rst.add({setParameter: 'numInitialSyncAttempts=3', binVersion: "last-stable"});
      	    rst.reInitiate();
      	    assert.eq(primary, rst.getPrimary(), 'Primary changed after reconfig');
      	
      	    jsTestLog('Wait for new node to start cloning');
      	    secondary.setSlaveOk();
      	    const secondaryDB = secondary.getDB(basename);
      	    wait(function() {
      	        return secondaryDB.stats().collections >= 1;
      	    }, 'never saw new node starting to clone, was waiting for collections in: ' + basename);
      	
      	    jsTestLog('Disable WT visibility failpoint on primary making all visible.');
      	    assert.commandWorked(primaryDB.adminCommand(
      	        {configureFailPoint: 'WTPausePrimaryOplogDurabilityLoop', mode: 'off'}));
      	
      	    jsTestLog('Wait for both nodes to be up-to-date');
      	    rst.awaitSecondaryNodes();
      	    rst.awaitReplication();
      	
      	    jsTestLog('Check all OK');
      	    rst.checkReplicatedDataHashes();
      	    rst.stopSet(15);
      	})();
      
      Show
      Run the following test on 3.6: (function() { 'use strict' ; load( 'jstests/replsets/rslib.js' ); const basename = 'initial_sync_visibility' ; jsTestLog( 'Bring up set' ); const rst = new ReplSetTest({name: basename, nodes: 1}); rst.startSet(); rst.initiate(); const primary = rst.getPrimary(); const primaryDB = primary.getDB(basename); assert .commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: "3.4" })); jsTestLog( 'Create a collection' ); assert .writeOK(primaryDB[ 'coll' ].save({_id: "visible" })); jsTestLog( 'Make sure synced' ); rst.awaitReplication(); jsTestLog( 'Activate WT visibility failpoint and write an invisible document' ); assert .commandWorked(primaryDB.adminCommand( {configureFailPoint: 'WTPausePrimaryOplogDurabilityLoop' , mode: 'alwaysOn' })); assert .writeOK(primaryDB[ 'coll' ].save({_id: "invisible" })); jsTestLog( 'Bring up a new node' ); const secondary = rst.add({setParameter: 'numInitialSyncAttempts=3' , binVersion: "last-stable" }); rst.reInitiate(); assert .eq(primary, rst.getPrimary(), 'Primary changed after reconfig' ); jsTestLog( 'Wait for new node to start cloning' ); secondary.setSlaveOk(); const secondaryDB = secondary.getDB(basename); wait(function() { return secondaryDB.stats().collections >= 1; }, 'never saw new node starting to clone, was waiting for collections in: ' + basename); jsTestLog( 'Disable WT visibility failpoint on primary making all visible.' ); assert .commandWorked(primaryDB.adminCommand( {configureFailPoint: 'WTPausePrimaryOplogDurabilityLoop' , mode: 'off' })); jsTestLog( 'Wait for both nodes to be up-to-date' ); rst.awaitSecondaryNodes(); rst.awaitReplication(); jsTestLog( 'Check all OK' ); rst.checkReplicatedDataHashes(); rst.stopSet(15); })();
    • 15

      MongoDB 3.4 uses afterOpTime for its initial sync oplog fetching query if the featureCompatibilityVersion is 3.4. However, this is dead code because the featureCompatibilityVersion is unset when the query is constructed, so it has its default value of 3.2. This afterOpTime is essential when initial syncing from a 3.6 node due to the oplog visibility rules on 3.6, and without it, the initial sync can fail with OplogStartMissing. The afterOpTime should not affect behavior when syncing from a 3.4 or 3.2 node, though this should be tested.

            Assignee:
            backlog-server-repl [DO NOT USE] Backlog - Replication Team
            Reporter:
            tess.avitabile@mongodb.com Tess Avitabile (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

              Created:
              Updated:
              Resolved: