Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-39607

MongoDB 3.4 should unconditionally use afterOpTime for initial sync oplog fetching

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Major - P3
    • Resolution: Won't Fix
    • Affects Version/s: 3.4.19
    • Fix Version/s: None
    • Component/s: Replication
    • Labels:
      None
    • Operating System:
      ALL
    • Steps To Reproduce:
      Hide

      Run the following test on 3.6:

      	(function() {
      	    'use strict';
      	
      	    load('jstests/replsets/rslib.js');
      	    const basename = 'initial_sync_visibility';
      	
      	    jsTestLog('Bring up set');
      	    const rst = new ReplSetTest({name: basename, nodes: 1});
      	    rst.startSet();
      	    rst.initiate();
      	
      	    const primary = rst.getPrimary();
      	    const primaryDB = primary.getDB(basename);
       
      	    assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: "3.4"}));
      	
      	    jsTestLog('Create a collection');
      	    assert.writeOK(primaryDB['coll'].save({_id: "visible"}));
      	    jsTestLog('Make sure synced');
      	    rst.awaitReplication();
      	
      	    jsTestLog('Activate WT visibility failpoint and write an invisible document');
      	    assert.commandWorked(primaryDB.adminCommand(
      	        {configureFailPoint: 'WTPausePrimaryOplogDurabilityLoop', mode: 'alwaysOn'}));
      	    assert.writeOK(primaryDB['coll'].save({_id: "invisible"}));
      	
      	    jsTestLog('Bring up a new node');
      	    const secondary = rst.add({setParameter: 'numInitialSyncAttempts=3', binVersion: "last-stable"});
      	    rst.reInitiate();
      	    assert.eq(primary, rst.getPrimary(), 'Primary changed after reconfig');
      	
      	    jsTestLog('Wait for new node to start cloning');
      	    secondary.setSlaveOk();
      	    const secondaryDB = secondary.getDB(basename);
      	    wait(function() {
      	        return secondaryDB.stats().collections >= 1;
      	    }, 'never saw new node starting to clone, was waiting for collections in: ' + basename);
      	
      	    jsTestLog('Disable WT visibility failpoint on primary making all visible.');
      	    assert.commandWorked(primaryDB.adminCommand(
      	        {configureFailPoint: 'WTPausePrimaryOplogDurabilityLoop', mode: 'off'}));
      	
      	    jsTestLog('Wait for both nodes to be up-to-date');
      	    rst.awaitSecondaryNodes();
      	    rst.awaitReplication();
      	
      	    jsTestLog('Check all OK');
      	    rst.checkReplicatedDataHashes();
      	    rst.stopSet(15);
      	})();
      

      Show
      Run the following test on 3.6: (function() { 'use strict'; load('jstests/replsets/rslib.js'); const basename = 'initial_sync_visibility'; jsTestLog('Bring up set'); const rst = new ReplSetTest({name: basename, nodes: 1}); rst.startSet(); rst.initiate(); const primary = rst.getPrimary(); const primaryDB = primary.getDB(basename);   assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: "3.4"})); jsTestLog('Create a collection'); assert.writeOK(primaryDB['coll'].save({_id: "visible"})); jsTestLog('Make sure synced'); rst.awaitReplication(); jsTestLog('Activate WT visibility failpoint and write an invisible document'); assert.commandWorked(primaryDB.adminCommand( {configureFailPoint: 'WTPausePrimaryOplogDurabilityLoop', mode: 'alwaysOn'})); assert.writeOK(primaryDB['coll'].save({_id: "invisible"})); jsTestLog('Bring up a new node'); const secondary = rst.add({setParameter: 'numInitialSyncAttempts=3', binVersion: "last-stable"}); rst.reInitiate(); assert.eq(primary, rst.getPrimary(), 'Primary changed after reconfig'); jsTestLog('Wait for new node to start cloning'); secondary.setSlaveOk(); const secondaryDB = secondary.getDB(basename); wait(function() { return secondaryDB.stats().collections >= 1; }, 'never saw new node starting to clone, was waiting for collections in: ' + basename); jsTestLog('Disable WT visibility failpoint on primary making all visible.'); assert.commandWorked(primaryDB.adminCommand( {configureFailPoint: 'WTPausePrimaryOplogDurabilityLoop', mode: 'off'})); jsTestLog('Wait for both nodes to be up-to-date'); rst.awaitSecondaryNodes(); rst.awaitReplication(); jsTestLog('Check all OK'); rst.checkReplicatedDataHashes(); rst.stopSet(15); })();
    • Linked BF Score:
      0

      Description

      MongoDB 3.4 uses afterOpTime for its initial sync oplog fetching query if the featureCompatibilityVersion is 3.4. However, this is dead code because the featureCompatibilityVersion is unset when the query is constructed, so it has its default value of 3.2. This afterOpTime is essential when initial syncing from a 3.6 node due to the oplog visibility rules on 3.6, and without it, the initial sync can fail with OplogStartMissing. The afterOpTime should not affect behavior when syncing from a 3.4 or 3.2 node, though this should be tested.

        Attachments

          Issue Links

            Activity

              People

              Assignee:
              backlog-server-repl Backlog - Replication Team
              Reporter:
              tess.avitabile Tess Avitabile
              Participants:
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

                Dates

                Created:
                Updated:
                Resolved: