Initial sync fail "Broken pipe"

    • Type: Bug
    • Resolution: Unresolved
    • Priority: Major - P3
    • None
    • Affects Version/s: None
    • Component/s: None
    • ALL
    • None
    • None
    • None
    • None
    • None
    • None
    • None

      We have a sharded cluster with one shard, Community version 6.0.25
      One node has 35TB of data.

      We are trying to add a node, and during the initial sync process, we encounter an error every time. Here are the most interesting lines from the log:
      Log from source host:

      {"t":{"$date":"2026-05-15T14:19:09.339+03:00"},"s":"I",  "c":"NETWORK",  "id":22989,   "ctx":"conn5887823","msg":"Error sending response to client. Ending connection from remote","attr":{"error":{"code":9001,"codeName":"SocketException","errmsg":"Broken pipe"},"remote":"10.140.101.205:42256","connectionId":5887823}}
      

      Log from receiver host:

      {"t":{"$date":"2026-05-15T14:19:08.296+03:00"},"s":"I",  "c":"INITSYNC", "id":21192,   "ctx":"ReplCoordExtern-0","msg":"Initial sync status and statistics","attr":{"status":"in_progre
      ss","statistics":{"method":"logical","failedInitialSyncAttempts":1,"maxFailedInitialSyncAttempts":10,"initialSyncStart":{"$date":"2026-05-15T06:18:24.971Z"},"totalInitialSyncElapsedMi
      llis":18043325,"initialSyncAttempts":[{"durationMillis":18042292,"status":"InitialSyncFailure: error cloning databases :: caused by :: HostUnreachable: Error cloning collection 'DOCST
      ORE_CONTENT.CONTENT_DOC_ADD.chunks' :: caused by :: network error while attempting to run command 'collStats' on host 'eip-mongo-fs-shrd-fks03:27011' ","syncSource":"eip-mongo-fs-shrd
      -fks03:27011","rollBackId":5,"operationsRetried":0,"totalTimeUnreachableMillis":0}],"approxTotalDataSize":118296229456526,"approxTotalBytesCopied":556127993559,"remainingInitialSyncEs
      timatedMillis":3820025139,"appliedOps":0,"initialSyncOplogStart":{"$timestamp":{"t":1778825905,"i":283}},"totalTimeUnreachableMillis":0,"databases":{"databasesToClone":2,"databasesClo
      ned":1,"admin":{"collections":3,"clonedCollections":3,"start":{"$date":"2026-05-15T06:18:26.014Z"},"end":{"$date":"2026-05-15T06:18:26.184Z"},"elapsedMillis":170,"admin.system.version
      ":{"documentsToCopy":3,"documentsCopied":3,"indexes":1,"fetchedBatches":1,"bytesToCopy":589,"approxBytesCopied":588,"start":{"$date":"2026-05-15T06:18:26.014Z"},"end":{"$date":"2026-0
      5-15T06:18:26.056Z"},"elapsedMillis":42,"receivedBatches":1},"admin.system.roles":{"documentsToCopy":1,"documentsCopied":1,"indexes":2,"fetchedBatches":1,"bytesToCopy":167,"approxByte
      sCopied":167,"start":{"$date":"2026-05-15T06:18:26.056Z"},"end":{"$date":"2026-05-15T06:18:26.109Z"},"elapsedMillis":53,"receivedBatches":1},"admin.system.users":{"documentsToCopy":3,
      "documentsCopied":3,"indexes":2,"fetchedBatches":1,"bytesToCopy":1772,"approxBytesCopied":1770,"start":{"$date":"2026-05-15T06:18:26.109Z"},"end":{"$date":"2026-05-15T06:18:26.184Z"},
      "elapsedMillis":75,"receivedBatches":1}},"DOCSTORE_CONTENT":{"collections":10,"clonedCollections":1,"start":{"$date":"2026-05-15T06:18:26.186Z"},"DOCSTORE_CONTENT.CONTENT_DOC_SIGN.fil
      es":{"documentsToCopy":2561896267,"documentsCopied":2562801802,"indexes":2,"fetchedBatches":34196,"bytesToCopy":555931489939,"approxBytesCopied":556127991034,"start":{"$date":"2026-05
      -15T06:18:26.186Z"},"end":{"$date":"2026-05-15T11:03:14.161Z"},"elapsedMillis":17087975,"receivedBatches":34196},"DOCSTORE_CONTENT.CONTENT_DOC_ADD.chunks":{"documentsToCopy":0,"docume
      ntsCopied":0,"indexes":0,"fetchedBatches":0,"bytesToCopy":0,"start":{"$date":"2026-05-15T11:03:14.161Z"},"receivedBatches":0},"DOCSTORE_CONTENT.CONTENT_DOC_SIGN.chunks":{"documentsToC
      opy":0,"documentsCopied":0,"indexes":0,"fetchedBatches":0,"bytesToCopy":0,"receivedBatches":0},"DOCSTORE_CONTENT.CONTENT_DOC_COMPOUND.chunks":{"documentsToCopy":0,"documentsCopied":0,
      "indexes":0,"fetchedBatches":0,"bytesToCopy":0,"receivedBatches":0},"DOCSTORE_CONTENT.CONTENT_DOC_COMPOUND.files":{"documentsToCopy":0,"documentsCopied":0,"indexes":0,"fetchedBatches"
      :0,"bytesToCopy":0,"receivedBatches":0},"DOCSTORE_CONTENT.CONTENT_DOC_ADD.files":{"documentsToCopy":0,"documentsCopied":0,"indexes":0,"fetchedBatches":0,"bytesToCopy":0,"receivedBatch
      es":0},"DOCSTORE_CONTENT.CONTENT_DOC_TEMP.chunks":{"documentsToCopy":0,"documentsCopied":0,"indexes":0,"fetchedBatches":0,"bytesToCopy":0,"receivedBatches":0},"DOCSTORE_CONTENT.CONTEN
      T_DOC_XML.chunks":{"documentsToCopy":0,"documentsCopied":0,"indexes":0,"fetchedBatches":0,"bytesToCopy":0,"receivedBatches":0},"DOCSTORE_CONTENT.CONTENT_DOC_XML.files":{"documentsToCo
      py":0,"documentsCopied":0,"indexes":0,"fetchedBatches":0,"bytesToCopy":0,"receivedBatches":0},"DOCSTORE_CONTENT.CONTENT_DOC_TEMP.files":{"documentsToCopy":0,"documentsCopied":0,"index
      es":0,"fetchedBatches":0,"bytesToCopy":0,"receivedBatches":0}},"config":{"collections":0,"clonedCollections":0}}}}}
      {"t":{"$date":"2026-05-15T14:19:08.297+03:00"},"s":"E",  "c":"INITSYNC", "id":21200,   "ctx":"ReplCoordExtern-0","msg":"Initial sync attempt failed","attr":{"attemptsLeft":9,"error":"
      InitialSyncFailure: error cloning databases :: caused by :: HostUnreachable: Error cloning collection 'DOCSTORE_CONTENT.CONTENT_DOC_ADD.chunks' :: caused by :: network error while att
      empting to run command 'collStats' on host 'eip-mongo-fs-shrd-fks03:27011' "}}
      {"t":{"$date":"2026-05-15T14:19:09.266+03:00"},"s":"I",  "c":"ACCESS",   "id":20250,   "ctx":"conn63392","msg":"Authentication succeeded","attr":{"mechanism":"SCRAM-SHA-256","speculat
      ive":true,"principalName":"admin","authenticationDatabase":"admin","remote":"127.0.0.1:51840","extraInfo":{}}}
      {"t":{"$date":"2026-05-15T14:19:09.297+03:00"},"s":"I",  "c":"INITSYNC", "id":21164,   "ctx":"ReplCoordExtern-3","msg":"Starting initial sync attempt","attr":{"initialSyncAttempt":2,"
      initialSyncMaxAttempts":10}}
      {"t":{"$date":"2026-05-15T14:19:09.298+03:00"},"s":"I",  "c":"STORAGE",  "id":20318,   "ctx":"ReplCoordExtern-3","msg":"Finishing collection drop","attr":{"namespace":"local.temp_oplog_buffer","uuid":{"uuid":{"$uuid":"e038197b-49ef-40d8-b498-5d5677a9ad68"}}}}
      {"t":{"$date":"2026-05-15T14:19:09.298+03:00"},"s":"I",  "c":"STORAGE",  "id":22206,   "ctx":"ReplCoordExtern-3","msg":"Deferring table drop for index","attr":{"index":"_id_","namespace":"local.temp_oplog_buffer","uuid":{"uuid":{"$uuid":"e038197b-49ef-40d8-b498-5d5677a9ad68"}},"ident":"index-3-2159468939868735742","dropTime":{"checkpointIteration":"291"}}}
      {"t":{"$date":"2026-05-15T14:19:09.298+03:00"},"s":"I",  "c":"STORAGE",  "id":22214,   "ctx":"ReplCoordExtern-3","msg":"Deferring table drop for collection","attr":{"namespace":"local.temp_oplog_buffer","ident":"collection-2-2159468939868735742","dropTime":{"checkpointIteration":"291"}}}
      {"t":{"$date":"2026-05-15T14:19:09.299+03:00"},"s":"I",  "c":"STORAGE",  "id":20320,   "ctx":"ReplCoordExtern-3","msg":"createCollection","attr":{"namespace":"local.temp_oplog_buffer","uuidDisposition":"generated","uuid":{"uuid":{"$uuid":"c50511e3-d5aa-4834-ba82-8bcd82dafa4f"}},"options":{"temp":true}}}
      {"t":{"$date":"2026-05-15T14:19:09.312+03:00"},"s":"I",  "c":"INDEX",    "id":20345,   "ctx":"ReplCoordExtern-3","msg":"Index build: done building","attr":{"buildUUID":null,"collectionUUID":{"uuid":{"$uuid":"c50511e3-d5aa-4834-ba82-8bcd82dafa4f"}},"namespace":"local.temp_oplog_buffer","index":"_id_","ident":"index-16-2159468939868735742","collectionIdent":"collection-15-2159468939868735742","commitTimestamp":null}}
      {"t":{"$date":"2026-05-15T14:19:09.313+03:00"},"s":"I",  "c":"REPL",     "id":8423401, "ctx":"ReplCoordExtern-3","msg":"Sync source candidate is eligible","attr":{"syncSourceCandidate":"eip-mongo-fs-shrd-fks02:27011"}}
      {"t":{"$date":"2026-05-15T14:19:09.313+03:00"},"s":"I",  "c":"REPL",     "id":8423401, "ctx":"ReplCoordExtern-3","msg":"Sync source candidate is eligible","attr":{"syncSourceCandidate":"eip-mongo-fs-shrd-fks03:27011"}}
      {"t":{"$date":"2026-05-15T14:19:09.313+03:00"},"s":"I",  "c":"REPL",     "id":8423401, "ctx":"ReplCoordExtern-3","msg":"Sync source candidate is eligible","attr":{"syncSourceCandidate":"eip-mongo-fs-shrd-fks04:27011"}}
      {"t":{"$date":"2026-05-15T14:19:09.313+03:00"},"s":"I",  "c":"REPL",     "id":3873114, "ctx":"ReplCoordExtern-3","msg":"Cannot select sync source with higher latency than the best candidate","attr":{"syncSourceCandidate":"eip-mongo-fs-shrd-fks04:27011","syncSourceCandidatePingMillis":11,"closestNode":"eip-mongo-fs-shrd-fks03:27011","closestPingMillis":0}}
      {"t":{"$date":"2026-05-15T14:19:09.313+03:00"},"s":"I",  "c":"REPL",     "id":8423401, "ctx":"ReplCoordExtern-3","msg":"Sync source candidate is eligible","attr":{"syncSourceCandidate":"eip-mongo-fs-shrd-fks05:27011"}}
      

      There are no interesting information in other node logs, no failures and hight load.
      Your answer on the previous task SERVER-123111 was very helpful for many clusters, but in this case we have no idea.

            Assignee:
            Chris Kelly
            Reporter:
            Asel Magzh
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

              Created:
              Updated: