-
Type:
Bug
-
Resolution: Unresolved
-
Priority:
Major - P3
-
None
-
Affects Version/s: None
-
Component/s: None
While investigating PYTHON-2872 I discovered another bug in cursor handling after step down. In Pymongo, it's possible for a cursor getMore to select an RSOther node and this causes to driver to mistakenly run the getMore without the original session (eg the we omit the "lsid" field). This causes the getMore to fail with 'errmsg': 'Cannot run getMore on cursor 3944699655485662303, which was created in session ab2ce191-a110-43b2-809a-9f335b239025 - 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU= - - , without an lsid', 'code': 50737, 'codeName': 'Location50737'
Traceback:
________ TestConnectionsSurvivePrimaryStepDown.test_get_more_iteration _________ self = <test.test_connections_survive_primary_stepdown_spec.TestConnectionsSurvivePrimaryStepDown testMethod=test_get_more_iteration> @client_context.require_version_min(4, 2, -1) def test_get_more_iteration(self): import logging start = time.time() for i in range(1000): with self.subTest(i=i): self.listener.reset() # Insert 5 documents with WC majority. self.coll.insert_many([{"data": k} for k in range(5)]) # Start a find operation and retrieve first batch of results. batch_size = 2 cursor = self.coll.find(batch_size=batch_size) for _ in range(batch_size): cursor.next() logging.info(f'\n\ntime: {time.time()-start} Stepping down the primary after first batch of results.\n\n') # Force step-down the primary. repl_set_step_down(self.client, replSetStepDown=10, secondaryCatchUpPeriodSecs=5) # Get next batch of results. for _ in range(batch_size): > cursor.next() test/test_connections_survive_primary_stepdown_spec.py:97: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ... E pymongo.errors.OperationFailure: Cannot run getMore on cursor 3944699655485662303, which was created in session ab2ce191-a110-43b2-809a-9f335b239025 - 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU= - - , without an lsid, full error: {'ok': 0.0, 'errmsg': 'Cannot run getMore on cursor 3944699655485662303, which was created in session ab2ce191-a110-43b2-809a-9f335b239025 - 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU= - - , without an lsid', 'code': 50737, 'codeName': 'Location50737', '$clusterTime': {'clusterTime': Timestamp(1751988774, 30), 'signature': {'hash': b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'keyId': 0}}, 'operationTime': Timestamp(1751988774, 30)}
Logs:
DEBUG pymongo.command:logger.py:108 {"message": "Command started", "clientId": {"$oid": "686d39524cb06ece7ed81c74"}, "command": "{\"replSetStepDown\": 10, \"secondaryCatchUpPeriodSecs\": 5, \"$clusterTime\": {\"clusterTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"signature\": {\"hash\": {\"$binary\": {\"base64\": \"AAAAAAAAAAAAAAAAAAAAAAAAAAA=\", \"subType\": \"00\"}}}}, \"$db\": \"admin\"}", "commandName": "replSetStepDown", "databaseName": "admin", "requestId": 1374380934, "operationId": 1374380934, "driverConnectionId": 2, "serverConnectionId": 719, "serverHost": "localhost", "serverPort": 27017} DEBUG pymongo.topology:logger.py:108 {"message": "Server heartbeat succeeded", "topologyId": {"$oid": "686d39524cb06ece7ed81c74"}, "driverConnectionId": 1, "serverConnectionId": 699, "serverHost": "localhost", "serverPort": 27017, "awaited": true, "durationMS": 74.27512493450195, "reply": "{\"topologyVersion\": {\"processId\": {\"$oid\": \"686d310f0e7e1d85400038ac\"}, \"counter\": 1808}, \"hosts\": [\"localhost:27017\", \"localhost:27018\", \"localhost:27019\"], \"setName\": \"2abb3a48-0fd9-4c33-a7ed-8569d1eac183\", \"setVersion\": 1, \"primary\": \"localhost:27017\", \"me\": \"localhost:27017\", \"electionId\": {\"$oid\": \"7fffffff00000000000002f2\"}, \"lastWrite\": {\"opTime\": {\"ts\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"t\": 754}, \"lastWriteDate\": {\"$date\": \"2025-07-08T15:32:54Z\"}, \"majorityOpTime\": {\"ts\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"t\": 754}, \"majorityWriteDate\": {\"$date\": \"2025-07-08T15:32:54Z\"}}, \"maxBsonObjectSize\": 16777216, \"maxMessageSizeBytes\": 48000000, \"maxWriteBatchSize\": 100000, \"localTime\": {\"$date\": \"2025-07-08T15:32:54.940Z\"}, \"logicalSessionTimeoutMinutes\": 30, \"connectionId\": 699, \"maxWireVersion\": 25, \"ok\": 1.0, \"$clusterTime\": {\"clusterTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"signature\": {\"hash\": {\"$binary\": {\"base64\": \"AAAAAAAAAAAAAAAAAAAAAAAAAAA=\", \"subType\": \"00\"}}}}, \"operationTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}}"} DEBUG pymongo.topology:logger.py:108 {"message": "Topology description changed", "topologyId": {"$oid": "686d39524cb06ece7ed81c74"}, "previousDescription": "<TopologyDescription id: 686d39524cb06ece7ed81c74, topology_type: ReplicaSetWithPrimary, servers: [<ServerDescription ('localhost', 27017) server_type: RSPrimary, rtt: 0.001626566259460886>, <ServerDescription ('localhost', 27018) server_type: RSSecondary, rtt: 0.0014175592278887193>, <ServerDescription ('localhost', 27019) server_type: RSSecondary, rtt: 0.001972064322808856>]>", "newDescription": "<TopologyDescription id: 686d39524cb06ece7ed81c74, topology_type: ReplicaSetNoPrimary, servers: [<ServerDescription ('localhost', 27017) server_type: RSOther, rtt: 0.001626566259460886>, <ServerDescription ('localhost', 27018) server_type: RSSecondary, rtt: 0.0014175592278887193>, <ServerDescription ('localhost', 27019) server_type: RSSecondary, rtt: 0.001972064322808856>]>"} DEBUG pymongo.topology:logger.py:108 {"message": "Server heartbeat succeeded", "topologyId": {"$oid": "686d39524cb06ece7ed81c73"}, "driverConnectionId": 1, "serverConnectionId": 697, "serverHost": "localhost", "serverPort": 27017, "awaited": true, "durationMS": 75.3959579160437, "reply": "{\"topologyVersion\": {\"processId\": {\"$oid\": \"686d310f0e7e1d85400038ac\"}, \"counter\": 1808}, \"hosts\": [\"localhost:27017\", \"localhost:27018\", \"localhost:27019\"], \"setName\": \"2abb3a48-0fd9-4c33-a7ed-8569d1eac183\", \"setVersion\": 1, \"primary\": \"localhost:27017\", \"me\": \"localhost:27017\", \"electionId\": {\"$oid\": \"7fffffff00000000000002f2\"}, \"lastWrite\": {\"opTime\": {\"ts\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"t\": 754}, \"lastWriteDate\": {\"$date\": \"2025-07-08T15:32:54Z\"}, \"majorityOpTime\": {\"ts\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"t\": 754}, \"majorityWriteDate\": {\"$date\": \"2025-07-08T15:32:54Z\"}}, \"maxBsonObjectSize\": 16777216, \"maxMessageSizeBytes\": 48000000, \"maxWriteBatchSize\": 100000, \"localTime\": {\"$date\": \"2025-07-08T15:32:54.940Z\"}, \"logicalSessionTimeoutMinutes\": 30, \"connectionId\": 697, \"maxWireVersion\": 25, \"ok\": 1.0, \"$clusterTime\": {\"clusterTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"signature\": {\"hash\": {\"$binary\": {\"base64\": \"AAAAAAAAAAAAAAAAAAAAAAAAAAA=\", \"subType\": \"00\"}}}}, \"operationTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}}"} DEBUG pymongo.topology:logger.py:108 {"message": "Server heartbeat started", "topologyId": {"$oid": "686d39524cb06ece7ed81c74"}, "driverConnectionId": 1, "serverConnectionId": 699, "serverHost": "localhost", "serverPort": 27017, "awaited": true} DEBUG pymongo.topology:logger.py:108 {"message": "Topology description changed", "topologyId": {"$oid": "686d39524cb06ece7ed81c73"}, "previousDescription": "<TopologyDescription id: 686d39524cb06ece7ed81c73, topology_type: ReplicaSetWithPrimary, servers: [<ServerDescription ('localhost', 27017) server_type: RSPrimary, rtt: 0.0019480678761580896>, <ServerDescription ('localhost', 27018) server_type: RSSecondary, rtt: 0.002072626095623817>, <ServerDescription ('localhost', 27019) server_type: RSSecondary, rtt: 0.0016644754395462736>]>", "newDescription": "<TopologyDescription id: 686d39524cb06ece7ed81c73, topology_type: ReplicaSetNoPrimary, servers: [<ServerDescription ('localhost', 27017) server_type: RSOther, rtt: 0.0019480678761580896>, <ServerDescription ('localhost', 27018) server_type: RSSecondary, rtt: 0.002072626095623817>, <ServerDescription ('localhost', 27019) server_type: RSSecondary, rtt: 0.0016644754395462736>]>"} DEBUG pymongo.topology:logger.py:108 {"message": "Server heartbeat started", "topologyId": {"$oid": "686d39524cb06ece7ed81c73"}, "driverConnectionId": 1, "serverConnectionId": 697, "serverHost": "localhost", "serverPort": 27017, "awaited": true} DEBUG pymongo.command:logger.py:108 {"message": "Command succeeded", "clientId": {"$oid": "686d39524cb06ece7ed81c74"}, "durationMS": 4.760999999999999, "reply": "{\"ok\": 1.0, \"$clusterTime\": {\"clusterTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"signature\": {\"hash\": {\"$binary\": {\"base64\": \"AAAAAAAAAAAAAAAAAAAAAAAAAAA=\", \"subType\": \"00\"}}}}, \"operationTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}}", "commandName": "replSetStepDown", "databaseName": "admin", "requestId": 1374380934, "operationId": 1374380934, "driverConnectionId": 2, "serverConnectionId": 719, "serverHost": "localhost", "serverPort": 27017} DEBUG pymongo.connection:logger.py:108 {"message": "Connection checked in", "clientId": {"$oid": "686d39524cb06ece7ed81c74"}, "serverHost": "localhost", "serverPort": 27017, "driverConnectionId": 2} DEBUG pymongo.serverSelection:logger.py:108 {"message": "Server selection started", "selector": "<function any_server_selector at 0x102ac7f60>", "operation": "getMore", "topologyDescription": "<TopologyDescription id: 686d39524cb06ece7ed81c74, topology_type: ReplicaSetNoPrimary, servers: [<ServerDescription ('localhost', 27017) server_type: RSOther, rtt: 0.001626566259460886>, <ServerDescription ('localhost', 27018) server_type: RSSecondary, rtt: 0.0014175592278887193>, <ServerDescription ('localhost', 27019) server_type: RSSecondary, rtt: 0.001972064322808856>]>", "clientId": {"$oid": "686d39524cb06ece7ed81c74"}} DEBUG pymongo.serverSelection:logger.py:108 {"message": "Server selection succeeded", "selector": "<function any_server_selector at 0x102ac7f60>", "operation": "getMore", "topologyDescription": "<TopologyDescription id: 686d39524cb06ece7ed81c74, topology_type: ReplicaSetNoPrimary, servers: [<ServerDescription ('localhost', 27017) server_type: RSOther, rtt: 0.001626566259460886>, <ServerDescription ('localhost', 27018) server_type: RSSecondary, rtt: 0.0014175592278887193>, <ServerDescription ('localhost', 27019) server_type: RSSecondary, rtt: 0.001972064322808856>]>", "clientId": {"$oid": "686d39524cb06ece7ed81c74"}, "serverHost": "localhost", "serverPort": 27017} DEBUG pymongo.connection:logger.py:108 {"message": "Connection checkout started", "clientId": {"$oid": "686d39524cb06ece7ed81c74"}, "serverHost": "localhost", "serverPort": 27017} DEBUG pymongo.topology:logger.py:108 {"message": "Server heartbeat succeeded", "topologyId": {"$oid": "686d39524cb06ece7ed81c73"}, "driverConnectionId": 1, "serverConnectionId": 697, "serverHost": "localhost", "serverPort": 27017, "awaited": true, "durationMS": 3.854084061458707, "reply": "{\"topologyVersion\": {\"processId\": {\"$oid\": \"686d310f0e7e1d85400038ac\"}, \"counter\": 1809}, \"hosts\": [\"localhost:27017\", \"localhost:27018\", \"localhost:27019\"], \"setName\": \"2abb3a48-0fd9-4c33-a7ed-8569d1eac183\", \"setVersion\": 1, \"secondary\": true, \"me\": \"localhost:27017\", \"lastWrite\": {\"opTime\": {\"ts\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"t\": 754}, \"lastWriteDate\": {\"$date\": \"2025-07-08T15:32:54Z\"}, \"majorityOpTime\": {\"ts\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"t\": 754}, \"majorityWriteDate\": {\"$date\": \"2025-07-08T15:32:54Z\"}}, \"maxBsonObjectSize\": 16777216, \"maxMessageSizeBytes\": 48000000, \"maxWriteBatchSize\": 100000, \"localTime\": {\"$date\": \"2025-07-08T15:32:54.944Z\"}, \"logicalSessionTimeoutMinutes\": 30, \"connectionId\": 697, \"maxWireVersion\": 25, \"ok\": 1.0, \"$clusterTime\": {\"clusterTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"signature\": {\"hash\": {\"$binary\": {\"base64\": \"AAAAAAAAAAAAAAAAAAAAAAAAAAA=\", \"subType\": \"00\"}}}}, \"operationTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}}"} DEBUG pymongo.connection:logger.py:108 {"message": "Connection checked out", "clientId": {"$oid": "686d39524cb06ece7ed81c74"}, "serverHost": "localhost", "serverPort": 27017, "driverConnectionId": 2, "durationMS": 0.0003327499143779278} DEBUG pymongo.topology:logger.py:108 {"message": "Server heartbeat succeeded", "topologyId": {"$oid": "686d39524cb06ece7ed81c74"}, "driverConnectionId": 1, "serverConnectionId": 699, "serverHost": "localhost", "serverPort": 27017, "awaited": true, "durationMS": 4.376458935439587, "reply": "{\"topologyVersion\": {\"processId\": {\"$oid\": \"686d310f0e7e1d85400038ac\"}, \"counter\": 1809}, \"hosts\": [\"localhost:27017\", \"localhost:27018\", \"localhost:27019\"], \"setName\": \"2abb3a48-0fd9-4c33-a7ed-8569d1eac183\", \"setVersion\": 1, \"secondary\": true, \"me\": \"localhost:27017\", \"lastWrite\": {\"opTime\": {\"ts\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"t\": 754}, \"lastWriteDate\": {\"$date\": \"2025-07-08T15:32:54Z\"}, \"majorityOpTime\": {\"ts\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"t\": 754}, \"majorityWriteDate\": {\"$date\": \"2025-07-08T15:32:54Z\"}}, \"maxBsonObjectSize\": 16777216, \"maxMessageSizeBytes\": 48000000, \"maxWriteBatchSize\": 100000, \"localTime\": {\"$date\": \"2025-07-08T15:32:54.944Z\"}, \"logicalSessionTimeoutMinutes\": 30, \"connectionId\": 699, \"maxWireVersion\": 25, \"ok\": 1.0, \"$clusterTime\": {\"clusterTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"signature\": {\"hash\": {\"$binary\": {\"base64\": \"AAAAAAAAAAAAAAAAAAAAAAAAAAA=\", \"subType\": \"00\"}}}}, \"operationTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}}"} DEBUG pymongo.topology:logger.py:108 {"message": "Topology description changed", "topologyId": {"$oid": "686d39524cb06ece7ed81c73"}, "previousDescription": "<TopologyDescription id: 686d39524cb06ece7ed81c73, topology_type: ReplicaSetNoPrimary, servers: [<ServerDescription ('localhost', 27017) server_type: RSOther, rtt: 0.0019480678761580896>, <ServerDescription ('localhost', 27018) server_type: RSSecondary, rtt: 0.002072626095623817>, <ServerDescription ('localhost', 27019) server_type: RSSecondary, rtt: 0.0016644754395462736>]>", "newDescription": "<TopologyDescription id: 686d39524cb06ece7ed81c73, topology_type: ReplicaSetNoPrimary, servers: [<ServerDescription ('localhost', 27017) server_type: RSSecondary, rtt: 0.0019480678761580896>, <ServerDescription ('localhost', 27018) server_type: RSSecondary, rtt: 0.002072626095623817>, <ServerDescription ('localhost', 27019) server_type: RSSecondary, rtt: 0.0016644754395462736>]>"} DEBUG pymongo.command:logger.py:108 {"message": "Command started", "clientId": {"$oid": "686d39524cb06ece7ed81c74"}, "command": "{\"getMore\": 3944699655485662303, \"collection\": \"step-down\", \"batchSize\": 2, \"$clusterTime\": {\"clusterTime\": {\"$timestamp\": {\"t\": 1751988774, \"i\": 30}}, \"signature\": {\"hash\": {\"$binary\": {\"base64\": \"AAAAAAAAAAAAAAAAAAAAAAAAAAA=\", \"subType\": \"00\"}}}}, \"$db\": \"step-down\"}", "commandName": "getMore", "databaseName": "step-down", "requestId": 886250606, "operationId": 886250606, "driverConnectionId": 2, "serverConnectionId": 719, "serverHost": "localhost", "serverPort": 27017} DEBUG pymongo.topology:logger.py:108 {"message": "Topology description changed", "topologyId": {"$oid": "686d39524cb06ece7ed81c74"}, "previousDescription": "<TopologyDescription id: 686d39524cb06ece7ed81c74, topology_type: ReplicaSetNoPrimary, servers: [<ServerDescription ('localhost', 27017) server_type: RSOther, rtt: 0.001626566259460886>, <ServerDescription ('localhost', 27018) server_type: RSSecondary, rtt: 0.0014175592278887193>, <ServerDescription ('localhost', 27019) server_type: RSSecondary, rtt: 0.001972064322808856>]>", "newDescription": "<TopologyDescription id: 686d39524cb06ece7ed81c74, topology_type: ReplicaSetNoPrimary, servers: [<ServerDescription ('localhost', 27017) server_type: RSSecondary, rtt: 0.001626566259460886>, <ServerDescription ('localhost', 27018) server_type: RSSecondary, rtt: 0.0014175592278887193>, <ServerDescription ('localhost', 27019) server_type: RSSecondary, rtt: 0.001972064322808856>]>"} DEBUG pymongo.topology:logger.py:108 {"message": "Server heartbeat started", "topologyId": {"$oid": "686d39524cb06ece7ed81c73"}, "driverConnectionId": 1, "serverConnectionId": 697, "serverHost": "localhost", "serverPort": 27017, "awaited": true} DEBUG pymongo.topology:logger.py:108 {"message": "Server heartbeat started", "topologyId": {"$oid": "686d39524cb06ece7ed81c74"}, "driverConnectionId": 1, "serverConnectionId": 699, "serverHost": "localhost", "serverPort": 27017, "awaited": true} DEBUG pymongo.command:logger.py:108 {"message": "Command failed", "clientId": {"$oid": "686d39524cb06ece7ed81c74"}, "durationMS": 3.835, "failure": "{\"code\": 50737, \"codeName\": \"Location50737\"}", "commandName": "getMore", "databaseName": "step-down", "requestId": 886250606, "operationId": 886250606, "driverConnectionId": 2, "serverConnectionId": 719, "serverHost": "localhost", "serverPort": 27017}
The fix is to either block server selection until the node becomes a Secondary (not RSOther) or attach the lsid even if the server is in the RSOther state.
- related to
-
PYTHON-2872 Test failure - TestConnectionsSurvivePrimaryStepDown.test_get_more_iteration
-
- Ready for Work
-