[SERVER-65247] jstests/core/set_param1.js failure Created: 04/Apr/22  Updated: 29/Oct/23  Resolved: 06/Apr/22

Status: Closed
Project: Core Server
Component/s: None
Affects Version/s: None
Fix Version/s: 6.0.0-rc0

Type: Improvement Priority: Major - P3
Reporter: Billy Donahue Assignee: Billy Donahue
Resolution: Fixed Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Issue Links:
Depends
Backwards Compatibility: Fully Compatible
Sprint: Service Arch 2022-04-18
Participants:
Linked BF Score: 35

 Description   

This simple test expects to call getParameter and setParameter a few times.
https://github.com/mongodb/mongo/blob/master/jstests/core/set_param1.js

 
old = db.adminCommand({"getParameter": "*"});
// the first time getParameter sends a request to with a shardingTaskExecutor and this sets an
// operationTime. The following commands do not use shardingTaskExecutor.
delete old["operationTime"];
delete old["$clusterTime"];
tmp1 = db.adminCommand({"setParameter": 1, "logLevel": 5});
tmp2 = db.adminCommand({"setParameter": 1, "logLevel": old.logLevel});
now = db.adminCommand({"getParameter": "*"});
delete now["operationTime"];
delete now["$clusterTime"];
 
assert.eq(old, now, "A");

The "A" assert fires.
The unequal strings are huge but they differ in only one small way:

--- old.txt	2022-04-04 18:33:56.000000000 -0400
+++ new.txt	2022-04-04 18:34:08.000000000 -0400
@@ -241,7 +241,7 @@
  		"timesEntered" : NumberLong(0)
  	},
  	"failpoint.WTPreserveSnapshotHistoryIndefinitely" : {
- 		"mode" : 1,
+ 		"mode" : 0,
  		"data" : {
  
  		},

So it looks like something is coming in and clearing a failpoint while this test is running?

[js_test:set_param1] Fixture status:
+--------------------------------+-------+-----+-----+
|full_name                       |name   |port |pid  |
+--------------------------------+-------+-----+-----+
|ReplicaSetFixture:job3:primary  |j3:prim|20750|28241|
|ReplicaSetFixture:job3:secondary|j3:sec |20751|28246|
+--------------------------------+-------+-----+-----+
[js_test:set_param1] Starting JSTest jstests/core/set_param1.js...
PATH=/data/mci/3ad82e0673491785b95ef0145edfe4ad/src:/data/multiversion:/data/mci/3ad82e0673491785b95ef0145edfe4ad/src/dist-test/bin:/data/mci/3ad82e0673491785b95ef0145edfe4ad/venv/bin:/home/ec2-user/.local/bin:/home/ec2-user/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/opt/node/bin:/opt/node/bin:/data/multiversion INSTALL_DIR=/data/mci/3ad82e0673491785b95ef0145edfe4ad/src/dist-test/bin /data/mci/3ad82e0673491785b95ef0145edfe4ad/src/dist-test/bin/mongo --eval 'MongoRunner.dataDir = "/data/db/job3/mongorunner"; MongoRunner.dataPath = "/data/db/job3/mongorunner/"; TestData = new Object(); TestData["minPort"] = 20770; TestData["maxPort"] = 20999; TestData["peerPids"] = [28241, 28246]; TestData["alwaysUseLogFiles"] = false; TestData["failIfUnterminatedProcesses"] = true; TestData["implicitlyShardOnCreateCollectionOnly"] = false; TestData["isMainTest"] = true; TestData["numTestClients"] = 1; TestData["backupOnRestartDir"] = null; TestData["enableMajorityReadConcern"] = true; TestData["mixedBinVersions"] = ""; TestData["multiversionBinVersion"] = ""; TestData["noJournal"] = false; TestData["storageEngine"] = "wiredTiger"; TestData["storageEngineCacheSizeGB"] = "1"; TestData["testName"] = "set_param1"; TestData["transportLayer"] = ""; TestData["wiredTigerCollectionConfigString"] = ""; TestData["wiredTigerEngineConfigString"] = ""; TestData["wiredTigerIndexConfigString"] = ""; TestData["inEvergreen"] = true; TestData["evergreenTaskId"] = "mongodb_mongo_master_linux_64_debug_required_replica_sets_batched_deletes_passthrough_0_linux_64_debug_required_patch_797daddf5dac8aa111f488d4102e1b88d49e56e3_624b6926d6d80a42057cdb35_22_04_04_21_55_42"; TestData["setParameters"] = new Object(); TestData["setParameters"]["logComponentVerbosity"] = new Object(); TestData["setParameters"]["logComponentVerbosity"]["replication"] = new Object(); TestData["setParameters"]["logComponentVerbosity"]["replication"]["election"] = 4; TestData["setParameters"]["logComponentVerbosity"]["replication"]["heartbeats"] = 2; TestData["setParameters"]["logComponentVerbosity"]["replication"]["initialSync"] = 2; TestData["setParameters"]["logComponentVerbosity"]["replication"]["rollback"] = 2; TestData["setParameters"]["logComponentVerbosity"]["sharding"] = new Object(); TestData["setParameters"]["logComponentVerbosity"]["sharding"]["migration"] = 2; TestData["setParameters"]["logComponentVerbosity"]["storage"] = new Object(); TestData["setParameters"]["logComponentVerbosity"]["storage"]["recovery"] = 2; TestData["setParameters"]["logComponentVerbosity"]["transaction"] = 4; TestData["setParameters"]["logComponentVerbosity"]["tenantMigration"] = 4; TestData["setParametersMongos"] = new Object(); TestData["setParametersMongos"]["logComponentVerbosity"] = new Object(); TestData["setParametersMongos"]["logComponentVerbosity"]["transaction"] = 3; TestData["setParametersMongocryptd"] = new Object(); TestData["undoRecorderPath"] = null; TestData["transactionLifetimeLimitSeconds"] = 86400; var testingReplication = true;; load('"'"'jstests/libs/override_methods/validate_collections_on_shutdown.js'"'"');; load('"'"'jstests/libs/override_methods/check_uuids_consistent_across_cluster.js'"'"');; load('"'"'jstests/libs/override_methods/check_indexes_consistent_across_cluster.js'"'"');; load('"'"'jstests/libs/override_methods/check_orphans_are_deleted.js'"'"');; load('"'"'jstests/libs/override_methods/implicitly_retry_on_background_op_in_progress.js'"'"');; (function() { Timestamp.prototype.toString = function() { throw new Error("Cannot toString timestamps. Consider using timestampCmp() for comparison or tojson(<variable>) for output."); } })();' mongodb://localhost:20750 jstests/core/set_param1.js
[js_test:set_param1] JSTest jstests/core/set_param1.js started with pid 416.
[js_test:set_param1] MongoDB shell version v6.0.0-alpha-1018-g797dadd-patch-624b6926d6d80a42057cdb35
[js_test:set_param1] connecting to: mongodb://localhost:20750/?compressors=disabled&gssapiServiceName=mongodb
[js_test:set_param1] Implicit session: session { "id" : UUID("23c2c8a4-be25-4c87-b6f2-59a7df552a83") }
[js_test:set_param1] MongoDB server version: 6.0.0-alpha-1018-g797dadd-patch-624b6926d6d80a42057cdb35

Task Metadata:

Build Variant Name: linux-64-debug-required
Project: mongodb-mongo-master
Submitted by: billy.donahue
Submitted at: Apr 4, 2022, 5:59:10 PM
Generated by: build_variant_gen
Started: Apr 4, 2022, 6:12:21 PM
Finished: Apr 4, 2022, 6:21:08 PM
Duration: 8m 46s
Base commit duration: 31m 41s
Base commit: 797daddf5d
Failing command: 'subprocess.exec' in "run generated tests" (#20)
Display Task: replica_sets_batched_deletes_passthrough
Distro: rhel80-medium
AMI: ami-05614bd3969b4aa33
Host: i-060befcbce5131592
Priority: 1

https://spruce.mongodb.com/task/mongodb_mongo_master_linux_64_debug_required_replica_sets_batched_deletes_passthrough_0_linux_64_debug_required_patch_797daddf5dac8aa111f488d4102e1b88d49e56e3_624b6926d6d80a42057cdb35_22_04_04_21_55_42/tests?execution=0&sortBy=STATUS&sortDir=ASC

failpoint.WTPreserveSnapshotHistoryIndefinitely



 Comments   
Comment by Githook User [ 05/Apr/22 ]

Author:

{'name': 'Billy Donahue', 'email': 'billy.donahue@mongodb.com', 'username': 'BillyDonahue'}

Message: SERVER-65247 make set_param1.js resilient against concurrent FailPoint manipulation
Branch: master
https://github.com/mongodb/mongo/commit/f2558a0fcc9bfa1c3013b3ae87e45973e5f28a69

Comment by George Wangensteen [ 05/Apr/22 ]

Same thing happened to me and I opened a BF. Linking it here

Comment by Max Hirschhorn [ 05/Apr/22 ]

I think one solution could be to move the set_param1.js test to the jstests/noPassthrough/ directory so it doesn't run with the background dbhash concurrently. The test would need to start its own MongoDB cluster instead of rely on the resmoke spawned one and would likely want to spawn both a ReplSetTest and ShardingTest to test the behavior against a replica set and a sharded cluster.

Alternatively, the set_param1.js test could choose to ignore differences around "failpoint.WTPreserveSnapshotHistoryIndefinitely" in the {getParameter: "*"} command result.

It may be possible with the default value of the minSnapshotHistoryWindowInSeconds server parameter being 5 minutes to no longer explicitly force WiredTiger to preserve enough history because we don't expect checking dbhash across all nodes in the replica sets to take more than 5 minutes.

Comment by Max Hirschhorn [ 04/Apr/22 ]

So it looks like something is coming in and clearing a failpoint while this test is running?

Yes, the dbhash check which runs in the background concurrently with the JavaScript test toggles the WTPreserveSnapshotHistoryIndefinitely failpoint on and off.

https://github.com/mongodb/mongo/blob/d57bfb47f2ff4743d5a087a895fd4eb1e5f1f8c4/jstests/hooks/run_check_repl_dbhash_background.js#L89-L92

Generated at Thu Feb 08 06:02:13 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.