Uploaded image for project: 'Core Server'
  1. Core Server
  2. SERVER-96832

InvalidIdField warning for timeseries records after large volume of writes

    • Type: Icon: Bug Bug
    • Resolution: Duplicate
    • Priority: Icon: Major - P3 Major - P3
    • None
    • Affects Version/s: None
    • Component/s: None
    • None
    • Storage Execution
    • ALL

      Reproducer from linked ticket.

      // Switch to or create a new database for testing
      use timeSeriesTestDB
      
      // Drop the collection if it exists (for repeatability)
      db.time_series_minute.drop();
      
      // Create the time-series collection
      db.createCollection("time_series_minute", {
        timeseries: {
          timeField: "timestamp",
          metaField: "metadata",
          granularity: "minutes"
        }
      });
      
      // Total number of data points to insert
      const totalDataPoints = 100_000_000;
      
      // Number of data points per batch
      const batchSize = 10_000; // Adjust batch size based on available memory
      
      // Total number of batches
      const totalBatches = Math.ceil(totalDataPoints / batchSize);
      
      // Start time for data generation
      const startTime = new Date();
      
      // Interval between data points in seconds
      const intervalSeconds = 60; // One data point per minute
      
      // Function to generate data batches with unique metadata every 1000 documents
      function generateDataBatch(batchNumber, batchSize, startTime, intervalSeconds) {
        const dataBatch = [];
        const batchStartIndex = batchNumber * batchSize;
        for (let i = 0; i < batchSize; i++) {
          const index = batchStartIndex + i;
          // Break if we've reached the total data points
          if (index >= totalDataPoints) break;
      
          // Generate a unique metadata value every 1000 documents
          const metadataValue = `group_${Math.floor(index / 1000)}`;
      
          dataBatch.push({
            timestamp: new Date(startTime.getTime() + index * intervalSeconds * 1000),
            value: Math.random() * 100,
            metadata: metadataValue
          });
        }
        return dataBatch;
      }
      
      // Insert data in batches
      for (let batchNumber = 0; batchNumber < totalBatches; batchNumber++) {
        // Generate the data batch
        const dataBatch = generateDataBatch(batchNumber, batchSize, startTime, intervalSeconds);
      
        // Insert the data batch into the collection
        db.time_series_minute.insertMany(dataBatch);
      
        // Log progress every 10 batches
        if (batchNumber % 10 === 0) {
          print(`Inserted batch ${batchNumber + 1} of ${totalBatches}`);
        }
      } 
      validationResults = db.time_series_minute.validate({ full: true });
      printjson(validationResults)

      After 700 batches, I got the following warning message:

      [j0] {"t":{"$date":"2024-11-06T18:03:13.853+00:00"},"s":"W",  "c":"STORAGE",  "id":6698300, "ctx":"conn9","msg":"Document is not compliant with time-series specifications","attr":{"namespace":"test.system.buckets.time_series_minute","recordId":"648000ebc0a0669fc7e794ee74","reason":{"code":53,"codeName":"InvalidIdField","errmsg":"Mismatch between the embedded timestamp Date(-2147423296000) in the time-series bucket '_id' field and the timestamp 2038-01-19T20:00:00.000+00:00 in 'control.min' field."}}} 

      for a subset of the inserted documents in the batch (not all documents had these warnings).

            Assignee:
            Unassigned Unassigned
            Reporter:
            stephanie.eristoff@mongodb.com Stephanie Eristoff
            Votes:
            0 Vote for this issue
            Watchers:
            9 Start watching this issue

              Created:
              Updated:
              Resolved: