Streams: investigate word count query that OOMs pod

XMLWordPrintableJSON

    • Type: Task
    • Resolution: Duplicate
    • Priority: Minor - P4
    • None
    • Affects Version/s: None
    • Component/s: None
    • Atlas Streams
    • Sprint 55
    • None
    • None
    • None
    • None
    • None
    • None
    • None

      let name = "testTop1000WordsInWikipediaComments"
      sp.createStreamProcessor("testTop1000WordsInWikipediaComments", [
      { $source:

      { connectionName: 'DemoKafka', topic: 'wiki1' }

      },
      { $project : { comment_words : { $split: ["$parsedcomment", " "] }} },
      { $unwind : "$comment_words" },
      { $hoppingWindow: {
      interval:

      { size: 1, unit: 'hour' }

      ,
      hopSize: { size: 1, unit: 'second' },
      pipeline: [
      {$group: {
      _id: '$comment_words',
      count: { '$count': {} }
      }},
      {$sort: {
      count: -1
      }},
      {$limit: 1000},
      {$group: {
      _id: null,
      out: { $push: { word: "$_id", count: "$count" }}
      }},
      ]
      }},
      { $emit:

      { connectionName: 'DemoKafka', topic: name }

      }
      ])
      sp[name].start()

            Assignee:
            Sandeep Dhoot (Inactive)
            Reporter:
            Matthew Normyle
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

              Created:
              Updated:
              Resolved: