[SERVER-84230] Streams: investigate word count query that OOMs pod Created: 15/Dec/23  Updated: 22/Jan/24

Status: Needs Scheduling
Project: Core Server
Component/s: None
Affects Version/s: None
Fix Version/s: None

Type: Task Priority: Major - P3
Reporter: Matthew Normyle Assignee: Backlog - Atlas Streams
Resolution: Unresolved Votes: 0
Labels: init-337-ga
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Assigned Teams:
Atlas Streams
Participants:

 Description   

let name = "testTop1000WordsInWikipediaComments"
sp.createStreamProcessor("testTop1000WordsInWikipediaComments", [
{ $source:

{ connectionName: 'DemoKafka', topic: 'wiki1' }

},
{ $project : { comment_words : { $split: ["$parsedcomment", " "] }} },
{ $unwind : "$comment_words" },
{ $hoppingWindow: {
interval:

{ size: 1, unit: 'hour' }

,
hopSize: { size: 1, unit: 'second' },
pipeline: [
{$group: {
_id: '$comment_words',
count: { '$count': {} }
}},
{$sort: {
count: -1
}},
{$limit: 1000},
{$group: {
_id: null,
out: { $push: { word: "$_id", count: "$count" }}
}},
]
}},
{ $emit:

{ connectionName: 'DemoKafka', topic: name }

}
])
sp[name].start()


Generated at Thu Feb 08 06:54:25 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.