[GODRIVER-3026] Optimize Collection.insert() Created: 30/Oct/23  Updated: 08/Jan/24

Status: Backlog
Project: Go Driver
Component/s: None
Affects Version/s: None
Fix Version/s: 2.0.0

Type: Improvement Priority: Unknown
Reporter: Preston Vasquez Assignee: Unassigned
Resolution: Unresolved Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Issue Links:
Related
related to GODRIVER-2582 Accept any value for "documents" in I... Closed
Epic Link: Go Driver 2.0: Driver
Quarter: FY24Q3, FY24Q4
Documentation Changes Summary:

1. What would you like to communicate to the user about this feature?
2. Would you like the user to see examples of the syntax and/or executable code and its output?
3. Which versions of the driver/connector does this apply to?


 Description   

The proposed solution for GODRIVER-2582 involves using reflection. This is very inefficient and non-idiomatic. A more optimized solution would be to defer the logic for parsing the document interface to the collection.insert method. The idea would be to marshal the documents in one operation like this:

func _switch(documents interface{}) {
	bytes, err := bson.Marshal(struct {
		Arr any `bson:"arr"`
	}{Arr: documents})
	if err != nil {
		panic(err)
	}
 
	raw := bson.Raw(bytes).Lookup("arr")
 
	var docSlice []bson.Raw
	switch raw.Type {
	case bsontype.Array:
		elems, err := raw.Array().Elements()
		if err != nil {
			panic(err)
		}
 
		docSlice = make([]bson.Raw, len(elems))
		for idx, elem := range elems {
			docSlice[idx] = elem.Value().Document()
		}
	}
}

Here is a script:

package benchmark
 
import (
	"reflect"
	"testing"
 
	"go.mongodb.org/mongo-driver/bson"
	"go.mongodb.org/mongo-driver/bson/bsontype"
)
 
func reflection(documents interface{}) {
	dv := reflect.ValueOf(documents)
	if dv.Kind() != reflect.Slice {
		return
	}
 
	if dv.Len() == 0 {
		return
	}
 
	docSlice := make([]interface{}, 0, dv.Len())
	for i := 0; i < dv.Len(); i++ {
		docSlice = append(docSlice, dv.Index(i).Interface())
	}
}
 
func _switch(documents interface{}) {
	bytes, err := bson.Marshal(struct {
		Arr any `bson:"arr"`
	}{Arr: documents})
	if err != nil {
		panic(err)
	}
 
	raw := bson.Raw(bytes).Lookup("arr")
 
	var docSlice []bson.Raw
	switch raw.Type {
	case bsontype.Array:
		elems, err := raw.Array().Elements()
		if err != nil {
			panic(err)
		}
 
		docSlice = make([]bson.Raw, len(elems))
		for idx, elem := range elems {
			docSlice[idx] = elem.Value().Document()
		}
	}
 
	if len(docSlice) == 0 {
		panic(2)
	}
}
 
type Document struct {
	A, B string
}
 
func TestSwitch(t *testing.T) {
	docs := []Document{{A: "A", B: "B"}}
	_switch(docs)
}
 
func BenchmarkReflection(b *testing.B) {
	// Prepare a sample input for the reflection function
	var data []Document
	for i := 0; i < 1000; i++ {
		data = append(data, Document{A: "A", B: "B"})
	}
 
	b.ReportAllocs()
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		reflection(data)
	}
}
 
func BenchmarkSwitch(b *testing.B) {
	// Prepare a sample input for the _switch function
	var data []interface{}
	for i := 0; i < 1000; i++ {
		data = append(data, Document{A: "A"})
	}
 
	b.ReportAllocs()
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		_switch(data)
	}
}

Here are the results of the benchmark:

goos: darwin
goarch: arm64
pkg: github.com/prestonvasquez/technical/workshop/mongo/insertmany/benchmark2582
BenchmarkReflection-10             42393             27440 ns/op           48408 B/op       1002 allocs/op
BenchmarkSwitch-10                  5612            212016 ns/op          157499 B/op         17 allocs/op
PASS


Generated at Thu Feb 08 08:39:54 UTC 2024 using Jira 9.7.1#970001-sha1:2222b88b221c4928ef0de3161136cc90c8356a66.