|
It should support recursive generation of arrays e.g. generates arrays of arrays. It should leverage existing infrastructure of random generators in Cost Model project defined in random_generator.py.
Possible design:
class ArrayRandomDistribution (RandomDistribution):
|
def __init__(self, lengths_distr: RandomDistribution, value_distr: RandomDistribution):
|
"""ArrayRandomDistribution inherits RandomDistribution, therefore, it allows recursive arrays generation since value_distr can be ArrayRandomDistribution or DocumentRandomDistribution"""
|
self.lengths_distr = lengths_distr
|
self.value_distr = value_distr
|
|
def generate(self, size: int):
|
arrays = []
|
lengths = self.lengths_distr.generate(size)
|
for length in lengths:
|
values = self.value_distr.generate(length)
|
arrays.append(values)
|
return arrays
|
Example of generating arrays of arrays of strings:
internal_length_distribution = RandomDistribution.normal(RangeGenerator(DataType.INT, 10, 25))
|
internal_values_distribution = RandomDistribution.uniform(RangeGenerator(DataType.STRING, "abc", "zzzz"))
|
|
internal_array_distribution = ArrayRandomDistribution(internal_length_distribution, internal_values_distribution)
|
|
length_distribution = RandomDistribution.choice(values=[10, 20], weights=[50, 50])
|
array_distribution = RandomDistribution.array(length_distribution , internal_array_distribution)
|
|