mirror of
https://github.com/facebookresearch/blt.git
synced 2025-04-07 18:29:08 +00:00
Correctly reset batch iterator at each arrow create_iter call. (#74)
Summary: Test Plan:
This commit is contained in:
parent
08b8c7cd05
commit
c727844e9d
1 changed files with 1 additions and 3 deletions
|
@ -197,9 +197,6 @@ class ArrowFileIterator(StatefulIterator):
|
|||
self.dataset = pa.dataset.dataset(
|
||||
self.dataset_files, format=self.file_format, filesystem=filesystem
|
||||
)
|
||||
self.batch_iterator = self.dataset.to_batches(
|
||||
batch_size=self.arrow_batch_size
|
||||
)
|
||||
self.iter_id += 1
|
||||
if self.batch_to_consume is not None:
|
||||
batch_columns: dict[str, list] = self.batch_to_consume
|
||||
|
@ -229,6 +226,7 @@ class ArrowFileIterator(StatefulIterator):
|
|||
if (self.row_num - 1) % self.num_workers == self.worker_id:
|
||||
yield out
|
||||
|
||||
self.batch_iterator = self.dataset.to_batches(batch_size=self.arrow_batch_size)
|
||||
for batch in self.batch_iterator:
|
||||
batch_columns = batch.to_pydict()
|
||||
if self.file_format == "arrow":
|
||||
|
|
Loading…
Add table
Reference in a new issue