From 6a6a871c6853bdcd32c1db6eb1860fb517890c39 Mon Sep 17 00:00:00 2001 From: Srini Iyer Date: Wed, 9 Apr 2025 00:21:48 +0000 Subject: [PATCH] Cast int sample id to str --- bytelatent/data/iterators/arrow_iterator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bytelatent/data/iterators/arrow_iterator.py b/bytelatent/data/iterators/arrow_iterator.py index bfb1c17..ec7c54f 100644 --- a/bytelatent/data/iterators/arrow_iterator.py +++ b/bytelatent/data/iterators/arrow_iterator.py @@ -215,7 +215,7 @@ class ArrowFileIterator(StatefulIterator): raise ValueError(f"Unknown file format: {self.file_format}") for i in range(len(sample_ids)): out = BltExample( - sample_id=sample_ids[i], + sample_id=str(sample_ids[i]), entropies=entropies[i] if entropies is not None else None, text=texts[i], tokens=None, @@ -249,7 +249,7 @@ class ArrowFileIterator(StatefulIterator): raise ValueError(f"Unknown file format: {self.file_format}") for i in range(len(sample_ids)): out = BltExample( - sample_id=sample_ids[i], + sample_id=str(sample_ids[i]), entropies=entropies[i] if entropies is not None else None, text=texts[i], tokens=None,