Skip to content

Commit a6ce1da

Browse files
committed
Set huggingface libraries to load finewebedu data
1 parent 8528118 commit a6ce1da

3 files changed

Lines changed: 38 additions & 1 deletion

File tree

algoperf/_version.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# file generated by setuptools-scm
2+
# don't change, don't track in version control
3+
4+
__all__ = [
5+
"__version__",
6+
"__version_tuple__",
7+
"version",
8+
"version_tuple",
9+
"__commit_id__",
10+
"commit_id",
11+
]
12+
13+
TYPE_CHECKING = False
14+
if TYPE_CHECKING:
15+
from typing import Tuple
16+
from typing import Union
17+
18+
VERSION_TUPLE = Tuple[Union[int, str], ...]
19+
COMMIT_ID = Union[str, None]
20+
else:
21+
VERSION_TUPLE = object
22+
COMMIT_ID = object
23+
24+
version: str
25+
__version__: str
26+
__version_tuple__: VERSION_TUPLE
27+
version_tuple: VERSION_TUPLE
28+
commit_id: COMMIT_ID
29+
__commit_id__: COMMIT_ID
30+
31+
__version__ = version = '1.0.1'
32+
__version_tuple__ = version_tuple = (1, 0, 1)
33+
34+
__commit_id__ = commit_id = 'ga673f5835'

algoperf/workloads/finewebedu_lm/input_pipeline.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import jax
88
import tensorflow as tf
9+
import datasets as hf_datasets
910

1011
from algoperf import data_utils
1112

@@ -83,7 +84,8 @@ def get_lm_dataset(
8384
shuffle_seed = jax.random.randint(data_rng, (), -(2**31), 2**31 - 1)
8485

8586
data_dir = os.path.join(data_dir, TFDS_SPLIT_NAME[split])
86-
tokens_ds = tf.data.Dataset.load(data_dir)
87+
ds = hf_datasets.load_from_disk(data_dir)
88+
tokens_ds = ds.to_tf_dataset()
8789

8890
# tokens
8991
tokens_ds = tokens_ds.flat_map(tf.data.Dataset.from_tensor_slices)

scoring/utils/slurm/run_jobs.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ DOCKER_CMD=(
9191
docker run
9292
-v /opt/data/:/data/
9393
-v "$HOME/experiment_runs:/experiment_runs"
94+
-v "$HOME/algorithmic-efficiency/:/algorithmic-efficiency/"
9495
-v "$HOME/submissions_algorithms/:/algorithmic-efficiency/submissions_algorithms"
9596
-v "$HOME/algorithmic-efficiency/docker/scripts/startup.sh:/algorithmic-efficiency/docker/scripts/startup.sh"
9697
--gpus all

0 commit comments

Comments
 (0)