Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix offset #1337

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ for X, Y in seq_data_iter_random(my_seq, batch_size=2, num_steps=5):
def seq_data_iter_sequential(corpus, batch_size, num_steps): #@save
"""使用顺序分区生成一个小批量子序列"""
# 从随机偏移量开始划分序列
offset = random.randint(0, num_steps)
offset = random.randint(0, num_steps - 1)
num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
Xs = d2l.tensor(corpus[offset: offset + num_tokens])
Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
Expand All @@ -351,7 +351,7 @@ def seq_data_iter_sequential(corpus, batch_size, num_steps): #@save
def seq_data_iter_sequential(corpus, batch_size, num_steps): #@save
"""使用顺序分区生成一个小批量子序列"""
# 从随机偏移量开始划分序列
offset = random.randint(0, num_steps)
offset = random.randint(0, num_steps - 1)
num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
Xs = d2l.tensor(corpus[offset: offset + num_tokens])
Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
Expand All @@ -369,7 +369,7 @@ def seq_data_iter_sequential(corpus, batch_size, num_steps): #@save
def seq_data_iter_sequential(corpus, batch_size, num_steps): #@save
"""使用顺序分区生成一个小批量子序列"""
# 从随机偏移量开始划分序列
offset = random.randint(0, num_steps)
offset = random.randint(0, num_steps - 1)
num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
Xs = d2l.tensor(corpus[offset: offset + num_tokens])
Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ in each subsequence.
def seq_data_iter_random(corpus, batch_size, num_steps): #@save
"""Generate a minibatch of subsequences using random sampling."""
# Start with a random offset to partition a sequence
corpus = corpus[random.randint(0, num_steps):]
corpus = corpus[random.randint(0, num_steps - 1):]
# Subtract 1 since we need to account for labels
num_subseqs = (len(corpus) - 1) // num_steps
# The starting indices for subsequences of length `num_steps`
Expand Down Expand Up @@ -333,7 +333,7 @@ This strategy preserves the order of split subsequences when iterating over mini
def seq_data_iter_sequential(corpus, batch_size, num_steps): #@save
"""Generate a minibatch of subsequences using sequential partitioning."""
# Start with a random offset to partition a sequence
offset = random.randint(0, num_steps)
offset = random.randint(0, num_steps - 1)
num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
Xs = d2l.tensor(corpus[offset: offset + num_tokens])
Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
Expand All @@ -350,7 +350,7 @@ def seq_data_iter_sequential(corpus, batch_size, num_steps): #@save
def seq_data_iter_sequential(corpus, batch_size, num_steps): #@save
"""Generate a minibatch of subsequences using sequential partitioning."""
# Start with a random offset to partition a sequence
offset = random.randint(0, num_steps)
offset = random.randint(0, num_steps - 1)
num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
Xs = d2l.tensor(corpus[offset: offset + num_tokens])
Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
Expand Down
2 changes: 1 addition & 1 deletion d2l/mxnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ def seq_data_iter_sequential(corpus, batch_size, num_steps):

Defined in :numref:`sec_language_model`"""
# 从随机偏移量开始划分序列
offset = random.randint(0, num_steps)
offset = random.randint(0, num_steps - 1)
num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
Xs = d2l.tensor(corpus[offset: offset + num_tokens])
Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
Expand Down
2 changes: 1 addition & 1 deletion d2l/paddle.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ def seq_data_iter_sequential(corpus, batch_size, num_steps):

Defined in :numref:`sec_language_model`"""
# 从随机偏移量开始划分序列
offset = random.randint(0, num_steps)
offset = random.randint(0, num_steps - 1)
num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
Xs = d2l.tensor(corpus[offset: offset + num_tokens])
Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
Expand Down
2 changes: 1 addition & 1 deletion d2l/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ def seq_data_iter_sequential(corpus, batch_size, num_steps):

Defined in :numref:`sec_language_model`"""
# 从随机偏移量开始划分序列
offset = random.randint(0, num_steps)
offset = random.randint(0, num_steps - 1)
num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
Xs = d2l.tensor(corpus[offset: offset + num_tokens])
Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
Expand Down
2 changes: 1 addition & 1 deletion d2l/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ def seq_data_iter_sequential(corpus, batch_size, num_steps):

Defined in :numref:`sec_language_model`"""
# 从随机偏移量开始划分序列
offset = random.randint(0, num_steps)
offset = random.randint(0, num_steps - 1)
num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
Xs = d2l.tensor(corpus[offset: offset + num_tokens])
Ys = d2l.tensor(corpus[offset + 1: offset + 1 + num_tokens])
Expand Down
Loading