module 'd2l.torch' has no attribute 'count_corpus' #2559

wangze1219 · 2023-09-29T06:10:56Z

import math
import os
import random
import torch
from d2l import torch as d2l
import os
import matplotlib.pyplot as plt

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
#@save
d2l.DATA_HUB['ptb'] = (d2l.DATA_URL + 'ptb.zip',
'319d85e578af0cdc590547f26231e4e31cdf1e42')

#@save
def read_ptb():
"""将PTB数据集加载到文本行的列表中"""
data_dir = d2l.download_extract('ptb')
# Readthetrainingset.
with open(os.path.join(data_dir, 'ptb.train.txt')) as f:
raw_text = f.read()
return [line.split() for line in raw_text.split('\n')]

sentences = read_ptb()
f'# sentences数: {len(sentences)}'
vocab = d2l.Vocab(sentences, min_freq=10)
f'vocab size: {len(vocab)}'
#@save
def subsample(sentences, vocab):
"""下采样高频词"""
# 排除未知词元''
sentences = [[token for token in line if vocab[token] != vocab.unk]
for line in sentences]
counter = d2l.count_corpus(sentences)
num_tokens = sum(counter.values())

# 如果在下采样期间保留词元，则返回True
def keep(token):
    return(random.uniform(0, 1) <
           math.sqrt(1e-4 / counter[token] * num_tokens))

return ([[token for token in line if keep(token)] for line in sentences],
        counter)

subsampled, counter = subsample(sentences, vocab)
d2l.show_list_len_pair_hist(
['origin', 'subsampled'], '# tokens per sentence',
'count', sentences, subsampled)
plt.show()

The text was updated successfully, but these errors were encountered:

AnirudhDagar · 2023-10-02T11:50:29Z

Please pip install d2l==0.17.6 to use older version of d2l which has these saved functions. In the latest version, we refactored the code and removed them.

zaffnet · 2023-10-27T16:23:22Z

What a nightmare. So much backward incompatibility! Could you mention which version to use at the beginning of each chapter?

AnirudhDagar · 2023-11-07T12:37:34Z

If you are using the latest version of the book then it should work with the latest d2l package.

AnirudhDagar added the question label Oct 2, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

module 'd2l.torch' has no attribute 'count_corpus' #2559

module 'd2l.torch' has no attribute 'count_corpus' #2559

wangze1219 commented Sep 29, 2023

AnirudhDagar commented Oct 2, 2023

zaffnet commented Oct 27, 2023

AnirudhDagar commented Nov 7, 2023

module 'd2l.torch' has no attribute 'count_corpus' #2559

module 'd2l.torch' has no attribute 'count_corpus' #2559

Comments

wangze1219 commented Sep 29, 2023

AnirudhDagar commented Oct 2, 2023

zaffnet commented Oct 27, 2023

AnirudhDagar commented Nov 7, 2023