pip install stopwds
you may want to checkout the version
stopwds version
from stopwds import stopwords
'''
baidu: 百度停用词表
hit: 哈工大停用词表
scu: 四川大学机器智能实验室停用词表
cn: 中文停用词表
'''
for stopword in stopwords('baidu'):
print(stopword)
and you can use like this:
from stopwds import stopwords
import jieba
text = ('医美产业崛起的同时,我国医美行业也形成了一条清晰且完整的产业链,上游医美产品生产企业占据了产业链核心环节。')
cut_sent = [word for word in jieba.cut(text) if word and word not in stopwords()]
- stopwords