diff --git a/.ipynb_checkpoints/rhyme_helper-checkpoint.py b/.ipynb_checkpoints/rhyme_helper-checkpoint.py new file mode 100644 index 0000000..b3664cc --- /dev/null +++ b/.ipynb_checkpoints/rhyme_helper-checkpoint.py @@ -0,0 +1,35 @@ + +class RhymeWords(): + rhyme_list = [] + + @staticmethod + def read_rhyme_words(infile): + with open(infile,'r',encoding='utf-8',errors='ignore') as fr: + for line in fr: + words = set(line.split()) + RhymeWords.rhyme_list.append(words) + + @staticmethod + def get_rhyme_words(w): + for words in RhymeWords.rhyme_list: + if w in words: + return words + return None + + @staticmethod + def print_stats(): + count = 0 + for words in RhymeWords.rhyme_list: + count += len(words) + print(words) + + for w in words: + if len(w) > 1: + print(w) + + print('count = ',count) + +if __name__ == '__main__': + infile = './data/poem/rhyme_words.txt' + RhymeWords.read_rhyme_words(infile) + RhymeWords.print_stats() \ No newline at end of file diff --git a/.ipynb_checkpoints/word2vec_helper-checkpoint.py b/.ipynb_checkpoints/word2vec_helper-checkpoint.py index ce0115e..d1fb388 100644 --- a/.ipynb_checkpoints/word2vec_helper-checkpoint.py +++ b/.ipynb_checkpoints/word2vec_helper-checkpoint.py @@ -29,8 +29,8 @@ if __name__ == '__main__': - # w2vpath = './corpus/vectors_xhj_shj.bin' #分字 - w2vpath = './corpus/vectors_qa_word.bin' #分词 + # w2vpath = './corpus/vectors_xhj_shj.bin' # 分字 + w2vpath = './corpus/vectors_qa_word.bin' # 分词 w2v = Word2Vec(w2vpath) with open( './corpus/vocab_word.txt','w',encoding='utf-8') as fw: diff --git a/word2vec_helper.py b/word2vec_helper.py index ce0115e..d1fb388 100644 --- a/word2vec_helper.py +++ b/word2vec_helper.py @@ -29,8 +29,8 @@ if __name__ == '__main__': - # w2vpath = './corpus/vectors_xhj_shj.bin' #分字 - w2vpath = './corpus/vectors_qa_word.bin' #分词 + # w2vpath = './corpus/vectors_xhj_shj.bin' # 分字 + w2vpath = './corpus/vectors_qa_word.bin' # 分词 w2v = Word2Vec(w2vpath) with open( './corpus/vocab_word.txt','w',encoding='utf-8') as fw: