yangsaisai
7 years ago
| 0 | ||
| 1 | class RhymeWords(): | |
| 2 | rhyme_list = [] | |
| 3 | ||
| 4 | @staticmethod | |
| 5 | def read_rhyme_words(infile): | |
| 6 | with open(infile,'r',encoding='utf-8',errors='ignore') as fr: | |
| 7 | for line in fr: | |
| 8 | words = set(line.split()) | |
| 9 | RhymeWords.rhyme_list.append(words) | |
| 10 | ||
| 11 | @staticmethod | |
| 12 | def get_rhyme_words(w): | |
| 13 | for words in RhymeWords.rhyme_list: | |
| 14 | if w in words: | |
| 15 | return words | |
| 16 | return None | |
| 17 | ||
| 18 | @staticmethod | |
| 19 | def print_stats(): | |
| 20 | count = 0 | |
| 21 | for words in RhymeWords.rhyme_list: | |
| 22 | count += len(words) | |
| 23 | print(words) | |
| 24 | ||
| 25 | for w in words: | |
| 26 | if len(w) > 1: | |
| 27 | print(w) | |
| 28 | ||
| 29 | print('count = ',count) | |
| 30 | ||
| 31 | if __name__ == '__main__': | |
| 32 | infile = './data/poem/rhyme_words.txt' | |
| 33 | RhymeWords.read_rhyme_words(infile) | |
| 34 | RhymeWords.print_stats()⏎ |
| 28 | 28 | |
| 29 | 29 | |
| 30 | 30 | if __name__ == '__main__': |
| 31 | # w2vpath = './corpus/vectors_xhj_shj.bin' #分字 | |
| 32 | w2vpath = './corpus/vectors_qa_word.bin' #分词 | |
| 31 | # w2vpath = './corpus/vectors_xhj_shj.bin' # 分字 | |
| 32 | w2vpath = './corpus/vectors_qa_word.bin' # 分词 | |
| 33 | 33 | |
| 34 | 34 | w2v = Word2Vec(w2vpath) |
| 35 | 35 | with open( './corpus/vocab_word.txt','w',encoding='utf-8') as fw: |
| 28 | 28 | |
| 29 | 29 | |
| 30 | 30 | if __name__ == '__main__': |
| 31 | # w2vpath = './corpus/vectors_xhj_shj.bin' #分字 | |
| 32 | w2vpath = './corpus/vectors_qa_word.bin' #分词 | |
| 31 | # w2vpath = './corpus/vectors_xhj_shj.bin' # 分字 | |
| 32 | w2vpath = './corpus/vectors_qa_word.bin' # 分词 | |
| 33 | 33 | |
| 34 | 34 | w2v = Word2Vec(w2vpath) |
| 35 | 35 | with open( './corpus/vocab_word.txt','w',encoding='utf-8') as fw: |