"a", "very", "kind", "man"]] model = Word2Vec(sentences, min_count=1, seed=1, hs=1) for word in model.vocab.keys(): print("word:", word) print("index", model.vocab[word].index) print("code", model.vocab[word].code) print("point", model.vocab[word].point) print("-------------") ('word:', 'a') ('index', 0) ('code', array([1, 0, 0], dtype=uint8)) ('point', array([4, 3, 1], dtype=uint32)) ------------- ('word:', 'kind') ('index', 1) ('code', array([1, 0, 1], dtype=uint8)) ('point', array([4, 3, 1], dtype=uint32)) ------------- ('word:', 'very') ('index', 2) ('code', array([1, 1, 1], dtype=uint8)) ('point', array([4, 3, 0], dtype=uint32)) ------------- ('word:', 'is') ('index', 3) ('code', array([0, 1], dtype=uint8)) ('point', array([4, 2], dtype=uint32)) ------------- ('word:', 'he') ('index', 4) ('code', array([0, 0], dtype=uint8)) ('point', array([4, 2], dtype=uint32)) ------------- ('word:', 'man') ('index', 5) ('code', array([1, 1, 0], dtype=uint8)) ('point', array([4, 3, 0], dtype=uint32)) ------------- Gensimは、Hierarchical softmaxのデータ構造をindex, code, pointという形で保持している