max_sentence_length=1000;;function init_unigram_table(table_size, vocab);train_words_pow=0.0;;power=0.75;;table=fill(0, table_size);;for a::Int64 = 0:size(vocab)[1]-1;train_words_pow+=vocab[a+1] [0+1]^power;;end;;i=0;;d1=(vocab[i+1][0+1]^power)/train_words_pow;;for a::Int64 = 0:table_size-1;table[a+1]=i;if a/float(table_size)>d1;i+=1;;d1+=(vocab[i+1] [0+1]^power)/train_words_pow;;;end;if i>=size(vocab)[1];i=size(vocab)[1]-1;;end;;end;;return table;;;end;;function addop(size, list, base, target);for i::Int64 = 0:size-1;list[i+base+1]+=target[i+1];;end;;list;;end;;function addop2(size, list, base, coefficient, target, base2);for i::Int64 = 0:size-1;list[i+base +1]+=coefficient*target[i+base2+1];;end;;list;;end;;function addop3(size, f, coefficient, target, base);for i::Int64 = 0:size-1;f+=coefficient[i+1]*target[i+base +1];;end;;f;;end;;function addop4(size, list, target, base);for i::Int64 = 0:size-1;list[i+1]+=target[i+base+1];;end;;list;;end;;myrandom=0;;function next_random();global myrandom;myrandom=abs((myrandom*25214903917+11));;return myrandom;;;end;;function exptable(num);num=exp((num/ float(exp_table_size)*2-1)*max_exp);;num/(num+1);;end;;function bsearch_index(list, target);a=0;;z=size(list)[1]-1;;while (true);current_entry=list[a+1:z+1] [floor(Int64,((z-a)/2))+1];if current_entry<target;next_entry=list[a+1:z+1][floor(Int64,((z-a)/2+1))+1];;if (next_entry>=target)||z-a<=1;return round(Int64,(a+(z- a)/2+1));;;else;a=round(Int64,(a+(z-a)/2));;;end;;;;else;if a>=target||z-a<=1;return a;;end;;z=round(Int64,(z-(z-a)/2));;;end;;;end;;;end;;function calc_vec(iter, original_text, sample, train_words, debug_mode, __vocab_index_hash, vocab, syn0, syn1neg, negative, alpha, __cum_table, table_size, layer1_size, window);sentence_position=0;;sentence_length=0;;word_count=0;;word_count_actual=0;;last_word_count=0;;sen=[];;local_iter=iter;;neu1=[];;neu1e=[];;backup=copy( original_text);;__denominator=trunc(Int64,parse(string((exp_table_size/max_exp/ 2))));;__sample_train_words=sample*train_words;;table_size=trunc(Int64,parse(string(1e8)));;table=init_unigram_table(table_size,vocab);;starting_alpha=alpha;; while true;if sentence_position%500==0&&debug_mode>1;print(@sprintf(\"%d %d / \",word_count,last_word_count));;end;if word_count- last_word_count>10000;word_count_actual+=word_count-last_word_count;;last_word_count=word_count;;if debug_mode>1;print(string(\"\\r Alpha: \",@sprintf(\"%f\",alpha),\" Progress: \",@sprintf(\"%.2f\",(word_count_actual/float((iter*train_words+1))*100)),\"%\"));;end;;alpha=starting_alpha*(1- word_count_actual/float((iter*train_words+1)));;if alpha<starting_alpha*0.0001;alpha=starting_alpha*0.0001;;end;;;end;if sentence_length==0;skipped=0;;sen=[];;___state = start(original_text);while !done(original_text, ___state);___i, ___state = next(original_text, ___state);e = ___i;if haskey(__vocab_index_hash, e);word=__vocab_index_hash[string(e)];;;else;skipped+=1;;continue;;;end;;;word_count+=1;;if word==0;break;;end;;if sample>0;ran=(sqrt(vocab[word+1][0+1]/__sample_train_words)+1)*__sample_train_words/vocab[word+1][0+1];;if ran<(next_random()&(0xFFFF+0))/ 65536.0;continue;;end;;;end;;push!(sen, word);sentence_length+=1;;if sentence_length>=max_sentence_length;break;;end;;;end;;if max_sentence_length +skipped<=length(original_text)-1;splice!(original_text, 0+1:0+0+max_sentence_length+skipped+1);;else;original_text=[];;;end;;;sentence_position=0;;;end;if size(original_text)[1]==0||word_count>train_words;word_count_actual+=word_count-last_word_count;;local_iter-=1;;if debug_mode>1;print(local_iter);;end;;if local_iter==0;break;;end;;word_count=0;;last_word_count=0;;sentence_length=0;;original_text=copy(backup);;sen=[];;continue;;;end;if sentence_position>=size(sen) [1];continue;;end;word=sen[sentence_position+1];neu1=fill(0.0, layer1_size);neu1e=fill(0.0, layer1_size);b=next_random()%window;cw=0;for j::Int64 = b:window*2- b;if j!=window;k=sentence_position-window+j;;if k<0||k>=sentence_length;continue;;end;;if k>=size(sen)[1];continue;;end;;last_word=sen[k +1];;neu1=addop4(layer1_size,neu1,syn0,last_word*layer1_size);;cw+=1;;;end;;end;if cw!=0;for j::Int64 = 0:layer1_size-1;neu1[j+1]/=cw;;end;;if negative>0;for j::Int64 = 0:negative;if j==0;target=word;;label=1;;;else;nr=next_random();;target=table[(nr>>16)%table_size+1];;if target==0;target=nr%(size(vocab) [1]-1)+1;;end;;if target==word;continue;;end;;label=0;;;end;;l2=target*layer1_size;f=0.0;f=addop3(layer1_size,f,neu1,syn1neg,l2);if f>max_exp;g=(label-1)*alpha;;;elseif f<(-max_exp);g=label*alpha;;;else;g=(label-exptable(trunc(Int64,parse(string(((f +max_exp)*__denominator))))))*alpha;;;end;;;neu1e=addop2(layer1_size,neu1e,0,g,syn1neg,l2);syn1neg=addop2(layer1_size,syn1neg,l2,g,neu1,0);;end;;;end;;for j::Int64 = b:window*2-b;if j!=window;c=sentence_position-window+j;;if c<0||c>=sentence_length;continue;;end;;if c>=size(sen)[1];continue;;end;;last_word=sen[c +1];;syn0=addop(layer1_size,syn0,last_word*layer1_size,neu1e);;;end;;end;;;end;sentence_position+=1;if sentence_position>=sentence_length;sentence_length=0;;end;;end;;[syn0,syn1neg];;end;;" You can convert word2vec.rb