function [D] = DataProcessing(vecSize, n_words, word2vector_init_method)
%%%
tmp=textread('../data/ptb.map.csv','%s','delimiter','\n');
id=zeros(length(tmp),1);
for i=1:length(tmp)
    tmp2=regexp(tmp{i},',\d+','match');
    tmp3=regexp(tmp2{1},',(\d+)','tokens');
    id(i)=str2num(tmp3{1}{1})+1;    
end
%%%
D.codeBookSize = size(id,1);
D.codeBook = zeros(D.codeBookSize,vecSize);
switch word2vector_init_method
    case 'skip_gram'
         vec = csvread(sprintf('../data/ptb.vector%d.csv',vecSize));
         for i = 1 : D.codeBookSize 
             D.codeBook(id(i),:)= vec(i,:);
         end
    case 'random'
         range = 0.5*sqrt(6/(n_words*vecSize + D.codeBookSize));
         D.codeBook = (rand(D.codeBookSize, vecSize,'single')*2-1) * range;
end
fprintf('create codebook done\n');
%%%%train set
[D.train_sent_ind, D.train_sent_len, totalWords] = csvReader('../data/ptb.train.csv', n_words);
fprintf(1,'Train set data preprocessing over, contains %d words!\n',totalWords);
%%%%valid set
[D.valid_sent_ind, D.valid_sent_len, totalWords,] = csvReader('../data/ptb.valid.csv', n_words);
fprintf(1,'Valid set data preprocessing over, contains %d words!\n',totalWords);
%%%%test  set
[D.test_sent_ind, D.test_sent_len, totalWords] = csvReader('../data/ptb.test.csv', n_words);
fprintf(1,'Test set data preprocessing over, contains %d words!\n',totalWords);
%save
save(sprintf('../data/fofe.ptb.%s.n_word%d.size%d.mat',word2vector_init_method,n_words, vecSize),'D')


