|
- function model = k_projectron2_multi_train(X,Y,model)
- % K_PROJECTRON2_MULTI_TRAIN Kernel Projectron++ multiclass algorithm
- %
- % MODEL = K_PROJECTRON2_MULTI_TRAIN(X,Y,MODEL) trains an classifier
- % according to the Projectron++ multiclass algorithm, using kernels.
- %
- % Additional parameters:
- % - model.eta is the sparseness parameter, used to trade-off the
- % performance for sparseness of the classifier. Note that model.eta is
- % the maximum error on EACH single projection; each projected update
- % has 2 projections.
- % Default value is 0.1.
- %
- % References:
- % - Orabona, F., Keshet, J., & Caputo, B. (2009).
- % Bounded Kernel-Based Online Learning.
- % Journal of Machine Learning Research 10(Nov), (pp. 2643–2666).
-
- % This file is part of the DOGMA library for MATLAB.
- % Copyright (C) 2009-2011, Francesco Orabona
- %
- % This program is free software: you can redistribute it and/or modify
- % it under the terms of the GNU General Public License as published by
- % the Free Software Foundation, either version 3 of the License, or
- % (at your option) any later version.
- %
- % This program is distributed in the hope that it will be useful,
- % but WITHOUT ANY WARRANTY; without even the implied warranty of
- % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- % GNU General Public License for more details.
- %
- % You should have received a copy of the GNU General Public License
- % along with this program. If not, see <http://www.gnu.org/licenses/>.
- %
- % Contact the author: francesco [at] orabona.com
-
- n = length(Y); % number of training samples
-
- if isfield(model,'n_cla')==0
- model.n_cla=max(Y);
- end
-
- if isfield(model,'iter')==0
- model.iter=0;
- model.beta=[];
- model.beta2=[];
- model.errTot=0;
- model.numSV=zeros(numel(Y),1);
- model.aer=zeros(numel(Y),1);
- model.pred=zeros(model.n_cla,numel(Y));
-
- for i=1:model.n_cla
- model.Kinv{i}=[];
- model.Y_cla{i}=[];
- end
- end
-
- if isfield(model,'eta')==0
- model.eta=.1;
- end
-
- n_skip=0;
- n_proj1=0;
- n_proj2=0;
- n_pred=0;
- idx_true=[];
- idx_wrong=[];
-
- for i=1:n
- model.iter=model.iter+1;
-
- if numel(model.S)>0
- K_f=feval(model.ker,model.SV,X(:,i),model.kerparam);
- val_f=full(model.beta*K_f);
- else
- val_f=zeros(1,model.n_cla);
- K_f=[];
- end
-
- Yi=Y(i);
-
- tmp=val_f; tmp(Yi)=-inf;
- [mx_val,idx_mx_val]=max(tmp);
-
- model.errTot=model.errTot+(val_f(Yi)<=mx_val);
- model.aer(model.iter)=model.errTot/model.iter;
- model.pred(:,model.iter)=val_f;
-
- if val_f(Yi) < mx_val+1 %Margin error or mistake
- Kii=full(feval(model.ker,X(:,i),X(:,i),model.kerparam));
-
- delta_true=Kii;
- delta_wrong=Kii;
- if numel(model.S)>0
- idx_true=model.Y_cla{Yi};
- idx_wrong=model.Y_cla{idx_mx_val};
-
- if numel(idx_true)>0
- coeff_true=K_f(idx_true)'*model.Kinv{Yi};
- % 'max' to prevent numerical instabilities that could make
- % delta a negative quantity.
- delta_true=max(Kii-coeff_true*K_f(idx_true),0);
- end
-
- if numel(idx_wrong)>0
- coeff_wrong=K_f(idx_wrong)'*model.Kinv{idx_mx_val};
- % 'max' to prevent numerical instabilities that could make
- % delta a negative quantity.
- delta_wrong=max(Kii-coeff_wrong*K_f(idx_wrong),0);
- end
- end
-
- if val_f(Yi)>mx_val % Margin error
- loss=1-val_f(Yi)+mx_val;
- delta=delta_wrong+delta_true;
- % 2*model.eta because eta is the tollerance on each single
- % projection.
- if loss-delta/(2*model.eta)>0
- tau_m=min(min(loss/(2*Kii-delta),1),2*(loss-delta/(2*model.eta))/(2*Kii-delta));
- if numel(idx_true)>0
- model.beta(Yi,idx_true)=model.beta(Yi,idx_true)+tau_m*coeff_true;
- end
- if numel(idx_wrong)>0
- model.beta(idx_mx_val,idx_wrong)=model.beta(idx_mx_val,idx_wrong)-tau_m*coeff_wrong;
- end
- n_proj2=n_proj2+1;
- else
- n_skip=n_skip+1;
- end
- else %Mistake
- vec=spalloc(1,model.n_cla,2);
-
- if (delta_true <= model.eta && delta_wrong <= model.eta) || delta_true < eps
- if numel(idx_true)>0
- model.beta(Yi,idx_true)=model.beta(Yi,idx_true)+coeff_true; % project true
- end
- else
- vec(Yi)=1; % normal update for true
- if numel(model.Kinv{Yi})~=0
- tmp=[model.Kinv{Yi}, zeros(size(model.Kinv{Yi},1),1);zeros(1,size(model.Kinv{Yi},1)+1)];
- tmp=tmp+[coeff_true'; -1]*[coeff_true'; -1]'/delta_true;
- else
- tmp=full(Kii^-1);
- end
- model.Kinv{Yi}=tmp;
- model.Y_cla{Yi}(end+1)=size(model.SV,2)+1;
- end
-
- if (delta_true <= model.eta && delta_wrong <= model.eta) || delta_wrong < eps
- if numel(idx_wrong)>0
- model.beta(idx_mx_val,idx_wrong)=model.beta(idx_mx_val,idx_wrong)-coeff_wrong; % project wrong
- end
- else
- vec(idx_mx_val)=-1; % normal update for wrong
- if numel(model.Kinv{idx_mx_val})~=0
- tmp=[model.Kinv{idx_mx_val}, zeros(size(model.Kinv{idx_mx_val},1),1);zeros(1,size(model.Kinv{idx_mx_val},1)+1)];
- tmp=tmp+[coeff_wrong'; -1]*[coeff_wrong'; -1]'/delta_wrong;
- else
- tmp=full(Kii^-1);
- end
- model.Kinv{idx_mx_val}=tmp;
- model.Y_cla{idx_mx_val}(end+1)=size(model.SV,2)+1;
- end
-
- if delta_true > model.eta || delta_wrong > model.eta
- model.beta(:,end+1)=vec;
- model.S(end+1)=model.iter;
- model.SV(:,end+1)=X(:,i);
- model.beta2(:,end+1)=0;
- else
- n_proj1=n_proj1+1;
- end
- end
- else
- n_pred=n_pred+1;
- end
-
- model.beta2=model.beta2+model.beta;
-
- model.numSV(model.iter)=numel(model.S);
-
- if mod(i,model.step)==0
- fprintf('#%.0f SV:%5.2f(%d) pred:%5.2f skip:%5.2f proj1:%5.2f proj2:%5.2f AER:%5.2f\n', ...
- ceil(i/1000),numel(model.S)/i*100,numel(model.S),n_pred/i*100,n_skip/i*100,n_proj1/i*100,n_proj2/i*100,model.aer(model.iter)*100);
- if isfield(model,'eachRound')~=0
- feval(model.eachRound,model);
- end
- end
- end
|