xinwangliu_homepage
/
Radius-incorporated-MKL-algorithm

 
			
			   
				 
					
						
						
							
							function model = k_omcl_multi_train(X,Y,model)
% K_OMCL_MULTI_TRAIN Kernel Online Multi-Cue Learning multiclass algorithm
%
%    MODEL = K_OMCL_MULTI_TRAIN(X,Y,MODEL) trains an classifier
%    according to the Online Multi-Cue Learning algorithm, using kernels.
%
%    MODEL = K_OMCL_MULTI_TRAIN(K,Y,MODEL) trains an classifier
%    according to the Online Multi-Cue Learning algorithm, using kernels.
%    Using pre-computed kernel as input.
%
%    Inputs:
%    K -  N*N*F matrix, K(:,:,i) is a kernel matrix
%    X -  1*F cell, each cell X{f} is a D*N matrix 
%    Y -  Training label, N*1 Vector
%
%    Additional parameters:
%    - model.n_cue is the number of different cues.
%    - model.L1.eta is the sparseness parameter, used to trade-off the
%      performance for sparseness of the classifier for learning each
%      single cue. Note that model.eta is the maximum error on EACH single
%      projection; each projected update has 2 projections.
%      Default value is 0.1.
%    - model.L2.C is the the aggressiveness parameter for the 2nd combining
%      layer, used to trade-off the loss on the current sample with the
%      update on the current hyperplane.
%      Default value is 1.
%
%    Example:
%        See omcl_demo.m
%
%   References:
%     - Jie, L., Orabona, F., & Caputo, B. (2009).
%       An online framework for learning novel concepts over multiple cues.
%       Proceeding of the 9th Asian Conference on Computer Vision
%     - Orabona, F., Keshet, J., & Caputo, B. (2009).
%       Bounded Kernel-Based Online Learning.
%       Journal of Machine Learning Research 10(Nov), (pp. 2643–2666).
 
%    This file is part of the DOGMA library for MATLAB.
%    Copyright (C) 2009-2011, Francesco Orabona
%
%    This program is free software: you can redistribute it and/or modify
%    it under the terms of the GNU General Public License as published by
%    the Free Software Foundation, either version 3 of the License, or
%    (at your option) any later version.
%
%    This program is distributed in the hope that it will be useful,
%    but WITHOUT ANY WARRANTY; without even the implied warranty of
%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%    GNU General Public License for more details.
%
%    You should have received a copy of the GNU General Public License
%    along with this program.  If not, see <http://www.gnu.org/licenses/>.
%
%    Contact the authors: francesco [at] orabona.com
%                         jluo      [at] idiap.ch

n = length(Y);   % number of training samples

if isfield(model,'iter')==0
    model.iter=0;
    model.time=[];
    model.beta2=cell(1,model.n_cue);
end

% models 1st layer
for c=1:model.n_cue
    if isfield(model.L1{c}, 'type')==0
        model.L1{c}.type = 'PROJECTRON2';
    end

    if isequal(model.L1{c}.type, 'PA-I')==1 && isfield(model.L1{c}, 'C')==0
        model.L1{c}.C = 1;
    end

    if isequal(model.L1{c}.type, 'Projectron2')==1 && isfield(model.L1{c},'eta')==0
        model.L1{c}.eta=.1;
    end

    if isfield(model.L1{c},'n_cla')==0
        model.L1{c}.n_cla=model.n_cla;
    end

    if isfield(model.L1{c},'iter')==0
        model.L1{c}.iter=0;
        model.L1{c}.beta=[];
        model.L1{c}.beta2=[];
        model.L1{c}.errTot=0;
        model.L1{c}.numSV=zeros(numel(Y),1);
        model.L1{c}.aer=zeros(numel(Y),1);
        model.L1{c}.pred=zeros(numel(Y),1);

        model.L1{c}.n_skip=0;
        model.L1{c}.n_proj1=0;
        model.L1{c}.n_proj2=0;
        model.L1{c}.n_pred=0;

        for i=1:model.L1{c}.n_cla
            model.L1{c}.Kinv{i}=[];
            model.L1{c}.Y_cla{i}=[];
        end
    end
end

% models 2nd layer
if isfield(model.L2,'n_cla')==0
    model.L2.n_cla=max(Y);
end
if isfield(model.L2,'update')==0
    model.L2.update=1; %default update using PA-I
end
if isfield(model.L2,'C')==0
    model.L2.C=1;
end
if isfield(model.L2,'iter')==0
    model.L2.iter=0;
    model.L2.w=zeros(model.n_cla,model.n_cla*model.n_cue);
    model.L2.errTot=0;
    model.L2.numSV=zeros(numel(Y),1);
    model.L2.aer=zeros(numel(Y),1);
    model.L2.pred=zeros(model.L2.n_cla,numel(Y));
end

timerstart = cputime;
h = zeros(model.n_cue*model.n_cla,1);
ht = zeros(model.n_cue*model.n_cla,1);
for i=1:n
    model.iter = model.iter+1;

    Yi=Y(i);

    % Prediction
    pred1 = cell(model.n_cue, 1);
    % 1st layer: Passive-Aggressive/Projectron++
    for c=1:model.n_cue
        pred1{c} = k_layer1_pred(i, c, Yi);
        h((c-1)*model.n_cla+1:c*model.n_cla)=pred1{c}.val_f';
    end
    % 2nd layer: Passive-Aggressive 
    pred2 = layer2_pred(h, Yi);

    % Update
    % 1st layer
    for c=1:model.n_cue
        switch upper(model.L1{c}.type)
            case {'PA-I'}
                pred1{c} = k_pa_update(i,c,Yi,pred1{c});
            case {'PROJECTRON2'}
                pred1{c} = k_projectron2_update(i,c,Yi,pred1{c});
            otherwise
                disp('Unknown base classifier.')
        end 
        ht((c-1)*model.n_cla+1:c*model.n_cla) = pred1{c}.val_f';
    end
    % 2nd layer
    pa_multi_update(ht, Yi);

    if mod(i,model.step)==0 || i==n
        model.time(end+1)=cputime-timerstart;
        fprintf('#%2.0f COMBINATION AER:%5.2f\n', ceil(i/model.step), model.L2.aer(model.iter)*100);
        for c=1:model.n_cue
            fprintf('  Cue %d   SV:%5.2f(%d)   pred:%5.2f   skip:%5.2f   proj1:%5.2f   proj2:%5.2f   AER:%5.2f\n', ...
                c, numel(model.L1{c}.S)/i*100, numel(model.L1{c}.S), model.L1{c}.n_pred/i*100, ...
                model.L1{c}.n_skip/i*100, model.L1{c}.n_proj1/i*100, model.L1{c}.n_proj2/i*100, ...
                model.L1{c}.aer(model.iter)*100);
        end

        if isfield(model,'eachRound')~=0
            if isfield(model, 'outputER')==0
                model.outputER = [];
                model.outputER = feval(model.eachRound, model);
            else
                model.outputER(end+1) = feval(model.eachRound, model);
            end
        end
        timerstart = cputime;
    end
end

% =============== built in functions ===============
% PA-I/Projectron2 predict
function pred = k_layer1_pred(idx_sample, idx_cue, label)
    if numel(model.L1{idx_cue}.S)>0
        if isempty(model.L1{idx_cue}.ker)
            K_f=X(model.L1{idx_cue}.S, idx_sample,idx_cue)';
        else
            K_f=feval(model.L1{c}.ker,model.L1{idx_cue}.SV,X{idx_cue}(:, idx_sample),model.L1{idx_cue}.kerparam);
        end
        val_f=full(model.L1{idx_cue}.beta*K_f);
    else
        K_f=zeros(0,1);
        val_f=zeros(1,model.L1{idx_cue}.n_cla);
    end

    tmp=val_f; tmp(label)=-inf;
    mx_val=max(tmp);
    model.L1{idx_cue}.errTot=model.L1{idx_cue}.errTot+(val_f(label)<=mx_val);
    model.L1{idx_cue}.aer(model.iter)=model.L1{idx_cue}.errTot/model.iter;

    pred.val_f = val_f;
    pred.K_f   = K_f;
end   % end of the function layer1_pred

% PA-I update
% k_pa_update(i,c,Yi,pred1{c})%X{c}(:, i), pred1{c}, Yi);
function pred = k_pa_update(idx_sample, idx_cue, label, pred)
    val_f = pred.val_f;
    K_f   = pred.K_f;

    tmp=val_f; tmp(label)=-inf;
    [mx_val,idx_mx_val]=max(tmp);

    if val_f(label) < mx_val+1 %Margin error or mistake
        model.L1{idx_cue}.S(end+1)=model.iter;

        if isempty(model.L1{idx_cue}.ker)
             Kii = full(X(idx_sample,idx_sample,idx_cue));
             norm_x_square=2*Kii;
        else
             Kii = full(feval(model.L1{idx_cue}.ker,X{idx_cue}(:, idx_sample),X{idx_cue}(:, idx_sample),model.L1{idx_cue}.kerparam));
             norm_x_square=2*Kii;
             model.L1{idx_cue}.SV(:, end+1)=X{idx_cue}(:, idx_sample);
        end

        % PA-I
        new_beta=min((1-(val_f(label)-mx_val))/norm_x_square,model.L1{idx_cue}.C);
        % PA-II
        % new_beta=(1-(val_f(y)-mx_val))/(norm_x_square+1/(2*model.C));

        model.L1{idx_cue}.beta(:,end+1)=spalloc(model.L1{idx_cue}.n_cla,1,2);
        model.L1{idx_cue}.beta(label,end)=new_beta;
        model.L1{idx_cue}.beta(idx_mx_val,end)=-new_beta;

        model.L1{idx_cue}.beta2(:,end+1)=spalloc(model.L1{idx_cue}.n_cla,1,2);

        % if the model is updated, also update the new hyphothesis
        % pred.val_f = full(K_f*model.beta(1:numel(K_f),:));
        % pred.val_f = pred.val_f + full(x(model.iter))*model.beta(end, :);
        pred.val_f = full([ K_f' Kii ]*model.L1{idx_cue}.beta);
    end
    
    model.L1{idx_cue}.beta2=model.L1{idx_cue}.beta2+model.L1{idx_cue}.beta;

    model.L1{idx_cue}.numSV(model.iter)=numel(model.L1{idx_cue}.S);
end   % end of the function pa_update

% projectron2 update
function pred = k_projectron2_update(idx_sample,idx_cue,label,pred)
    val_f = pred.val_f;
    K_f   = pred.K_f;

    idx_true  = [];
    idx_wrong = [];

    tmp=val_f; tmp(label)=-inf;
    [mx_val,idx_mx_val]=max(tmp);

    if val_f(label) < mx_val+1 %Margin error or mistake
        if isempty(model.L1{idx_cue}.ker)
              Kii=full(X(idx_sample,idx_sample,idx_cue));
        else
              Kii=full(feval(model.L1{idx_cue}.ker,X{idx_cue}(:,idx_sample),X{idx_cue}(:,idx_sample),model.L1{idx_cue}.kerparam));
        end
        vec=spalloc(model.L1{idx_cue}.n_cla,1,2);

        delta_true=Kii;
        delta_wrong=Kii;
        if numel(model.L1{idx_cue}.S)>0
            idx_true=model.L1{idx_cue}.Y_cla{label};
            idx_wrong=model.L1{idx_cue}.Y_cla{idx_mx_val};

            if numel(idx_true)>0
                coeff_true=K_f(idx_true)'*model.L1{idx_cue}.Kinv{label};
                % the 'max' is only to prevent numerical problems
                delta_true=max(Kii-coeff_true*K_f(idx_true),0);
            end

            if numel(idx_wrong)>0
                coeff_wrong=K_f(idx_wrong)'*model.L1{idx_cue}.Kinv{idx_mx_val};
                % the 'max' is only to prevent numerical problems
                delta_wrong=max(Kii-coeff_wrong*K_f(idx_wrong),0);
            end
        end

        if val_f(label)>mx_val % Margin error
            loss=1-val_f(label)+mx_val;
            delta=delta_wrong+delta_true;
            if loss-delta/(2*model.L1{idx_cue}.eta)>0       % 2*model.eta because eta is the tollerance on each single projection
                tau_m=min(min(loss/(2*Kii-delta),1),2*(loss-delta/(2*model.L1{idx_cue}.eta))/(2*Kii-delta));
                if numel(idx_true)>0
                    model.L1{idx_cue}.beta(label,idx_true)=model.L1{idx_cue}.beta(label,idx_true)+tau_m*coeff_true;
                end
                if numel(idx_wrong)>0
                    model.L1{idx_cue}.beta(idx_mx_val,idx_wrong)=model.L1{idx_cue}.beta(idx_mx_val,idx_wrong)-tau_m*coeff_wrong;
                end
                model.L1{idx_cue}.n_proj2=model.L1{idx_cue}.n_proj2+1;
            else
                model.L1{idx_cue}.n_skip=model.L1{idx_cue}.n_skip+1;
            end
        else % Mistake
            if (delta_true <= model.L1{idx_cue}.eta && delta_wrong <= model.L1{idx_cue}.eta) || delta_true < eps
                if numel(idx_true)>0
                    % project true
                    model.L1{idx_cue}.beta(label,idx_true)=model.L1{idx_cue}.beta(label,idx_true)+coeff_true;
                end
            else
                vec(label)=1; % normal update for true
                if numel(model.L1{idx_cue}.Kinv{label})~=0
                   tmp=[model.L1{idx_cue}.Kinv{label}, zeros(size(model.L1{idx_cue}.Kinv{label},1),1);zeros(1,size(model.L1{idx_cue}.Kinv{label},1)+1)];
                   tmp=tmp+[coeff_true'; -1]*[coeff_true'; -1]'/delta_true;
                else
                   tmp=full(Kii^-1);
                end
                model.L1{idx_cue}.Kinv{label}=tmp;
                model.L1{idx_cue}.Y_cla{label}(end+1)=numel(model.L1{idx_cue}.S)+1;
            end

            if (delta_true <= model.L1{idx_cue}.eta && delta_wrong <= model.L1{idx_cue}.eta) || delta_wrong < eps
                if numel(idx_wrong)>0
                    % project wrong
                    model.L1{idx_cue}.beta(idx_mx_val,idx_wrong)=model.L1{idx_cue}.beta(idx_mx_val,idx_wrong)-coeff_wrong;
                end
            else
                vec(idx_mx_val)=-1; % normal update for wrong
                if numel(model.L1{idx_cue}.Kinv{idx_mx_val})~=0
                   tmp=[model.L1{idx_cue}.Kinv{idx_mx_val}, zeros(size(model.L1{idx_cue}.Kinv{idx_mx_val},1),1); ...
                        zeros(1,size(model.L1{idx_cue}.Kinv{idx_mx_val},1)+1)];
                   tmp=tmp+[coeff_wrong'; -1]*[coeff_wrong'; -1]'/delta_wrong;
                else
                   tmp=full(Kii^-1);
                end
                model.L1{idx_cue}.Kinv{idx_mx_val}=tmp;
                model.L1{idx_cue}.Y_cla{idx_mx_val}(end+1)=numel(model.L1{idx_cue}.S)+1;
            end

            if delta_true > model.L1{idx_cue}.eta || delta_wrong > model.L1{idx_cue}.eta
                model.L1{idx_cue}.beta(:,end+1)=vec;
                model.L1{idx_cue}.S(end+1)=model.iter;
                if ~isempty(model.L1{idx_cue}.ker)
                    model.L1{idx_cue}.SV(:,end+1)=X{idx_cue}(:,idx_sample);
                end
                model.L1{idx_cue}.beta2(:,end+1)=0;
            else
                model.L1{idx_cue}.n_proj1=model.L1{idx_cue}.n_proj1+1;
            end
        end

        % if the model is updated, also update the new hyphothesis
        pred.val_f = full(model.L1{idx_cue}.beta(:,1:numel(K_f))*K_f);
        pred.val_f = pred.val_f + Kii*vec;
    else
       model.L1{idx_cue}.n_pred=model.L1{idx_cue}.n_pred+1;
    end

    model.L1{idx_cue}.beta2=model.L1{idx_cue}.beta2+model.L1{idx_cue}.beta;

    model.L1{idx_cue}.numSV(model.iter)=numel(model.L1{idx_cue}.S);
end   % end of the function projectron2_update

% pa predict
function pred = layer2_pred(input, label)
    val_f=model.L2.w*input;
    tmp=val_f; tmp(label)=-inf;
    mx_val=max(tmp);
    model.L2.errTot=model.L2.errTot+(val_f(label)<=mx_val);
    model.L2.aer(model.iter)=model.L2.errTot/model.iter;
    model.pred(:,model.iter)=val_f;
    pred = val_f;
end   % end of pa predict

% pa update
function pa_multi_update(ht, y)
    % receive the updated hypothesis from 1st layer
    val_f=model.L2.w*ht;

    tmp=val_f; tmp(y)=-inf;
    [mx_val,idx_mx_val]=max(tmp);

    if val_f(y)<1+mx_val
        norm_x_square=2*norm(ht)^2;
        if model.L2.update==1
            new_beta=min((1-(val_f(y)-mx_val))/norm_x_square,model.L2.C);
        else
            new_beta=(1-(val_f(y)-mx_val))/(norm_x_square+1/(2*model.L2.C));
        end
        model.L2.w(y,:)=model.L2.w(y,:)+new_beta*ht';
        model.L2.w(idx_mx_val,:)=model.L2.w(idx_mx_val,:)-new_beta*ht';
        model.L2.S(end+1)=model.L2.iter;
    end
    model.L2.numSV(model.iter)=numel(model.L2.S);

    % update final w (online to batch conversion)
    alpha = model.L2.w;
    for cc=1:model.n_cue
        beta  = full(model.L1{cc}.beta);
        model.beta{cc} = alpha(:, (cc-1)*model.n_cla+1:cc*model.n_cla)*beta;
    end
    for cc=1:numel(model.beta2)
        if size(model.beta2{cc}, 2)<size(model.beta{cc}, 2)
            model.beta2{cc}(:, end+1)=0;
        end
        model.beta2{cc} = model.beta2{cc}+model.beta{cc};
    end
end   % end of pa update

end