|
- // Distributed under GNU General Public License (see license.txt for details).
- //
- // Copyright (c) 2007 Shai Shalev-Shwartz.
- // All Rights Reserved.
- //=============================================================================
- // File Name: pegasos_optimize.cc
- // implements the main optimization function of pegasos
- //=============================================================================
-
- #include "pegasos_optimize.h"
-
- // help function for getting runtime
- long get_runtime(void)
- {
- clock_t start;
- start = clock();
- return((long)((double)start/(double)CLOCKS_PER_SEC));
- }
-
-
- // ------------------------------------------------------------//
- // ---------------- OPTIMIZING --------------------------------//
- // ------------------------------------------------------------//
- void Learn(// Input variables
- std::vector<simple_sparse_vector>& Dataset,
- std::vector<int>& Labels,
- uint dimension,
- std::vector<simple_sparse_vector>& testDataset,
- std::vector<int>& testLabels,
- double lambda,int max_iter,int exam_per_iter,int num_iter_to_avg,
- std::string& model_filename,
- // Output variables
- long& train_time,long& calc_obj_time,double& obj_value,
- double& norm_value,double& loss_value,double& zero_one_error,
- double& test_loss,double& test_error,
- // additional parameters
- int eta_rule_type , double eta_constant ,
- int projection_rule, double projection_constant) {
-
- uint num_examples = Labels.size();
-
- long startTime = get_runtime();
- long endTime;
- double eta_denom;
-
-
- // Initialization of classification vector
- WeightVector W(dimension);
- WeightVector AvgW(dimension);
- double avgScale = (num_iter_to_avg > max_iter) ? max_iter : num_iter_to_avg;
-
- // ---------------- Main Loop -------------------
- for (int i = 0; i < max_iter; ++i) {
-
- // learning rate
- double eta;
- if (eta_rule_type == 0) { // Pegasos eta rule
- //if (lambda < 0.000001)
- // eta_denom = 0.000001;
- //else
- eta_denom = lambda;
- //eta = 1 / (lambda * (i+2));
- eta = 1 / (eta_denom * (i+2));
- } else if (eta_rule_type == 1) { // Norma rule
- eta = eta_constant / sqrt(i+2);
- // solve numerical problems
- W.make_my_a_one();
- } else {
- eta = eta_constant;
- }
-
- // gradient indices and losses
- std::vector<uint> grad_index;
- std::vector<double> grad_weights;
-
- // calc sub-gradients
- for (int j=0; j < exam_per_iter; ++j) {
-
- // choose random example
- uint r = ((int)rand()) % num_examples;
-
- // calculate prediction
- double prediction = W*Dataset[r];
-
- // calculate loss
- double cur_loss = 1 - Labels[r]*prediction;
- if (cur_loss < 0.0) cur_loss = 0.0;
-
- // and add to the gradient
- if (cur_loss > 0.0) {
- grad_index.push_back(r);
- grad_weights.push_back(eta*Labels[r]/exam_per_iter);
- }
- }
-
- // scale w
- W.scale(1.0 - eta*lambda);
-
- // and add sub-gradients
- for (uint j=0; j<grad_index.size(); ++j) {
- W.add(Dataset[grad_index[j]],grad_weights[j]);
- }
-
- // Project if needed
- if (projection_rule == 0) { // Pegasos projection rule
- double norm2 = W.snorm();
- if (norm2 > 1.0/lambda) {
- W.scale(sqrt(1.0/(lambda*norm2)));
- }
- } else if (projection_rule == 1) { // other projection
- double norm2 = W.snorm();
- if (norm2 > (projection_constant*projection_constant)) {
- W.scale(projection_constant/sqrt(norm2));
- }
- } // else -- no projection
-
-
- // and update AvgW
- if (max_iter <= num_iter_to_avg + i)
- AvgW.add(W, 1.0/avgScale);
- }
-
-
- // update timeline
- endTime = get_runtime();
- train_time = endTime - startTime;
- startTime = get_runtime();
-
- // Calculate objective value
- norm_value = AvgW.snorm();
- obj_value = norm_value * lambda / 2.0;
- loss_value = 0.0;
- zero_one_error = 0.0;
- for (uint i=0; i < Dataset.size(); ++i) {
- double cur_loss = 1 - Labels[i]*(AvgW * Dataset[i]);
- if (cur_loss < 0.0) cur_loss = 0.0;
- loss_value += cur_loss/num_examples;
- obj_value += cur_loss/num_examples;
- if (cur_loss >= 1.0) zero_one_error += 1.0/num_examples;
- }
-
- endTime = get_runtime();
- calc_obj_time = endTime - startTime;
-
- // Calculate test_loss and test_error
- test_loss = 0.0;
- test_error = 0.0;
- for (uint i=0; i < testDataset.size(); ++i) {
- double cur_loss = 1 - testLabels[i]*(AvgW * testDataset[i]);
- if (cur_loss < 0.0) cur_loss = 0.0;
- test_loss += cur_loss;
- if (cur_loss >= 1.0) test_error += 1.0;
- }
- if (testDataset.size() != 0) {
- test_loss /= testDataset.size();
- test_error /= testDataset.size();
- }
-
-
-
- // finally, print the model to the model_file
- if (model_filename != "noModelFile") {
- std::ofstream model_file(model_filename.c_str());
- if (!model_file.good()) {
- std::cerr << "error w/ " << model_filename << std::endl;
- exit(EXIT_FAILURE);
- }
- AvgW.print(model_file);
- model_file.close();
- }
-
- }
-
-
-
-
- void LearnAndValidate(// Input variables
- std::vector<simple_sparse_vector>& Dataset,
- std::vector<int>& Labels,
- uint dimension,
- std::vector<simple_sparse_vector>& testDataset,
- std::vector<int>& testLabels,
- double lambda,int max_iter,
- int exam_per_iter,int num_example_to_validate,
- std::string& model_filename,
- // Output variables
- long& train_time,long& calc_obj_time,
- double& obj_value,double& norm_value,
- double& loss_value,double& zero_one_error,
- double& test_loss,double& test_error,
- // additional parameters
- int eta_rule_type , double eta_constant ,
- int projection_rule, double projection_constant) {
-
- uint num_examples = Labels.size();
-
- long startTime = get_runtime();
- long endTime;
-
-
- // Initialization of classification vector
- WeightVector W(dimension);
- WeightVector BestW(dimension);
- double best_obj = 1.0; // the zero solution
-
- // create validation indices
- std::vector<uint> validate_indices(num_example_to_validate);
- for (uint i=0; i < validate_indices.size(); ++i)
- validate_indices[i] = ((int)rand()) % num_examples;
-
- // Choose s random indices
- int s = 5; // corresponds to confidence of 0.9933
- int block_size = max_iter/s;
- std::vector<int> candidates(s);
- for (int i=0; i<s; ++i) {
- candidates[i] = block_size*i + ((int)rand()) % block_size;
- }
- candidates[s-1] = max_iter-1; // make sure we need all iterations
- int cur_block = 0;
-
-
-
-
- // ---------------- Main Loop -------------------
- for (int i = 0; i < max_iter; ++i) {
-
- // learning rate
- double eta;
- if (eta_rule_type == 0) { // Pegasos eta rule
- eta = 1 / (lambda * (i+2));
- } else if (eta_rule_type == 1) { // Norma rule
- eta = eta_constant / sqrt(i+2);
- // solve numerical problems
- if (projection_rule != 2)
- W.make_my_a_one();
- } else {
- eta = eta_constant;
- }
-
- // gradient indices and losses
- std::vector<uint> grad_index;
- std::vector<double> grad_weights;
-
- // calc sub-gradients
- for (int j=0; j < exam_per_iter; ++j) {
-
- // choose random example
- uint r = ((int)rand()) % num_examples;
-
- // calculate prediction
- double prediction = W*Dataset[r];
-
- // calculate loss
- double cur_loss = 1 - Labels[r]*prediction;
- if (cur_loss < 0.0) cur_loss = 0.0;
-
- // and add to the gradient
- if (cur_loss > 0.0) {
- grad_index.push_back(r);
- grad_weights.push_back(eta*Labels[r]/exam_per_iter);
- }
- }
-
- // scale w
- W.scale(1.0 - eta*lambda);
-
- // and add sub-gradients
- for (uint j=0; j<grad_index.size(); ++j) {
- W.add(Dataset[grad_index[j]],grad_weights[j]);
- }
-
- // Project if needed
- if (projection_rule == 0) { // Pegasos projection rule
- double norm2 = W.snorm();
- if (norm2 > 1.0/lambda) {
- W.scale(sqrt(1.0/(lambda*norm2)));
- }
- } else if (projection_rule == 1) { // other projection
- double norm2 = W.snorm();
- if (norm2 > (projection_constant*projection_constant)) {
- W.scale(projection_constant/sqrt(norm2));
- }
- } // else -- no projection
-
-
- // and validate
- if (i == candidates[cur_block]) {
- double obj = 0.0;
- for (uint j=0; j < validate_indices.size(); ++j) {
- uint ind = validate_indices[j];
- double cur_loss = 1 - Labels[ind]*(W * Dataset[ind]);
- if (cur_loss < 0.0) cur_loss = 0.0;
- obj += cur_loss;
- }
- obj /= validate_indices.size();
- obj += lambda/2.0*W.snorm();
- if (obj <= best_obj) {
- // std::cerr << "obj of " << i << " (candidates["
- // << cur_block << "]) = " << obj << std::endl;
- BestW = W;
- best_obj = obj;
- }
- cur_block++;
- }
- }
-
-
- // update timeline
- endTime = get_runtime();
- train_time = endTime - startTime;
- startTime = get_runtime();
-
- // Calculate objective value
- norm_value = BestW.snorm();
- obj_value = norm_value * lambda / 2.0;
- loss_value = 0.0;
- zero_one_error = 0.0;
- for (uint i=0; i < Dataset.size(); ++i) {
- double cur_loss = 1 - Labels[i]*(BestW * Dataset[i]);
- if (cur_loss < 0.0) cur_loss = 0.0;
- loss_value += cur_loss/num_examples;
- obj_value += cur_loss/num_examples;
- if (cur_loss >= 1.0) zero_one_error += 1.0/num_examples;
- }
-
- endTime = get_runtime();
- calc_obj_time = endTime - startTime;
-
- // Calculate test_loss and test_error
- test_loss = 0.0;
- test_error = 0.0;
- for (uint i=0; i < testDataset.size(); ++i) {
- double cur_loss = 1 - testLabels[i]*(BestW * testDataset[i]);
- if (cur_loss < 0.0) cur_loss = 0.0;
- test_loss += cur_loss;
- if (cur_loss >= 1.0) test_error += 1.0;
- }
- if (testDataset.size() != 0) {
- test_loss /= testDataset.size();
- test_error /= testDataset.size();
- }
-
-
-
- // finally, print the model to the model_file
- if (model_filename != "noModelFile") {
- std::ofstream model_file(model_filename.c_str());
- if (!model_file.good()) {
- std::cerr << "error w/ " << model_filename << std::endl;
- exit(EXIT_FAILURE);
- }
- BestW.print(model_file);
- model_file.close();
- }
-
- }
-
-
-
-
-
- void LearnReturnLast(// Input variables
- std::vector<simple_sparse_vector>& Dataset,
- std::vector<int>& Labels,
- uint dimension,
- std::vector<simple_sparse_vector>& testDataset,
- std::vector<int>& testLabels,
- double lambda,int max_iter,
- int exam_per_iter,
- std::string& model_filename,
- // Output variables
- long& train_time,long& calc_obj_time,
- double& obj_value,double& norm_value,
- double& loss_value,double& zero_one_error,
- double& test_loss,double& test_error,
- // additional parameters
- int eta_rule_type , double eta_constant ,
- int projection_rule, double projection_constant) {
-
- uint num_examples = Labels.size();
-
- long startTime = get_runtime();
- long endTime;
-
-
- // Initialization of classification vector
- WeightVector W(dimension);
-
- // ---------------- Main Loop -------------------
- for (int i = 0; i < max_iter; ++i) {
-
- // learning rate
- double eta;
- if (eta_rule_type == 0) { // Pegasos eta rule
- eta = 1 / (lambda * (i+2));
- } else if (eta_rule_type == 1) { // Norma rule
- eta = eta_constant / sqrt(i+2);
- // solve numerical problems
- //if (projection_rule != 2)
- W.make_my_a_one();
- } else {
- eta = eta_constant;
- }
-
- // gradient indices and losses
- std::vector<uint> grad_index;
- std::vector<double> grad_weights;
-
- // calc sub-gradients
- for (int j=0; j < exam_per_iter; ++j) {
-
- // choose random example
- uint r = ((int)rand()) % num_examples;
-
- // calculate prediction
- double prediction = W*Dataset[r];
-
- // calculate loss
- double cur_loss = 1 - Labels[r]*prediction;
- if (cur_loss < 0.0) cur_loss = 0.0;
-
- // and add to the gradient
- if (cur_loss > 0.0) {
- grad_index.push_back(r);
- grad_weights.push_back(eta*Labels[r]/exam_per_iter);
- }
- }
-
- // scale w
- W.scale(1.0 - eta*lambda);
-
- // and add sub-gradients
- for (uint j=0; j<grad_index.size(); ++j) {
- W.add(Dataset[grad_index[j]],grad_weights[j]);
- }
-
- // Project if needed
- W.positive(); // project on positive set
-
- if (projection_rule == 0) { // Pegasos projection rule
- double norm2 = W.snorm();
- if (norm2 > 1.0/lambda) {
- W.scale(sqrt(1.0/(lambda*norm2)));
- }
- } else if (projection_rule == 1) { // other projection
- double norm2 = W.snorm();
- if (norm2 > (projection_constant*projection_constant)) {
- W.scale(projection_constant/sqrt(norm2));
- }
- } // else -- no projection
-
- }
-
-
- // update timeline
- endTime = get_runtime();
- train_time = endTime - startTime;
- startTime = get_runtime();
-
- // Calculate objective value
- norm_value = W.snorm();
- obj_value = norm_value * lambda / 2.0;
- loss_value = 0.0;
- zero_one_error = 0.0;
- for (uint i=0; i < Dataset.size(); ++i) {
- double cur_loss = 1 - Labels[i]*(W * Dataset[i]);
- if (cur_loss < 0.0) cur_loss = 0.0;
- loss_value += cur_loss/num_examples;
- obj_value += cur_loss/num_examples;
- if (cur_loss >= 1.0) zero_one_error += 1.0/num_examples;
- }
-
- endTime = get_runtime();
- calc_obj_time = endTime - startTime;
-
- // Calculate test_loss and test_error
- test_loss = 0.0;
- test_error = 0.0;
- for (uint i=0; i < testDataset.size(); ++i) {
- double cur_loss = 1 - testLabels[i]*(W * testDataset[i]);
- if (cur_loss < 0.0) cur_loss = 0.0;
- test_loss += cur_loss;
- if (cur_loss >= 1.0) test_error += 1.0;
- }
- if (testDataset.size() != 0) {
- test_loss /= testDataset.size();
- test_error /= testDataset.size();
- }
-
-
-
- // finally, print the model to the model_file
- if (model_filename != "noModelFile") {
- std::ofstream model_file(model_filename.c_str());
- if (!model_file.good()) {
- std::cerr << "error w/ " << model_filename << std::endl;
- exit(EXIT_FAILURE);
- }
- W.print(model_file);
- model_file.close();
- }
-
- }
-
-
- // Read Lambda file
- void ReadLambda(//input
- std::string& lambda_filename,
- //output
- int& num_lambda,
- std::vector<double>& lambdaarr)
- {
- std::ifstream file(lambda_filename.c_str());
- if (!file.good()) {
- std::cerr << "error reading " << lambda_filename << std::endl;
- exit(EXIT_FAILURE);
- }
- num_lambda = 0;
- while (!file.eof()) {
- double tempd;
- file >> tempd;
- lambdaarr.push_back(tempd);
- num_lambda ++;
- }
- }
-
-
-
-
-
-
-
-
- // ------------------------------------------------------------//
- // ---------------- READING DATA ------------------------------//
- // ------------------------------------------------------------//
- void ReadData(// input
- std::string& data_filename,
- // output
- std::vector<simple_sparse_vector> & Dataset,
- std::vector<int> & Labels,
- uint& dimension,
- long& readingTime) {
-
- dimension = 0;
-
- // Start a timer
- long startTime = get_runtime();
-
- // OPEN DATA FILE
- // =========================
- std::ifstream data_file(data_filename.c_str());
- if (!data_file.good()) {
- std::cerr << "error w/ " << data_filename << std::endl;
- exit(EXIT_FAILURE);
- }
-
-
- // Read SVM-Light data file
- // ========================
- int num_examples = 0;
- std::string buf;
- while (getline(data_file,buf)) {
- // ignore lines which begin with #
- if (buf[0] == '#') continue;
- // Erase what comes after #
- size_t pos = buf.find('#');
- if (pos < buf.size()) {
- buf.erase(pos);
- }
- // replace ':' with white space
- int n=0;
- for (size_t pos=0; pos < buf.size(); ++pos)
- if (buf[pos] == ':') {
- n++; buf[pos] = ' ';
- }
- // read from the string
- std::istringstream is(buf);
- int label = 0;
- is >> label;
- if (label != 1 && label != -1) {
- std::cerr << "Error reading SVM-light format. Abort." << std::endl;
- exit(EXIT_FAILURE);
- }
- Labels.push_back(label);
- simple_sparse_vector instance(is,n);
- Dataset.push_back(instance);
- num_examples++;
- uint cur_max_ind = instance.max_index() + 1;
- if (cur_max_ind > dimension) dimension = cur_max_ind;
- }
-
- data_file.close();
-
-
- #ifdef nodef
- std::cerr << "num_examples = " << num_examples
- << " dimension = " << dimension
- << " Dataset.size = " << Dataset.size()
- << " Labels.size = " << Labels.size() << std::endl;
- #endif
-
-
- // update timeline
- readingTime = get_runtime() - startTime;
-
- }
-
-
-
- // -------------------------------------------------------------//
- // ---------------------- Experiments mode ---------------------//
- // -------------------------------------------------------------//
-
- class ExperimentStruct {
- public:
- ExperimentStruct() { }
- void Load(std::istringstream& is) {
- is >> lambda >> max_iter >> exam_per_iter >> num_iter_to_avg
- >> eta_rule >> eta_constant >> projection_rule
- >> projection_constant;
- }
- void Print() {
- std::cout << lambda << "\t\t" << max_iter << "\t\t" << exam_per_iter << "\t\t"
- << num_iter_to_avg << "\t\t" << eta_rule << "\t\t" << eta_constant
- << "\t\t" << projection_rule << "\t\t" << projection_constant << "\t\t";
- }
- void PrintHead() {
- std::cerr << "lambda\t\tT\t\tk\t\tnumValid\t\te_r\t\te_c\t\tp_r\t\tp_c\t\t";
- }
- double lambda, eta_constant, projection_constant;
- uint max_iter,exam_per_iter,num_iter_to_avg;
- int eta_rule,projection_rule;
- };
-
- class ResultStruct {
- public:
- ResultStruct() : trainTime(0), calc_obj_time(0),
- norm_value(0.0),loss_value(0.0),
- zero_one_error(0.0),obj_value(0.0),
- test_loss(0.0), test_error(0.0) { }
- void Print() {
- std::cout << trainTime << "\t\t"
- << calc_obj_time << "\t\t"
- << norm_value << "\t\t"
- << loss_value << "\t\t"
- << zero_one_error << "\t\t"
- << obj_value << "\t\t"
- << test_loss << "\t\t"
- << test_error << "\t\t";
-
- }
- void PrintHead() {
- std::cerr << "tTime\t\tcoTime\t\t||w||\t\tL\t\tL0-1\t\tobj_value\t\ttest_L\t\ttestL0-1\t\t";
- }
-
- long trainTime, calc_obj_time;
- double norm_value,loss_value,zero_one_error,obj_value,test_loss,test_error;
- };
-
-
- void run_experiments(std::string& experiments_filename,
- std::vector<simple_sparse_vector>& Dataset,
- std::vector<int>& Labels,
- uint dimension,
- std::vector<simple_sparse_vector>& testDataset,
- std::vector<int>& testLabels) {
-
- // open the experiments file
- std::ifstream exp_file(experiments_filename.c_str());
- if (!exp_file.good()) {
- std::cerr << "error w/ " << experiments_filename << std::endl;
- exit(EXIT_FAILURE);
- }
-
- // read the entire experiment specification
-
-
- uint num_experiments = 0;
- std::vector<ExperimentStruct> v;
- std::string buf;
- while (getline(exp_file,buf)) {
- // read from the string
- std::istringstream is(buf);
- ExperimentStruct tmp; tmp.Load(is);
- v.push_back(tmp);
- num_experiments++;
- }
- exp_file.close();
-
- // run all the experiments
- std::vector<ResultStruct> res(num_experiments);
- std::string lala = "noModelFile";
- for (uint i=0; i<num_experiments; ++i) {
- LearnReturnLast(Dataset,Labels,dimension,testDataset,testLabels,
- v[i].lambda,v[i].max_iter,v[i].exam_per_iter,
- //v[i].num_iter_to_avg,
- lala,
- res[i].trainTime,res[i].calc_obj_time,res[i].obj_value,
- res[i].norm_value,
- res[i].loss_value,res[i].zero_one_error,
- res[i].test_loss,res[i].test_error,
- v[i].eta_rule,v[i].eta_constant,
- v[i].projection_rule,v[i].projection_constant);
- }
-
- // print results
- v[0].PrintHead(); res[0].PrintHead();
- std::cout << std::endl;
- for (uint i=0; i<num_experiments; ++i) {
- v[i].Print();
- res[i].Print();
- std::cout << std::endl;
- }
-
- std::cerr << std::endl;
-
- }
|