|
- ################################################################################
- #
- # Copyright 2022 The DecisionHoldem Authors,namely,Qibin Zhou,
- # Dongdong Bai,Junge Zhang and Kaiqi Huang. All Rights Reserved.
- #
- # Licensed under the GNU AFFERO GENERAL PUBLIC LICENSE
- # Version 3, 19 November 2007
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU Affero General Public License for more details.
- #
- # You should have received a copy of the GNU Affero General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- #
- ################################################################################
- #pragma once
- #include <iostream>
- #include <sys/time.h>
- #include <random>
- #include "tree/Node.h"
- #include "poker/State.h"
- #include "tree/Bulid_Tree.h"
- #include "tree/Save_load.h"
- #include "tree/Visualize_Tree.h"
- #include "tree/Exploitability.h"
- using namespace std;
-
- double blueprint_cfr(strategy_node* cnode[], Pokerstate& state, int pi, double w) { // mccfr
- int ph = state.player_i_index;
- assert(cnode[0]->action_len == cnode[1]->action_len);
- if (state.is_terminal()) {
- return state.payout(pi);
- }
- else if (ph == pi) {
- double sigma[12];
- double vo = 0;
- calculate_strategy(cnode[ph]->regret, cnode[ph]->action_len, sigma);
- int len = cnode[0]->action_len;
- double voa[12] = { 0 };
- for (int i = 0; i < len; i++) {
- Pokerstate st2 = state;
- bool is_chance = st2.apply_action(cnode[ph]->actionstr[i]);
- strategy_node* cnode2[2];
- cnode2[0] = cnode[0]->actions + i;//cnode[0]->findnode(cnode[0]->actionstr[i]);
- cnode2[1] = cnode[1]->actions + i;//cnode[1]->findnode(cnode[1]->actionstr[i]);
- if (is_chance) {
- cnode2[0] = (cnode2[0]->actions + st2.table.players[0].clusters[st2.betting_stage]);
- cnode2[1] = (cnode2[1]->actions + st2.table.players[1].clusters[st2.betting_stage]);
- }
- voa[i] = blueprint_cfr(cnode2, st2, pi, w) * w;
- vo += sigma[i] * voa[i];
- }
- for (int i = 0; i < len; i++) {
- cnode[ph]->regret[i] += voa[i] - vo;
- assert(cnode[ph]->regret[i] < 200000000);
- if (cnode[ph]->regret[i] < -210000000)
- cnode[ph]->regret[i] = -210000000;
- }
- assert(w > 0);
- return vo / w;
- }
- else {
- double sigma[12];
- double vo = 0;
- calculate_strategy(cnode[ph]->regret, cnode[ph]->action_len, sigma);
- int len = cnode[0]->action_len;
- double voa[12] = { 0 };
- for (int i = 0; i < len; i++) {
- if (sigma[i] > 0) {
- Pokerstate st2 = state;
- bool is_chance = st2.apply_action(cnode[ph]->actionstr[i]);
- strategy_node* cnode2[2];
- cnode2[0] = cnode[0]->actions + i;//cnode[0]->findnode(cnode[0]->actionstr[i]);
- cnode2[1] = cnode[1]->actions + i;//cnode[1]->findnode(cnode[1]->actionstr[i]);
- if (is_chance) {
- cnode2[0] = (cnode2[0]->actions + st2.table.players[0].clusters[st2.betting_stage]);
- cnode2[1] = (cnode2[1]->actions + st2.table.players[1].clusters[st2.betting_stage]);
- }
- voa[i] = blueprint_cfr(cnode2, st2, pi, w * sigma[i]);
- vo += sigma[i] * voa[i];
- }
- }
- return vo;
- }
- }
- double blueprint_cfrp(strategy_node* cnode[], Pokerstate& state, int pi, int c, double w) { // cfr prune
- int ph = state.player_i_index;
- assert(cnode[0]->action_len == cnode[1]->action_len);
- if (state.is_terminal()) {
- return state.payout(pi);
- }
- else if (ph == pi) {
- double sigma[12];
- double vo = 0;
- calculate_strategy(cnode[ph]->regret, cnode[ph]->action_len, sigma);
- int len = cnode[0]->action_len;
- bool explored[15] = { false };
- double voa[15] = { 0 };
- for (int i = 0; i < len; i++) {
- if (cnode[ph]->regret[i] > c) {
- Pokerstate st2 = state;
- bool is_chance = st2.apply_action(cnode[ph]->actionstr[i]);
- strategy_node* cnode2[2];
- cnode2[0] = cnode[0]->actions + i;//cnode[0]->findnode(cnode[0]->actionstr[i]);
- cnode2[1] = cnode[1]->actions + i;//cnode[1]->findnode(cnode[1]->actionstr[i]);
- if (is_chance) {
- cnode2[0] = (cnode2[0]->actions + st2.table.players[0].clusters[st2.betting_stage]);
- cnode2[1] = (cnode2[1]->actions + st2.table.players[1].clusters[st2.betting_stage]);
- }
- if (st2.betting_stage < 2)
- voa[i] = blueprint_cfrp(cnode2, st2, pi, c, w) * w;
- else
- voa[i] = blueprint_cfr(cnode2, st2, pi, w) * w;
- explored[i] = true;
- vo += sigma[i] * voa[i];
- }
- else
- explored[i] = false;
- }
- for (int i = 0; i < len; i++)
- if (explored[i]) {
- cnode[ph]->regret[i] += voa[i] - vo;
- assert(cnode[ph]->regret[i] < 200000000);
- if (cnode[ph]->regret[i] < -210000000)
- cnode[ph]->regret[i] = -210000000;
- }
- assert(w > 0);
- return vo / w;
- }
- else {
- double sigma[12];
- double vo = 0;
- calculate_strategy(cnode[ph]->regret, cnode[ph]->action_len, sigma);
- int len = cnode[0]->action_len;
- double voa[12] = { 0 };
- for (int i = 0; i < len; i++) {
- if (sigma[i] > 0) {
- Pokerstate st2 = state;
- bool is_chance = st2.apply_action(cnode[ph]->actionstr[i]);
- strategy_node* cnode2[2];
- cnode2[0] = cnode[0]->actions + i;//cnode[0]->findnode(cnode[0]->actionstr[i]);
- cnode2[1] = cnode[1]->actions + i;//cnode[1]->findnode(cnode[1]->actionstr[i]);
- if (is_chance) {
- cnode2[0] = (cnode2[0]->actions + st2.table.players[0].clusters[st2.betting_stage]);
- cnode2[1] = (cnode2[1]->actions + st2.table.players[1].clusters[st2.betting_stage]);
- }
- voa[i] = blueprint_cfr(cnode2, st2, pi, w * sigma[i]);
- vo += sigma[i] * voa[i];
- }
- }
- return vo;
- }
- }
-
- void dfs_discount(strategy_node* treenode,double d, bool firstin) {
- if (treenode->action_len == 0)
- return;
- if (treenode->action_len > 100) {
- if (firstin) {
- dfs_discount(treenode->actions, d, true);
- for (int i = 1; i < treenode->action_len; i++)
- dfs_discount(treenode->actions + i, d, false);
- }
- return;
- }
- else {
- double sigma[15];
- calculate_strategy(treenode->regret, treenode->action_len, sigma);
- for (int i = 0; i < treenode->action_len; i++) {
- if (sigma[i] > 0)
- treenode->averegret[i] += sigma[i];
- treenode->regret[i] *= d;
- //treenode->averegret[i] *= d;
- dfs_discount(treenode->actions + i, d, firstin);
- }
- }
- }
-
- void update_strategy(strategy_node* treenode, bool firstin) {
- if (treenode->action_len == 0)
- return;
- if (treenode->action_len > 100) {
- if (firstin) {
- update_strategy(treenode->actions, true);
- for (int i = 1; i < treenode->action_len; i++)
- update_strategy(treenode->actions + i, false);
- }
- return;
- }
- else{
- double sigma[15];
- calculate_strategy(treenode->regret, treenode->action_len, sigma);
- for (int i = 0; i < treenode->action_len; i++) {
- if (sigma[i] > 0)
- treenode->averegret[i] += sigma[i];
- update_strategy(treenode->actions + i, firstin);
- }
- }
- }
-
- const ll strategy_interval = 100000, discount_interval = 1000000, n_iterations = 2000000000, lcfr_threshold = 400000000;
- const int prune_threshold = 100000000, c = -200000000, n_players = 2, print_iteration = 10, dump_iteration = 100000000, update_threshold = 1000000;
-
- void Singleiter() {
- struct timeval start, end;
- Player players[] = { Player(20000),Player(20000) };
- PokerTable table(2, players);
- strategy_node* pref[2];
- Pokerstate state(table);
- state.reset_game();
- strategy_node* root = new strategy_node();
- bulid_preflop(root, state);
- check_subgame(root, state);
- gettimeofday(&start, NULL);
-
- mt19937_64 _rng_gen(rand());
- for (ll t = 1; t <= n_iterations; t++) {
- if (t % 10000 == 0) {
- _rng_gen.seed(rand());
- gettimeofday(&end, NULL);
- cout << "10000 time:" << ((end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)) / 1000000.0 << endl;
- cout << "iter:" << t << endl;
- gettimeofday(&start, NULL);
- }
- state.reset_game_single();
- for (int i = 0; i < n_players; i++) {
- state.reset_game();
-
- pref[0] = (root->actions + state.table.players[0].clusters[0]);
- pref[1] = (root->actions + state.table.players[1].clusters[0]);
-
- if (t > prune_threshold) {
- int dr = rand() % 100;
- if (dr < 5)
- blueprint_cfr(pref, state, i, 1);
- else
- blueprint_cfrp(pref, state, i, c, 1);
- }
- else
- blueprint_cfr(pref, state, i, 1);
- }
- if (t % strategy_interval == 0)
- update_strategy(root, true);
- if (t < lcfr_threshold && t % discount_interval == 0) {
- double d = ((double)t / discount_interval) / (((double)t / discount_interval) + 1);
- dfs_discount(root, d, true);
- }
- if (t % dump_iteration == 0) {
- dump(root, "blueprint_strategy.dat");
- }
-
- }
- }
|