Source code for agmm2

"""
This module provides implementations of joint estimation for nested nonparametric instrumental variables (NPIV) using neural networks.

Classes:
    _BaseAGMM2: Base class for joint estimation of nested NPIV models.
    _BaseSupLossAGMM2: Base class for joint estimation of nested NPIV models with supervised loss.
    AGMM2: Adversarial Generalized Method of Moments estimator for nested NPIV.
    _BaseSupLossAGMM2L2: Base class for joint estimation of nested NPIV models with L2 regularization.
    AGMM2L2: Adversarial Generalized Method of Moments estimator for nested NPIV with L2 regularization.
"""
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import os
import numpy as np
import tempfile
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch import optim
from torch.utils.tensorboard import SummaryWriter
from nnpiv.neuralnet.oadam import OAdam
from nnpiv.neuralnet.rbflayer import RBF

# TODO. This epsilon is used only because pytorch 1.5 has an instability in torch.cdist
# when the input distance is close to zero, due to instability of the square root in
# automatic differentiation. Should be removed once pytorch fixes the instability.
# It can be set to 0 if using pytorch 1.4.0
EPSILON = 1e-2


[docs]def add_weight_decay(net, l2_value, skip_list=()): decay, no_decay = [], [] for name, param in net.named_parameters(): if not param.requires_grad: continue # frozen weights if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list: no_decay.append(param) else: decay.append(param) return [{'params': no_decay, 'weight_decay': 0.}, {'params': decay, 'weight_decay': l2_value}]
[docs]class _BaseAGMM2: """ Base class for joint estimation of nested NPIV models. Methods: _pretrain: Prepares the variables required to begin training. predict: Predicts outcomes using the fitted AGMM model. """
[docs] def _pretrain(self, A, B, C, D, Y, W, learner_l2, adversary_l2, adversary_norm_reg, learner_norm_reg, learner_lr, adversary_lr, n_epochs, bs, train_learner_every, train_adversary_every, warm_start, model_dir, device, verbose, add_sample_inds=False, subsetted=False, subset_ind1=None, subset_ind2=None): """ Prepares the variables required to begin training. """ self.verbose = verbose if not os.path.exists(model_dir): os.makedirs(model_dir) self.tempdir = tempfile.TemporaryDirectory(dir=model_dir) self.model_dir = self.tempdir.name self.n_epochs = n_epochs if add_sample_inds: sample_inds = torch.arange(Y.shape[0]).clone().detach() self.train_ds = TensorDataset(A, B, C, D, Y, W, sample_inds) if not subsetted else TensorDataset(A, B, C, D, Y, W, sample_inds, subset_ind1, subset_ind2) else: self.train_ds = TensorDataset(A, B, C, D, Y, W) if not subsetted else TensorDataset(A, B, C, D, Y, W, subset_ind1, subset_ind2) self.train_dl = DataLoader(self.train_ds, batch_size=bs, shuffle=True) self.learnerh = self.learnerh.to(device) self.learnerg = self.learnerg.to(device) self.adversary1 = self.adversary1.to(device) self.adversary2 = self.adversary2.to(device) if not warm_start: self.learnerh.apply(lambda m: ( m.reset_parameters() if hasattr(m, 'reset_parameters') else None)) self.learnerg.apply(lambda m: ( m.reset_parameters() if hasattr(m, 'reset_parameters') else None)) self.adversary1.apply(lambda m: ( m.reset_parameters() if hasattr(m, 'reset_parameters') else None)) self.adversary2.apply(lambda m: ( m.reset_parameters() if hasattr(m, 'reset_parameters') else None)) beta1 = 0. self.optimizerg = OAdam(add_weight_decay(self.learnerg, learner_l2), lr=learner_lr, betas=(beta1, .01)) self.optimizerh = OAdam(add_weight_decay(self.learnerh, learner_l2), lr=learner_lr, betas=(beta1, .01)) self.optimizerf = OAdam(add_weight_decay( self.adversary2, adversary_l2, skip_list=self.skip_list), lr=adversary_lr, betas=(beta1, .01)) self.optimizerf_ = OAdam(add_weight_decay( self.adversary1, adversary_l2, skip_list=self.skip_list), lr=adversary_lr, betas=(beta1, .01)) if subsetted: return A, B, C, D, Y, W, subset_ind1, subset_ind2 else: return A, B, C, D, Y, W
[docs] def predict(self, B, A, model='avg', burn_in=0, alpha=None): """ Parameters ---------- B, A : endogenous vars for second and first stage model : one of ('avg', 'final'), whether to use an average of models or the final burn_in : discard the first "burn_in" epochs when doing averaging alpha : if not None but a float, then it also returns the a/2 and 1-a/2, percentile of the predictions across different epochs (proxy for a confidence interval) """ if model == 'avg': pred_h = np.array([torch.load(os.path.join(self.model_dir, "h_epoch{}".format(i)))(B).cpu().data.numpy() for i in np.arange(burn_in, self.n_epochs)]) pred_g = np.array([torch.load(os.path.join(self.model_dir, "g_epoch{}".format(i)))(A).cpu().data.numpy() for i in np.arange(burn_in, self.n_epochs)]) if alpha is None: return np.mean(pred_h, axis=0), np.mean(pred_g, axis=0) else: return np.mean(pred_h, axis=0), np.mean(pred_g, axis=0), \ np.percentile( pred_h, 100 * alpha / 2, axis=0), np.percentile(pred_h, 100 * (1 - alpha / 2), axis=0), \ np.percentile( pred_g, 100 * alpha / 2, axis=0), np.percentile(pred_g, 100 * (1 - alpha / 2), axis=0) if model == 'final': return torch.load(os.path.join(self.model_dir, "h_epoch{}".format(self.n_epochs - 1)))(B).cpu().data.numpy(), \ torch.load(os.path.join(self.model_dir, "g_epoch{}".format(self.n_epochs - 1)))(A).cpu().data.numpy() if isinstance(model, int): return torch.load(os.path.join(self.model_dir, "h_epoch{}".format(model)))(B).cpu().data.numpy(), \ torch.load(os.path.join(self.model_dir, "g_epoch{}".format(model)))(A).cpu().data.numpy()
[docs]class _BaseSupLossAGMM2(_BaseAGMM2): """ Base class for joint estimation of nested NPIV models with supervised loss. Methods: fit: Fits the AGMM model with supervised loss to the provided data. """
[docs] def fit(self, A, B, C, D, Y, W=None, learner_l2=1e-3, adversary_l2=1e-4, adversary_norm_reg=1e-3, learner_norm_reg=1e-3, learner_lr=0.001, adversary_lr=0.001, n_epochs=100, bs=100, train_learner_every=1, train_adversary_every=1, warm_start=False, model_dir='.', device=None, verbose=0, subsetted=False, subset_ind1=None, subset_ind2=None): """ Parameters ---------- A : endogenous vars for first stage B : endogenous vars for second stage C : instrument vars for second stage D : instrument vars for first stage Y : outcome W : weights for the second stage learner_l2, adversary_l2 : l2_regularization of parameters of learner and adversary adversary_norm_reg : adversary norm regularization weight learner_norm_reg : learner norm regularization weight learner_lr : learning rate of the Adam optimizer for learner adversary_lr : learning rate of the Adam optimizer for adversary n_epochs : how many passes over the data bs : batch size train_learner_every : after how many training iterations of the adversary should we train the learner warm_start : if False then network parameters are initialized at the beginning, otherwise we start from their current weights model_dir : folder where to store the learned models after every epoch """ W = torch.ones(Y.shape[0]) if W is None else W if subsetted: if subset_ind1 is None: raise ValueError("subset_ind1 must be provided when subsetted is True") if len(subset_ind1) != len(Y): raise ValueError("subset_ind1 must have the same length as Y") subset_ind2 = 1 - subset_ind1 if subset_ind2 is None else subset_ind2 A, B, C, D, Y, W, subset_ind1, subset_ind2 = self._pretrain(A, B, C, D, Y, W, learner_l2, adversary_l2, adversary_norm_reg, learner_norm_reg, learner_lr, adversary_lr, n_epochs, bs, train_learner_every, train_adversary_every, warm_start, model_dir, device, verbose, subsetted=True, subset_ind1=subset_ind1, subset_ind2=subset_ind2) else: A, B, C, D, Y, W = self._pretrain(A, B, C, D, Y, W, learner_l2, adversary_l2, adversary_norm_reg, learner_norm_reg, learner_lr, adversary_lr, n_epochs, bs, train_learner_every, train_adversary_every, warm_start, model_dir, device, verbose) for epoch in range(n_epochs): if self.verbose > 0: print("Epoch #", epoch, sep="") for it, data in enumerate(self.train_dl): data = tuple(map(lambda x: x.to(device), data)) if subsetted: Ab, Bb, Cb, Db, Yb, Wb, subset_ind1, subset_ind2 = data else: Ab, Bb, Cb, Db, Yb, Wb = data if (it % train_learner_every == 0): # Set models to training mode self.learnerh.train() self.learnerg.train() # Forward passes hat_g = self.learnerg(Ab) hat_h = self.learnerh(Bb) hat_f_ = self.adversary1(Db) if not subsetted else self.adversary1(Db) * subset_ind1 hat_f = self.adversary2(Cb) if not subsetted else self.adversary2(Cb) * subset_ind2 # Calculate losses for each learner G_loss = torch.mean(2 * (hat_g - Yb) * hat_f_) + torch.mean(2 * (hat_h - hat_g) * hat_f) G_loss += learner_norm_reg * 0 H_loss = torch.mean(2 * (hat_h - hat_g*Wb) * hat_f) H_loss += learner_norm_reg * 0 # Backpropagate and update for learnerg self.optimizerg.zero_grad() G_loss.backward(retain_graph=True) # Retain graph for subsequent use in H_loss self.optimizerg.step() self.learnerg.eval() # Backpropagate and update for learnerh self.optimizerh.zero_grad() H_loss.backward() self.optimizerh.step() self.learnerh.eval() if (it % train_adversary_every == 0): # Set models to training mode self.adversary1.train() self.adversary2.train() # Since models are being reused, ensure data is consistent or re-compute if necessary hat_g = self.learnerg(Ab) hat_h = self.learnerh(Bb) hat_f_ = self.adversary1(Db) if not subsetted else self.adversary1(Db) * subset_ind1 hat_f = self.adversary2(Cb) if not subsetted else self.adversary2(Cb) * subset_ind2 # Calculate losses for each adversary F_loss = - torch.mean(2 * (hat_h - hat_g*Wb) * hat_f) + torch.mean(hat_f**2) F__loss = - torch.mean(2 * (hat_g - Yb) * hat_f_) + torch.mean(hat_f_**2) # Update adversary2 self.optimizerf.zero_grad() F_loss.backward(retain_graph=True) self.optimizerf.step() self.adversary2.eval() # Update adversary1 self.optimizerf_.zero_grad() F__loss.backward() self.optimizerf_.step() self.adversary1.eval() torch.save(self.learnerg, os.path.join( self.model_dir, "g_epoch{}".format(epoch))) torch.save(self.learnerh, os.path.join( self.model_dir, "h_epoch{}".format(epoch))) return self
[docs]class AGMM2(_BaseSupLossAGMM2): """ Adversarial Generalized Method of Moments estimator for nested NPIV. Parameters: learnerh : a pytorch neural net module for the second stage learner. learnerg : a pytorch neural net module for the first stage learner. adversary1 : a pytorch neural net module for the first stage adversary. adversary2 : a pytorch neural net module for the second stage adversary. """ def __init__(self, learnerh, learnerg, adversary1, adversary2): self.learnerh = learnerh self.learnerg = learnerg self.adversary1 = adversary1 self.adversary2 = adversary2 # which adversary parameters to not ell2 penalize self.skip_list = []
[docs]class _BaseSupLossAGMM2L2(_BaseAGMM2): """ Base class for joint estimation of nested NPIV models with L2 regularization. Methods: fit: Fits the AGMM model with L2 regularization to the provided data. """
[docs] def fit(self, A, B, C, D, Y, W=None, learner_l2=1e-3, adversary_l2=1e-4, adversary_norm_reg=1e-3, learner_norm_reg=1e-3, learner_lr=0.001, adversary_lr=0.001, n_epochs=100, bs=100, train_learner_every=1, train_adversary_every=1, warm_start=False, model_dir='.', device=None, verbose=0, subsetted=False, subset_ind1=None, subset_ind2=None): """ Parameters ---------- A : endogenous vars for first stage B : endogenous vars for second stage C : instrument vars for second stage D : instrument vars for first stage Y : outcome W : weights for the second stage learner_l2, adversary_l2 : l2_regularization of parameters of learner and adversary adversary_norm_reg : adversary norm regularization weight learner_norm_reg : learner norm regularization weight learner_lr : learning rate of the Adam optimizer for learner adversary_lr : learning rate of the Adam optimizer for adversary n_epochs : how many passes over the data bs : batch size train_learner_every : after how many training iterations of the adversary should we train the learner warm_start : if False then network parameters are initialized at the beginning, otherwise we start from their current weights model_dir : folder where to store the learned models after every epoch """ W = torch.ones(Y.shape[0]) if W is None else W if subsetted: if subset_ind1 is None: raise ValueError("subset_ind1 must be provided when subsetted is True") if len(subset_ind1) != len(Y): raise ValueError("subset_ind1 must have the same length as Y") subset_ind2 = 1 - subset_ind1 if subset_ind2 is None else subset_ind2 A, B, C, D, Y, W, subset_ind1, subset_ind2 = self._pretrain(A, B, C, D, Y, W, learner_l2, adversary_l2, adversary_norm_reg, learner_norm_reg, learner_lr, adversary_lr, n_epochs, bs, train_learner_every, train_adversary_every, warm_start, model_dir, device, verbose, subsetted=True, subset_ind1=subset_ind1, subset_ind2=subset_ind2) else: A, B, C, D, Y, W = self._pretrain(A, B, C, D, Y, W, learner_l2, adversary_l2, adversary_norm_reg, learner_norm_reg, learner_lr, adversary_lr, n_epochs, bs, train_learner_every, train_adversary_every, warm_start, model_dir, device, verbose) for epoch in range(n_epochs): if self.verbose > 0: print("Epoch #", epoch, sep="") for it, data in enumerate(self.train_dl): data = tuple(map(lambda x: x.to(device), data)) if subsetted: Ab, Bb, Cb, Db, Yb, Wb, subset_ind1, subset_ind2 = data else: Ab, Bb, Cb, Db, Yb, Wb = data if (it % train_learner_every == 0): # Set models to training mode self.learnerh.train() self.learnerg.train() # Forward passes hat_g = self.learnerg(Ab) hat_h = self.learnerh(Bb) hat_f_ = self.adversary1(Db) if not subsetted else self.adversary1(Db) * subset_ind1 hat_f = self.adversary2(Cb) if not subsetted else self.adversary2(Cb) * subset_ind2 # Calculate losses for each learner G_loss = torch.mean(2 * (hat_g - Yb) * hat_f_) + torch.mean(2 * (hat_h - hat_g) * hat_f) G_loss += learner_norm_reg * torch.mean(hat_g**2) H_loss = torch.mean(2 * (hat_h - hat_g*Wb) * hat_f) H_loss += learner_norm_reg * torch.mean(hat_h**2) # Backpropagate and update for learnerg self.optimizerg.zero_grad() G_loss.backward(retain_graph=True) # Retain graph for subsequent use in H_loss self.optimizerg.step() self.learnerg.eval() # Backpropagate and update for learnerh self.optimizerh.zero_grad() H_loss.backward() self.optimizerh.step() self.learnerh.eval() if (it % train_adversary_every == 0): # Set models to training mode self.adversary1.train() self.adversary2.train() # Since models are being reused, ensure data is consistent or re-compute if necessary hat_g = self.learnerg(Ab) hat_h = self.learnerh(Bb) hat_f_ = self.adversary1(Db) if not subsetted else self.adversary1(Db) * subset_ind1 hat_f = self.adversary2(Cb) if not subsetted else self.adversary2(Cb) * subset_ind2 # Calculate losses for each adversary F_loss = - torch.mean(2 * (hat_h - hat_g*Wb) * hat_f) + torch.mean(hat_f**2) F__loss = - torch.mean(2 * (hat_g - Yb) * hat_f_) + torch.mean(hat_f_**2) # Update adversary2 self.optimizerf.zero_grad() F_loss.backward(retain_graph=True) self.optimizerf.step() self.adversary2.eval() # Update adversary1 self.optimizerf_.zero_grad() F__loss.backward() self.optimizerf_.step() self.adversary1.eval() torch.save(self.learnerg, os.path.join( self.model_dir, "g_epoch{}".format(epoch))) torch.save(self.learnerh, os.path.join( self.model_dir, "h_epoch{}".format(epoch))) return self
[docs]class AGMM2L2(_BaseSupLossAGMM2L2): """ Adversarial Generalized Method of Moments estimator for nested NPIV with L2 regularization. Parameters: learnerh : a pytorch neural net module for the second stage learner. learnerg : a pytorch neural net module for the first stage learner. adversary1 : a pytorch neural net module for the first stage adversary. adversary2 : a pytorch neural net module for the second stage adversary. """ def __init__(self, learnerh, learnerg, adversary1, adversary2): self.learnerh = learnerh self.learnerg = learnerg self.adversary1 = adversary1 self.adversary2 = adversary2 # which adversary parameters to not ell2 penalize self.skip_list = []