{ "cells": [ { "cell_type": "markdown", "id": "7edfd544", "metadata": {}, "source": [ "## `.sig_test.split_test` in Boston Housing price regression dataset" ] }, { "cell_type": "code", "execution_count": 1, "id": "f295957b", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import tensorflow.keras as keras \n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D\n", "from tensorflow.python.keras import backend as K\n", "import time\n", "from sklearn.model_selection import train_test_split\n", "from tensorflow.keras.optimizers import Adam, SGD\n", "from sig_test import split_test\n", "import tensorflow as tf" ] }, { "cell_type": "markdown", "id": "4e5ad7e7", "metadata": {}, "source": [ "\n", "Boston house prices dataset\n", "\n", "\n", "**Data Set Characteristics:** \n", "\n", " Number of Instances: 506 \n", "\n", " Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\n", "\n", " Attribute Information (in order)\n", " - CRIM per capita crime rate by town\n", " - ZN proportion of residential land zoned for lots over 25,000 sq.ft.\n", " - INDUS proportion of non-retail business acres per town\n", " - CHAS Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\n", " - NOX nitric oxides concentration (parts per 10 million)\n", " - RM average number of rooms per dwelling\n", " - AGE proportion of owner-occupied units built prior to 1940\n", " - DIS weighted distances to five Boston employment centres\n", " - RAD index of accessibility to radial highways\n", " - TAX full-value property-tax rate per USD10,000\n", " - PTRATIO pupil-teacher ratio by town\n", " - B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town\n", " - LSTAT % lower status of the population\n", " - MEDV Median value of owner-occupied homes in $1000's\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "2119d3f6", "metadata": { "scrolled": false }, "outputs": [], "source": [ "(x_train, y_train), (_, _) = tf.keras.datasets.boston_housing.load_data(path=\"boston_housing.npz\", \n", " test_split=0.1)\n", "y_train, y_test = y_train[:,np.newaxis], y_test[:,np.newaxis]\n", "\n", "from sklearn import preprocessing\n", "scaler = preprocessing.MinMaxScaler()\n", "x_train = scaler.fit_transform(x_train)" ] }, { "cell_type": "code", "execution_count": 3, "id": "3a837094", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "num of samples: 455, dim: 13\n" ] } ], "source": [ "n, d = x_train.shape\n", "print('num of samples: %d, dim: %d' %(n, d))" ] }, { "cell_type": "code", "execution_count": 4, "id": "3fd8a537", "metadata": {}, "outputs": [], "source": [ "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "\n", "def build_model():\n", " model = keras.Sequential([\n", " layers.Dense(8, activation='relu', input_shape=[d]),\n", " layers.BatchNormalization(),\n", " layers.Dense(1, activation='relu')\n", " ])\n", "\n", " optimizer = tf.keras.optimizers.Adam(1e-3)\n", "\n", " model.compile(loss='mse',\n", " optimizer=optimizer,\n", " metrics=['mae', 'mse'])\n", " return model" ] }, { "cell_type": "code", "execution_count": 5, "id": "d93f05e0", "metadata": { "scrolled": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2022-06-30 12:53:52.290176: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2022-06-30 12:53:52.295414: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2022-06-30 12:53:52.295682: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2022-06-30 12:53:52.296167: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2022-06-30 12:53:52.296992: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2022-06-30 12:53:52.297257: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2022-06-30 12:53:52.297500: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2022-06-30 12:53:52.641227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2022-06-30 12:53:52.641520: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2022-06-30 12:53:52.641753: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2022-06-30 12:53:52.642007: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3022 MB memory: -> device: 0, name: NVIDIA GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5\n" ] } ], "source": [ "model_null, model_alter = build_model(), build_model()" ] }, { "cell_type": "code", "execution_count": 6, "id": "85689778", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.callbacks import EarlyStopping\n", "es = EarlyStopping(monitor='val_loss', mode='min',\n", "\t\t\t\t\tverbose=0, patience=300, restore_best_weights=True)\n", "\n", "fit_params = {'callbacks': [es],\n", "\t\t\t 'epochs': 3000,\n", "\t\t\t 'batch_size': 32,\n", "\t\t\t 'validation_split': .2,\n", "\t\t\t 'verbose': 0}\n", "\n", "## testing params\n", "test_params = { 'split': \"one-split\",\n", " 'inf_ratio': None,\n", " 'perturb': None,\n", " 'cv_num': 2,\n", " 'cp': 'hommel',\n", " 'verbose': 2}\n", "\n", "## tuning params\n", "tune_params = { 'num_perm': 100,\n", " 'ratio_grid': [.2, .4, .6, .8],\n", " 'if_reverse': 1,\n", " 'perturb_range': 2.**np.arange(-3,3,.3),\n", " 'tune_ratio_method': 'fuse',\n", " 'tune_pb_method': 'fuse',\n", " 'cv_num': 2,\n", " 'cp': 'hommel',\n", " 'verbose': 2}\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "90f0d2bb", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: ./saved/split_test/06-30_12-53/model_null_init/assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2022-06-30 12:53:59.491087: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: ./saved/split_test/06-30_12-53/model_alter_init/assets\n", "==================== one-split test for 0-th Hypothesis ====================\n", "(tuneHP: ratio) Est. Type 1 error: 0.150; inf sample ratio: 0.800\n", "(tuneHP: ratio) Est. Type 1 error: 0.270; inf sample ratio: 0.600\n", "(tuneHP: ratio) Est. Type 1 error: 0.000; inf sample ratio: 0.400\n", "โœ… (tuneHP: ratio) Done with inf sample ratio: 0.400\n", "(tuneHP: pb) Est. Type 1 error: 0.000; perturbation level: 0.125\n", "โœ… (tuneHP: pb) Done with inf pb level: 0.125\n", "cv: 0; p_value: 0.03247; loss_null: 14.63722(51.76234); loss_alter: 18.41512(42.01610)\n", "cv: 1; p_value: 0.01776; loss_null: 14.65350(46.57168); loss_alter: 21.16354(51.60448)\n", " ๐Ÿงช 0-th Hypothesis: reject H0 with p_value: 0.049\n", "==================== one-split test for 1-th Hypothesis ====================\n", "(tuneHP: ratio) Est. Type 1 error: 1.000; inf sample ratio: 0.800\n", "(tuneHP: ratio) Est. Type 1 error: 0.000; inf sample ratio: 0.600\n", "โœ… (tuneHP: ratio) Done with inf sample ratio: 0.600\n", "(tuneHP: pb) Est. Type 1 error: 0.700; perturbation level: 0.125\n", "(tuneHP: pb) Est. Type 1 error: 0.730; perturbation level: 0.154\n", "(tuneHP: pb) Est. Type 1 error: 0.700; perturbation level: 0.189\n", "(tuneHP: pb) Est. Type 1 error: 0.700; perturbation level: 0.233\n", "(tuneHP: pb) Est. Type 1 error: 0.610; perturbation level: 0.287\n", "(tuneHP: pb) Est. Type 1 error: 0.580; perturbation level: 0.354\n", "(tuneHP: pb) Est. Type 1 error: 0.470; perturbation level: 0.435\n", "(tuneHP: pb) Est. Type 1 error: 0.460; perturbation level: 0.536\n", "(tuneHP: pb) Est. Type 1 error: 0.390; perturbation level: 0.660\n", "(tuneHP: pb) Est. Type 1 error: 0.290; perturbation level: 0.812\n", "(tuneHP: pb) Est. Type 1 error: 0.180; perturbation level: 1.000\n", "(tuneHP: pb) Est. Type 1 error: 0.190; perturbation level: 1.231\n", "(tuneHP: pb) Est. Type 1 error: 0.170; perturbation level: 1.516\n", "(tuneHP: pb) Est. Type 1 error: 0.070; perturbation level: 1.866\n", "(tuneHP: pb) Est. Type 1 error: 0.070; perturbation level: 2.297\n", "(tuneHP: pb) Est. Type 1 error: 0.030; perturbation level: 2.828\n", "โœ… (tuneHP: pb) Done with inf pb level: 2.828\n", "cv: 0; p_value: 0.12088; loss_null: 31.87867(120.14606); loss_alter: 34.30483(86.49743)\n", "cv: 1; p_value: 0.83584; loss_null: 33.45124(124.96815); loss_alter: 33.72868(86.60861)\n", " ๐Ÿงช 1-th Hypothesis: accept H0 with p_value: 0.363\n" ] } ], "source": [ "inf_feats = [np.arange(3), np.arange(5,11)]\n", "\n", "cue = split_test(inf_feats=inf_feats, model_null=model_null, model_alter=model_alter, eva_metric='mse')\n", "\n", "P_value = cue.testing(x_train, y_train, fit_params, test_params, tune_params)" ] }, { "cell_type": "code", "execution_count": 9, "id": "fcac21f5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.048711537430423474, 0.3626513098329711]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "P_value" ] } ], "metadata": { "kernelspec": { "display_name": "tf", "language": "python", "name": "tf" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 5 }