重庆多功能网站建设,asp.net网站开发百科,有没有做校园文化的网站,百科网站源码研一刚入门深度学习的小白一枚#xff0c;想记录自己学习代码的经过#xff0c;理解每行代码的意思#xff0c;这样整理方便日后复习也方便理清自己的思路。感觉每天时间都不够用了#xff01;#xff01;加油啦。
第一部分#xff1a;导入模块
导入各个模块#xff0…研一刚入门深度学习的小白一枚想记录自己学习代码的经过理解每行代码的意思这样整理方便日后复习也方便理清自己的思路。感觉每天时间都不够用了加油啦。
第一部分导入模块
导入各个模块代码如下
# Numerical Operations
import math
import numpy as np# Reading/Writing Data
import pandas as pd
import os
import csv# For Progress Bar
from tqdm import tqdm# Pytorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split# For plotting learning curve
from torch.utils.tensorboard import SummaryWriter在上面程序中依次导入了
第二部分切分数据集及预测
随机数作用是切分训练集和验证集代码如下
def same_seed(seed): Fixes random number generator seeds for reproducibility.torch.backends.cudnn.deterministic Truetorch.backends.cudnn.benchmark Falsenp.random.seed(seed)torch.manual_seed(seed)if torch.cuda.is_available():torch.cuda.manual_seed_all(seed)在上面程序中先调用xxx函数
接着根据随机数拆分数据集代码如下
ef train_valid_split(data_set, valid_ratio, seed):Split provided training data into training set and validation setvalid_set_size int(valid_ratio * len(data_set)) train_set_size len(data_set) - valid_set_sizetrain_set, valid_set random_split(data_set, [train_set_size, valid_set_size], generatortorch.Generator().manual_seed(seed))return np.array(train_set), np.array(valid_set)在上面程序中先调用xxx
接着做预测下面这段预测程序也作为工具函数
def predict(test_loader, model, device):model.eval() # Set your model to evaluation mode.preds []for x in tqdm(test_loader):x x.to(device) with torch.no_grad(): pred model(x) preds.append(pred.detach().cpu()) preds torch.cat(preds, dim0).numpy() return preds在上面程序中先将模型调成evaluation模式再设定一个预测结果preds列表将x
第三部分数据集
这一部分是数据集代码如下
class COVID19Dataset(Dataset):x: Features.y: Targets, if none, do prediction.def __init__(self, x, yNone):if y is None:self.y yelse:self.y torch.FloatTensor(y)self.x torch.FloatTensor(x)def __getitem__(self, idx):if self.y is None:return self.x[idx]else:return self.x[idx], self.y[idx]def __len__(self):return len(self.x)上面这段代码
第四部分模型
定义自己的模型代码如下
class My_Model(nn.Module):def __init__(self, input_dim):super(My_Model, self).__init__()# TODO: modify models structure, be aware of dimensions. self.layers nn.Sequential(nn.Linear(input_dim, 16),nn.ReLU(),nn.Linear(16, 8),nn.ReLU(),nn.Linear(8, 1))def forward(self, x):x self.layers(x)x x.squeeze(1) # (B, 1) - (B)return x上面这段代码定义了一个继承自nn.Module模块的My_Model类先在__init__方法中定义层数layers属性调用nn.Sequential方法列出了5个层分别是线性层和ReLU层注意维度分别是input_dim和1616和88和1。接着在forward方法中 得到定义的模型x 外界可以调用 。
第五部分特征选择
def select_feat(train_data, valid_data, test_data, select_allTrue):Selects useful features to perform regressiony_train, y_valid train_data[:,-1], valid_data[:,-1] # 只需要参考并预测最后一列即可raw_x_train, raw_x_valid, raw_x_test train_data[:,:-1], valid_data[:,:-1], test_data # update 1: 去掉第一列 update 2:在特征选择去掉第一列if select_all:feat_idx list(range(raw_x_train.shape[1]))else:# update 1: 去掉belief和mental#feat_idx [i for i in raw_x_train.shape[1] if i not in [wbelief_masking_effective, wbelief_distancing_effective, wbelief_masking_effective, worried_finances]] # update: 不能读取列名否则array维度不匹配#feat_idx [i for i in raw_x_train.shape[1] if i not in [0, 39, 40, 47, 52, 57, 58, 65, 70, 75, 76, 83, 88]] # update: 遍历所有列名排除不需要的#feat_idx [i for i in raw_x_train.shape[1] if i ! 0 | i ! 39 | i ! 40 | i ! 47 | i ! 52 | i ! 57 | i ! 58 | i ! 65 | i ! 70 | i ! 75 | i ! 76 | i ! 83 | i ! 88] #update: 整数不可迭代del_col [0, 38, 39, 46, 51, 56, 57, 64, 69, 74, 75, 82, 87]raw_x_train np.delete(raw_x_train, del_col, axis1) # update: numpy数组增删查改方法raw_x_valid np.delete(raw_x_valid, del_col, axis1)raw_x_test np.delete(raw_x_test, del_col, axis1)#update 2:使用前三天的covid like illness和前二天的tested positive casesget_col [35, 36, 37, 47, 48, 3518, 3618, 3718, 4718, 4818, 3518*2, 3618*2, 3718*2, 4718*2, 4818*2, 52, 5218]raw_x_train raw_x_train[:, get_col] # update: numpy数组取某几行某几列raw_x_valid raw_x_valid[:, get_col]raw_x_test raw_x_test[:, get_col]return raw_x_train, raw_x_valid, raw_x_test, y_train, y_valid#feat_idx [1,1,2,3,4] # TODO: Select suitable feature columns.return raw_x_train[:,feat_idx], raw_x_valid[:,feat_idx], raw_x_test[:,feat_idx], y_train, y_valid上面这段代码包含我自己修改的部分跟着其他大佬的调参步骤更改加了适当的注释写在update后面。由列选择得到相应的列…
第六部分训练
代码如下
def trainer(train_loader, valid_loader, model, config, device):criterion nn.MSELoss(reductionmean) # Define your loss function, do not modify this.# Define your optimization algorithm. # TODO: Please check https://pytorch.org/docs/stable/optim.html to get more available algorithms.# TODO: L2 regularization (optimizer(weight decay...) or implement by your self).optimizer torch.optim.SGD(model.parameters(), lrconfig[learning_rate], momentum0.9) # update: momentum调整为0.9; #optimizer torch.optim.Adam(model.parameters(), lrconfig[learning_rate]) # update: 用Adam优化器; writer SummaryWriter() # Writer of tensoboard.if not os.path.isdir(./models):os.mkdir(./models) # Create directory of saving models.n_epochs, best_loss, step, early_stop_count config[n_epochs], math.inf, 0, 0for epoch in range(n_epochs):model.train() # Set your model to train mode.loss_record []# tqdm is a package to visualize your training progress.train_pbar tqdm(train_loader, position0, leaveTrue)for x, y in train_pbar:optimizer.zero_grad() # Set gradient to zero.x, y x.to(device), y.to(device) # Move your data to device. pred model(x) loss criterion(pred, y)loss.backward() # Compute gradient(backpropagation).optimizer.step() # Update parameters.step 1loss_record.append(loss.detach().item())# Display current epoch number and loss on tqdm progress bar.train_pbar.set_description(fEpoch [{epoch1}/{n_epochs}])train_pbar.set_postfix({loss: loss.detach().item()})mean_train_loss sum(loss_record)/len(loss_record)writer.add_scalar(Loss/train, mean_train_loss, step)model.eval() # Set your model to evaluation mode.loss_record []for x, y in valid_loader:x, y x.to(device), y.to(device)with torch.no_grad():pred model(x)loss criterion(pred, y)loss_record.append(loss.item())mean_valid_loss sum(loss_record)/len(loss_record)print(fEpoch [{epoch1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f})# writer.add_scalar(Loss/valid, mean_valid_loss, step)if mean_valid_loss best_loss:best_loss mean_valid_losstorch.save(model.state_dict(), config[save_path]) # Save your best modelprint(Saving model with loss {:.3f}....format(best_loss))early_stop_count 0else: early_stop_count 1if early_stop_count config[early_stop]:print(\nModel is not improving, so we halt the training session.)return上面这段代码…
第七部分参数
代码如下
device cuda if torch.cuda.is_available() else cpu
config {seed: 5201314, # Your seed number, you can pick your lucky number. :)select_all: False, # Whether to use all features. update: select_all为Falsevalid_ratio: 0.2, # validation_size train_size * valid_ration_epochs: 5000, # Number of epochs. batch_size: 256, learning_rate: 1e-4, # update: 学习率加大为1e-4early_stop: 600, # If model has not improved for this many consecutive epochs, stop training. save_path: ./models/model.ckpt # Your model will be saved here.
}上面这部分代码定义了1个设备和8个参数device是用if-else定义的bool值变量config用字典表示。
第八部分开始调用以上定义的方法、对象和参数
数据集处理代码如下
same_seed(config[seed])
train_data, test_data pd.read_csv(./covid_train.csv).values, pd.read_csv(./covid_test.csv).values # update: .values选中除第一行列名下面的所有行; .values输出的shape一样 (?)
train_data, valid_data train_valid_split(train_data, config[valid_ratio], config[seed])# Print out the data size.
print(ftrain_data size: {train_data.shape}
valid_data size: {valid_data.shape}
test_data size: {test_data.shape})上面这段代码中前三行是读入训练和测试的两个.csv文件得到总的训练集train_data和测试集test_data再接着对训练集train_data进行切分得到切分后的训练集train_data和验证集valid_data。
接着进行特征选择代码如下
# Select features
x_train, x_valid, x_test, y_train, y_valid select_feat(train_data, valid_data, test_data, config[select_all])# Print out the number of features.
print(fnumber of features: {x_train.shape[1]})上面这段代码
接着加载数据代码如下
train_dataset, valid_dataset, test_dataset COVID19Dataset(x_train, y_train), \COVID19Dataset(x_valid, y_valid), \COVID19Dataset(x_test)# Pytorch data loader loads pytorch dataset into batches.
train_loader DataLoader(train_dataset, batch_sizeconfig[batch_size], shuffleTrue, pin_memoryTrue)
valid_loader DataLoader(valid_dataset, batch_sizeconfig[batch_size], shuffleTrue, pin_memoryTrue)
test_loader DataLoader(test_dataset, batch_sizeconfig[batch_size], shuffleFalse, pin_memoryTrue)上面这段代码train和valid的dataset进行了shuffle而test的dataset不需要shuffle。
接着进行训练代码如下
model My_Model(input_dimx_train.shape[1]).to(device) # put your model and data on the same computation device.
trainer(train_loader, valid_loader, model, config, device)上面这段代码
接着进行预测并保存预测结果代码如下
def save_pred(preds, file): Save predictions to specified file with open(file, w) as fp:writer csv.writer(fp)writer.writerow([id, tested_positive])for i, p in enumerate(preds):writer.writerow([i, p])model My_Model(input_dimx_train.shape[1]).to(device)
model.load_state_dict(torch.load(config[save_path])) # update: tensor size mismatch所以暂时先注释掉
preds predict(test_loader, model, device)
save_pred(preds, pred.csv)上面这段代码先定义了一个save_pred方法调用open创建一个.csv文件…