当前位置：首页 > news >正文

国外的设计网站推荐中国最好的做网站高手

news 2026/5/3 21:03:52

国外的设计网站推荐,中国最好的做网站高手,wap网站微信登录,php制作网站主要改进点日志配置#xff1a; 确保日志文件按日期和时间生成#xff0c;便于追踪不同运行的记录。数据处理#xff1a; 增加了对数据加载过程中错误的捕获和日志记录#xff0c;确保程序能够跳过无效数据并继续运行。模型训练#xff1a; 增加了重新训练模型的功…主要改进点日志配置确保日志文件按日期和时间生成便于追踪不同运行的记录。数据处理增加了对数据加载过程中错误的捕获和日志记录确保程序能够跳过无效数据并继续运行。模型训练增加了重新训练模型的功能用户可以选择重新训练现有模型或从头开始训练。用户交互增加了输入验证确保用户输入的问题不为空。增加了模糊匹配功能支持部分输入问题的匹配。错误处理在关键步骤增加了异常捕获和日志记录提高了程序的健壮性。 import os import json import jsonlines import torch import torch.optim as optim from torch.utils.data import Dataset, DataLoader from transformers import BertModel, BertTokenizer import tkinter as tk from tkinter import filedialog, messagebox import logging from difflib import SequenceMatcher from datetime import datetime# 配置日志 LOGS_DIR os.path.join(PROJECT_ROOT, logs) os.makedirs(LOGS_DIR, exist_okTrue)def setup_logging():log_file os.path.join(LOGS_DIR, datetime.now().strftime(%Y-%m-%d/%H-%M-%S/羲和.txt))os.makedirs(os.path.dirname(log_file), exist_okTrue)logging.basicConfig(levellogging.INFO,format%(asctime)s - %(levelname)s - %(message)s,handlers[logging.FileHandler(log_file),logging.StreamHandler()])# 获取项目根目录 PROJECT_ROOT os.path.dirname(os.path.abspath(__file__)) setup_logging()# 数据集类 class XihuaDataset(Dataset):def __init__(self, file_path, tokenizer, max_length128):self.tokenizer tokenizerself.max_length max_lengthself.data self.load_data(file_path)def load_data(self, file_path):data []if file_path.endswith(.jsonl):with jsonlines.open(file_path) as reader:for i, item in enumerate(reader):try:data.append(item)except jsonlines.jsonlines.InvalidLineError as e:logging.warning(f跳过无效行 {i 1}: {e})elif file_path.endswith(.json):with open(file_path, r) as f:try:data json.load(f)except json.JSONDecodeError as e:logging.warning(f跳过无效文件 {file_path}: {e})return datadef __len__(self):return len(self.data)def __getitem__(self, idx):item self.data[idx]question item[question]human_answer item[human_answers][0]chatgpt_answer item[chatgpt_answers][0]try:inputs self.tokenizer(question, return_tensorspt, paddingmax_length, truncationTrue, max_lengthself.max_length)human_inputs self.tokenizer(human_answer, return_tensorspt, paddingmax_length, truncationTrue, max_lengthself.max_length)chatgpt_inputs self.tokenizer(chatgpt_answer, return_tensorspt, paddingmax_length, truncationTrue, max_lengthself.max_length)except Exception as e:logging.warning(f跳过无效项 {idx}: {e})return self.__getitem__((idx 1) % len(self.data))return {input_ids: inputs[input_ids].squeeze(),attention_mask: inputs[attention_mask].squeeze(),human_input_ids: human_inputs[input_ids].squeeze(),human_attention_mask: human_inputs[attention_mask].squeeze(),chatgpt_input_ids: chatgpt_inputs[input_ids].squeeze(),chatgpt_attention_mask: chatgpt_inputs[attention_mask].squeeze(),human_answer: human_answer,chatgpt_answer: chatgpt_answer}# 获取数据加载器 def get_data_loader(file_path, tokenizer, batch_size8, max_length128):dataset XihuaDataset(file_path, tokenizer, max_length)return DataLoader(dataset, batch_sizebatch_size, shuffleTrue)# 模型定义 class XihuaModel(torch.nn.Module):def __init__(self, pretrained_model_nameF:/models/bert-base-chinese):super(XihuaModel, self).__init__()self.bert BertModel.from_pretrained(pretrained_model_name)self.classifier torch.nn.Linear(self.bert.config.hidden_size, 1)def forward(self, input_ids, attention_mask):outputs self.bert(input_idsinput_ids, attention_maskattention_mask)pooled_output outputs.pooler_outputlogits self.classifier(pooled_output)return logits# 训练函数 def train(model, data_loader, optimizer, criterion, device):model.train()total_loss 0.0for batch in data_loader:try:input_ids batch[input_ids].to(device)attention_mask batch[attention_mask].to(device)human_input_ids batch[human_input_ids].to(device)human_attention_mask batch[human_attention_mask].to(device)chatgpt_input_ids batch[chatgpt_input_ids].to(device)chatgpt_attention_mask batch[chatgpt_attention_mask].to(device)optimizer.zero_grad()human_logits model(human_input_ids, human_attention_mask)chatgpt_logits model(chatgpt_input_ids, chatgpt_attention_mask)human_labels torch.ones(human_logits.size(0), 1).to(device)chatgpt_labels torch.zeros(chatgpt_logits.size(0), 1).to(device)loss criterion(human_logits, human_labels) criterion(chatgpt_logits, chatgpt_labels)loss.backward()optimizer.step()total_loss loss.item()except Exception as e:logging.warning(f跳过无效批次: {e})return total_loss / len(data_loader)# 主训练函数 def main_train(retrainFalse):device torch.device(cuda if torch.cuda.is_available() else cpu)logging.info(fUsing device: {device})tokenizer BertTokenizer.from_pretrained(F:/models/bert-base-chinese)model XihuaModel(pretrained_model_nameF:/models/bert-base-chinese).to(device)if retrain:model.load_state_dict(torch.load(os.path.join(PROJECT_ROOT, models/xihua_model.pth), map_locationdevice, weights_onlyTrue))optimizer optim.Adam(model.parameters(), lr1e-5)criterion torch.nn.BCEWithLogitsLoss()train_data_loader get_data_loader(os.path.join(PROJECT_ROOT, data/train_data.jsonl), tokenizer, batch_size8, max_length128)num_epochs 5for epoch in range(num_epochs):train_loss train(model, train_data_loader, optimizer, criterion, device)logging.info(fEpoch [{epoch1}/{num_epochs}], Loss: {train_loss:.4f})torch.save(model.state_dict(), os.path.join(PROJECT_ROOT, models/xihua_model.pth))logging.info(模型训练完成并保存)# GUI界面 class XihuaChatbotGUI:def __init__(self, root):self.root rootself.root.title(羲和聊天机器人)self.tokenizer BertTokenizer.from_pretrained(F:/models/bert-base-chinese)self.device torch.device(cuda if torch.cuda.is_available() else cpu)self.model XihuaModel(pretrained_model_nameF:/models/bert-base-chinese).to(self.device)self.load_model()self.model.eval()# 加载训练数据集以便在获取答案时使用self.data self.load_data(os.path.join(PROJECT_ROOT, data/train_data.jsonl))self.create_widgets()def create_widgets(self):self.question_label tk.Label(self.root, text问题:)self.question_label.pack()self.question_entry tk.Entry(self.root, width50)self.question_entry.pack()self.answer_button tk.Button(self.root, text获取回答, commandself.get_answer)self.answer_button.pack()self.answer_label tk.Label(self.root, text回答:)self.answer_label.pack()self.answer_text tk.Text(self.root, height10, width50)self.answer_text.pack()self.train_button tk.Button(self.root, text训练模型, commandself.train_model)self.train_button.pack()self.retrain_button tk.Button(self.root, text重新训练模型, commandlambda: self.train_model(retrainTrue))self.retrain_button.pack()def get_answer(self):question self.question_entry.get()if not question:messagebox.showwarning(输入错误, 请输入问题)returninputs self.tokenizer(question, return_tensorspt, paddingmax_length, truncationTrue, max_length128)with torch.no_grad():input_ids inputs[input_ids].to(self.device)attention_mask inputs[attention_mask].to(self.device)logits self.model(input_ids, attention_mask)if logits.item() 0:answer_type 人类回答else:answer_type ChatGPT回答specific_answer self.get_specific_answer(question, answer_type)self.answer_text.delete(1.0, tk.END)self.answer_text.insert(tk.END, f{answer_type}\n{specific_answer})def get_specific_answer(self, question, answer_type):# 使用模糊匹配查找最相似的问题best_match Nonebest_ratio 0.0for item in self.data:ratio SequenceMatcher(None, question, item[question]).ratio()if ratio best_ratio:best_ratio ratiobest_match itemif best_match:if answer_type 人类回答:return best_match[human_answers][0]else:return best_match[chatgpt_answers][0]return 未找到具体答案def load_data(self, file_path):data []if file_path.endswith(.jsonl):with jsonlines.open(file_path) as reader:for i, item in enumerate(reader):try:data.append(item)except jsonlines.jsonlines.InvalidLineError as e:logging.warning(f跳过无效行 {i 1}: {e})elif file_path.endswith(.json):with open(file_path, r) as f:try:data json.load(f)except json.JSONDecodeError as e:logging.warning(f跳过无效文件 {file_path}: {e})return datadef load_model(self):model_path os.path.join(PROJECT_ROOT, models/xihua_model.pth)if os.path.exists(model_path):self.model.load_state_dict(torch.load(model_path, map_locationself.device, weights_onlyTrue))logging.info(加载现有模型)else:logging.info(没有找到现有模型将使用预训练模型)def train_model(self, retrainFalse):file_path filedialog.askopenfilename(filetypes[(JSONL files, *.jsonl), (JSON files, *.json)])if not file_path:messagebox.showwarning(文件选择错误, 请选择一个有效的数据文件)returntry:dataset XihuaDataset(file_path, self.tokenizer)data_loader DataLoader(dataset, batch_size8, shuffleTrue)# 加载已训练的模型权重if retrain:self.model.load_state_dict(torch.load(os.path.join(PROJECT_ROOT, models/xihua_model.pth), map_locationself.device, weights_onlyTrue))self.model.to(self.device)self.model.train()optimizer torch.optim.Adam(self.model.parameters(), lr1e-5)criterion torch.nn.BCEWithLogitsLoss()num_epochs 5for epoch in range(num_epochs):train_loss train(self.model, data_loader, optimizer, criterion, self.device)logging.info(fEpoch [{epoch1}/{num_epochs}], Loss: {train_loss:.4f})torch.save(self.model.state_dict(), os.path.join(PROJECT_ROOT, models/xihua_model.pth))logging.info(模型训练完成并保存)messagebox.showinfo(训练完成, 模型训练完成并保存)except Exception as e:logging.error(f模型训练失败: {e})messagebox.showerror(训练失败, f模型训练失败: {e})# 主函数 if __name__ __main__:# 启动GUIroot tk.Tk()app XihuaChatbotGUI(root)root.mainloop()

查看全文

http://www.hkea.cn/news/14519513/