建设行业信息和技术应用服务网站,衡水网站建设选哪家,高端网站建设过程,网站开发大学是什么专业进行数据管理时#xff0c;无效数据可能会对生产力和决策质量造成严重的影响。如何发现和处理无效数据变得愈发重要。一起来唠唠你会如何处理无效数据吧~
方向一#xff1a;介绍无效数据的概念 最近遇到了pg数据库表中的大量数据重复了#xff0c;需要删除其中的一条。一条…
进行数据管理时无效数据可能会对生产力和决策质量造成严重的影响。如何发现和处理无效数据变得愈发重要。一起来唠唠你会如何处理无效数据吧~
方向一介绍无效数据的概念 最近遇到了pg数据库表中的大量数据重复了需要删除其中的一条。一条条删除显然不切合实际还是需要通过计算来删除。 方向二无效数据的处理方法 实施步骤 1.对原表进行备份 2.使用一个表结构完全一样的临时表对原始表进行接收。 create table tmp_0524_1 as select * from public.m_user_bak20230524; 3.找出重复的数据 drop table tmp_0524_2; create table tmp_0524_2 as select * from ( select phone ,count(1) cn from tmp_0524_1 where phone is not null group by phone ) t where t.cn 1 ; 4.用第二个临时表接收数据 drop table tmp_0524_3_1; create table tmp_0524_3_1 as select t.* from tmp_0524_1 t where phone in (select phone from tmp_0524_2 ) and opt_user is null; 5.建一个表结构一样的空表多增加一个iid字段 create table tmp_0524_3 as select iid , t.* from tmp_0524_3_1 where 1 2; 6.给空表增加一个iid字段建立一个字段自增 CREATE SEQUENCE tmp_0524_3_id_seq START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE CACHE 1; alter table tmp_0524_3 alter column iid set default nextval(tmp_0524_3_id_seq); 7.插入数据 INSERT INTO public.tmp_0524_3 (id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company) select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company from tmp_0524_3_1; 8.删除相同数据中的一条 DELETE FROM tmp_0524_3 WHERE iid NOT IN ( SELECT max(iid) FROM tmp_0524_3 GROUP BY phone ); 9.删除原始表中的有相同数据的数据 delete from m_user where phone in (select phone from tmp_0524_3); 10.将处理好的数据插回原始表 INSERT INTO public.m_user (id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company) select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company from tmp_0524_3; 方向三如何减少无效数据 在并发系统中应该增加锁对数据进行插入避免重复的插入数据。 方向四实际案例举例 案例 最近遇到了pg数据库表中的大量数据重复了需要删除其中的一条。一条条删除显然不切合实际还是需要通过计算来删除。 实施步骤 1.对原表进行备份 2.使用一个表结构完全一样的临时表对原始表进行接收。 create table tmp_0524_1 as select * from public.m_user_bak20230524; 3.找出重复的数据 drop table tmp_0524_2; create table tmp_0524_2 as select * from ( select phone ,count(1) cn from tmp_0524_1 where phone is not null group by phone ) t where t.cn 1 ; 4.用第二个临时表接收数据 drop table tmp_0524_3_1; create table tmp_0524_3_1 as select t.* from tmp_0524_1 t where phone in (select phone from tmp_0524_2 ) and opt_user is null; 5.建一个表结构一样的空表多增加一个iid字段 create table tmp_0524_3 as select iid , t.* from tmp_0524_3_1 where 1 2; 6.给空表增加一个iid字段建立一个字段自增 CREATE SEQUENCE tmp_0524_3_id_seq START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE CACHE 1; alter table tmp_0524_3 alter column iid set default nextval(tmp_0524_3_id_seq); 7.插入数据 INSERT INTO public.tmp_0524_3 (id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company) select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company from tmp_0524_3_1; 8.删除相同数据中的一条 DELETE FROM tmp_0524_3 WHERE iid NOT IN ( SELECT max(iid) FROM tmp_0524_3 GROUP BY phone ); 9.删除原始表中的有相同数据的数据 delete from m_user where phone in (select phone from tmp_0524_3); 10.将处理好的数据插回原始表 INSERT INTO public.m_user (id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company) select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company from tmp_0524_3;