使用Postgresql 实现快速插入测试数据

时间:2021-05-23

1.创建常规的企业信息表

create table t_centerprises( objectid bigint not null, divid uuid not null, name text not null, address text not null, post text, contacts text, tel text, fax text, describe text, date timestamp default now() not null, constraint pk_centerprisess_objectid primary key (objectid), constraint fk_centerprises_divid foreign key(divid) references ts_divisions(objectid) on delete cascade);create index idx_centerprises_divid on t_centerprises(divid);

2.需要使用的函数

drop function if exists hex_to_string(text);create or replace function hex_to_string( text) returns text as $$ declare result text; begin execute 'select U&''\' || $1 || '''' INTO result; return result; end;$$ language plpgsql; drop function if exists gen_random_zh(int,int);create or replace function gen_random_zh(imin int,imax int) returns text as $$ declare vlen integer; result text; begin result := ''; vlen = floor(random()*(imax-imin)+imin); for i in 1..vlen loop result := result || hex_to_string(to_hex(floor(random()*(42191-19968)+19968)::integer)); end loop; return result; end;$$ language plpgsql;

3.常规测试数据插入(5000000条)

insert into t_centerprises(objectid,divid,name,address,post,contacts,tel,fax,describe) select (vdivid|| lpad(id::text,6,'0'))::bigint as objectid,'110101', gen_random_zh(5,25) as name,gen_random_zh(10,50) as address, floor(random()*(699999-600000)+600000) as post,gen_random_zh(2,8) as contacts, floor(random()*(69999999-60000000)+60000000) as tel,floor(random()*(69999999-60000000)+60000000) as fax, gen_random_zh(32,128) as describe from generate_series(1,5000000) as id;

在普通pc机上插入,大概完成时间约8小时,过程不可监控,并且cpu/内存占用率高,磁盘基本满负荷动作,读写率基本上都是100%.

4.改进后的方法, 插入(10000000条)

do $$ declare vStart bigint; declare vEnd bigint; declare MAXVALE bigint; declare INTERVAL bigint; declare vprovince integer; declare vprefecture integer; declare vcounty integer; declare vdivid text; declare vdividex uuid;begin vprovince := 10;vprefecture := 1;vcounty := 1; MAXVALE := 1000000; INTERVAL := 1000; vStart := 1 ;vEnd := INTERVAL; vdivid := (lpad(vprovince::text,2,'0') || lpad(vprefecture::text,2,'0') || lpad(vcounty::text,2,'0'))::text; vdividex := (select objectid from ts_divisions where province=vprovince and prefecture=vprefecture and county=vcounty); loop insert into t_centerprises(objectid,divid,name,address,post,contacts,tel,fax,describe) select (vdivid|| lpad(id::text,6,'0'))::bigint as objectid,vdividex as divid, gen_random_zh(5,25) as name,gen_random_zh(10,50) as address, floor(random()*(699999-600000)+600000) as post,gen_random_zh(2,8) as contacts, floor(random()*(69999999-60000000)+60000000) as tel,floor(random()*(69999999-60000000)+60000000) as fax, gen_random_zh(32,128) as describe from generate_series(vStart,vEnd) as id; raise notice '%', vEnd; vStart := vEnd + 1; vEnd := vEnd + INTERVAL; if( vEnd > MAXVALE ) then return; elsif(vEnd = MAXVALE) then vEnd := vEnd - 1; end if; end loop;end$$;

因为运算原因, cpu/内存占用率仍然很高, 硬盘负荷较小,读写率也比较低,大概完成时间约1.5小时.

补充:postgreSQL数据库 向表中快速插入1000000条数据

不用创建函数,直接向表中快速插入1000000条数据

create table tbl_test (id int, info text, c_time timestamp);insert into tbl_test select generate_series(1,100000),md5(random()::text),clock_timestamp();select count(id) from tbl_test; --查看个数据条数

以上为个人经验,希望能给大家一个参考,也希望大家多多支持。如有错误或未考虑完全的地方,望不吝赐教。

声明:本页内容来源网络,仅供用户参考;我单位不保证亦不表示资料全面及准确无误,也不保证亦不表示这些资料为最新信息,如因任何原因,本网内容或者用户因倚赖本网内容造成任何损失或损害,我单位将不会负任何法律责任。如涉及版权问题,请提交至online#300.cn邮箱联系删除。

相关文章