1.创建常规的企业信息表
create table t_centerprises( objectid bigint not null, divid uuid not null, name text not null, address text not null, post text, contacts text, tel text, fax text, describe text, date timestamp default now() not null, constraint pk_centerprisess_objectid primary key (objectid), constraint fk_centerprises_divid foreign key(divid) references ts_divisions(objectid) on delete cascade);create index idx_centerprises_divid on t_centerprises(divid);
2.需要使用的函数
drop function if exists hex_to_string(text);create or replace function hex_to_string( text) returns text as $$ declare result text; begin execute 'select U&''\' || $1 || '''' INTO result; return result; end;$$ language plpgsql; drop function if exists gen_random_zh(int,int);create or replace function gen_random_zh(imin int,imax int) returns text as $$ declare vlen integer; result text; begin result := ''; vlen = floor(random()*(imax-imin)+imin); for i in 1..vlen loop result := result || hex_to_string(to_hex(floor(random()*(42191-19968)+19968)::integer)); end loop; return result; end;$$ language plpgsql;
3.常规测试数据插入(5000000条)
insert into t_centerprises(objectid,divid,name,address,post,contacts,tel,fax,describe) select (vdivid|| lpad(id::text,6,'0'))::bigint as objectid,'110101', gen_random_zh(5,25) as name,gen_random_zh(10,50) as address, floor(random()*(699999-600000)+600000) as post,gen_random_zh(2,8) as contacts, floor(random()*(69999999-60000000)+60000000) as tel,floor(random()*(69999999-60000000)+60000000) as fax, gen_random_zh(32,128) as describe from generate_series(1,5000000) as id;
在普通pc机上插入,大概完成时间约8小时,过程不可监控,并且cpu/内存占用率高,磁盘基本满负荷动作,读写率基本上都是100%.
4.改进后的方法, 插入(10000000条)
do $$ declare vStart bigint; declare vEnd bigint; declare MAXVALE bigint; declare INTERVAL bigint; declare vprovince integer; declare vprefecture integer; declare vcounty integer; declare vdivid text; declare vdividex uuid;begin vprovince := 10;vprefecture := 1;vcounty := 1; MAXVALE := 1000000; INTERVAL := 1000; vStart := 1 ;vEnd := INTERVAL; vdivid := (lpad(vprovince::text,2,'0') || lpad(vprefecture::text,2,'0') || lpad(vcounty::text,2,'0'))::text; vdividex := (select objectid from ts_divisions where province=vprovince and prefecture=vprefecture and county=vcounty); loop insert into t_centerprises(objectid,divid,name,address,post,contacts,tel,fax,describe) select (vdivid|| lpad(id::text,6,'0'))::bigint as objectid,vdividex as divid, gen_random_zh(5,25) as name,gen_random_zh(10,50) as address, floor(random()*(699999-600000)+600000) as post,gen_random_zh(2,8) as contacts, floor(random()*(69999999-60000000)+60000000) as tel,floor(random()*(69999999-60000000)+60000000) as fax, gen_random_zh(32,128) as describe from generate_series(vStart,vEnd) as id; raise notice '%', vEnd; vStart := vEnd + 1; vEnd := vEnd + INTERVAL; if( vEnd > MAXVALE ) then return; elsif(vEnd = MAXVALE) then vEnd := vEnd - 1; end if; end loop;end$$;
因为运算原因, cpu/内存占用率仍然很高, 硬盘负荷较小,读写率也比较低,大概完成时间约1.5小时.
补充:postgreSQL数据库 向表中快速插入1000000条数据
不用创建函数,直接向表中快速插入1000000条数据
create table tbl_test (id int, info text, c_time timestamp);insert into tbl_test select generate_series(1,100000),md5(random()::text),clock_timestamp();select count(id) from tbl_test; --查看个数据条数
以上为个人经验,希望能给大家一个参考,也希望大家多多支持。如有错误或未考虑完全的地方,望不吝赐教。