Cindy Makarowsky
2013-03-22 19:46:00 UTC
I have two tables in Postgres 9.2 on a Linux server with 8GB of RAM. The
first table has 60 million records:
CREATE TABLE table1
(
id integer,
update date,
company character(35),
address character(35),
city character(20),
state character(2),
zip character(9),
phone character(10),
fips character(5),
tract character(6),
block character(4),
status character(1),
pre_title character(2),
contact character(35),
title character(20),
pstat character(1),
id integer NOT NULL,
pkone character(2),
pktwo character(2),
pkthree character(2),
pkfour character(2),
centract character(15),
CONSTRAINT table1_pkey PRIMARY KEY (id ),
CONSTRAINT fipsc FOREIGN KEY (fips)
REFERENCES fips (fips) MATCH FULL
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT statec FOREIGN KEY (state)
REFERENCES state (state) MATCH FULL
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tractc FOREIGN KEY (centract)
REFERENCES tract (centract) MATCH FULL
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT zipc FOREIGN KEY (zip)
REFERENCES zip (zip) MATCH FULL
ON UPDATE NO ACTION ON DELETE NO ACTION
)
WITH (
OIDS=FALSE
);
ALTER TABLE table1
OWNER TO postgres;
-- Index: statidx2
-- DROP INDEX statidx2;
CREATE INDEX statidx2
ON table1
USING btree
(state COLLATE pg_catalog."default" );
The second table just has the 51 state records:
CREATE TABLE state
(
state character(2) NOT NULL,
state_name character(15),
CONSTRAINT state_pkey PRIMARY KEY (state )
)
WITH (
OIDS=FALSE
);
ALTER TABLE state
OWNER TO postgres;
-- Index: stateidx
-- DROP INDEX stateidx;
CREATE UNIQUE INDEX stateidx
ON state
USING btree
(state COLLATE pg_catalog."default" );
When I run this query:
select state.state, count(table1.id) from state,table1 where table1.state
= state.state group by state.state
It takes almost 4 minutes with this output from explain:
"HashAggregate (cost=7416975.58..7416976.09 rows=51 width=7) (actual
time=284891.955..284891.964 rows=51 loops=1)"
" -> Hash Join (cost=2.15..7139961.94 rows=55402728 width=7) (actual
time=0.049..269049.678 rows=60057057 loops=1)"
" Hash Cond: (busbase.state = state.state)"
" -> Seq Scan on busbase (cost=0.00..6378172.28 rows=55402728
width=7) (actual time=0.004..250046.673 rows=60057057 loops=1)"
" -> Hash (cost=1.51..1.51 rows=51 width=3) (actual
time=0.032..0.032 rows=51 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 2kB"
" -> Seq Scan on state (cost=0.00..1.51 rows=51 width=3)
(actual time=0.003..0.012 rows=51 loops=1)"
"Total runtime: 284892.024 ms"
I've tried playing around with the settings in the config file for
shared_buffers, work_mem, etc restarting Postgres each time and nothing
seems to help.
Thanks for any help.
first table has 60 million records:
CREATE TABLE table1
(
id integer,
update date,
company character(35),
address character(35),
city character(20),
state character(2),
zip character(9),
phone character(10),
fips character(5),
tract character(6),
block character(4),
status character(1),
pre_title character(2),
contact character(35),
title character(20),
pstat character(1),
id integer NOT NULL,
pkone character(2),
pktwo character(2),
pkthree character(2),
pkfour character(2),
centract character(15),
CONSTRAINT table1_pkey PRIMARY KEY (id ),
CONSTRAINT fipsc FOREIGN KEY (fips)
REFERENCES fips (fips) MATCH FULL
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT statec FOREIGN KEY (state)
REFERENCES state (state) MATCH FULL
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tractc FOREIGN KEY (centract)
REFERENCES tract (centract) MATCH FULL
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT zipc FOREIGN KEY (zip)
REFERENCES zip (zip) MATCH FULL
ON UPDATE NO ACTION ON DELETE NO ACTION
)
WITH (
OIDS=FALSE
);
ALTER TABLE table1
OWNER TO postgres;
-- Index: statidx2
-- DROP INDEX statidx2;
CREATE INDEX statidx2
ON table1
USING btree
(state COLLATE pg_catalog."default" );
The second table just has the 51 state records:
CREATE TABLE state
(
state character(2) NOT NULL,
state_name character(15),
CONSTRAINT state_pkey PRIMARY KEY (state )
)
WITH (
OIDS=FALSE
);
ALTER TABLE state
OWNER TO postgres;
-- Index: stateidx
-- DROP INDEX stateidx;
CREATE UNIQUE INDEX stateidx
ON state
USING btree
(state COLLATE pg_catalog."default" );
When I run this query:
select state.state, count(table1.id) from state,table1 where table1.state
= state.state group by state.state
It takes almost 4 minutes with this output from explain:
"HashAggregate (cost=7416975.58..7416976.09 rows=51 width=7) (actual
time=284891.955..284891.964 rows=51 loops=1)"
" -> Hash Join (cost=2.15..7139961.94 rows=55402728 width=7) (actual
time=0.049..269049.678 rows=60057057 loops=1)"
" Hash Cond: (busbase.state = state.state)"
" -> Seq Scan on busbase (cost=0.00..6378172.28 rows=55402728
width=7) (actual time=0.004..250046.673 rows=60057057 loops=1)"
" -> Hash (cost=1.51..1.51 rows=51 width=3) (actual
time=0.032..0.032 rows=51 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 2kB"
" -> Seq Scan on state (cost=0.00..1.51 rows=51 width=3)
(actual time=0.003..0.012 rows=51 loops=1)"
"Total runtime: 284892.024 ms"
I've tried playing around with the settings in the config file for
shared_buffers, work_mem, etc restarting Postgres each time and nothing
seems to help.
Thanks for any help.