| Summary We are going to create two "derived" datasets from the base TIGER datasets. NOTE: there could be multiple names for one landmark (same 'module' and 'land' id but different laname). This happens 294 times. |
Area Landmarks

Example of TIGER area landmarks in New York City.
| Input Datasets and Process 1. poly2 - the basic TIGER polygons The process is simple: |
Setup:
create index landmarks_idx_land on landmarks (land); create index arealandmarks_idx_land on arealandmarks (land); create index landmarks_idx_module on landmarks (module); create index arealandmarks_idx_module on arealandmarks (module); create index arealandmarks_idx_moduleland on arealandmarks (module,land); create index landmarks_idx_moduleland on landmarks (module,land); create index poly2_idx_moduleland on poly2 (module,polyid); vacuum analyse landmarks; vacuum analyse arealandmarks; vacuum analyse poly2;
Step (a):
CREATE TABLE area_landmarks_tmp AS SELECT landmarks.laname, landmarks.cfcc, landmarks.land, arealandmarks.module, arealandmarks.polyid FROM landmarks,arealandmarks WHERE arealandmarks.land = landmarks.land AND arealandmarks.module = landmarks.module; -- CHECK: --should be 0 (no bad data) select count(*) from arealandmarks where land isnull or module isnull; -- check to make sure that no landmarks disappeared select count(distinct module||land) from arealandmarks; select count(distinct module||land) from area_landmarks_tmp; select count(distinct module||land) from landmarks where wkb_geometry isnull; --should be same #
Step (b):
-- create a table with all the base polygons in a geometry collection -- this takes a while!!! CREATE TABLE poly_landmarks_tmp AS SELECT poly2.module, land, min(cfcc) as cfcc, substring(min(cfcc) from 1 for 1) as cfcc_1, substring(min(cfcc) from 2 for 1) as cfcc_2, substring(min(cfcc) from 3 for 1) as cfcc_3, min(laname) as laname, collect(the_geom) FROM area_landmarks_tmp,poly2 WHERE area_landmarks_tmp.polyid = poly2.polyid AND area_landmarks_tmp.module = poly2.module GROUP BY land,poly2.module; -- CHECK: -- should be the same SELECT count(*) from poly_landmarks_tmp; SELECT count(distinct(module||land)) FROM area_landmarks_tmp; -- should be the NEAR the same # -- it will not be exactly because there some landmarks with multiple names SELECT sum( numgeometries(collect) ) FROM poly_landmarks_tmp; SELECT count(*) FROM arealandmarks; -- dissolve the geometry collection of base polygons alter table poly_landmarks_tmp add column buffer geometry; UPDATE poly_landmarks_tmp SET buffer=buffer(collect,0); --check to make sure the results are valid geometries SELECT module,land FROM poly_landmarks_tmp WHERE not(isvalid(buffer)) or buffer isnull; -- CHECK: alter table poly_landmarks_tmp add column area_coll float8; alter table poly_landmarks_tmp add column area_buff float8; update poly_landmarks_tmp set area_coll = area(collect); update poly_landmarks_tmp set area_buff = area(buffer); -- area is off by > 1 % (likely a problem with the dissolving of polygons) -- should return no rows -- the 0.5 and 2/3 occurs when there are polygons on top of each other -- like when there are multiple names for a single area landmark. -- We do not worry about them. SELECT laname,module,land,(area_coll-area_buff)/area_coll as error FROM poly_landmarks_tmp WHERE abs(area_coll-area_buff)/area_coll >0.001 AND (abs(area_coll-area_buff)/area_coll != 0.5) and (abs(area_coll-area_buff)/area_coll != 2.0/3.0)
Step (c):
-- augment the dataset with area. We create an albers projection -- centered on the polygon so we get an accurate area estimate alter table poly_landmarks_tmp add column proj4 text; update poly_landmarks_tmp set proj4 = '+proj=aea +lat_1='|| ymin(buffer)||' +lat_2='||ymax(buffer) || ' +lat_0='|| ymin(buffer) ||' +lon_0='||xmin(buffer) || ' +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs'; alter table poly_landmarks_tmp add column buffer_aea geometry; update poly_landmarks_tmp set buffer_aea = transform_geometry(buffer,'+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs',proj4,3005); alter table poly_landmarks_tmp add column area_sq_m float8; update poly_landmarks_tmp set area_sq_m = area(buffer_aea); --check (should be 0) select count(*) from poly_landmarks_tmp where area_sq_m isnull or area_sq_m <=0; select isvalid(buffer_aea) from poly_landmarks_tmp where not(isvalid(buffer_aea)); -- generate the final dataset (dont just change the name of the table, -- by recreating it we save a HUGE amount of disk space) CREATE TABLE poly_landmarks AS SELECT module,land,laname, cfcc,cfcc_1,cfcc_2,cfcc_3,buffer as the_geom,area_sq_m FROM poly_landmarks_tmp; --clean up DROP TABLE area_landmarks_tmp; DROP TABLE poly_landmarks_tmp; INSERT INTO geometry_columns values ('','public','poly_landmarks','the_geom',2,1,'GEOMETRY'); CREATE INDEX polyland_idx_module on poly_landmarks (module); CREATE INDEX polyland_idx_moduleland on poly_landmarks (module,land); CREATE INDEX polyland_idx_spatial on poly_landmarks using gist (the_geom gist_geometry_ops); VACUUM ANALYSE poly_landmarks;
| Final poly_landmarks dataset module – name of county this is in |
SELECT cfcc,count(*) as nfeatures FROM poly_landmarks GROUP BY cfcc ORDER BY nfeatures; cfcc | nfeatures ------+----------- H31 | 113691 Perennial lake or pond D82 | 65233 Cemetery H11 | 57427 Perennial stream or river D85 | 23902 State or local park or forest E23 | 10267 Island H51 | 7939 Bay, estuary, gulf, or sound H41 | 6715 Perennial reservoir H32 | 3156 Intermittent lake or pond D51 | 2699 Airport or airfield D10 | 2601 Military installation or reservation D81 | 2558 Golf course D43 | 1939 Educational institution D84 | 1875 National forest or other Federal land D83 | 1731 National Park Service land H12 | 1326 Intermittent stream, river, or wash H21 | 1237 Perennial canal, ditch, or aqueduct H53 | 874 Sea, or ocean D31 | 617 Hospital, urgent care facility, clinic D23 | 505 Trailer court or mobile home park H81 | 476 Glacier D57 | 450 Airport (inside a city) D28 | 429 Campground D64 | 372 Amusement center D37 | 340 prison D61 | 312 Shopping center D62 | 191 Industrial building D65 | 173 Government center D44 | 169 Religious institution, H42 | 159 Treatment pond D21 | 102 Apartment building D36 | 87 Jail or detention center H60 | 87 Gravel pit or quarry filled with water D20 | 80 Multihousehold or transient quarters D90 | 62 Special purpose landmark; H22 | 62 Intermittent canal D33 | 57 Nursing home D27 | 42 Hotel D66 | 40 Other employment center D00 | 38 Landmark D63 | 34 Office building D26 | 34 Housing facility for workers D35 | 27 Orphanage D24 | 27 Marina H00 | 26 Water feature, classification unknown D42 | 17 Educational institution D54 | 16 Marine terminal D52 | 15 Train station H13 | 13 Braided stream H50 | 11 Bay, estuary, gulf, sound, sea, or ocean H30 | 11 Lake or pond D34 | 7 County home D53 | 3 Bus terminal D45 | 2 Museum D55 | 2 Seaplane anchorage D80 | 2 Open space D29 | 2 Shelter or mission D60 | 1 Employment center D58 | 1 Park and ride facility/parking lot D32 | 1 Halfway house H80 | 1 Special water feature D88 | 1 Landfill D40 | 1 Educational, cultural, or religious institution See the TIGER manual [here|http://docs.codehaus.org/download/attachments/46766/TGR05FE.pdf] for what the feature codes refer too.
Point landmarks.

Example of point landmarks.
This is easy:
DROP TABLE point_landmarks; -- clean up CREATE TABLE point_landmarks AS SELECT wkb_geometry as the_geom,laname , module,land, cfcc as cfcc, substring(cfcc from 1 for 1) as cfcc_1, substring(cfcc from 2 for 1) as cfcc_2, substring(cfcc from 3 for 1) as cfcc_3 FROM landmarks WHERE not(wkb_geometry isnull) AND length(laname) != 0; INSERT INTO geometry_columns values ('','public','point_landmarks','the_geom',2,1,'GEOMETRY'); CREATE INDEX pointland_idx_module on point_landmarks (module); CREATE INDEX pointland_idx_moduleland on point_landmarks (module,land); CREATE INDEX pointland_idx_spatial on point_landmarks using gist (the_geom gist_geometry_ops); VACUUM ANALYSE point_landmarks;