Skip to content

Commit

Permalink
Merge pull request #25 from zhik/scripts
Browse files Browse the repository at this point in the history
Automates boundaries shapefile download and merge
  • Loading branch information
zhik authored Nov 6, 2019
2 parents 2c0a383 + dfbe27c commit c7616ff
Show file tree
Hide file tree
Showing 10 changed files with 2,336 additions and 0 deletions.
3 changes: 3 additions & 0 deletions script/.babelrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"presets": ["@babel/preset-env"]
}
2 changes: 2 additions & 0 deletions script/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
node_modules/
files/
73 changes: 73 additions & 0 deletions script/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Boundaries-Map Download and Merge Script

The NYC [Boundaries Map](https://betanyc.github.io/Boundaries-Map/) uses Carto to serve map tiles and run SQL queries. Maintaining over a handful of boundaries can be very time consuming, and costly for users querying many layers at a time.

This script will download all the datasets in the `data.json` file, then merge them to a single feature collection called `all_bounds.geojson`

The geojson will include the fields formatted to provide a quick way to query though all the various boundaries. All fields are a string.

| id | nameCol | nameAlt |
| --- | ------- | -------------------- |
| cd | 101 | |
| cd | 102 | |
| nta | 1 | Fresh Meadows-Utopia |
| sd | BKN09 | 309 |

To select one layer

```sql
SELECT * FROM all_bounds WHERE id = 'cd'
```

To find all boundaries of a given point

```sql
SELECT * FROM all_bounds WHERE ST_Intersects(ST_SetSRID(ST_MakePoint(long, lat), 4326),the_geom)
```

To find all intersect boundaries to a single bound (example uses Community District 101)

```sql
SELECT id, namecol, namealt FROM all_bounds, (SELECT the_geom FROM all_bounds WHERE id = 'cd' AND namecol = '101') as m WHERE ST_Intersects(all_bounds.the_geom, m.the_geom) AND (st_area(st_intersection(all_bounds.the_geom, m.the_geom))/st_area(all_bounds.the_geom)) > .00025
```

## How to run

- Add your datasets to `data.json`
- Run `npm install` then `npm run start`

## Create or update import

The easiest way to create or import the geojson is directly on the Carto dashboard. Drag and drop the .geojson file.

### Syncing

Syncing a provides a way
Either commit your geojson to github and use the raw url; or upload geojson to file sharing service. Then paste it to the "Upload a file or a URL" text field and click submit.

![Add dataset in Carto](./img.png)

Select your data sync frequency, then click connect your dataset.

![Sync data in Carto](./img2.png)

Note you may also manually sync in the data view.

### Syncing Carto [Import API](https://carto.com/developers/import-api/reference/)

Carto Import API provides a mostly hands free method of updating your dataset. Then run the following command in your terminal.

```bash
curl -v -d '{"url":"https://github.com/{github_account}/Boundaries-Map/raw/master/script/all_bounds.geojson", "interval": 3600}' -H "Content-Type: application/json" "https://{carto_account}.carto.com/api/v1/synchronizations/?api_key={api_key}"
```

Using the id in the response you can check the status of your sync

```bash
curl -v "https://{carto_account}.carto.com/api/v1/synchronizations/{id}?api_key={api_key}"
```

Common issues are

- Unsupported/Unrecognized file type : check shapefile and geojson files in a GIS software
- Over account storage limit, please upgrade : you might be running the Free tier, and/or need to upgrade your Carto Account
1 change: 1 addition & 0 deletions script/all_bounds.geojson

Large diffs are not rendered by default.

93 changes: 93 additions & 0 deletions script/datasets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
[
{
"id": "cd",
"datasetName": "Community Districts",
"url": "http://data.cityofnewyork.us/api/geospatial/yfnk-k7r4?method=export&format=Shapefile",
"nameCol": "boro_cd",
"nameAlt": null
},
{
"id": "pp",
"datasetName": "Police Precincts",
"url": "http://data.cityofnewyork.us/api/geospatial/78dh-3ptz?method=export&format=Shapefile",
"nameCol": "precinct",
"nameAlt": null
},
{
"id": "sd",
"datasetName": "Sanitation Districts",
"url": "http://data.cityofnewyork.us/api/geospatial/6j86-5s7z?method=export&format=Shapefile",
"nameCol": "district",
"nameAlt": "districtcode"
},
{
"id": "fb",
"datasetName": "Fire Battilion",
"url": "http://data.cityofnewyork.us/api/geospatial/uh7r-6nya?method=export&format=Shapefile",
"nameCol": "fire_bn",
"nameAlt": null
},
{
"id": "sd",
"datasetName": "School Districts",
"url": "http://data.cityofnewyork.us/api/geospatial/r8nu-ymqj?method=export&format=Shapefile",
"nameCol": "district",
"nameAlt": "districtco"
},
{
"id": "hc",
"datasetName": "Health Center Districts",
"url": "http://data.cityofnewyork.us/api/geospatial/b55q-34ps?method=export&format=Shapefile",
"nameCol": "hcent_dist",
"nameAlt": null
},
{
"id": "cc",
"datasetName": "City Council Districts",
"url": "http://data.cityofnewyork.us/api/geospatial/yusd-j4xi?method=export&format=Shapefile",
"nameCol": "coun_dist",
"nameAlt": null
},
{
"id": "nycongress",
"datasetName": "Congressional Districts",
"url": "http://data.cityofnewyork.us/api/geospatial/qd3c-zuu7?method=export&format=Shapefile",
"nameCol": "cong_dist",
"nameAlt": null
},
{
"id": "sa",
"datasetName": "State Assembly Districts",
"url": "http://data.cityofnewyork.us/api/geospatial/pf5b-73bw?method=export&format=Shapefile",
"nameCol": "assem_dist",
"nameAlt": null
},
{
"id": "ss",
"datasetName": "State Senate Districts",
"url": "http://data.cityofnewyork.us/api/geospatial/h4i2-acfi?method=export&format=Shapefile",
"nameCol": "st_sen_dis",
"nameAlt": null
},
{
"id": "nta",
"datasetName": "Neighborhood Tabulation Area",
"url": "http://data.cityofnewyork.us/api/geospatial/cpf4-rkhq?method=export&format=Shapefile",
"nameCol": "ntaname",
"nameAlt": "ntacode"
},
{
"id": "bid",
"datasetName": "Business Improvement District",
"url": "http://data.cityofnewyork.us/api/geospatial/ejxk-d93y?method=export&format=Shapefile",
"nameCol": "bid",
"nameAlt": null
},
{
"id": "zipcode",
"datasetName": "Zip Code",
"url": "http://data.cityofnewyork.us/download/i8iw-xf4u/application%2Fzip",
"nameCol": "ZIPCODE",
"nameAlt": null
}
]
Binary file added script/img.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added script/img2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
126 changes: 126 additions & 0 deletions script/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import fs from 'fs';
import shp from 'shpjs';
import { http } from 'follow-redirects';
import express from 'express';

require('dotenv').config({ path: '../.env' });

import datasets from './datasets.json';

const app = express();
const port = 3000;

app.use(express.static('files'));
const server = app.listen(port, () =>
console.log(`Static files listening on port ${port}!`)
);

function download(url, dest) {
return new Promise((resolve, reject) => {
const file = fs.createWriteStream(dest);
const request = http
.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
file.close(resolve(dest)); // close() is async, call cb after close completes.
});
})
.on('error', function(err) {
// Handle errors
fs.unlink(dest); // Delete the file async. (But we don't check the result)
reject(err.message);
});
});
}

function saveFile(data, dest) {
return new Promise((resolve, reject) => {
fs.writeFile(dest, data, function(err) {
resolve();
if (err) {
reject(err);
}
});
});
}

function getDataset(dataset) {
return new Promise(async (resolve, reject) => {
const { nameCol, nameAlt, id, url, datasetName } = dataset;
const fileName = `${id}.zip`;
const dest = `./files/${fileName}`;
//download zipped shapefile
if (!fs.existsSync(dest)) {
console.log(url, dest);
await download(url, dest).catch(err => reject(err));
}

//open zip and return new geojson with nameCol and nameAlt as properties
const features = await shp(`http://localhost:3000/${fileName}`)
.then(geojson => {
let features;
if (Array.isArray(geojson)) {
//for zips with more than one layer (Sanitation Districts)
features = geojson.reduce(
(features, collection) => [...features, ...collection.features],
[]
);
} else {
features = geojson.features;
}
console.log(`${datasetName} has ${features.length} features`);

//restructure properties
return features.map(feature => {
const { geometry, properties } = feature;
const formatedFeature = {
type: 'Feature',
geometry,
properties: {
id
}
};

if (nameCol in properties) {
formatedFeature.properties['nameCol'] = String(properties[nameCol]);
} else {
reject(
`${datasetName} does not contain the field of ${nameCol} : ${properties}`
);
}

if (nameAlt && nameAlt in properties) {
formatedFeature.properties['nameAlt'] = String(properties[nameAlt]);
} else {
`${datasetName} does not contain the field of ${nameAlt} : ${properties}`;
}

return formatedFeature;
});
})
.catch(err => reject(err));

resolve(features);
});
}

async function main() {
const featureCollection = await Promise.all(
datasets.map(dataset => getDataset(dataset))
)
.then(collections => {
//combine all collections to a single feature collection
return {
type: 'FeatureCollection',
features: collections.reduce((prev, curr) => [...prev, ...curr], [])
};
})
.catch(err => console.log(err));

server.close();

//save featureCollection
await saveFile(JSON.stringify(featureCollection), './all_bounds.geojson');
}

main();
Loading

0 comments on commit c7616ff

Please sign in to comment.