diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml new file mode 100644 index 00000000..92b637d6 --- /dev/null +++ b/.github/workflows/build-wheels.yml @@ -0,0 +1,65 @@ +name: Build-wheels + +# This workflow builds "wheels", which are the binary package installers hosted on PyPI. +# GitHub Actions is super helpful here because each one needs to be compiled in its own +# target environment. The wheel files are saved as artifacts, which you can download from +# the GitHub website. Wheels should be uploaded manually to PyPI -- see CONTRIBUTING.md. + +# The Linux wheels cannot be generated using `ubuntu-latest` because they require a +# special Docker image to provide cross-Linux compatibility. There are at least a couple +# of third-party actions set up using the official image; we could switch to another if +# this ever breaks. + +on: + # push: + pull_request: + release: + workflow_dispatch: + +jobs: + + build-manylinux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 +# with: +# ref: 'v0.6' # enable to check out prior version of codebase + - name: Build wheels + uses: RalfG/python-wheels-manylinux-build@v0.5.0 + with: + python-versions: 'cp38-cp38 cp39-cp39 cp310-cp310 cp311-cp311' + - name: Save artifacts + uses: actions/upload-artifact@v2 + with: + name: wheels + path: dist/*-manylinux*.whl + + build: + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} # needed for conda persistence + strategy: + matrix: + os: [macos-latest, windows-latest] + python-version: [3.8, 3.9, '3.10', '3.11'] + steps: + - uses: actions/checkout@v2 +# with: +# ref: 'v0.6' # enable to check out prior version of codebase + - name: Set up Python ${{ matrix.python-version }} + uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Set up environment + run: | + conda config --append channels conda-forge + conda install build clang llvm-openmp + - name: Build wheel + run: | + python -m build --sdist --wheel + - name: Save artifacts + uses: actions/upload-artifact@v2 + with: + name: wheels + path: dist/*.whl diff --git a/.github/workflows/code-style.yml b/.github/workflows/code-style.yml new file mode 100644 index 00000000..5c44e43e --- /dev/null +++ b/.github/workflows/code-style.yml @@ -0,0 +1,21 @@ +name: Code style + +# This workflow runs code style checks. + +on: + push: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Check code style + run: | + pip install pycodestyle + pycodestyle pandana diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 00000000..5afbe34e --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,40 @@ +name: Coverage + +# This workflow generates a coverage report (how much of the codebase is covered by the +# unit tests) and posts headline metrics to the PR thread. + +on: + # push: + pull_request: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.10' + - name: Install Pandana + run: | + pip install . + pip install osmnet + +# `coverage run ...` is failing in GitHub Actions, but I'm not able to reproduce the +# problem locally. We should look into this again another time. (11-May-2021) + +# - name: Generate coverage report +# run: | +# pip install pytest coverage +# coverage run --source pandana --module pytest --verbose +# coverage report --show-missing +# echo "coverage=$(coverage report | grep '^TOTAL' | grep -oE '[^ ]+$')" >> $GITHUB_ENV +# - name: Post comment on PR +# uses: unsplash/comment-on-pr@master +# env: +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# with: +# msg: "Test coverage is ${{ env.coverage }}" +# check_for_duplicate_msg: true diff --git a/.github/workflows/cross-compatibility.yml b/.github/workflows/cross-compatibility.yml new file mode 100644 index 00000000..3dc65a99 --- /dev/null +++ b/.github/workflows/cross-compatibility.yml @@ -0,0 +1,66 @@ +name: Cross-compatibility + +# This workflow runs the Pandana unit tests across a comprehensive range of Python +# versions and operating systems. Windows needs conda in order to install geospatial +# dependencies. + +on: + # push: + pull_request: + workflow_dispatch: + +jobs: + build-pip: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + python-version: [3.8, 3.9, '3.10', '3.11'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Pandana + run: | + pip install . + pip install osmnet + - name: Run demo + run: | + python examples/simple_example.py + - name: Run unit tests + run: | + pip install pytest + pytest -s + + build-conda: + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} # needed for conda persistence + strategy: + matrix: + os: [windows-latest] + python-version: [3.8, 3.9, '3.10', '3.11'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Pandana + run: | + pip install . + +# OSMNet is causing a version of Pandas to be installed that crashes in GitHub Actions. +# Assume this will resolve itself on its own. (11-May-2021) + +# conda install osmnet --channel conda-forge + - name: Run demo + run: | + python examples/simple_example.py +# - name: Run unit tests +# run: | +# pip install pytest +# pytest -s diff --git a/.github/workflows/installation.yml b/.github/workflows/installation.yml new file mode 100644 index 00000000..91d6d087 --- /dev/null +++ b/.github/workflows/installation.yml @@ -0,0 +1,58 @@ +name: Installation + +# This workflow installs Pandana from Pip and Conda across a range of Python versions and +# operating systems. You can run this manually after a new release is posted to confirm +# that it installs smoothly. This workflow also runs periodically in the background to +# catch dependency updates that break Pandana. + +on: + # push: + # pull_request: + workflow_dispatch: + schedule: + - cron: '0 3 * * 1' # every Monday at 3am UTC (Sunday evening Calif time) + +jobs: + build-pip: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + # Pip installation for Windows is not supported. Pip installation for Mac is + # broken in the GitHub Actions environment with Pandana v0.6 but should be fixed + # in the next Pandana release. + python-version: [3.8, 3.9, '3.10', '3.11'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Pandana + run: | + pip install pandana + - name: Run demo + run: | + python examples/simple_example.py + + build-conda: + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} # needed for conda persistence + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: [3.8, 3.9, '3.10', '3.11'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Pandana + run: | + conda install pandana --channel conda-forge + - name: Run demo + run: | + python examples/simple_example.py diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml new file mode 100644 index 00000000..b109f8c4 --- /dev/null +++ b/.github/workflows/unit-tests.yml @@ -0,0 +1,28 @@ +name: Unit tests + +# This workflow runs the Pandana unit tests in a single generic environment (recent but +# stable Python version on recent but stable Ubuntu). The cross-compatibility.yml +# workflow runs the same tests across multiple platforms. + +on: + push: + # pull_request: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.10' + - name: Install Pandana + run: | + pip install . + pip install osmnet + - name: Run unit tests + run: | + pip install pytest + pytest -s diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e491936a..00000000 --- a/.travis.yml +++ /dev/null @@ -1,22 +0,0 @@ -language: python - -python: - - '2.7' - - '3.5' - - '3.6' - - '3.7' - - '3.8' - -install: - - pip install . - - pip install -r requirements-dev.txt - - pip install -r requirements-extras.txt - - pip list - - pip show pandana - -script: - - pycodestyle pandana - - python setup.py test --pytest-args "--cov pandana --cov-report term-missing" - -after_success: - - coveralls diff --git a/CHANGELOG.md b/CHANGELOG.md index 3180894c..72dd42e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,26 @@ +v0.7 +==== + +2023/07/26 + +* Adds support for calculating accessibility isochrones: which nodes are within x network distance of a source node +* Allows a maximum distance to be set for POIs +* Adds a warning when a shortest path is requested between unconnected nodes +* Supports PyTables 3.7+ +* Support Pandas 2.0 +* Switches to pyproject.toml packaging standards +* Adds binaries on PyPI to support Python 3.10 and 3.11 +* Improves compilation in MacOS 12+ + +v0.6.1 +====== + +2021/03/17 + +* Adds support for non-x86 CPUs, including ARM-based Macs +* Removes accommodations for pre-C++11 compilers +* Formally ends support for Python 2.7 + v0.6 ==== diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3ffc153b..6f10e44f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,7 +9,7 @@ You can contact Sam Maurer, the lead maintainer, at `maurer@urbansim.com`. - Take a look at the [open issues](https://github.com/UDST/pandana/issues) and [closed issues](https://github.com/UDST/pandana/issues?q=is%3Aissue+is%3Aclosed) to see if there's already a related discussion -- Open a new issue describing the problem -- if possible, include any error messages, the operating system and version of python you're using, and versions of any libraries that may be relevant +- Open a new issue describing the problem -- if possible, include any error messages, a full reproducible example of the code that generated the error, the operating system and version of python you're using, and versions of any libraries that may be relevant ## Feature proposals: @@ -25,7 +25,7 @@ You can contact Sam Maurer, the lead maintainer, at `maurer@urbansim.com`. - Make your changes, following the existing styles for code and inline documentation -- Add [tests](https://github.com/UDST/urbansim/tree/master/pandana/tests) if possible! +- Add [tests](https://github.com/UDST/pandana/tree/master/pandana/tests) if possible! - Open a pull request to the `UDST/pandana` dev branch, including a writeup of your changes -- take a look at some of the closed PR's for examples @@ -46,7 +46,8 @@ You can contact Sam Maurer, the lead maintainer, at `maurer@urbansim.com`. - `setup.py` - `pandana/__init__.py` - `docs/source/index.rst` - + - `docs/source/conf.py` + - Make sure all the tests are passing, and check if updates are needed to `README.md` or to the documentation - Open a pull request to the master branch to finalize it @@ -68,11 +69,19 @@ You can contact Sam Maurer, the lead maintainer, at `maurer@urbansim.com`. - Check https://pypi.org/project/pandana/ for the new version +The binary package installers or "wheels" are built using a GitHub Actions workflow, because each one needs to be compiled in its own target environment. This should run automatically when a PR is opened, to confirm nothing is broken, and again when a release is tagged in GitHub. You can download the resulting wheel files from the Action status page and then upload them to PyPI using the same command as above. + +How to create wheels for ARM Macs: As of 7/2023, GitHub Actions doesn't provide this environment yet. You'll need an ARM Mac to create the wheels. One at a time, set up a Conda environment with Python 3.8, 3.9, etc. Include cython, numpy, clang, llvm-openmp, and pytables. These need to be ARM-native Conda environments -- check that you're getting `osx-arm64` versions of libraries. Run `python setup.py bdist_wheel` to generate a wheel file. Once one is built for each Python version, upload them to PyPI using the command above. + ## Distributing a release on Conda Forge (for conda installation): -- The [conda-forge/pandana-feedstock](https://github.com/conda-forge/pandana-feedstock) repository controls the Conda Forge release +- The [conda-forge/pandana-feedstock](https://github.com/conda-forge/pandana-feedstock) repository controls the Conda Forge release, including which GitHub users have maintainer status for the repo - Conda Forge bots usually detect new releases on PyPI and set in motion the appropriate feedstock updates, which a current maintainer will need to approve and merge +- Maintainers can add on additional changes before merging the PR, for example to update the requirements or edit the list of maintainers + +- You can also fork the feedstock and open a PR manually. It seems like this must be done from a personal account (not a group account like UDST) so that the bots can be granted permission for automated cleanup + - Check https://anaconda.org/conda-forge/pandana for the new version (may take a few minutes for it to appear) diff --git a/README.md b/README.md index 19be6f79..a1ed8255 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Coverage Status](https://coveralls.io/repos/github/UDST/pandana/badge.svg?branch=master)](https://coveralls.io/github/UDST/pandana?branch=master) +![Coverage Status](https://img.shields.io/badge/coverage-90%25-green) # Pandana @@ -11,11 +11,12 @@ Documentation: http://udst.github.io/pandana ### Installation -Pandana runs on Mac, Linux, and Windows with Python 2.7, 3.6, 3.7, and 3.8. +As of March 2021, binary installers are provided for Mac, Linux, and Windows through both PyPI and Conda Forge. -The easiest way to install Pandana is using the [Anaconda](https://www.anaconda.com/distribution/) package manager. Pandana's Anaconda distributions are pre-compiled and include multi-threading support on all platforms. +- `pip install pandana` +- `conda install pandana --channel conda-forge` -`conda install pandana --channel conda-forge` +Pandana is easiest to install in Python 3.8 to 3.11. The last version of Pandana with Python 2.7 binaries is v0.4.4 on Conda Forge. The last version with Python 3.5 binaries is v0.6 on Pip. See the documentation for information about other [installation options](http://udst.github.io/pandana/installation.html). @@ -25,7 +26,6 @@ See the documentation for information about other [installation options](http:// [Pandana-demo.ipynb](examples/Pandana-demo.ipynb) - ### Acknowledgments Pandana was created by [Fletcher Foti](https://github.com/fscottfoti), with subsequent contributions from [Matt Davis](https://github.com/jiffyclub), [Federico Fernandez](https://github.com/federicofernandez), [Sam Maurer](https://github.com/smmaurer), and others. Sam Maurer is currently the lead maintainer. Pandana relies on contraction hierarchy code from [Dennis Luxen](https://github.com/DennisOSRM) and his [OSRM project](https://github.com/DennisOSRM/Project-OSRM). diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 666884c8..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,24 +0,0 @@ -build: false - -environment: - matrix: - - PYTHON: 2.7 - - PYTHON: 3.6 - -init: - - "ECHO %PYTHON%" - -# The goal here is mainly to confirm that Pandana compiles in Windows; we're not -# running any of the CI tests - -# Cython and NumPy need to be present before building Pandana. Some other -# runtime requirements aren't installing well with Pip in win-py27, so I'm just -# putting all of them into the Conda environment - -install: - - "set PATH=C:\\Miniconda3;C:\\Miniconda3\\Scripts;%PATH%" - - conda config --append channels conda-forge - - "conda create --name test-env python=%PYTHON% pip cython matplotlib numpy osmnet pandas requests scikit-learn pytables --yes --quiet" - - activate test-env - - python setup.py install - - conda list diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 826e550e..b291c52b 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,6 +1,15 @@ Change log ========== +v0.6.1 +------ + +2021/03/17 + +* Adds support for non-x86 CPUs, including ARM-based Macs +* Removes accommodations for pre-C++11 compilers +* Formally ends support for Python 2.7 + v0.6 ---- diff --git a/docs/source/conf.py b/docs/source/conf.py index e915973f..c23aea64 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -50,16 +50,16 @@ # General information about the project. project = 'pandana' -copyright = '2020, UrbanSim Inc.' +copyright = '2021, UrbanSim Inc.' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.6' +version = '0.7' # The full version, including alpha/beta/rc tags. -release = '0.6' +release = '0.7' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/index.rst b/docs/source/index.rst index 4b893394..db7fcd5d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,7 +8,7 @@ Pandana Pandana is a Python library for network analysis that uses `contraction hierarchies `_ to calculate super-fast travel accessibility metrics and shortest paths. The numerical code is in C++. -v0.6, released November 11, 2020 +v0.7, released July 26, 2023. Acknowledgments diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 1aa7d7ff..9cd6984c 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -1,95 +1,65 @@ Installation ============ -Pandana is a Python package that includes a C++ extension for numerical operations. Pandana is tested on Mac, Linux, and Windows with Python 2.7, 3.6, 3.7, and 3.8. +Pandana is a Python package that includes a C++ extension for numerical operations. -The easiest way to install Pandana is using the `Anaconda`_ package manager. Pandana's Anaconda distributions are pre-compiled and include multi-threading support on all platforms. -If you install Pandana from Pip or from the source code on GitHub, you'll need to compile the C++ components locally. This is automatic, but won't work unless the right build tools are in place. See full instructions below. - - -Anaconda (recommended!) +Standard installation ------------------------------ -Pandana is hosted on Conda Forge:: - - conda install pandana --channel conda-forge - - -.. _pip: +As of March 2021, binary installers are provided for Mac, Linux, and Windows through both PyPI and Conda Forge. -Pip (requires local compilation) --------------------------------- - -Pandana is also hosted on PyPI:: +You can install Pandana using Pip:: pip install pandana -Pandana's C++ components will compile automatically if the right tools are present. See instructions below for individual operating systems. +Or Conda:: + conda install pandana --channel conda-forge -.. _github: +Pandana is easiest to install in Python 3.8 to 3.11. The last version of Pandana with Python 2.7 binaries is v0.4.4 on Conda Forge. The last version with Python 3.5 binaries is v0.6 on Pip. -GitHub (requires local compilation) ------------------------------------ -If you'll be modifying the code, you can install Pandana from the `GitHub source `_:: +ARM-based Macs +------------------------------ - git clone https://github.com/udst/pandana.git - cd pandana - pip install cython numpy - python setup.py develop +Native binary installers for ARM-based Macs are available on Conda Forge, but to use these your full Python stack needs to be optimized for ARM. -Pandana's C++ components will compile automatically if the right tools are present. See instructions below for individual operating systems. +If you're running Python through Rosetta translation (which is the default), older Mac installers will continue to work fine. See `issue #152 `_ for tips and further discussion. -Tips for local compilation --------------------------- +Compiling from source code +------------------------------ -If you cannot install using Conda, Pandana's C++ code will need to be compiled locally on your machine. +You may want to compile Pandana locally if you're modifying the source code or need to use a version that's missing binary installers for your platform. -Compiling in MacOS -~~~~~~~~~~~~~~~~~~ +Mac users should start by running ``xcode-select --install`` to make sure you have Apple's Xcode command line tools, which are needed behind the scenes. Windows users will need the `Microsoft Visual C++ Build Tools `_. -MacOS comes with C++ compilers, but the built-in ones don't allow multi-threading in Pandana. So, run this if possible before installing Pandana from source code:: +Pandana's build-time requirements are ``cython``, ``numpy``, and a C++ compiler that supports the C++11 standard. Additionally, the compiler needs to support OpenMP to allow Pandana to use multithreading. - xcode-select --install - conda install cython numpy llvm-openmp clang +The smoothest route is to get the compilers from Conda Forge -- you want the ``clang`` and ``llvm-openmp`` packages. Running Pandana's setup script will trigger compilation:: -Pandana will automatically detect that these are installed, and compile itself with multi-threading enabled. + conda install cython numpy clang llvm-openmp + python setup.py develop -If you prefer to use a different compiler, provide a path in the ``CC`` environment variable and we'll use that one instead. See writeup in `PR #137 `_ for some more discussion of this. +You'll see a lot of status messages go by, but hopefully no errors. -If you get a compilation error like ``'wchar.h' file not found`` in MacOS 10.14, you can resolve it by installing some additional header files:: +MacOS 10.14 (but not newer versions) often needs additional header files installed. If you see a compilation error like ``'wchar.h' file not found`` in MacOS 10.14, you can resolve it by running this command:: open /Library/Developer/CommandLineTools/Packages/macOS_SDK_headers_for_macOS_10.14.pkg -Compiling in Linux -~~~~~~~~~~~~~~~~~~ - -Pandana's setup script expects a version of the GCC compiler with support for OpenMP. This appears to be GCC 4.8+, but we haven't done extensive testing. If you run into problems, try doing a fresh install of the core build tools:: - - sudo apt-get install --reinstall build-essential - -Compiling in Windows -~~~~~~~~~~~~~~~~~~~~ - -Compilation is automatic but requires that `Microsoft Visual C++ Build Tools `_ are installed. -Certain older machines may need the `Microsoft Visual C++ 2008 SP1 Redistributable Package (x64) `_ or something similar in order to use Pandana. This provides runtime components of the Visual C++ libraries. - - -Multi-threading ---------------- - -After installing Pandana, running :code:`examples/simple_example.py` will display the number of threads that Pandana is using. +Advanced compilation tips +------------------------------ -If you're installing from source code on a Mac, see "Compiling in MacOS" above for more information about enabling multi-threading. +If you prefer not to use Conda, you can skip the ``clang`` and ``llvm-openmp`` packages. Compilation will likely work fine with your system's built-in toolchain. -.. note:: - The multi-threading status indicator may be incorrect in certain Windows environments. See GitHub `issue #138 `_ for the latest information on this. +The default C++ compiler on Macs doesn't support OpenMP, though, meaning that Pandana won't be able to use multithreading. +You can set the ``CC`` environment variable to specify a compiler of your choice. See writeup in `PR #137 `_ for discussion of this. If you need to make additional modifications, you can edit the compilation script in your local copy of ``setup.py``. +Multithreading +------------------------------ -.. _Anaconda: https://www.anaconda.com/distribution/ +You can check how many threads Pandana is able to use on your machine by running the ``examples/simple_example.py`` script. diff --git a/examples/Pandana-demo.ipynb b/examples/Pandana-demo.ipynb index f70409f9..17b72956 100644 --- a/examples/Pandana-demo.ipynb +++ b/examples/Pandana-demo.ipynb @@ -586,9 +586,7 @@ "Then get the routing between the nodes.\n", "- [network.shortest_path()](http://udst.github.io/pandana/network.html#pandana.network.Network.shortest_path)\n", "- [network.shortest_path_length()](http://udst.github.io/pandana/network.html#pandana.network.Network.shortest_path_length)\n", - "- [network.shortest_path_lengths()](http://udst.github.io/pandana/network.html#pandana.network.Network.shortest_path_lengths)\n", - "\n", - "Note that these are only *approximately* the shortest path. Pandana uses a heuristic called [contraction hierarchies](https://en.wikipedia.org/wiki/Contraction_hierarchies) to prioritize trunk routes, similar to e.g. Google Maps. This dramatically improves routing performance." + "- [network.shortest_path_lengths()](http://udst.github.io/pandana/network.html#pandana.network.Network.shortest_path_lengths)\n" ] }, { diff --git a/examples/range_example.py b/examples/range_example.py new file mode 100644 index 00000000..8228d81f --- /dev/null +++ b/examples/range_example.py @@ -0,0 +1,39 @@ +import sys +import time + +import pandana + +import numpy as np +import pandas as pd +from pympler.asizeof import asizeof + +print() +print("Loading data...") +t0 = time.time() +store = pd.HDFStore('examples/data/bayareanetwork.h5', 'r') +nodes, edges = store.nodes, store.edges +print(round(time.time()-t0, 1), ' sec.') + +print() +print("Initializing network...") +t0 = time.time() +net = pandana.Network(nodes.x, nodes.y, edges.from_int, edges.to_int, edges[['weight']]) +store.close() +print(round(time.time()-t0, 1), ' sec.') + +print() +print("Calculating nodes in 100m range...") +t0 = time.time() +r = net.nodes_in_range([53114882, 53107159], 100.0) +print(round(time.time()-t0, 1), ' sec.') + +# print(net.node_idx.values) +# print(net.node_idx.index.values) + +print(asizeof(r)) # 88.8 million bytes raw + +print() + +# dataframe.info() +# dataframe.memory_usage(deep=True) +# .set_index(['1', '2'], inplace=True) diff --git a/examples/shortest_path_example.py b/examples/shortest_path_example.py index 1c63a693..0b73fe22 100644 --- a/examples/shortest_path_example.py +++ b/examples/shortest_path_example.py @@ -31,7 +31,7 @@ # if no argument provided look for it in the test data storef = os.path.normpath(os.path.join( os.path.dirname(os.path.abspath(__file__)), - '../pandana/tests/osm_sample.h5')) + '../tests/osm_sample.h5')) if not os.path.isfile(storef): raise IOError('Could not find test input file: {!r}'.format(storef)) @@ -50,7 +50,7 @@ net.set(pd.Series(net.node_ids)) s = net.aggregate(10000, type='count') -connected_nodes = s[s==477] +connected_nodes = s[s == 477] n = 10000 nodes_a = np.random.choice(connected_nodes.index, n) @@ -60,19 +60,19 @@ print(nodes_a[0]) print(nodes_b[0]) -print(net.shortest_path(nodes_a[0],nodes_b[0])) -print(net.shortest_path_length(nodes_a[0],nodes_b[0])) +print(net.shortest_path(nodes_a[0], nodes_b[0])) +print(net.shortest_path_length(nodes_a[0], nodes_b[0])) print('Shortest path 2:') print(nodes_a[1]) print(nodes_b[1]) -print(net.shortest_path(nodes_a[1],nodes_b[1])) -print(net.shortest_path_length(nodes_a[1],nodes_b[1])) +print(net.shortest_path(nodes_a[1], nodes_b[1])) +print(net.shortest_path_length(nodes_a[1], nodes_b[1])) print('Repeat with vectorized calculations:') -print(net.shortest_paths(nodes_a[0:2],nodes_b[0:2])) -print(net.shortest_path_lengths(nodes_a[0:2],nodes_b[0:2])) +print(net.shortest_paths(nodes_a[0:2], nodes_b[0:2])) +print(net.shortest_path_lengths(nodes_a[0:2], nodes_b[0:2])) # Performance comparison print('Performance comparison for 10k distance calculations:') diff --git a/examples/simple_example.py b/examples/simple_example.py index 7ded02c7..8bf9103c 100644 --- a/examples/simple_example.py +++ b/examples/simple_example.py @@ -29,7 +29,7 @@ # if no argument provided look for it in the test data storef = os.path.normpath(os.path.join( os.path.dirname(os.path.abspath(__file__)), - '../pandana/tests/osm_sample.h5')) + '../tests/osm_sample.h5')) if not os.path.isfile(storef): raise IOError('Could not find test input file: {!r}'.format(storef)) diff --git a/pandana/__init__.py b/pandana/__init__.py index f50d2441..bdb7d53e 100644 --- a/pandana/__init__.py +++ b/pandana/__init__.py @@ -1,3 +1,3 @@ from .network import Network -version = __version__ = '0.6' +version = __version__ = '0.7' diff --git a/pandana/network.py b/pandana/network.py index 6dbe6c98..6842edd6 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -18,13 +18,13 @@ def reserve_num_graphs(num): Parameters ---------- num : int - Number of graph to be reserved in memory + Number of graphs to be reserved in memory """ warnings.warn( "Function reserve_num_graphs() is no longer needed in Pandana 0.4+\ and will be removed in a future version", - DeprecationWarning + DeprecationWarning, ) return None @@ -36,40 +36,38 @@ class Network: Parameters ---------- - node_x : Pandas Series, float + node_x : pandas.Series, float Defines the x attribute for nodes in the network (e.g. longitude) - node_y : Pandas Series, float + node_y : pandas.Series, float Defines the y attribute for nodes in the network (e.g. latitude) This param and the one above should have the *same* index which should be the node_ids that are referred to in the edges below. - edge_from : Pandas Series, int - Defines the node id that begins an edge - should refer to the index + edge_from : pandas.Series, int + Defines the node ID that begins an edge - should refer to the index of the two series objects above - edge_to : Pandas Series, int - Defines the node id that ends an edge - should refer to the index + edge_to : pandas.Series, int + Defines the node ID that ends an edge - should refer to the index of the two series objects above - edge_weights : Pandas DataFrame, all numerics + edge_weights : pandas.DataFrame, all numerics Specifies one or more *impedances* on the network which define the distances between nodes. Multiple impedances can be used to capture travel times at different times of day, for instance twoway : boolean, optional Whether the edges in this network are two way edges or one way ( where the one direction is directed from the from node to the to - node). If twoway = True, it is assumed that the from and to id in the + node). If twoway = True, it is assumed that the from and to ID in the edge table occurs once and that travel can occur in both directions on the single edge record. Pandana will internally flip and append - the from and to ids to the original edges to create a two direction + the from and to IDs to the original edges to create a two direction network. If twoway = False, it is assumed that travel can only occur - in the explicit direction indicated by the from and to id in the edge + in the explicit direction indicated by the from and to ID in the edge table. """ - def __init__(self, node_x, node_y, edge_from, edge_to, edge_weights, - twoway=True): - nodes_df = pd.DataFrame({'x': node_x, 'y': node_y}) - edges_df = pd.DataFrame({'from': edge_from, 'to': edge_to}).\ - join(edge_weights) + def __init__(self, node_x, node_y, edge_from, edge_to, edge_weights, twoway=True): + nodes_df = pd.DataFrame({"x": node_x, "y": node_y}) + edges_df = pd.DataFrame({"from": edge_from, "to": edge_to}).join(edge_weights) self.nodes_df = nodes_df self.edges_df = edges_df @@ -79,24 +77,27 @@ def __init__(self, node_x, node_y, edge_from, edge_to, edge_weights, self.poi_category_names = [] self.poi_category_indexes = {} - # this maps ids to indexes which are used internally + # this maps IDs to indexes which are used internally # this is a constant source of headaches, but all node identifiers # in the c extension are actually indexes ordered from 0 to numnodes-1 - # node ids are thus translated back and forth in the python layer, which - # allows non-integer node ids as well - self.node_idx = pd.Series(np.arange(len(nodes_df), dtype="int"), - index=nodes_df.index) - - edges = pd.concat([self._node_indexes(edges_df["from"]), - self._node_indexes(edges_df["to"])], axis=1) - - self.net = cyaccess(self.node_idx.values, - nodes_df.astype('double').values, - edges.values, - edges_df[edge_weights.columns].transpose() - .astype('double') - .values, - twoway) + # node IDs are thus translated back and forth in the python layer, + # which allows non-integer node IDs as well + self.node_idx = pd.Series( + np.arange(len(nodes_df), dtype="int"), index=nodes_df.index + ) + + edges = pd.concat( + [self._node_indexes(edges_df["from"]), self._node_indexes(edges_df["to"])], + axis=1, + ) + + self.net = cyaccess( + self.node_idx.values, + nodes_df.astype("double").values, + edges.values, + edges_df[edge_weights.columns].transpose().astype("double").values, + twoway, + ) self._twoway = twoway @@ -137,11 +138,13 @@ def save_hdf5(self, filename, rm_nodes=None): def _node_indexes(self, node_ids): # for some reason, merge is must faster than .loc - df = pd.merge(pd.DataFrame({"node_ids": node_ids}), - pd.DataFrame({"node_idx": self.node_idx}), - left_on="node_ids", - right_index=True, - how="left") + df = pd.merge( + pd.DataFrame({"node_ids": node_ids}), + pd.DataFrame({"node_idx": self.node_idx}), + left_on="node_ids", + right_index=True, + how="left", + ) return df.node_idx @property @@ -155,7 +158,7 @@ def decays(self): @property def node_ids(self): """ - The node ids which will be used as the index of many return series + The node IDs which will be used as the index of many return series """ return self.node_idx.index @@ -164,20 +167,24 @@ def bbox(self): """ The bounding box for nodes in this network [xmin, ymin, xmax, ymax] """ - return [self.nodes_df.x.min(), self.nodes_df.y.min(), - self.nodes_df.x.max(), self.nodes_df.y.max()] + return [ + self.nodes_df.x.min(), + self.nodes_df.y.min(), + self.nodes_df.x.max(), + self.nodes_df.y.max(), + ] def shortest_path(self, node_a, node_b, imp_name=None): """ - Return the shortest path between two node ids in the network. Must + Return the shortest path between two node IDs in the network. Must provide an impedance name if more than one is available. Parameters ---------- node_a : int - Source node id + Source node ID node_b : int - Destination node id + Destination node ID imp_name : string, optional The impedance name to use for the shortest path @@ -196,7 +203,7 @@ def shortest_path(self, node_a, node_b, imp_name=None): path = self.net.shortest_path(node_a, node_b, imp_num) - # map back to external node ids + # map back to external node IDs return self.node_ids.values[path] def shortest_paths(self, nodes_a, nodes_b, imp_name=None): @@ -211,9 +218,9 @@ def shortest_paths(self, nodes_a, nodes_b, imp_name=None): Parameters ---------- nodes_a : list-like of ints - Source node ids + Source node IDs nodes_b : list-like of ints - Corresponding destination node ids + Corresponding destination node IDs imp_name : string The impedance name to use for the shortest path @@ -224,8 +231,11 @@ def shortest_paths(self, nodes_a, nodes_b, imp_name=None): """ if len(nodes_a) != len(nodes_b): - raise ValueError("Origin and destination counts don't match: {}, {}" - .format(len(nodes_a), len(nodes_b))) + raise ValueError( + "Origin and destination counts don't match: {}, {}".format( + len(nodes_a), len(nodes_b) + ) + ) # map to internal node indexes nodes_a_idx = self._node_indexes(pd.Series(nodes_a)).values @@ -240,7 +250,7 @@ def shortest_paths(self, nodes_a, nodes_b, imp_name=None): def shortest_path_length(self, node_a, node_b, imp_name=None): """ - Return the length of the shortest path between two node ids in the + Return the length of the shortest path between two node IDs in the network. Must provide an impedance name if more than one is available. @@ -252,9 +262,9 @@ def shortest_path_length(self, node_a, node_b, imp_name=None): Parameters ---------- node_a : int - Source node id + Source node ID node_b : int - Destination node id + Destination node ID imp_name : string The impedance name to use for the shortest path @@ -272,6 +282,12 @@ def shortest_path_length(self, node_a, node_b, imp_name=None): len = self.net.shortest_path_distance(node_a, node_b, imp_num) + if len == 4294967.295: + warnings.warn( + "Unsigned integer: shortest path distance is trying to be calculated between\ + external %s and %s unconntected nodes" % (node_a, node_b) + ) + return len def shortest_path_lengths(self, nodes_a, nodes_b, imp_name=None): @@ -286,20 +302,23 @@ def shortest_path_lengths(self, nodes_a, nodes_b, imp_name=None): Parameters ---------- nodes_a : list-like of ints - Source node ids + Source node IDs nodes_b : list-like of ints - Corresponding destination node ids + Corresponding destination node IDs imp_name : string The impedance name to use for the shortest path Returns ------- - lenths : list of floats + lengths : list of floats """ if len(nodes_a) != len(nodes_b): - raise ValueError("Origin and destination counts don't match: {}, {}" - .format(len(nodes_a), len(nodes_b))) + raise ValueError( + "Origin and destination counts don't match: {}, {}".format( + len(nodes_a), len(nodes_b) + ) + ) # map to internal node indexes nodes_a_idx = self._node_indexes(pd.Series(nodes_a)).values @@ -309,6 +328,13 @@ def shortest_path_lengths(self, nodes_a, nodes_b, imp_name=None): lens = self.net.shortest_path_distances(nodes_a_idx, nodes_b_idx, imp_num) + if 4294967.295 in lens: + unconnected_idx = [i for i, v in enumerate(lens) if v == 4294967.295] + unconnected_nodes = [(nodes_a[i], nodes_b[i]) for i in unconnected_idx] + warnings.warn( + "Unsigned integer: shortest path distance is trying to be calculated \ + between the following external unconnected nodes: %s" % (unconnected_nodes)) + return lens def set(self, node_ids, variable=None, name="tmp"): @@ -318,10 +344,10 @@ def set(self, node_ids, variable=None, name="tmp"): Parameters ---------- - node_ids : Pandas Series, int + node_ids : pandas.Series, int A series of node_ids which are usually computed using get_node_ids on this object. - variable : Pandas Series, numeric, optional + variable : pandas.Series, numeric, optional A series which represents some variable defined in urban space. It could be the location of buildings, or the income of all households - just about anything can be aggregated using the @@ -349,23 +375,23 @@ def set(self, node_ids, variable=None, name="tmp"): """ if variable is None: variable = pd.Series(np.ones(len(node_ids)), index=node_ids.index) - - df = pd.DataFrame({name: variable, - "node_idx": self._node_indexes(node_ids)}) + df = pd.DataFrame({name: variable, "node_idx": self._node_indexes(node_ids)}) length = len(df) df = df.dropna(how="any") newl = len(df) - if length-newl > 0: + if length - newl > 0: print( - "Removed %d rows because they contain missing values" % - (length-newl)) + "Removed %d rows because they contain missing values" % (length - newl) + ) self.variable_names.add(name) - self.net.initialize_access_var(name.encode('utf-8'), - df.node_idx.values.astype('int'), - df[name].values.astype('double')) + self.net.initialize_access_var( + name.encode("utf-8"), + df.node_idx.values.astype("int"), + df[name].values.astype("double"), + ) def precompute(self, distance): """ @@ -386,19 +412,67 @@ def precompute(self, distance): """ self.net.precompute_range(distance) + def nodes_in_range(self, nodes, radius, imp_name=None): + """ + Computes the range queries (the reachable nodes within this maximum + distance) for each input node. + + Parameters + ---------- + nodes : list-like of ints + Source node IDs + radius : float + Maximum distance to use. This will usually be a distance unit in + meters however if you have customized the impedance (using the + imp_name option) this could be in other units such as utility or + time etc. + imp_name : string, optional + The impedance name to use for the aggregation on this network. + Must be one of the impedance names passed in the constructor of + this object. If not specified, there must be only one impedance + passed in the constructor, which will be used. + + Returns + ------- + d : pandas.DataFrame + Like nearest_pois, this is a dataframe containing the input node + index, the index of the nearby nodes within the search radius, + and the distance (according to the requested impedance) from the + source to the nearby node. + """ + imp_num = self._imp_name_to_num(imp_name) + imp_name = self.impedance_names[imp_num] + ext_ids = self.node_idx.index.values + + raw_result = self.net.nodes_in_range(nodes, radius, imp_num, ext_ids) + clean_result = pd.concat( + [ + pd.DataFrame(r, columns=["destination", imp_name]).assign(source=ix) + for r, ix in zip(raw_result, nodes) + ] + )[["source", "destination", imp_name]] + return ( + clean_result.drop_duplicates(subset=["source", "destination"]) + .reset_index(drop=True) + .query("{} <= {}".format(imp_name, radius)) + ) + def _imp_name_to_num(self, imp_name): if imp_name is None: - assert len(self.impedance_names) == 1,\ - "must pass impedance name if there are multiple impedances set" + assert ( + len(self.impedance_names) == 1 + ), "must pass impedance name if there are multiple impedances set" imp_name = self.impedance_names[0] - assert imp_name in self.impedance_names, "An impedance with that name" \ - "was not found" + assert imp_name in self.impedance_names, ( + "An impedance with that name" "was not found" + ) return self.impedance_names.index(imp_name) - def aggregate(self, distance, type="sum", decay="linear", imp_name=None, - name="tmp"): + def aggregate( + self, distance, type="sum", decay="linear", imp_name=None, name="tmp" + ): """ Aggregate information for every source node in the network - this is really the main purpose of this library. This allows you to touch @@ -446,7 +520,7 @@ def aggregate(self, distance, type="sum", decay="linear", imp_name=None, Returns ------- - agg : Pandas Series + agg : pandas.Series Returns a Pandas Series for every origin node in the network, with the index which is the same as the node_ids passed to the init method and the values are the aggregations for each source @@ -457,23 +531,26 @@ def aggregate(self, distance, type="sum", decay="linear", imp_name=None, type = type.lower() # Resolve aliases - if type in ['ave', 'avg', 'average']: - type = 'mean' + if type in ["ave", "avg", "average"]: + type = "mean" - if type in ['stddev']: - type = 'std' + if type in ["stddev"]: + type = "std" - if type in ['med']: - type = 'median' + if type in ["med"]: + type = "median" - assert name in self.variable_names, "A variable with that name " \ - "has not yet been initialized" + assert name in self.variable_names, ( + "A variable with that name " "has not yet been initialized" + ) - res = self.net.get_all_aggregate_accessibility_variables(distance, - name.encode('utf-8'), - type.encode('utf-8'), - decay.encode('utf-8'), - imp_num) + res = self.net.get_all_aggregate_accessibility_variables( + distance, + name.encode("utf-8"), + type.encode("utf-8"), + decay.encode("utf-8"), + imp_num, + ) return pd.Series(res, index=self.node_ids) @@ -483,10 +560,10 @@ def get_node_ids(self, x_col, y_col, mapping_distance=None): Parameters ---------- - x_col : Pandas series (float) + x_col : pandas.Series (float) A Pandas Series where values specify the x (e.g. longitude) location of dataset. - y_col : Pandas series (float) + y_col : pandas.Series (float) A Pandas Series where values specify the y (e.g. latitude) location of dataset. x_col and y_col should use the same index. mapping_distance : float, optional @@ -499,7 +576,7 @@ def get_node_ids(self, x_col, y_col, mapping_distance=None): Returns ------- - node_ids : Pandas series (int) + node_ids : pandas.Series (int) Returns a Pandas Series of node_ids for each x, y in the input data. The index is the same as the indexes of the x, y input data, and the values are the mapped node_ids. @@ -508,7 +585,7 @@ def get_node_ids(self, x_col, y_col, mapping_distance=None): If the mapping is imperfect, this function returns all the input x, y's that were successfully mapped to node_ids. """ - xys = pd.DataFrame({'x': x_col, 'y': y_col}) + xys = pd.DataFrame({"x": x_col, "y": y_col}) distances, indexes = self.kdtree.query(xys.values) indexes = np.transpose(indexes)[0] @@ -516,16 +593,22 @@ def get_node_ids(self, x_col, y_col, mapping_distance=None): node_ids = self.nodes_df.iloc[indexes].index - df = pd.DataFrame({"node_id": node_ids, "distance": distances}, - index=xys.index) + df = pd.DataFrame({"node_id": node_ids, "distance": distances}, index=xys.index) if mapping_distance is not None: df = df[df.distance <= mapping_distance] return df.node_id - def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, - plot_kwargs=None, cbar_kwargs=None): + def plot( + self, + data, + bbox=None, + plot_type="scatter", + fig_kwargs=None, + plot_kwargs=None, + cbar_kwargs=None, + ): """ Plot an array of data on a map using Matplotlib, automatically matching the data to the Pandana network node positions. Keyword arguments are @@ -570,8 +653,8 @@ def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, except (ModuleNotFoundError, RuntimeError): raise ModuleNotFoundError("Pandana's network.plot() requires Matplotlib") - fig_kwargs = fig_kwargs or {'figsize': (10, 8)} - plot_kwargs = plot_kwargs or {'cmap': 'hot_r', 's': 1} + fig_kwargs = fig_kwargs or {"figsize": (10, 8)} + plot_kwargs = plot_kwargs or {"cmap": "hot_r", "s": 1} cbar_kwargs = cbar_kwargs or {} if not bbox: @@ -579,18 +662,17 @@ def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, self.nodes_df.y.min(), self.nodes_df.x.min(), self.nodes_df.y.max(), - self.nodes_df.x.max()) + self.nodes_df.x.max(), + ) fig, ax = plt.subplots(**fig_kwargs) x, y = (self.nodes_df.x.values, self.nodes_df.y.values) - if plot_type == 'scatter': - plot = plt.scatter( - x, y, c=data.values, **plot_kwargs) - elif plot_type == 'hexbin': - plot = plt.hexbin( - x, y, C=data.values, **plot_kwargs) + if plot_type == "scatter": + plot = plt.scatter(x, y, c=data.values, **plot_kwargs) + elif plot_type == "hexbin": + plot = plt.hexbin(x, y, C=data.values, **plot_kwargs) colorbar = plt.colorbar(plot, **cbar_kwargs) @@ -600,8 +682,9 @@ def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, def init_pois(self, num_categories, max_dist, max_pois): """ - Initialize the point of interest infrastructure. This is no longer - needed in Pandana 0.4+ and will be removed in a future version. + Initialize the point of interest (POI) infrastructure. + This is no longer needed in Pandana 0.4+ and will be removed in a + future version. Parameters ---------- @@ -612,7 +695,7 @@ def init_pois(self, num_categories, max_dist, max_pois): usually be a distance unit in meters however if you have customized the impedance this could be in other units such as utility or time etc. - max_pois : + max_pois : int Maximum number of POIs to return in the nearest query """ @@ -622,29 +705,39 @@ def init_pois(self, num_categories, max_dist, max_pois): warnings.warn( "Method init_pois() is no longer needed in Pandana 0.4+ and will be removed in a \ future version; maxdist and maxitems should now be passed to set_pois()", - DeprecationWarning + DeprecationWarning, ) return None - def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col=None): + def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col=None, + mapping_distance=None): """ - Set the location of all the pois of this category. The pois are - connected to the closest node in the Pandana network which assumes - no impedance between the location of the variable and the location - of the closest network node. + Set the location of all the points of interest (POIs) of this category. + The POIs are connected to the closest node in the Pandana network + which assumes no impedance between the location of the variable and + the location of the closest network node. Parameters ---------- category : string - The name of the category for this set of pois - maxdist - the maximum distance that will later be used in - find_all_nearest_pois - maxitems - the maximum number of items that will later be requested - in find_all_nearest_pois - x_col : Pandas Series (float) - The x location (longitude) of pois in this category - y_col : Pandas Series (Float) - The y location (latitude) of pois in this category + The name of the category for this set of POIs + maxdist : float + The maximum distance that will later be used in + find_all_nearest_pois() + maxitems : int + The maximum number of items that will later be requested + in find_all_nearest_pois() + x_col : pandas.Series (float) + The x location (longitude) of POIs in this category + y_col : pandas.Series (float) + The y location (latitude) of POIs in this category + mapping_distance : float, optional + The maximum distance that will be considered a match between the + POIs and the nearest node in the network. This will usually + be a distance unit in meters however if you have customized the + impedance this could be in other units such as utility or time + etc. If not specified, every POI will be mapped to + the nearest node. Returns ------- @@ -653,7 +746,7 @@ def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col """ # condition to check if missing arguments for keyword arguments using set_pois() from v0.3 if maxitems is None: - print('Reading parameters from init_pois()') + print("Reading parameters from init_pois()") maxitems = self.max_pois # condition to check for positional arguments in set_pois() from v0.3 @@ -662,7 +755,7 @@ def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col maxitems = self.max_pois if maxdist is None: - print('Reading parameters from init_pois()') + print("Reading parameters from init_pois()") maxdist = self.max_dist elif isinstance(maxdist, type(pd.Series())): @@ -674,34 +767,43 @@ def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col self.max_pois = maxitems - node_ids = self.get_node_ids(x_col, y_col) + node_ids = self.get_node_ids(x_col, y_col, mapping_distance=mapping_distance) self.poi_category_indexes[category] = node_ids.index node_idx = self._node_indexes(node_ids) - self.net.initialize_category(maxdist, maxitems, category.encode('utf-8'), node_idx.values) + self.net.initialize_category( + maxdist, maxitems, category.encode("utf-8"), node_idx.values + ) - def nearest_pois(self, distance, category, num_pois=1, max_distance=None, - imp_name=None, include_poi_ids=False): + def nearest_pois( + self, + distance, + category, + num_pois=1, + max_distance=None, + imp_name=None, + include_poi_ids=False, + ): """ - Find the distance to the nearest pois from each source node. The - bigger values in this case mean less accessibility. + Find the distance to the nearest points of interest (POI)s from each + source node. The bigger values in this case mean less accessibility. Parameters ---------- distance : float - The maximum distance to look for pois. This will usually be a + The maximum distance to look for POIs. This will usually be a distance unit in meters however if you have customized the impedance this could be in other units such as utility or time etc. category : string - The name of the category of poi to look for + The name of the category of POI to look for num_pois : int - The number of pois to look for, this also sets the number of + The number of POIs to look for, this also sets the number of columns in the DataFrame that gets returned max_distance : float, optional - The value to set the distance to if there is NO poi within the + The value to set the distance to if there is no POI within the specified distance - if not specified, gets set to distance. This will usually be a distance unit in meters however if you have customized the impedance this could be in other units such as @@ -715,20 +817,20 @@ def nearest_pois(self, distance, category, num_pois=1, max_distance=None, If this flag is set to true, the call will add columns to the return DataFrame - instead of just returning the distance for the nth POI, it will also return the id of that POI. The names - of the columns with the poi ids will be poi1, poi2, etc - it - will take roughly twice as long to include these ids as to not + of the columns with the POI IDs will be poi1, poi2, etc - it + will take roughly twice as long to include these IDs as to not include them Returns ------- - d : Pandas DataFrame + d : pandas.DataFrame Like aggregate, this series has an index of all the node ids for the network. Unlike aggregate, this method returns a dataframe with the number of columns equal to the distances to the Nth - closest poi. For instance, if you ask for the 10 closest poi to - each node, column d[1] wil be the distance to the 1st closest poi + closest POI. For instance, if you ask for the 10 closest poi to + each node, column d[1] wil be the distance to the 1st closest POI of that category while column d[2] will be the distance to the 2nd - closest poi, and so on. + closest POI, and so on. """ if max_distance is None: max_distance = distance @@ -737,32 +839,30 @@ def nearest_pois(self, distance, category, num_pois=1, max_distance=None, assert 0, "Need to call set_pois for this category" if num_pois > self.max_pois: - assert 0, "Asking for more pois than set in init_pois" + assert 0, "Asking for more POIs than set in init_pois" imp_num = self._imp_name_to_num(imp_name) dists, poi_ids = self.net.find_all_nearest_pois( - distance, - num_pois, - category.encode('utf-8'), - imp_num) + distance, num_pois, category.encode("utf-8"), imp_num + ) dists[dists == -1] = max_distance df = pd.DataFrame(dists, index=self.node_ids) - df.columns = list(range(1, num_pois+1)) + df.columns = list(range(1, num_pois + 1)) if include_poi_ids: df2 = pd.DataFrame(poi_ids, index=self.node_ids) - df2.columns = ["poi%d" % i for i in range(1, num_pois+1)] + df2.columns = ["poi%d" % i for i in range(1, num_pois + 1)] for col in df2.columns: # if this is still all working according to plan at this point # the great magic trick is now to turn the integer position of # the poi, which is painstakingly returned from the c++ code, # and turn it into the actual index that was used when it was - # initialized as a pandas series - this really is pandas-like + # initialized as a pandas.Series - this really is pandas-like # thinking. it's complicated on the inside, but quite # intuitive to the user I think - s = df2[col].astype('int') + s = df2[col].astype("int") df2[col] = self.poi_category_indexes[category].values[s] df2.loc[s == -1, col] = np.nan @@ -799,10 +899,9 @@ def low_connectivity_nodes(self, impedance, count, imp_name=None): """ # set a counter variable on all nodes - self.set(self.node_ids.to_series(), name='counter') + self.set(self.node_ids.to_series(), name="counter") # count nodes within impedance range - agg = self.aggregate( - impedance, type='count', imp_name=imp_name, name='counter') + agg = self.aggregate(impedance, type="count", imp_name=imp_name, name="counter") return np.array(agg[agg < count].index) diff --git a/pandana/testing.py b/pandana/testing.py index 01c25fa8..77f8ba3c 100644 --- a/pandana/testing.py +++ b/pandana/testing.py @@ -2,5 +2,5 @@ import pytest -skipiftravis = pytest.mark.skipif( - os.environ.get('TRAVIS') == 'true', reason='skip on Travis-CI') +skipifci = pytest.mark.skipif( + os.environ.get('CI') == 'true', reason='skip on CI') diff --git a/pandana/tests/__init__.py b/pandana/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/pandana/utils.py b/pandana/utils.py index 10e0885d..c2507c19 100644 --- a/pandana/utils.py +++ b/pandana/utils.py @@ -25,3 +25,11 @@ def reindex(series1, series2): right_index=True, how="left") return df.right + + +def adjacency_matrix(edges_df, plot_matrix=False): + df = pd.crosstab(edges_df['from'], edges_df['to']) + idx = df.columns.union(df.index) + df = df.reindex(index=idx, columns=idx, fill_value=0) + + return df diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..335ed6d8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,9 @@ +[build-system] +# Requirements for building the compiled package +requires = [ + "wheel", + "setuptools >=40.8", + "cython >=0.25.2", + "oldest-supported-numpy" +] +build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index 23c4f06a..7682b220 100644 --- a/setup.py +++ b/setup.py @@ -1,175 +1,131 @@ import os -import platform import sys -import sysconfig -from setuptools import find_packages -from distutils.core import setup, Extension -from setuptools.command.test import test as TestCommand -from setuptools.command.build_ext import build_ext +import numpy as np # for c++ headers - -############################################### -## Invoking tests -############################################### - -class PyTest(TestCommand): - user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")] - - def initialize_options(self): - TestCommand.initialize_options(self) - self.pytest_args = None - - def finalize_options(self): - TestCommand.finalize_options(self) - self.test_args = [] - self.test_suite = True - - def run_tests(self): - # import here, cause outside the eggs aren't loaded - import pytest - errno = pytest.main(self.pytest_args or ['']) - sys.exit(errno) - - -class Lint(TestCommand): - def run(self): - os.system("cpplint --filter=-build/include_subdir,-legal/copyright,-runtime/references,-runtime/int src/accessibility.* src/graphalg.*") - os.system("pycodestyle src/cyaccess.pyx") - os.system("pycodestyle pandana") - - -class CustomBuildExtCommand(build_ext): - """build_ext command for use when numpy headers are needed.""" - def run(self): - import numpy as np - self.include_dirs.append(np.get_include()) - build_ext.run(self) +from setuptools import find_packages, setup, Extension ############################################### -## Building the C++ extension +# Building the C++ extension ############################################### -extra_compile_args = ['-w', '-std=c++11', '-O3'] +extra_compile_args = ["-w", "-std=c++11", "-O3"] extra_link_args = [] # Mac compilation: flags are for the llvm compilers included with recent # versions of Xcode Command Line Tools, or newer versions installed separately -if sys.platform.startswith('darwin'): # Mac - - # This environment variable sets the earliest OS version that the compiled - # code will be compatible with. In certain contexts the default is too old - # to allow using libc++; supporting OS X 10.9 and later seems reasonable - os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.9' - - extra_compile_args += ['-D NO_TR1_MEMORY', '-stdlib=libc++'] - extra_link_args += ['-stdlib=libc++'] - +if sys.platform.startswith("darwin"): # Mac + + extra_compile_args += ["-stdlib=libc++"] + extra_link_args += ["-stdlib=libc++"] + # The default compiler that ships with Macs doesn't support OpenMP multi- # threading. We recommend using the Conda toolchain instead, but will also # try to detect if people are using another alternative like Homebrew. - if 'CC' in os.environ: - extra_compile_args += ['-fopenmp'] - print('Attempting Pandana compilation with OpenMP multi-threading ' - 'support, with user-specified compiler:\n{}'.format( - os.environ['CC'])) + if "CC" in os.environ: + extra_compile_args += ["-fopenmp"] + print( + "Attempting Pandana compilation with OpenMP multi-threading " + "support, with user-specified compiler:\n{}".format(os.environ["CC"]) + ) # Otherwise, if the default clang has been replaced but nothing specified # in the 'CC' environment variable, assume they've followed our instructions # for using the Conda toolchain. - - elif os.popen('which clang').read().strip() != '/usr/bin/clang': - cc = 'clang' - cc_catalina = 'clang --sysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk' - - extra_compile_args += ['-fopenmp'] - print('Attempting Pandana compilation with OpenMP multi-threading ' - 'support, with the following compiler:\n{}'.format( - os.popen('which clang').read())) - - if '10.15' in os.popen('sw_vers').read(): - os.environ['CC'] = cc_catalina - elif '11.' in os.popen('sw_vers').read(): - os.environ['CC'] = cc_catalina - else: - os.environ['CC'] = cc + + elif os.popen("which clang").read().strip() != "/usr/bin/clang": + cc = "clang" + cc_catalina = ( + "clang --sysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk" + ) + + extra_compile_args += ["-fopenmp"] + print( + "Attempting Pandana compilation with OpenMP multi-threading " + "support, with the following compiler:\n{}".format( + os.popen("which clang").read() + ) + ) + + if " 10.15" in os.popen("sw_vers").read(): + os.environ["CC"] = cc_catalina + elif " 10." in os.popen("sw_vers").read(): # 10.14 and earlier + os.environ["CC"] = cc + else: # 11.x, 12.x, etc. + os.environ["CC"] = cc_catalina else: - print('Attempting Pandana compilation without support for ' - 'multi-threading. See installation instructions for alternative ' - 'options') + print( + "Attempting Pandana compilation without support for " + "multi-threading. See installation instructions for alternative " + "options" + ) # Window compilation: flags are for Visual C++ -elif sys.platform.startswith('win'): # Windows - extra_compile_args = ['/w', '/openmp'] +elif sys.platform.startswith("win"): # Windows + extra_compile_args = ["/w", "/openmp"] # Linux compilation: flags are for gcc 4.8 and later else: # Linux - extra_compile_args += ['-fopenmp'] - extra_link_args += ['-lgomp'] + extra_compile_args += ["-fopenmp"] + extra_link_args += ["-lgomp"] cyaccess = Extension( - name='pandana.cyaccess', - sources=[ - 'src/accessibility.cpp', - 'src/graphalg.cpp', - 'src/cyaccess.pyx', - 'src/contraction_hierarchies/src/libch.cpp'], - language='c++', - include_dirs=['.'], - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args) + name='pandana.cyaccess', + sources=[ + 'src/accessibility.cpp', + 'src/graphalg.cpp', + 'src/cyaccess.pyx', + 'src/contraction_hierarchies/src/libch.cpp'], + language='c++', + include_dirs=['.', np.get_include()], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args) ############################################### -## Standard setup +# Standard setup ############################################### -version = '0.6' +version = "0.7" packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]) setup( packages=packages, - name='pandana', - author='UrbanSim Inc.', + name="pandana", + author="UrbanSim Inc.", version=version, - license='AGPL', - description=('Python library for network analysis'), + license="AGPL", + description=("Python library for network analysis"), long_description=( - 'Pandana is a Python library for network analysis that uses ' - 'contraction hierarchies to calculate super-fast travel ' - 'accessibility metrics and shortest paths. The numerical ' - 'code is in C++.'), - url='https://udst.github.io/pandana/', + "Pandana is a Python library for network analysis that uses " + "contraction hierarchies to calculate super-fast travel " + "accessibility metrics and shortest paths. The numerical " + "code is in C++." + ), + url="https://udst.github.io/pandana/", ext_modules=[cyaccess], install_requires=[ - 'cython >=0.25.2', 'numpy >=1.8', 'pandas >=0.17', 'requests >=2.0', 'scikit-learn >=0.18', - 'tables >=3.1, <3.6; python_version <"3.6"', - 'tables >=3.1, <3.7; python_version >="3.6"' + 'tables >=3.1' ], - cmdclass={ - 'test': PyTest, - 'lint': Lint, - 'build_ext': CustomBuildExtCommand, - }, classifiers=[ - 'Development Status :: 4 - Beta', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'License :: OSI Approved :: GNU Affero General Public License v3' + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: GNU Affero General Public License v3", ], ) diff --git a/src/accessibility.cpp b/src/accessibility.cpp index cedb588a..5aa86b5a 100644 --- a/src/accessibility.cpp +++ b/src/accessibility.cpp @@ -1,6 +1,8 @@ #include "accessibility.h" #include #include +#include +#include #include #include "graphalg.h" @@ -18,22 +20,6 @@ bool distance_node_pair_comparator(const distance_node_pair& l, { return l.first < r.first; } -double exp_decay(const double &distance, const float &radius, const float &var) -{ - return exp(-1*distance/radius) * var; -} - -double linear_decay(const double &distance, const float &radius, const float &var) -{ - return (1.0-distance/radius) * var; -} - -double flat_decay(const double &distance, const float &radius, const float &var) -{ - return var; -} - - Accessibility::Accessibility( int numnodes, vector< vector> edges, @@ -96,6 +82,48 @@ Accessibility::precomputeRangeQueries(float radius) { } +vector>> +Accessibility::Range(vector srcnodes, float radius, int graphno, + vector ext_ids) { + + // Set up a mapping between the external node ids and internal ones + std::unordered_map int_ids(ext_ids.size()); + for (int i = 0; i < ext_ids.size(); i++) { + int_ids.insert(pair(ext_ids[i], i)); + } + + // use cached results if available + vector dists(srcnodes.size()); + if (dmsradius > 0 && radius <= dmsradius) { + for (int i = 0; i < srcnodes.size(); i++) { + dists[i] = dms[graphno][int_ids[srcnodes[i]]]; + } + } + else { + #pragma omp parallel + #pragma omp for schedule(guided) + for (int i = 0; i < srcnodes.size(); i++) { + ga[graphno]->Range(int_ids[srcnodes[i]], radius, + omp_get_thread_num(), dists[i]); + } + } + + // todo: check that results are returned from cache correctly + // todo: check that performing an aggregation creates cache + + // Convert back to external node ids + vector>> output(dists.size()); + for (int i = 0; i < dists.size(); i++) { + output[i].resize(dists[i].size()); + for (int j = 0; j < dists[i].size(); j++) { + output[i][j] = std::make_pair(ext_ids[dists[i][j].first], + dists[i][j].second); + } + } + return output; +} + + vector Accessibility::Route(int src, int tgt, int graphno) { vector ret = this->ga[graphno]->Route(src, tgt); @@ -401,14 +429,17 @@ Accessibility::aggregateAccessibilityVariable( double sum = 0.0; double sumsq = 0.0; - double (*sum_function_ptr)(const double &, const float &, const float &); + std::function sum_function; if(decay == "exp") - sum_function_ptr = &exp_decay; + sum_function = [](const double &distance, const float &radius, const float &var) + { return exp(-1*distance/radius) * var; }; if(decay == "linear") - sum_function_ptr = &linear_decay; + sum_function = [](const double &distance, const float &radius, const float &var) + { return (1.0-distance/radius) * var; }; if(decay == "flat") - sum_function_ptr = &flat_decay; + sum_function = [](const double &distance, const float &radius, const float &var) + { return var; }; for (int i = 0 ; i < distances.size() ; i++) { int nodeid = distances[i].first; @@ -419,7 +450,7 @@ Accessibility::aggregateAccessibilityVariable( for (int j = 0 ; j < vars[nodeid].size() ; j++) { cnt++; // count items - sum += (*sum_function_ptr)(distance, radius, vars[nodeid][j]); + sum += sum_function(distance, radius, vars[nodeid][j]); // stddev is always flat sumsq += vars[nodeid][j] * vars[nodeid][j]; diff --git a/src/accessibility.h b/src/accessibility.h index de76aab5..c5e0ad71 100644 --- a/src/accessibility.h +++ b/src/accessibility.h @@ -45,15 +45,16 @@ class Accessibility { string decay, int graphno = 0); - // get nodes with the range - DistanceVec Range(int srcnode, float radius, int graphno = 0); + // get nodes with a range for a specific list of source nodes + vector>> Range(vector srcnodes, float radius, + int graphno, vector ext_ids); // shortest path between two points vector Route(int src, int tgt, int graphno = 0); // shortest path between list of origins and destinations vector> Routes(vector sources, vector targets, - int graphno = 0); + int graphno = 0); // shortest path distance between two points double Distance(int src, int tgt, int graphno = 0); diff --git a/src/contraction_hierarchies/src/POIIndex/POIIndex.h b/src/contraction_hierarchies/src/POIIndex/POIIndex.h index 5d442fa0..bd699403 100644 --- a/src/contraction_hierarchies/src/POIIndex/POIIndex.h +++ b/src/contraction_hierarchies/src/POIIndex/POIIndex.h @@ -22,18 +22,7 @@ #include -#if defined _WIN32 || defined NO_TR1_MEMORY -#include -#else -// can we get rid of this tr1 stuff and use c++11 now? -#include -#endif - -#ifdef NO_TR1_MEMORY - using std::shared_ptr; -#else -#define shared_ptr tr1::shared_ptr -#endif +using std::shared_ptr; #include "../BasicDefinitions.h" #include "../DataStructures/BinaryHeap.h" diff --git a/src/contraction_hierarchies/src/libch.cpp b/src/contraction_hierarchies/src/libch.cpp index 8dad3b88..d591c055 100644 --- a/src/contraction_hierarchies/src/libch.cpp +++ b/src/contraction_hierarchies/src/libch.cpp @@ -20,7 +20,7 @@ or see http://www.gnu.org/licenses/agpl.txt. #include "libch.h" #include "POIIndex/POIIndex.h" -#ifdef _OPENMP +#if defined(_OPENMP) && (defined(__amd64__) || defined(__i386__)) #include "Util/HyperThreading.h" #endif namespace CH { diff --git a/src/cyaccess.pyx b/src/cyaccess.pyx index a4f6c617..af637647 100644 --- a/src/cyaccess.pyx +++ b/src/cyaccess.pyx @@ -1,3 +1,5 @@ +#cython: language_level=3 + cimport cython from libcpp cimport bool from libcpp.vector cimport vector @@ -27,6 +29,7 @@ cdef extern from "accessibility.h" namespace "MTC::accessibility": vector[vector[int]] Routes(vector[long], vector[long], int) double Distance(int, int, int) vector[double] Distances(vector[long], vector[long], int) + vector[vector[pair[long, float]]] Range(vector[long], float, int, vector[long]) void precomputeRangeQueries(double) @@ -191,6 +194,16 @@ cdef class cyaccess: impno - impedance id """ return self.access.Distances(srcnodes, destnodes, impno) - + def precompute_range(self, double radius): self.access.precomputeRangeQueries(radius) + + def nodes_in_range(self, vector[long] srcnodes, float radius, int impno, + np.ndarray[long] ext_ids): + """ + srcnodes - node ids of origins + radius - maximum range in which to search for nearby nodes + impno - the impedance id to use + ext_ids - all node ids in the network + """ + return self.access.Range(srcnodes, radius, impno, ext_ids) diff --git a/pandana/tests/osm_sample.h5 b/tests/osm_sample.h5 similarity index 100% rename from pandana/tests/osm_sample.h5 rename to tests/osm_sample.h5 diff --git a/pandana/tests/test_cyaccess.py b/tests/test_cyaccess.py similarity index 97% rename from pandana/tests/test_cyaccess.py rename to tests/test_cyaccess.py index 7d67924d..5eed36da 100644 --- a/pandana/tests/test_cyaccess.py +++ b/tests/test_cyaccess.py @@ -33,9 +33,9 @@ def net(nodes_and_edges): edges["to"] = node_locations.loc[edges["to"]].values net = cyaccess( - nodes.index.values, + nodes.index.values.astype('int_'), nodes.values, - edges.values, + edges.values.astype('int_'), edge_weights.transpose().values, True ) diff --git a/pandana/loaders/tests/test_osm.py b/tests/test_osm.py similarity index 90% rename from pandana/loaders/tests/test_osm.py rename to tests/test_osm.py index 14cd62b2..d3a44571 100644 --- a/pandana/loaders/tests/test_osm.py +++ b/tests/test_osm.py @@ -2,7 +2,7 @@ import pandana from pandana.loaders import osm -from pandana.testing import skipiftravis +from pandana.testing import skipifci @pytest.fixture(scope='module') @@ -41,10 +41,11 @@ def test_process_node(): assert osm.process_node(test_node) == expected -@skipiftravis -def test_network_from_bbox(bbox2): - net = osm.pdna_network_from_bbox(*bbox2) - assert isinstance(net, pandana.Network) +# This needs to be fixed in UrbanAccess +# @skipifci +# def test_network_from_bbox(bbox2): +# net = osm.pdna_network_from_bbox(*bbox2) +# assert isinstance(net, pandana.Network) def test_build_node_query_no_tags(bbox1): diff --git a/pandana/tests/test_pandana.py b/tests/test_pandana.py similarity index 72% rename from pandana/tests/test_pandana.py rename to tests/test_pandana.py index 68e277bd..8522b046 100644 --- a/pandana/tests/test_pandana.py +++ b/tests/test_pandana.py @@ -1,28 +1,29 @@ import os.path import numpy as np -from numpy.testing import assert_allclose import pandas as pd import pytest -from pandas.util import testing as pdt -from pandana.testing import skipiftravis import pandana.network as pdna +from numpy.testing import assert_allclose +from pandas.testing import assert_index_equal + +from pandana.testing import skipifci + @pytest.fixture(scope="module") def sample_osm(request): - store = pd.HDFStore( - os.path.join(os.path.dirname(__file__), 'osm_sample.h5'), "r") + store = pd.HDFStore(os.path.join(os.path.dirname(__file__), "osm_sample.h5"), "r") nodes, edges = store.nodes, store.edges - net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, - edges[["weight"]]) + net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, edges[["weight"]]) net.precompute(2000) def fin(): store.close() + request.addfinalizer(fin) return net @@ -31,16 +32,15 @@ def fin(): # initialize a second network @pytest.fixture(scope="module") def second_sample_osm(request): - store = pd.HDFStore( - os.path.join(os.path.dirname(__file__), 'osm_sample.h5'), "r") + store = pd.HDFStore(os.path.join(os.path.dirname(__file__), "osm_sample.h5"), "r") nodes, edges = store.nodes, store.edges - net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, - edges[["weight"]]) + net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, edges[["weight"]]) net.precompute(2000) def fin(): store.close() + request.addfinalizer(fin) return net @@ -79,7 +79,7 @@ def random_x_y(sample_osm, ssize): def test_agg_variables_accuracy(sample_osm): net = sample_osm - # test accuracy compared to pandas functions + # test accuracy compared to Pandas functions ssize = 50 r = random_data(ssize) connected_nodes = get_connected_nodes(net) @@ -91,48 +91,47 @@ def test_agg_variables_accuracy(sample_osm): assert s.iloc[0] == 50 s = net.aggregate(100000, type="AVE").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.mean(), atol=1e-3) s = net.aggregate(100000, type="mean").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.mean(), atol=1e-3) s = net.aggregate(100000, type="min").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.min(), atol=1e-3) s = net.aggregate(100000, type="max").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.max(), atol=1e-3) r.sort_values(inplace=True) s = net.aggregate(100000, type="median").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.iloc[25], atol=1e-2) s = net.aggregate(100000, type="25pct").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.iloc[12], atol=1e-2) s = net.aggregate(100000, type="75pct").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.iloc[37], atol=1e-2) s = net.aggregate(100000, type="SUM").loc[connected_nodes] - assert s.describe()['std'] < .05 # assert almost equal + assert s.describe()["std"] < 0.05 # assert almost equal assert_allclose(s.mean(), r.sum(), atol=1e-2) s = net.aggregate(100000, type="std").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.std(), atol=1e-2) def test_non_integer_nodeids(request): - store = pd.HDFStore( - os.path.join(os.path.dirname(__file__), 'osm_sample.h5'), "r") + store = pd.HDFStore(os.path.join(os.path.dirname(__file__), "osm_sample.h5"), "r") nodes, edges = store.nodes, store.edges # convert to string! @@ -140,14 +139,14 @@ def test_non_integer_nodeids(request): edges["from"] = edges["from"].astype("str") edges["to"] = edges["to"].astype("str") - net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, - edges[["weight"]]) + net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, edges[["weight"]]) def fin(): store.close() + request.addfinalizer(fin) - # test accuracy compared to pandas functions + # test accuracy compared to Pandas functions ssize = 50 r = random_data(ssize) connected_nodes = get_connected_nodes(net) @@ -162,16 +161,15 @@ def test_agg_variables(sample_osm): net = sample_osm ssize = 50 - net.set(random_node_ids(sample_osm, ssize), - variable=random_data(ssize)) + net.set(random_node_ids(sample_osm, ssize), variable=random_data(ssize)) for type in net.aggregations: for decay in net.decays: for distance in [5, 10, 20]: - t = type.decode(encoding='UTF-8') - d = decay.decode(encoding='UTF-8') + t = type.decode(encoding="UTF-8") + d = decay.decode(encoding="UTF-8") s = net.aggregate(distance, type=t, decay=d) - assert s.describe()['std'] > 0 + assert s.describe()["std"] > 0 # testing w/o setting variable ssize = 50 @@ -180,30 +178,32 @@ def test_agg_variables(sample_osm): for type in net.aggregations: for decay in net.decays: for distance in [5, 10, 20]: - t = type.decode(encoding='UTF-8') - d = decay.decode(encoding='UTF-8') + t = type.decode(encoding="UTF-8") + d = decay.decode(encoding="UTF-8") s = net.aggregate(distance, type=t, decay=d) if t != "std": - assert s.describe()['std'] > 0 + assert s.describe()["std"] > 0 else: # no variance in data - assert s.describe()['std'] == 0 + assert s.describe()["std"] == 0 def test_non_float_node_values(sample_osm): net = sample_osm ssize = 50 - net.set(random_node_ids(sample_osm, ssize), - variable=(random_data(ssize)*100).astype('int')) + net.set( + random_node_ids(sample_osm, ssize), + variable=(random_data(ssize) * 100).astype("int"), + ) for type in net.aggregations: for decay in net.decays: for distance in [5, 10, 20]: - t = type.decode(encoding='UTF-8') - d = decay.decode(encoding='UTF-8') + t = type.decode(encoding="UTF-8") + d = decay.decode(encoding="UTF-8") s = net.aggregate(distance, type=t, decay=d) - assert s.describe()['std'] > 0 + assert s.describe()["std"] > 0 def test_missing_nodeid(sample_osm): @@ -222,7 +222,7 @@ def test_assign_nodeids(sample_osm): # check a couple of assignments for accuracy assert node_ids1.loc[48] == 1840703798 assert node_ids1.loc[43] == 257739973 - pdt.assert_index_equal(x.index, node_ids1.index) + assert_index_equal(x.index, node_ids1.index) # test with max distance - this max distance is in decimal degrees node_ids2 = sample_osm.get_node_ids(x, y, 0.0005) @@ -238,13 +238,12 @@ def test_named_variable(sample_osm): net = sample_osm ssize = 50 - net.set(random_node_ids(sample_osm, ssize), - variable=random_data(ssize), name="foo") + net.set(random_node_ids(sample_osm, ssize), variable=random_data(ssize), name="foo") net.aggregate(500, type="sum", decay="linear", name="foo") -''' +""" def test_plot(sample_osm): net = sample_osm @@ -255,7 +254,7 @@ def test_plot(sample_osm): s = net.aggregate(500, type="sum", decay="linear") sample_osm.plot(s) -''' +""" def test_shortest_path(sample_osm): @@ -274,8 +273,8 @@ def test_shortest_paths(sample_osm): vec_paths = sample_osm.shortest_paths(nodes[0:50], nodes[50:100]) for i in range(50): - path = sample_osm.shortest_path(nodes[i], nodes[i+50]) - assert(np.array_equal(vec_paths[i], path)) + path = sample_osm.shortest_path(nodes[i], nodes[i + 50]) + assert np.array_equal(vec_paths[i], path) # check mismatched OD lists try: @@ -331,13 +330,12 @@ def test_pois(sample_osm): net = sample_osm x, y = random_x_y(sample_osm, 100) - x.index = ['lab%d' % i for i in range(len(x))] + x.index = ["lab%d" % i for i in range(len(x))] y.index = x.index net.set_pois("restaurants", 2000, 10, x, y) - d = net.nearest_pois(2000, "restaurants", num_pois=10, - include_poi_ids=True) + d = net.nearest_pois(2000, "restaurants", num_pois=10, include_poi_ids=True) def test_pois2(second_sample_osm): @@ -347,7 +345,7 @@ def test_pois2(second_sample_osm): np.random.seed(0) x, y = random_x_y(second_sample_osm, ssize) - # make sure poi searches work on second graph + # make sure POI searches work on second graph net2.set_pois("restaurants", 2000, 10, x, y) net2.nearest_pois(2000, "restaurants", num_pois=10) @@ -363,7 +361,7 @@ def test_pois_pandana3(second_sample_osm): net2.init_pois(num_categories=1, max_dist=2000, max_pois=10) - # make sure poi searches work on second graph + # make sure POI searches work on second graph net2.set_pois(category="restaurants", x_col=x, y_col=y) net2.nearest_pois(2000, "restaurants", num_pois=10) @@ -384,6 +382,7 @@ def test_pois_pandana3_pos_args(second_sample_osm): net2.nearest_pois(2000, "restaurants", num_pois=10) + # test items are sorted @@ -407,12 +406,14 @@ def test_repeat_pois(sample_osm): net = sample_osm def get_nearest_nodes(x, y, x2=None, y2=None, n=2): - coords_dict = [{'x': x, 'y': y, 'var': 1} for i in range(2)] + coords_dict = [{"x": x, "y": y, "var": 1} for i in range(2)] if x2 and y2: - coords_dict.append({'x': x2, 'y': y2, 'var': 1}) + coords_dict.append({"x": x2, "y": y2, "var": 1}) df = pd.DataFrame(coords_dict) - sample_osm.set_pois("restaurants", 2000, 10, df['x'], df['y']) - res = sample_osm.nearest_pois(2000, "restaurants", num_pois=5, include_poi_ids=True) + sample_osm.set_pois("restaurants", 2000, 10, df["x"], df["y"]) + res = sample_osm.nearest_pois( + 2000, "restaurants", num_pois=5, include_poi_ids=True + ) return res # these are the min-max values of the network @@ -426,8 +427,45 @@ def get_nearest_nodes(x, y, x2=None, y2=None, n=2): assert test1.equals(test3) test4 = get_nearest_nodes(-122.31, 47.60, -122.32, 47.61, n=3) - assert_allclose(test4.loc[53114882], [7, 13, 13, 2000, 2000, 2, 0, 1, np.nan, np.nan]) - assert_allclose(test4.loc[53114880], [6, 14, 14, 2000, 2000, 2, 0, 1, np.nan, np.nan]) + assert_allclose( + test4.loc[53114882], [7, 13, 13, 2000, 2000, 2, 0, 1, np.nan, np.nan] + ) + assert_allclose( + test4.loc[53114880], [6, 14, 14, 2000, 2000, 2, 0, 1, np.nan, np.nan] + ) assert_allclose( test4.loc[53227769], - [2000, 2000, 2000, 2000, 2000, np.nan, np.nan, np.nan, np.nan, np.nan]) + [2000, 2000, 2000, 2000, 2000, np.nan, np.nan, np.nan, np.nan, np.nan], + ) + + +def test_nodes_in_range(sample_osm): + net = sample_osm + + np.random.seed(0) + ssize = 10 + x, y = random_x_y(net, 10) + snaps = net.get_node_ids(x, y) + + test1 = net.nodes_in_range(snaps, 1) + net.precompute(10) + test5 = net.nodes_in_range(snaps, 5) + test11 = net.nodes_in_range(snaps, 11) + assert test1.weight.max() == 1 + assert test5.weight.max() == 5 + assert test11.weight.max() == 11 + + focus_id = snaps[0] + all_distances = net.shortest_path_lengths( + [focus_id] * len(net.node_ids), net.node_ids + ) + all_distances = np.asarray(all_distances) + assert (all_distances <= 1).sum() == len( + test1.query("source == {}".format(focus_id)) + ) + assert (all_distances <= 5).sum() == len( + test5.query("source == {}".format(focus_id)) + ) + assert (all_distances <= 11).sum() == len( + test11.query("source == {}".format(focus_id)) + ) diff --git a/pandana/loaders/tests/test_pandash5.py b/tests/test_pandash5.py similarity index 81% rename from pandana/loaders/tests/test_pandash5.py rename to tests/test_pandash5.py index 140c00e8..dfe2dccd 100644 --- a/pandana/loaders/tests/test_pandash5.py +++ b/tests/test_pandash5.py @@ -4,11 +4,12 @@ import pandas as pd import pytest -import pandas.util.testing as pdt -from pandana import Network -from pandana.testing import skipiftravis +from pandas.testing import assert_frame_equal +from pandas.testing import assert_series_equal +from pandana import Network from pandana.loaders import pandash5 as ph5 +from pandana.testing import skipifci @pytest.fixture(scope='module') @@ -71,7 +72,7 @@ def cleanup(): return fname -@skipiftravis +@skipifci def test_remove_nodes(network, rm_nodes): # node 0 is connected to node 4, which is in turn connected to node 5 # node 7 is connected to node 11, which has no other connections @@ -92,25 +93,25 @@ def test_remove_nodes(network, rm_nodes): index=[1, 4, 5, 7, 8]) exp_edges = exp_edges[['from', 'to', 'distance', 'time']] # order columns - pdt.assert_frame_equal(nodes, exp_nodes) - pdt.assert_frame_equal(edges, exp_edges) + assert_frame_equal(nodes, exp_nodes) + assert_frame_equal(edges, exp_edges) -@skipiftravis +@skipifci def test_network_to_pandas_hdf5( tmpfile, network, nodes, edges_df, impedance_names, two_way): ph5.network_to_pandas_hdf5(network, tmpfile) store = pd.HDFStore(tmpfile) - pdt.assert_frame_equal(store['nodes'], nodes) - pdt.assert_frame_equal(store['edges'], edges_df) - pdt.assert_series_equal(store['two_way'], pd.Series([two_way])) - pdt.assert_series_equal( + assert_frame_equal(store['nodes'], nodes) + assert_frame_equal(store['edges'], edges_df) + assert_series_equal(store['two_way'], pd.Series([two_way])) + assert_series_equal( store['impedance_names'], pd.Series(impedance_names)) -@skipiftravis +@skipifci def test_network_to_pandas_hdf5_removal( tmpfile, network, impedance_names, two_way, rm_nodes): nodes, edges = ph5.remove_nodes(network, rm_nodes) @@ -118,26 +119,26 @@ def test_network_to_pandas_hdf5_removal( store = pd.HDFStore(tmpfile) - pdt.assert_frame_equal(store['nodes'], nodes) - pdt.assert_frame_equal(store['edges'], edges) - pdt.assert_series_equal(store['two_way'], pd.Series([two_way])) - pdt.assert_series_equal( + assert_frame_equal(store['nodes'], nodes) + assert_frame_equal(store['edges'], edges) + assert_series_equal(store['two_way'], pd.Series([two_way])) + assert_series_equal( store['impedance_names'], pd.Series(impedance_names)) -@skipiftravis +@skipifci def test_network_from_pandas_hdf5( tmpfile, network, nodes, edges_df, impedance_names, two_way): ph5.network_to_pandas_hdf5(network, tmpfile) new_net = ph5.network_from_pandas_hdf5(Network, tmpfile) - pdt.assert_frame_equal(new_net.nodes_df, nodes) - pdt.assert_frame_equal(new_net.edges_df, edges_df) + assert_frame_equal(new_net.nodes_df, nodes) + assert_frame_equal(new_net.edges_df, edges_df) assert new_net._twoway == two_way assert new_net.impedance_names == impedance_names -@skipiftravis +@skipifci def test_network_save_load_hdf5( tmpfile, network, impedance_names, two_way, rm_nodes): network.save_hdf5(tmpfile, rm_nodes) @@ -145,15 +146,15 @@ def test_network_save_load_hdf5( nodes, edges = ph5.remove_nodes(network, rm_nodes) - pdt.assert_frame_equal(new_net.nodes_df, nodes) - pdt.assert_frame_equal(new_net.edges_df, edges) + assert_frame_equal(new_net.nodes_df, nodes) + assert_frame_equal(new_net.edges_df, edges) assert new_net._twoway == two_way assert new_net.impedance_names == impedance_names # this is an odd place for this test because it's not related to HDF5, # but my test Network is perfect. -@skipiftravis +@skipifci def test_network_low_connectivity_nodes(network, impedance_names): nodes = network.low_connectivity_nodes(10, 3, imp_name=impedance_names[0]) assert list(nodes) == [7, 11] diff --git a/pandana/tests/test_utils.py b/tests/test_utils.py similarity index 100% rename from pandana/tests/test_utils.py rename to tests/test_utils.py