refs.bib

@inproceedings{rllib,
  author    = {Eric Liang and
               Richard Liaw and
               Robert Nishihara and
               Philipp Moritz and
               Roy Fox and
               Ken Goldberg and
               Joseph Gonzalez and
               Michael I. Jordan and
               Ion Stoica},
  title     = {RLlib: Abstractions for Distributed Reinforcement Learning},
  booktitle = {Proceedings of the 35th International Conference on Machine Learning,
               {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
               10-15, 2018},
  pages     = {3059--3068},
  year      = {2018},
  url       = {http://proceedings.mlr.press/v80/liang18b.html},
  timestamp = {Thu, 30 Apr 2020 14:45:00 +0200},
  biburl    = {https://dblp.org/rec/conf/icml/LiangLNMFGGJS18.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{irl,
  author    = {Chelsea Finn and
               Sergey Levine and
               Pieter Abbeel},
  title     = {Guided Cost Learning: Deep Inverse Optimal Control via Policy Optimization},
  booktitle = {Proceedings of the 33nd International Conference on Machine Learning,
               {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  pages     = {49--58},
  year      = {2016},
  url       = {http://proceedings.mlr.press/v48/finn16.html},
  timestamp = {Wed, 29 May 2019 08:41:46 +0200},
  biburl    = {https://dblp.org/rec/conf/icml/FinnLA16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{trpo,
  author    = {John Schulman and
               Sergey Levine and
               Pieter Abbeel and
               Michael I. Jordan and
               Philipp Moritz},
  title     = {Trust Region Policy Optimization},
  booktitle = {Proceedings of the 32nd International Conference on Machine Learning,
               {ICML} 2015, Lille, France, 6-11 July 2015},
  pages     = {1889--1897},
  year      = {2015},
  url       = {http://proceedings.mlr.press/v37/schulman15.html},
  timestamp = {Wed, 29 May 2019 08:41:45 +0200},
  biburl    = {https://dblp.org/rec/conf/icml/SchulmanLAJM15.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{td,
  author    = {Gerald Tesauro},
  title     = {TD-Gammon, a Self-Teaching Backgammon Program, Achieves Master-Level
               Play},
  journal   = {Neural Computation},
  volume    = {6},
  number    = {2},
  pages     = {215--219},
  year      = {1994},
  url       = {https://doi.org/10.1162/neco.1994.6.2.215},
  doi       = {10.1162/neco.1994.6.2.215},
  timestamp = {Sun, 28 May 2017 13:19:00 +0200},
  biburl    = {https://dblp.org/rec/journals/neco/Tesauro94.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{rlpyt,
  author    = {Adam Stooke and
               Pieter Abbeel},
  title     = {rlpyt: {A} Research Code Base for Deep Reinforcement Learning in PyTorch},
  journal   = {CoRR},
  volume    = {abs/1909.01500},
  year      = {2019},
  url       = {http://arxiv.org/abs/1909.01500},
  archivePrefix = {arXiv},
  eprint    = {1909.01500},
  timestamp = {Mon, 16 Sep 2019 17:27:14 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-1909-01500.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{pg,
  author    = {Richard S. Sutton and
               David A. McAllester and
               Satinder P. Singh and
               Yishay Mansour},
  title     = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
  booktitle = {Advances in Neural Information Processing Systems 12, {[NIPS} Conference,
               Denver, Colorado, USA, November 29 - December 4, 1999]},
  pages     = {1057--1063},
  year      = {1999},
  url       = {http://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation},
  timestamp = {Fri, 06 Mar 2020 16:58:30 +0100},
  biburl    = {https://dblp.org/rec/conf/nips/SuttonMSM99.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{dqn,
  author    = {Volodymyr Mnih and
               Koray Kavukcuoglu and
               David Silver and
               Andrei A. Rusu and
               Joel Veness and
               Marc G. Bellemare and
               Alex Graves and
               Martin A. Riedmiller and
               Andreas Fidjeland and
               Georg Ostrovski and
               Stig Petersen and
               Charles Beattie and
               Amir Sadik and
               Ioannis Antonoglou and
               Helen King and
               Dharshan Kumaran and
               Daan Wierstra and
               Shane Legg and
               Demis Hassabis},
  title     = {Human-level control through deep reinforcement learning},
  journal   = {Nature},
  volume    = {518},
  number    = {7540},
  pages     = {529--533},
  year      = {2015},
  url       = {https://doi.org/10.1038/nature14236},
  doi       = {10.1038/nature14236},
  timestamp = {Wed, 14 Nov 2018 10:30:43 +0100},
  biburl    = {https://dblp.org/rec/journals/nature/MnihKSRVBGRFOPB15.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{double-dqn,
  author    = {Hado van Hasselt and
               Arthur Guez and
               David Silver},
  title     = {Deep Reinforcement Learning with Double Q-Learning},
  booktitle = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence,
               February 12-17, 2016, Phoenix, Arizona, {USA}},
  pages     = {2094--2100},
  year      = {2016},
  url       = {http://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/view/12389},
  timestamp = {Wed, 05 Apr 2017 12:53:50 +0200},
  biburl    = {https://dblp.org/rec/conf/aaai/HasseltGS16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{her,
  author    = {Marcin Andrychowicz and
               Dwight Crow and
               Alex Ray and
               Jonas Schneider and
               Rachel Fong and
               Peter Welinder and
               Bob McGrew and
               Josh Tobin and
               Pieter Abbeel and
               Wojciech Zaremba},
  title     = {Hindsight Experience Replay},
  booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference
               on Neural Information Processing Systems 2017, 4-9 December 2017,
               Long Beach, CA, {USA}},
  pages     = {5048--5058},
  year      = {2017},
  url       = {http://papers.nips.cc/paper/7090-hindsight-experience-replay},
  timestamp = {Fri, 06 Mar 2020 16:57:05 +0100},
  biburl    = {https://dblp.org/rec/conf/nips/AndrychowiczCRS17.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{dpg,
  author    = {David Silver and
               Guy Lever and
               Nicolas Heess and
               Thomas Degris and
               Daan Wierstra and
               Martin A. Riedmiller},
  title     = {Deterministic Policy Gradient Algorithms},
  booktitle = {Proceedings of the 31th International Conference on Machine Learning,
               {ICML} 2014, Beijing, China, 21-26 June 2014},
  pages     = {387--395},
  year      = {2014},
  url       = {http://proceedings.mlr.press/v32/silver14.html},
  timestamp = {Wed, 29 May 2019 08:41:46 +0200},
  biburl    = {https://dblp.org/rec/conf/icml/SilverLHDWR14.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{per,
  author    = {Tom Schaul and
               John Quan and
               Ioannis Antonoglou and
               David Silver},
  title     = {Prioritized Experience Replay},
  booktitle = {4th International Conference on Learning Representations, {ICLR} 2016,
               San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
  year      = {2016},
  url       = {http://arxiv.org/abs/1511.05952},
  timestamp = {Thu, 25 Jul 2019 14:25:38 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/SchaulQAS15.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{a2c,
  author    = {Volodymyr Mnih and
               Adri{\`{a}} Puigdom{\`{e}}nech Badia and
               Mehdi Mirza and
               Alex Graves and
               Timothy P. Lillicrap and
               Tim Harley and
               David Silver and
               Koray Kavukcuoglu},
  title     = {Asynchronous Methods for Deep Reinforcement Learning},
  booktitle = {Proceedings of the 33nd International Conference on Machine Learning,
               {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  pages     = {1928--1937},
  year      = {2016},
  url       = {http://proceedings.mlr.press/v48/mniha16.html},
  timestamp = {Wed, 29 May 2019 08:41:46 +0200},
  biburl    = {https://dblp.org/rec/conf/icml/MnihBMGLHSK16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{ddpg,
  author    = {Timothy P. Lillicrap and
               Jonathan J. Hunt and
               Alexander Pritzel and
               Nicolas Heess and
               Tom Erez and
               Yuval Tassa and
               David Silver and
               Daan Wierstra},
  title     = {Continuous control with deep reinforcement learning},
  booktitle = {4th International Conference on Learning Representations, {ICLR} 2016,
               San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
  year      = {2016},
  url       = {http://arxiv.org/abs/1509.02971},
  timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/LillicrapHPHETS15.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{ppo,
  author    = {John Schulman and
               Filip Wolski and
               Prafulla Dhariwal and
               Alec Radford and
               Oleg Klimov},
  title     = {Proximal Policy Optimization Algorithms},
  journal   = {CoRR},
  volume    = {abs/1707.06347},
  year      = {2017},
  url       = {http://arxiv.org/abs/1707.06347},
  archivePrefix = {arXiv},
  eprint    = {1707.06347},
  timestamp = {Mon, 13 Aug 2018 16:47:34 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{td3,
  author    = {Scott Fujimoto and
               Herke van Hoof and
               David Meger},
  title     = {Addressing Function Approximation Error in Actor-Critic Methods},
  booktitle = {Proceedings of the 35th International Conference on Machine Learning,
               {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
               10-15, 2018},
  pages     = {1582--1591},
  year      = {2018},
  url       = {http://proceedings.mlr.press/v80/fujimoto18a.html},
  timestamp = {Wed, 03 Apr 2019 18:17:30 +0200},
  biburl    = {https://dblp.org/rec/conf/icml/FujimotoHM18.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{sac,
  author    = {Tuomas Haarnoja and
               Aurick Zhou and
               Kristian Hartikainen and
               George Tucker and
               Sehoon Ha and
               Jie Tan and
               Vikash Kumar and
               Henry Zhu and
               Abhishek Gupta and
               Pieter Abbeel and
               Sergey Levine},
  title     = {Soft Actor-Critic Algorithms and Applications},
  journal   = {CoRR},
  volume    = {abs/1812.05905},
  year      = {2018},
  url       = {http://arxiv.org/abs/1812.05905},
  archivePrefix = {arXiv},
  eprint    = {1812.05905},
  timestamp = {Tue, 01 Jan 2019 15:01:25 +0100},
  biburl    = {https://dblp.org/rec/journals/corr/abs-1812-05905.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{gae,
  author    = {John Schulman and
               Philipp Moritz and
               Sergey Levine and
               Michael I. Jordan and
               Pieter Abbeel},
  title     = {High-Dimensional Continuous Control Using Generalized Advantage Estimation},
  booktitle = {4th International Conference on Learning Representations, {ICLR} 2016,
               San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
  year      = {2016},
  url       = {http://arxiv.org/abs/1506.02438},
  timestamp = {Thu, 25 Jul 2019 14:25:38 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/SchulmanMLJA15.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@misc{baselines,
    author = {Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai and Zhokhov, Peter},
    title = {OpenAI Baselines},
    year = {2017},
    publisher = {GitHub},
    journal = {GitHub repository},
    howpublished = {\url{https://github.com/openai/baselines}},
}

@misc{spinningup,
    author = {Achiam, Joshua},
    title = {{Spinning Up in Deep Reinforcement Learning}},
    year = {2018},
    publisher = {GitHub},
    journal = {GitHub repository},
    howpublished = {\url{https://github.com/openai/spinningup}},
}

@article{dopamine,
  author    = {Pablo Samuel Castro and
               Subhodeep Moitra and
               Carles Gelada and
               Saurabh Kumar and
               Marc G. Bellemare},
  title     = {Dopamine: {A} Research Framework for Deep Reinforcement Learning},
  journal   = {CoRR},
  volume    = {abs/1812.06110},
  year      = {2018},
  url       = {http://arxiv.org/abs/1812.06110},
  archivePrefix = {arXiv},
  eprint    = {1812.06110},
  timestamp = {Tue, 01 Jan 2019 15:01:25 +0100},
  biburl    = {https://dblp.org/rec/journals/corr/abs-1812-06110.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{pytorch,
  author    = {Adam Paszke and
               Sam Gross and
               Francisco Massa and
               Adam Lerer and
               James Bradbury and
               Gregory Chanan and
               Trevor Killeen and
               Zeming Lin and
               Natalia Gimelshein and
               Luca Antiga and
               Alban Desmaison and
               Andreas K{\"{o}}pf and
               Edward Yang and
               Zachary DeVito and
               Martin Raison and
               Alykhan Tejani and
               Sasank Chilamkurthy and
               Benoit Steiner and
               Lu Fang and
               Junjie Bai and
               Soumith Chintala},
  title     = {PyTorch: An Imperative Style, High-Performance Deep Learning Library},
  booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
               on Neural Information Processing Systems 2019, NeurIPS 2019, 8-14
               December 2019, Vancouver, BC, Canada},
  pages     = {8024--8035},
  year      = {2019},
  url       = {http://papers.nips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library},
  timestamp = {Fri, 06 Mar 2020 16:59:09 +0100},
  biburl    = {https://dblp.org/rec/conf/nips/PaszkeGMLBCKLGA19.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{tensorflow,
  author    = {Mart{\'{\i}}n Abadi and
               Paul Barham and
               Jianmin Chen and
               Zhifeng Chen and
               Andy Davis and
               Jeffrey Dean and
               Matthieu Devin and
               Sanjay Ghemawat and
               Geoffrey Irving and
               Michael Isard and
               Manjunath Kudlur and
               Josh Levenberg and
               Rajat Monga and
               Sherry Moore and
               Derek Gordon Murray and
               Benoit Steiner and
               Paul A. Tucker and
               Vijay Vasudevan and
               Pete Warden and
               Martin Wicke and
               Yuan Yu and
               Xiaoqiang Zheng},
  title     = {TensorFlow: {A} System for Large-Scale Machine Learning},
  booktitle = {12th {USENIX} Symposium on Operating Systems Design and Implementation,
               {OSDI} 2016, Savannah, GA, USA, November 2-4, 2016},
  pages     = {265--283},
  year      = {2016},
  url       = {https://www.usenix.org/conference/osdi16/technical-sessions/presentation/abadi},
  timestamp = {Tue, 29 Jan 2019 17:35:36 +0100},
  biburl    = {https://dblp.org/rec/conf/osdi/AbadiBCCDDDGIIK16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{dota2,
  author    = {Christopher Berner and
               Greg Brockman and
               Brooke Chan and
               Vicki Cheung and
               Przemyslaw Debiak and
               Christy Dennison and
               David Farhi and
               Quirin Fischer and
               Shariq Hashme and
               Chris Hesse and
               Rafal J{\'{o}}zefowicz and
               Scott Gray and
               Catherine Olsson and
               Jakub Pachocki and
               Michael Petrov and
               Henrique Pond{\'{e}} de Oliveira Pinto and
               Jonathan Raiman and
               Tim Salimans and
               Jeremy Schlatter and
               Jonas Schneider and
               Szymon Sidor and
               Ilya Sutskever and
               Jie Tang and
               Filip Wolski and
               Susan Zhang},
  title     = {Dota 2 with Large Scale Deep Reinforcement Learning},
  journal   = {CoRR},
  volume    = {abs/1912.06680},
  year      = {2019},
  url       = {http://arxiv.org/abs/1912.06680},
  archivePrefix = {arXiv},
  eprint    = {1912.06680},
  timestamp = {Fri, 03 Jan 2020 16:10:45 +0100},
  biburl    = {https://dblp.org/rec/journals/corr/abs-1912-06680.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{alphago,
  author    = {David Silver and
               Aja Huang and
               Chris J. Maddison and
               Arthur Guez and
               Laurent Sifre and
               George van den Driessche and
               Julian Schrittwieser and
               Ioannis Antonoglou and
               Vedavyas Panneershelvam and
               Marc Lanctot and
               Sander Dieleman and
               Dominik Grewe and
               John Nham and
               Nal Kalchbrenner and
               Ilya Sutskever and
               Timothy P. Lillicrap and
               Madeleine Leach and
               Koray Kavukcuoglu and
               Thore Graepel and
               Demis Hassabis},
  title     = {Mastering the game of Go with deep neural networks and tree search},
  journal   = {Nature},
  volume    = {529},
  number    = {7587},
  pages     = {484--489},
  year      = {2016},
  url       = {https://doi.org/10.1038/nature16961},
  doi       = {10.1038/nature16961},
  timestamp = {Wed, 14 Nov 2018 10:30:42 +0100},
  biburl    = {https://dblp.org/rec/journals/nature/SilverHMGSDSAPL16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{alphafold,
  title={Improved protein structure prediction using potentials from deep learning},
  author={Senior, Andrew W and Evans, Richard and Jumper, John and Kirkpatrick, James and Sifre, Laurent and Green, Tim and Qin, Chongli and {\v{Z}}{\'\i}dek, Augustin and Nelson, Alexander WR and Bridgland, Alex and others},
  journal={Nature},
  pages={1--5},
  year={2020},
  publisher={Nature Publishing Group}
}

@inproceedings{gail,
  author    = {Jonathan Ho and
               Stefano Ermon},
  title     = {Generative Adversarial Imitation Learning},
  booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference
               on Neural Information Processing Systems 2016, December 5-10, 2016,
               Barcelona, Spain},
  pages     = {4565--4573},
  year      = {2016},
  url       = {http://papers.nips.cc/paper/6391-generative-adversarial-imitation-learning},
  timestamp = {Fri, 06 Mar 2020 17:00:15 +0100},
  biburl    = {https://dblp.org/rec/conf/nips/HoE16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{rainbow,
  author    = {Matteo Hessel and
               Joseph Modayil and
               Hado van Hasselt and
               Tom Schaul and
               Georg Ostrovski and
               Will Dabney and
               Dan Horgan and
               Bilal Piot and
               Mohammad Gheshlaghi Azar and
               David Silver},
  title     = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
  booktitle = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence,
               (AAAI-18), the 30th innovative Applications of Artificial Intelligence
               (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in
               Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February
               2-7, 2018},
  pages     = {3215--3222},
  year      = {2018},
  url       = {https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/view/17204},
  timestamp = {Tue, 23 Oct 2018 06:42:15 +0200},
  biburl    = {https://dblp.org/rec/conf/aaai/HesselMHSODHPAS18.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@misc{stable-baselines,
  author = {Hill, Ashley and Raffin, Antonin and Ernestus, Maximilian and Gleave, Adam and Kanervisto, Anssi and Traore, Rene and Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai},
  title = {Stable Baselines},
  year = {2018},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/hill-a/stable-baselines}},
}

@misc{pytorch-drl,
  author = {Petros Christodoulou},
  title = {Deep Reinforcement Learning Algorithms with PyTorch},
  year = {2019},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/p-christ/Deep-Reinforcement-Learning-Algorithms-with-PyTorch}},
}

@misc{tensorforce,
  author       = {Kuhnle, Alexander and Schaarschmidt, Michael and Fricke, Kai},
  title        = {Tensorforce: a TensorFlow library for applied reinforcement learning},
  publisher = {GitHub},
  howpublished = {\url{https://github.com/tensorforce/tensorforce}},
  journal      = {GitHub repository},
  year         = {2017}
}

@inproceedings{unet,
  author    = {Olaf Ronneberger and
               Philipp Fischer and
               Thomas Brox},
  title     = {U-Net: Convolutional Networks for Biomedical Image Segmentation},
  booktitle = {Medical Image Computing and Computer-Assisted Intervention - {MICCAI}
               2015 - 18th International Conference Munich, Germany, October 5 -
               9, 2015, Proceedings, Part {III}},
  pages     = {234--241},
  year      = {2015},
  url       = {https://doi.org/10.1007/978-3-319-24574-4\_28},
  doi       = {10.1007/978-3-319-24574-4\_28},
  timestamp = {Sun, 02 Jun 2019 21:24:41 +0200},
  biburl    = {https://dblp.org/rec/conf/miccai/RonnebergerFB15.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{deepface,
  author    = {Yaniv Taigman and
               Ming Yang and
               Marc'Aurelio Ranzato and
               Lior Wolf},
  title     = {DeepFace: Closing the Gap to Human-Level Performance in Face Verification},
  booktitle = {2014 {IEEE} Conference on Computer Vision and Pattern Recognition,
               {CVPR} 2014, Columbus, OH, USA, June 23-28, 2014},
  pages     = {1701--1708},
  year      = {2014},
  url       = {https://doi.org/10.1109/CVPR.2014.220},
  doi       = {10.1109/CVPR.2014.220},
  timestamp = {Wed, 16 Oct 2019 14:14:50 +0200},
  biburl    = {https://dblp.org/rec/conf/cvpr/TaigmanYRW14.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{mask-rcnn,
  author    = {Kaiming He and
               Georgia Gkioxari and
               Piotr Doll{\'{a}}r and
               Ross B. Girshick},
  title     = {Mask {R-CNN}},
  booktitle = {{IEEE} International Conference on Computer Vision, {ICCV} 2017, Venice,
               Italy, October 22-29, 2017},
  pages     = {2980--2988},
  year      = {2017},
  url       = {https://doi.org/10.1109/ICCV.2017.322},
  doi       = {10.1109/ICCV.2017.322},
  timestamp = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl    = {https://dblp.org/rec/conf/iccv/HeGDG17.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{yolov3,
  author    = {Joseph Redmon and
               Ali Farhadi},
  title     = {YOLOv3: An Incremental Improvement},
  journal   = {CoRR},
  volume    = {abs/1804.02767},
  year      = {2018},
  url       = {http://arxiv.org/abs/1804.02767},
  archivePrefix = {arXiv},
  eprint    = {1804.02767},
  timestamp = {Mon, 13 Aug 2018 16:48:24 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-1804-02767.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@misc{garage,
 author = {The garage contributors},
 title = {Garage: A toolkit for reproducible reinforcement learning research},
 year = {2019},
 publisher = {GitHub},
 journal = {GitHub repository},
 howpublished = {\url{https://github.com/rlworkgroup/garage}},
}

@misc{rlkit,
 author = {Vitchyr H. Pong and Murtaza Dalal and Steven Lin and Ashvin Nair},
 title = {rlkit: Collection of reinforcement learning algorithms},
 year = {2019},
 publisher = {GitHub},
 journal = {GitHub repository},
 howpublished = {\url{https://github.com/vitchyr/rlkit}},
}

@inproceedings{tensorlayer,
  author    = {Hao Dong and
               Akara Supratak and
               Luo Mai and
               Fangde Liu and
               Axel Oehmichen and
               Simiao Yu and
               Yike Guo},
  title     = {TensorLayer: {A} Versatile Library for Efficient Deep Learning Development},
  booktitle = {Proceedings of the 2017 {ACM} on Multimedia Conference, {MM} 2017,
               Mountain View, CA, USA, October 23-27, 2017},
  pages     = {1201--1204},
  year      = {2017},
  url       = {https://doi.org/10.1145/3123266.3129391},
  doi       = {10.1145/3123266.3129391},
  timestamp = {Sat, 19 Oct 2019 20:03:05 +0200},
  biburl    = {https://dblp.org/rec/conf/mm/DongSMLOYG17.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@misc{keras-rl,
    author = {Matthias Plappert},
    title = {keras-rl},
    year = {2016},
    publisher = {GitHub},
    journal = {GitHub repository},
    howpublished = {\url{https://github.com/keras-rl/keras-rl}},
}

@article{gym,
  author    = {Greg Brockman and
               Vicki Cheung and
               Ludwig Pettersson and
               Jonas Schneider and
               John Schulman and
               Jie Tang and
               Wojciech Zaremba},
  title     = {OpenAI Gym},
  journal   = {CoRR},
  volume    = {abs/1606.01540},
  year      = {2016},
  url       = {http://arxiv.org/abs/1606.01540},
  archivePrefix = {arXiv},
  eprint    = {1606.01540},
  timestamp = {Fri, 08 Nov 2019 12:51:06 +0100},
  biburl    = {https://dblp.org/rec/journals/corr/BrockmanCPSSTZ16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{alexnet,
  author    = {Alex Krizhevsky and
               Ilya Sutskever and
               Geoffrey E. Hinton},
  title     = {ImageNet Classification with Deep Convolutional Neural Networks},
  booktitle = {Advances in Neural Information Processing Systems 25: 26th Annual
               Conference on Neural Information Processing Systems 2012. Proceedings
               of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States},
  pages     = {1106--1114},
  year      = {2012},
  url       = {http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks},
  timestamp = {Fri, 06 Mar 2020 16:56:56 +0100},
  biburl    = {https://dblp.org/rec/conf/nips/KrizhevskySH12.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{ray,
  author    = {Philipp Moritz and
               Robert Nishihara and
               Stephanie Wang and
               Alexey Tumanov and
               Richard Liaw and
               Eric Liang and
               Melih Elibol and
               Zongheng Yang and
               William Paul and
               Michael I. Jordan and
               Ion Stoica},
  title     = {Ray: {A} Distributed Framework for Emerging {AI} Applications},
  booktitle = {13th {USENIX} Symposium on Operating Systems Design and Implementation,
               {OSDI} 2018, Carlsbad, CA, USA, October 8-10, 2018},
  pages     = {561--577},
  year      = {2018},
  url       = {https://www.usenix.org/conference/osdi18/presentation/nishihara},
  timestamp = {Tue, 29 Jan 2019 17:35:36 +0100},
  biburl    = {https://dblp.org/rec/conf/osdi/MoritzNWTLLEYPJ18.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{bsuite,
  author    = {Ian Osband and
               Yotam Doron and
               Matteo Hessel and
               John Aslanides and
               Eren Sezener and
               Andre Saraiva and
               Katrina McKinney and
               Tor Lattimore and
               Csaba Szepesv{\'{a}}ri and
               Satinder Singh and
               Benjamin Van Roy and
               Richard S. Sutton and
               David Silver and
               Hado van Hasselt},
  title     = {Behaviour Suite for Reinforcement Learning},
  booktitle = {8th International Conference on Learning Representations, {ICLR} 2020,
               Addis Ababa, Ethiopia, April 26-30, 2020},
  year      = {2020},
  url       = {https://openreview.net/forum?id=rygf-kSYwH},
  timestamp = {Thu, 07 May 2020 17:11:48 +0200},
  biburl    = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{rllab,
  author    = {Yan Duan and
               Xi Chen and
               Rein Houthooft and
               John Schulman and
               Pieter Abbeel},
  title     = {Benchmarking Deep Reinforcement Learning for Continuous Control},
  booktitle = {Proceedings of the 33nd International Conference on Machine Learning,
               {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  pages     = {1329--1338},
  year      = {2016},
  url       = {http://proceedings.mlr.press/v48/duan16.html},
  timestamp = {Wed, 29 May 2019 08:41:46 +0200},
  biburl    = {https://dblp.org/rec/conf/icml/DuanCHSA16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{theano,
  author    = {Rami Al{-}Rfou and
               Guillaume Alain and
               Amjad Almahairi and
               Christof Angerm{\"{u}}ller and
               Dzmitry Bahdanau and
               Nicolas Ballas and
               Fr{\'{e}}d{\'{e}}ric Bastien and
               Justin Bayer and
               Anatoly Belikov and
               Alexander Belopolsky and
               Yoshua Bengio and
               Arnaud Bergeron and
               James Bergstra and
               Valentin Bisson and
               Josh Bleecher Snyder and
               Nicolas Bouchard and
               Nicolas Boulanger{-}Lewandowski and
               Xavier Bouthillier and
               Alexandre de Br{\'{e}}bisson and
               Olivier Breuleux and
               Pierre Luc Carrier and
               Kyunghyun Cho and
               Jan Chorowski and
               Paul F. Christiano and
               Tim Cooijmans and
               Marc{-}Alexandre C{\^{o}}t{\'{e}} and
               Myriam C{\^{o}}t{\'{e}} and
               Aaron C. Courville and
               Yann N. Dauphin and
               Olivier Delalleau and
               Julien Demouth and
               Guillaume Desjardins and
               Sander Dieleman and
               Laurent Dinh and
               Melanie Ducoffe and
               Vincent Dumoulin and
               Samira Ebrahimi Kahou and
               Dumitru Erhan and
               Ziye Fan and
               Orhan Firat and
               Mathieu Germain and
               Xavier Glorot and
               Ian J. Goodfellow and
               Matthew Graham and
               {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
               Philippe Hamel and
               Iban Harlouchet and
               Jean{-}Philippe Heng and
               Bal{\'{a}}zs Hidasi and
               Sina Honari and
               Arjun Jain and
               S{\'{e}}bastien Jean and
               Kai Jia and
               Mikhail Korobov and
               Vivek Kulkarni and
               Alex Lamb and
               Pascal Lamblin and
               Eric Larsen and
               C{\'{e}}sar Laurent and
               Sean Lee and
               Simon Lefran{\c{c}}ois and
               Simon Lemieux and
               Nicholas L{\'{e}}onard and
               Zhouhan Lin and
               Jesse A. Livezey and
               Cory Lorenz and
               Jeremiah Lowin and
               Qianli Ma and
               Pierre{-}Antoine Manzagol and
               Olivier Mastropietro and
               Robert McGibbon and
               Roland Memisevic and
               Bart van Merri{\"{e}}nboer and
               Vincent Michalski and
               Mehdi Mirza and
               Alberto Orlandi and
               Christopher Joseph Pal and
               Razvan Pascanu and
               Mohammad Pezeshki and
               Colin Raffel and
               Daniel Renshaw and
               Matthew Rocklin and
               Adriana Romero and
               Markus Roth and
               Peter Sadowski and
               John Salvatier and
               Fran{\c{c}}ois Savard and
               Jan Schl{\"{u}}ter and
               John Schulman and
               Gabriel Schwartz and
               Iulian Vlad Serban and
               Dmitriy Serdyuk and
               Samira Shabanian and
               {\'{E}}tienne Simon and
               Sigurd Spieckermann and
               S. Ramana Subramanyam and
               Jakub Sygnowski and
               J{\'{e}}r{\'{e}}mie Tanguay and
               Gijs van Tulder and
               Joseph P. Turian and
               Sebastian Urban and
               Pascal Vincent and
               Francesco Visin and
               Harm de Vries and
               David Warde{-}Farley and
               Dustin J. Webb and
               Matthew Willson and
               Kelvin Xu and
               Lijun Xue and
               Li Yao and
               Saizheng Zhang and
               Ying Zhang},
  title     = {Theano: {A} Python framework for fast computation of mathematical
               expressions},
  journal   = {CoRR},
  volume    = {abs/1605.02688},
  year      = {2016},
  url       = {http://arxiv.org/abs/1605.02688},
  archivePrefix = {arXiv},
  eprint    = {1605.02688},
  timestamp = {Thu, 30 Apr 2020 11:17:18 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/Al-RfouAAa16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}