-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathrefs.bib
909 lines (866 loc) · 34.5 KB
/
refs.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
@inproceedings{rllib,
author = {Eric Liang and
Richard Liaw and
Robert Nishihara and
Philipp Moritz and
Roy Fox and
Ken Goldberg and
Joseph Gonzalez and
Michael I. Jordan and
Ion Stoica},
title = {RLlib: Abstractions for Distributed Reinforcement Learning},
booktitle = {Proceedings of the 35th International Conference on Machine Learning,
{ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
10-15, 2018},
pages = {3059--3068},
year = {2018},
url = {http://proceedings.mlr.press/v80/liang18b.html},
timestamp = {Thu, 30 Apr 2020 14:45:00 +0200},
biburl = {https://dblp.org/rec/conf/icml/LiangLNMFGGJS18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{irl,
author = {Chelsea Finn and
Sergey Levine and
Pieter Abbeel},
title = {Guided Cost Learning: Deep Inverse Optimal Control via Policy Optimization},
booktitle = {Proceedings of the 33nd International Conference on Machine Learning,
{ICML} 2016, New York City, NY, USA, June 19-24, 2016},
pages = {49--58},
year = {2016},
url = {http://proceedings.mlr.press/v48/finn16.html},
timestamp = {Wed, 29 May 2019 08:41:46 +0200},
biburl = {https://dblp.org/rec/conf/icml/FinnLA16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{trpo,
author = {John Schulman and
Sergey Levine and
Pieter Abbeel and
Michael I. Jordan and
Philipp Moritz},
title = {Trust Region Policy Optimization},
booktitle = {Proceedings of the 32nd International Conference on Machine Learning,
{ICML} 2015, Lille, France, 6-11 July 2015},
pages = {1889--1897},
year = {2015},
url = {http://proceedings.mlr.press/v37/schulman15.html},
timestamp = {Wed, 29 May 2019 08:41:45 +0200},
biburl = {https://dblp.org/rec/conf/icml/SchulmanLAJM15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{td,
author = {Gerald Tesauro},
title = {TD-Gammon, a Self-Teaching Backgammon Program, Achieves Master-Level
Play},
journal = {Neural Computation},
volume = {6},
number = {2},
pages = {215--219},
year = {1994},
url = {https://doi.org/10.1162/neco.1994.6.2.215},
doi = {10.1162/neco.1994.6.2.215},
timestamp = {Sun, 28 May 2017 13:19:00 +0200},
biburl = {https://dblp.org/rec/journals/neco/Tesauro94.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{rlpyt,
author = {Adam Stooke and
Pieter Abbeel},
title = {rlpyt: {A} Research Code Base for Deep Reinforcement Learning in PyTorch},
journal = {CoRR},
volume = {abs/1909.01500},
year = {2019},
url = {http://arxiv.org/abs/1909.01500},
archivePrefix = {arXiv},
eprint = {1909.01500},
timestamp = {Mon, 16 Sep 2019 17:27:14 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1909-01500.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{pg,
author = {Richard S. Sutton and
David A. McAllester and
Satinder P. Singh and
Yishay Mansour},
title = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
booktitle = {Advances in Neural Information Processing Systems 12, {[NIPS} Conference,
Denver, Colorado, USA, November 29 - December 4, 1999]},
pages = {1057--1063},
year = {1999},
url = {http://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation},
timestamp = {Fri, 06 Mar 2020 16:58:30 +0100},
biburl = {https://dblp.org/rec/conf/nips/SuttonMSM99.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{dqn,
author = {Volodymyr Mnih and
Koray Kavukcuoglu and
David Silver and
Andrei A. Rusu and
Joel Veness and
Marc G. Bellemare and
Alex Graves and
Martin A. Riedmiller and
Andreas Fidjeland and
Georg Ostrovski and
Stig Petersen and
Charles Beattie and
Amir Sadik and
Ioannis Antonoglou and
Helen King and
Dharshan Kumaran and
Daan Wierstra and
Shane Legg and
Demis Hassabis},
title = {Human-level control through deep reinforcement learning},
journal = {Nature},
volume = {518},
number = {7540},
pages = {529--533},
year = {2015},
url = {https://doi.org/10.1038/nature14236},
doi = {10.1038/nature14236},
timestamp = {Wed, 14 Nov 2018 10:30:43 +0100},
biburl = {https://dblp.org/rec/journals/nature/MnihKSRVBGRFOPB15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{double-dqn,
author = {Hado van Hasselt and
Arthur Guez and
David Silver},
title = {Deep Reinforcement Learning with Double Q-Learning},
booktitle = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence,
February 12-17, 2016, Phoenix, Arizona, {USA}},
pages = {2094--2100},
year = {2016},
url = {http://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/view/12389},
timestamp = {Wed, 05 Apr 2017 12:53:50 +0200},
biburl = {https://dblp.org/rec/conf/aaai/HasseltGS16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{her,
author = {Marcin Andrychowicz and
Dwight Crow and
Alex Ray and
Jonas Schneider and
Rachel Fong and
Peter Welinder and
Bob McGrew and
Josh Tobin and
Pieter Abbeel and
Wojciech Zaremba},
title = {Hindsight Experience Replay},
booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference
on Neural Information Processing Systems 2017, 4-9 December 2017,
Long Beach, CA, {USA}},
pages = {5048--5058},
year = {2017},
url = {http://papers.nips.cc/paper/7090-hindsight-experience-replay},
timestamp = {Fri, 06 Mar 2020 16:57:05 +0100},
biburl = {https://dblp.org/rec/conf/nips/AndrychowiczCRS17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{dpg,
author = {David Silver and
Guy Lever and
Nicolas Heess and
Thomas Degris and
Daan Wierstra and
Martin A. Riedmiller},
title = {Deterministic Policy Gradient Algorithms},
booktitle = {Proceedings of the 31th International Conference on Machine Learning,
{ICML} 2014, Beijing, China, 21-26 June 2014},
pages = {387--395},
year = {2014},
url = {http://proceedings.mlr.press/v32/silver14.html},
timestamp = {Wed, 29 May 2019 08:41:46 +0200},
biburl = {https://dblp.org/rec/conf/icml/SilverLHDWR14.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{per,
author = {Tom Schaul and
John Quan and
Ioannis Antonoglou and
David Silver},
title = {Prioritized Experience Replay},
booktitle = {4th International Conference on Learning Representations, {ICLR} 2016,
San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
year = {2016},
url = {http://arxiv.org/abs/1511.05952},
timestamp = {Thu, 25 Jul 2019 14:25:38 +0200},
biburl = {https://dblp.org/rec/journals/corr/SchaulQAS15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{a2c,
author = {Volodymyr Mnih and
Adri{\`{a}} Puigdom{\`{e}}nech Badia and
Mehdi Mirza and
Alex Graves and
Timothy P. Lillicrap and
Tim Harley and
David Silver and
Koray Kavukcuoglu},
title = {Asynchronous Methods for Deep Reinforcement Learning},
booktitle = {Proceedings of the 33nd International Conference on Machine Learning,
{ICML} 2016, New York City, NY, USA, June 19-24, 2016},
pages = {1928--1937},
year = {2016},
url = {http://proceedings.mlr.press/v48/mniha16.html},
timestamp = {Wed, 29 May 2019 08:41:46 +0200},
biburl = {https://dblp.org/rec/conf/icml/MnihBMGLHSK16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{ddpg,
author = {Timothy P. Lillicrap and
Jonathan J. Hunt and
Alexander Pritzel and
Nicolas Heess and
Tom Erez and
Yuval Tassa and
David Silver and
Daan Wierstra},
title = {Continuous control with deep reinforcement learning},
booktitle = {4th International Conference on Learning Representations, {ICLR} 2016,
San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
year = {2016},
url = {http://arxiv.org/abs/1509.02971},
timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
biburl = {https://dblp.org/rec/journals/corr/LillicrapHPHETS15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{ppo,
author = {John Schulman and
Filip Wolski and
Prafulla Dhariwal and
Alec Radford and
Oleg Klimov},
title = {Proximal Policy Optimization Algorithms},
journal = {CoRR},
volume = {abs/1707.06347},
year = {2017},
url = {http://arxiv.org/abs/1707.06347},
archivePrefix = {arXiv},
eprint = {1707.06347},
timestamp = {Mon, 13 Aug 2018 16:47:34 +0200},
biburl = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{td3,
author = {Scott Fujimoto and
Herke van Hoof and
David Meger},
title = {Addressing Function Approximation Error in Actor-Critic Methods},
booktitle = {Proceedings of the 35th International Conference on Machine Learning,
{ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
10-15, 2018},
pages = {1582--1591},
year = {2018},
url = {http://proceedings.mlr.press/v80/fujimoto18a.html},
timestamp = {Wed, 03 Apr 2019 18:17:30 +0200},
biburl = {https://dblp.org/rec/conf/icml/FujimotoHM18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{sac,
author = {Tuomas Haarnoja and
Aurick Zhou and
Kristian Hartikainen and
George Tucker and
Sehoon Ha and
Jie Tan and
Vikash Kumar and
Henry Zhu and
Abhishek Gupta and
Pieter Abbeel and
Sergey Levine},
title = {Soft Actor-Critic Algorithms and Applications},
journal = {CoRR},
volume = {abs/1812.05905},
year = {2018},
url = {http://arxiv.org/abs/1812.05905},
archivePrefix = {arXiv},
eprint = {1812.05905},
timestamp = {Tue, 01 Jan 2019 15:01:25 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1812-05905.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{gae,
author = {John Schulman and
Philipp Moritz and
Sergey Levine and
Michael I. Jordan and
Pieter Abbeel},
title = {High-Dimensional Continuous Control Using Generalized Advantage Estimation},
booktitle = {4th International Conference on Learning Representations, {ICLR} 2016,
San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
year = {2016},
url = {http://arxiv.org/abs/1506.02438},
timestamp = {Thu, 25 Jul 2019 14:25:38 +0200},
biburl = {https://dblp.org/rec/journals/corr/SchulmanMLJA15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@misc{baselines,
author = {Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai and Zhokhov, Peter},
title = {OpenAI Baselines},
year = {2017},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/openai/baselines}},
}
@misc{spinningup,
author = {Achiam, Joshua},
title = {{Spinning Up in Deep Reinforcement Learning}},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/openai/spinningup}},
}
@article{dopamine,
author = {Pablo Samuel Castro and
Subhodeep Moitra and
Carles Gelada and
Saurabh Kumar and
Marc G. Bellemare},
title = {Dopamine: {A} Research Framework for Deep Reinforcement Learning},
journal = {CoRR},
volume = {abs/1812.06110},
year = {2018},
url = {http://arxiv.org/abs/1812.06110},
archivePrefix = {arXiv},
eprint = {1812.06110},
timestamp = {Tue, 01 Jan 2019 15:01:25 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1812-06110.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{pytorch,
author = {Adam Paszke and
Sam Gross and
Francisco Massa and
Adam Lerer and
James Bradbury and
Gregory Chanan and
Trevor Killeen and
Zeming Lin and
Natalia Gimelshein and
Luca Antiga and
Alban Desmaison and
Andreas K{\"{o}}pf and
Edward Yang and
Zachary DeVito and
Martin Raison and
Alykhan Tejani and
Sasank Chilamkurthy and
Benoit Steiner and
Lu Fang and
Junjie Bai and
Soumith Chintala},
title = {PyTorch: An Imperative Style, High-Performance Deep Learning Library},
booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, 8-14
December 2019, Vancouver, BC, Canada},
pages = {8024--8035},
year = {2019},
url = {http://papers.nips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library},
timestamp = {Fri, 06 Mar 2020 16:59:09 +0100},
biburl = {https://dblp.org/rec/conf/nips/PaszkeGMLBCKLGA19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{tensorflow,
author = {Mart{\'{\i}}n Abadi and
Paul Barham and
Jianmin Chen and
Zhifeng Chen and
Andy Davis and
Jeffrey Dean and
Matthieu Devin and
Sanjay Ghemawat and
Geoffrey Irving and
Michael Isard and
Manjunath Kudlur and
Josh Levenberg and
Rajat Monga and
Sherry Moore and
Derek Gordon Murray and
Benoit Steiner and
Paul A. Tucker and
Vijay Vasudevan and
Pete Warden and
Martin Wicke and
Yuan Yu and
Xiaoqiang Zheng},
title = {TensorFlow: {A} System for Large-Scale Machine Learning},
booktitle = {12th {USENIX} Symposium on Operating Systems Design and Implementation,
{OSDI} 2016, Savannah, GA, USA, November 2-4, 2016},
pages = {265--283},
year = {2016},
url = {https://www.usenix.org/conference/osdi16/technical-sessions/presentation/abadi},
timestamp = {Tue, 29 Jan 2019 17:35:36 +0100},
biburl = {https://dblp.org/rec/conf/osdi/AbadiBCCDDDGIIK16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{dota2,
author = {Christopher Berner and
Greg Brockman and
Brooke Chan and
Vicki Cheung and
Przemyslaw Debiak and
Christy Dennison and
David Farhi and
Quirin Fischer and
Shariq Hashme and
Chris Hesse and
Rafal J{\'{o}}zefowicz and
Scott Gray and
Catherine Olsson and
Jakub Pachocki and
Michael Petrov and
Henrique Pond{\'{e}} de Oliveira Pinto and
Jonathan Raiman and
Tim Salimans and
Jeremy Schlatter and
Jonas Schneider and
Szymon Sidor and
Ilya Sutskever and
Jie Tang and
Filip Wolski and
Susan Zhang},
title = {Dota 2 with Large Scale Deep Reinforcement Learning},
journal = {CoRR},
volume = {abs/1912.06680},
year = {2019},
url = {http://arxiv.org/abs/1912.06680},
archivePrefix = {arXiv},
eprint = {1912.06680},
timestamp = {Fri, 03 Jan 2020 16:10:45 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1912-06680.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{alphago,
author = {David Silver and
Aja Huang and
Chris J. Maddison and
Arthur Guez and
Laurent Sifre and
George van den Driessche and
Julian Schrittwieser and
Ioannis Antonoglou and
Vedavyas Panneershelvam and
Marc Lanctot and
Sander Dieleman and
Dominik Grewe and
John Nham and
Nal Kalchbrenner and
Ilya Sutskever and
Timothy P. Lillicrap and
Madeleine Leach and
Koray Kavukcuoglu and
Thore Graepel and
Demis Hassabis},
title = {Mastering the game of Go with deep neural networks and tree search},
journal = {Nature},
volume = {529},
number = {7587},
pages = {484--489},
year = {2016},
url = {https://doi.org/10.1038/nature16961},
doi = {10.1038/nature16961},
timestamp = {Wed, 14 Nov 2018 10:30:42 +0100},
biburl = {https://dblp.org/rec/journals/nature/SilverHMGSDSAPL16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{alphafold,
title={Improved protein structure prediction using potentials from deep learning},
author={Senior, Andrew W and Evans, Richard and Jumper, John and Kirkpatrick, James and Sifre, Laurent and Green, Tim and Qin, Chongli and {\v{Z}}{\'\i}dek, Augustin and Nelson, Alexander WR and Bridgland, Alex and others},
journal={Nature},
pages={1--5},
year={2020},
publisher={Nature Publishing Group}
}
@inproceedings{gail,
author = {Jonathan Ho and
Stefano Ermon},
title = {Generative Adversarial Imitation Learning},
booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference
on Neural Information Processing Systems 2016, December 5-10, 2016,
Barcelona, Spain},
pages = {4565--4573},
year = {2016},
url = {http://papers.nips.cc/paper/6391-generative-adversarial-imitation-learning},
timestamp = {Fri, 06 Mar 2020 17:00:15 +0100},
biburl = {https://dblp.org/rec/conf/nips/HoE16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{rainbow,
author = {Matteo Hessel and
Joseph Modayil and
Hado van Hasselt and
Tom Schaul and
Georg Ostrovski and
Will Dabney and
Dan Horgan and
Bilal Piot and
Mohammad Gheshlaghi Azar and
David Silver},
title = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
booktitle = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence,
(AAAI-18), the 30th innovative Applications of Artificial Intelligence
(IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in
Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February
2-7, 2018},
pages = {3215--3222},
year = {2018},
url = {https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/view/17204},
timestamp = {Tue, 23 Oct 2018 06:42:15 +0200},
biburl = {https://dblp.org/rec/conf/aaai/HesselMHSODHPAS18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@misc{stable-baselines,
author = {Hill, Ashley and Raffin, Antonin and Ernestus, Maximilian and Gleave, Adam and Kanervisto, Anssi and Traore, Rene and Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai},
title = {Stable Baselines},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/hill-a/stable-baselines}},
}
@misc{pytorch-drl,
author = {Petros Christodoulou},
title = {Deep Reinforcement Learning Algorithms with PyTorch},
year = {2019},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/p-christ/Deep-Reinforcement-Learning-Algorithms-with-PyTorch}},
}
@misc{tensorforce,
author = {Kuhnle, Alexander and Schaarschmidt, Michael and Fricke, Kai},
title = {Tensorforce: a TensorFlow library for applied reinforcement learning},
publisher = {GitHub},
howpublished = {\url{https://github.com/tensorforce/tensorforce}},
journal = {GitHub repository},
year = {2017}
}
@inproceedings{unet,
author = {Olaf Ronneberger and
Philipp Fischer and
Thomas Brox},
title = {U-Net: Convolutional Networks for Biomedical Image Segmentation},
booktitle = {Medical Image Computing and Computer-Assisted Intervention - {MICCAI}
2015 - 18th International Conference Munich, Germany, October 5 -
9, 2015, Proceedings, Part {III}},
pages = {234--241},
year = {2015},
url = {https://doi.org/10.1007/978-3-319-24574-4\_28},
doi = {10.1007/978-3-319-24574-4\_28},
timestamp = {Sun, 02 Jun 2019 21:24:41 +0200},
biburl = {https://dblp.org/rec/conf/miccai/RonnebergerFB15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{deepface,
author = {Yaniv Taigman and
Ming Yang and
Marc'Aurelio Ranzato and
Lior Wolf},
title = {DeepFace: Closing the Gap to Human-Level Performance in Face Verification},
booktitle = {2014 {IEEE} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2014, Columbus, OH, USA, June 23-28, 2014},
pages = {1701--1708},
year = {2014},
url = {https://doi.org/10.1109/CVPR.2014.220},
doi = {10.1109/CVPR.2014.220},
timestamp = {Wed, 16 Oct 2019 14:14:50 +0200},
biburl = {https://dblp.org/rec/conf/cvpr/TaigmanYRW14.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{mask-rcnn,
author = {Kaiming He and
Georgia Gkioxari and
Piotr Doll{\'{a}}r and
Ross B. Girshick},
title = {Mask {R-CNN}},
booktitle = {{IEEE} International Conference on Computer Vision, {ICCV} 2017, Venice,
Italy, October 22-29, 2017},
pages = {2980--2988},
year = {2017},
url = {https://doi.org/10.1109/ICCV.2017.322},
doi = {10.1109/ICCV.2017.322},
timestamp = {Wed, 16 Oct 2019 14:14:51 +0200},
biburl = {https://dblp.org/rec/conf/iccv/HeGDG17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{yolov3,
author = {Joseph Redmon and
Ali Farhadi},
title = {YOLOv3: An Incremental Improvement},
journal = {CoRR},
volume = {abs/1804.02767},
year = {2018},
url = {http://arxiv.org/abs/1804.02767},
archivePrefix = {arXiv},
eprint = {1804.02767},
timestamp = {Mon, 13 Aug 2018 16:48:24 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1804-02767.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@misc{garage,
author = {The garage contributors},
title = {Garage: A toolkit for reproducible reinforcement learning research},
year = {2019},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/rlworkgroup/garage}},
}
@misc{rlkit,
author = {Vitchyr H. Pong and Murtaza Dalal and Steven Lin and Ashvin Nair},
title = {rlkit: Collection of reinforcement learning algorithms},
year = {2019},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/vitchyr/rlkit}},
}
@inproceedings{tensorlayer,
author = {Hao Dong and
Akara Supratak and
Luo Mai and
Fangde Liu and
Axel Oehmichen and
Simiao Yu and
Yike Guo},
title = {TensorLayer: {A} Versatile Library for Efficient Deep Learning Development},
booktitle = {Proceedings of the 2017 {ACM} on Multimedia Conference, {MM} 2017,
Mountain View, CA, USA, October 23-27, 2017},
pages = {1201--1204},
year = {2017},
url = {https://doi.org/10.1145/3123266.3129391},
doi = {10.1145/3123266.3129391},
timestamp = {Sat, 19 Oct 2019 20:03:05 +0200},
biburl = {https://dblp.org/rec/conf/mm/DongSMLOYG17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@misc{keras-rl,
author = {Matthias Plappert},
title = {keras-rl},
year = {2016},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/keras-rl/keras-rl}},
}
@article{gym,
author = {Greg Brockman and
Vicki Cheung and
Ludwig Pettersson and
Jonas Schneider and
John Schulman and
Jie Tang and
Wojciech Zaremba},
title = {OpenAI Gym},
journal = {CoRR},
volume = {abs/1606.01540},
year = {2016},
url = {http://arxiv.org/abs/1606.01540},
archivePrefix = {arXiv},
eprint = {1606.01540},
timestamp = {Fri, 08 Nov 2019 12:51:06 +0100},
biburl = {https://dblp.org/rec/journals/corr/BrockmanCPSSTZ16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{alexnet,
author = {Alex Krizhevsky and
Ilya Sutskever and
Geoffrey E. Hinton},
title = {ImageNet Classification with Deep Convolutional Neural Networks},
booktitle = {Advances in Neural Information Processing Systems 25: 26th Annual
Conference on Neural Information Processing Systems 2012. Proceedings
of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States},
pages = {1106--1114},
year = {2012},
url = {http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks},
timestamp = {Fri, 06 Mar 2020 16:56:56 +0100},
biburl = {https://dblp.org/rec/conf/nips/KrizhevskySH12.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{ray,
author = {Philipp Moritz and
Robert Nishihara and
Stephanie Wang and
Alexey Tumanov and
Richard Liaw and
Eric Liang and
Melih Elibol and
Zongheng Yang and
William Paul and
Michael I. Jordan and
Ion Stoica},
title = {Ray: {A} Distributed Framework for Emerging {AI} Applications},
booktitle = {13th {USENIX} Symposium on Operating Systems Design and Implementation,
{OSDI} 2018, Carlsbad, CA, USA, October 8-10, 2018},
pages = {561--577},
year = {2018},
url = {https://www.usenix.org/conference/osdi18/presentation/nishihara},
timestamp = {Tue, 29 Jan 2019 17:35:36 +0100},
biburl = {https://dblp.org/rec/conf/osdi/MoritzNWTLLEYPJ18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{bsuite,
author = {Ian Osband and
Yotam Doron and
Matteo Hessel and
John Aslanides and
Eren Sezener and
Andre Saraiva and
Katrina McKinney and
Tor Lattimore and
Csaba Szepesv{\'{a}}ri and
Satinder Singh and
Benjamin Van Roy and
Richard S. Sutton and
David Silver and
Hado van Hasselt},
title = {Behaviour Suite for Reinforcement Learning},
booktitle = {8th International Conference on Learning Representations, {ICLR} 2020,
Addis Ababa, Ethiopia, April 26-30, 2020},
year = {2020},
url = {https://openreview.net/forum?id=rygf-kSYwH},
timestamp = {Thu, 07 May 2020 17:11:48 +0200},
biburl = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{rllab,
author = {Yan Duan and
Xi Chen and
Rein Houthooft and
John Schulman and
Pieter Abbeel},
title = {Benchmarking Deep Reinforcement Learning for Continuous Control},
booktitle = {Proceedings of the 33nd International Conference on Machine Learning,
{ICML} 2016, New York City, NY, USA, June 19-24, 2016},
pages = {1329--1338},
year = {2016},
url = {http://proceedings.mlr.press/v48/duan16.html},
timestamp = {Wed, 29 May 2019 08:41:46 +0200},
biburl = {https://dblp.org/rec/conf/icml/DuanCHSA16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{theano,
author = {Rami Al{-}Rfou and
Guillaume Alain and
Amjad Almahairi and
Christof Angerm{\"{u}}ller and
Dzmitry Bahdanau and
Nicolas Ballas and
Fr{\'{e}}d{\'{e}}ric Bastien and
Justin Bayer and
Anatoly Belikov and
Alexander Belopolsky and
Yoshua Bengio and
Arnaud Bergeron and
James Bergstra and
Valentin Bisson and
Josh Bleecher Snyder and
Nicolas Bouchard and
Nicolas Boulanger{-}Lewandowski and
Xavier Bouthillier and
Alexandre de Br{\'{e}}bisson and
Olivier Breuleux and
Pierre Luc Carrier and
Kyunghyun Cho and
Jan Chorowski and
Paul F. Christiano and
Tim Cooijmans and
Marc{-}Alexandre C{\^{o}}t{\'{e}} and
Myriam C{\^{o}}t{\'{e}} and
Aaron C. Courville and
Yann N. Dauphin and
Olivier Delalleau and
Julien Demouth and
Guillaume Desjardins and
Sander Dieleman and
Laurent Dinh and
Melanie Ducoffe and
Vincent Dumoulin and
Samira Ebrahimi Kahou and
Dumitru Erhan and
Ziye Fan and
Orhan Firat and
Mathieu Germain and
Xavier Glorot and
Ian J. Goodfellow and
Matthew Graham and
{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
Philippe Hamel and
Iban Harlouchet and
Jean{-}Philippe Heng and
Bal{\'{a}}zs Hidasi and
Sina Honari and
Arjun Jain and
S{\'{e}}bastien Jean and
Kai Jia and
Mikhail Korobov and
Vivek Kulkarni and
Alex Lamb and
Pascal Lamblin and
Eric Larsen and
C{\'{e}}sar Laurent and
Sean Lee and
Simon Lefran{\c{c}}ois and
Simon Lemieux and
Nicholas L{\'{e}}onard and
Zhouhan Lin and
Jesse A. Livezey and
Cory Lorenz and
Jeremiah Lowin and
Qianli Ma and
Pierre{-}Antoine Manzagol and
Olivier Mastropietro and
Robert McGibbon and
Roland Memisevic and
Bart van Merri{\"{e}}nboer and
Vincent Michalski and
Mehdi Mirza and
Alberto Orlandi and
Christopher Joseph Pal and
Razvan Pascanu and
Mohammad Pezeshki and
Colin Raffel and
Daniel Renshaw and
Matthew Rocklin and
Adriana Romero and
Markus Roth and
Peter Sadowski and
John Salvatier and
Fran{\c{c}}ois Savard and
Jan Schl{\"{u}}ter and
John Schulman and
Gabriel Schwartz and
Iulian Vlad Serban and
Dmitriy Serdyuk and
Samira Shabanian and
{\'{E}}tienne Simon and
Sigurd Spieckermann and
S. Ramana Subramanyam and
Jakub Sygnowski and
J{\'{e}}r{\'{e}}mie Tanguay and
Gijs van Tulder and
Joseph P. Turian and
Sebastian Urban and
Pascal Vincent and
Francesco Visin and
Harm de Vries and
David Warde{-}Farley and
Dustin J. Webb and
Matthew Willson and
Kelvin Xu and
Lijun Xue and
Li Yao and
Saizheng Zhang and
Ying Zhang},
title = {Theano: {A} Python framework for fast computation of mathematical
expressions},
journal = {CoRR},
volume = {abs/1605.02688},
year = {2016},
url = {http://arxiv.org/abs/1605.02688},
archivePrefix = {arXiv},
eprint = {1605.02688},
timestamp = {Thu, 30 Apr 2020 11:17:18 +0200},
biburl = {https://dblp.org/rec/journals/corr/Al-RfouAAa16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}