-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.bbl
175 lines (147 loc) · 7.48 KB
/
main.bbl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
\begin{thebibliography}{}
\bibitem[Aji et~al., 2016]{multicl}
Aji, A.~M., Peña, A.~J., Balaji, P., and chun Feng, W. (2016).
\newblock Multicl: Enabling automatic scheduling for task-parallel workloads in
opencl.
\newblock {\em Parallel Computing}, 58:37--55.
\bibitem[Augonnet et~al., 2009]{augonnet2011starpu}
Augonnet, C., Thibault, S., Namyst, R., and Wacrenier, P.-A. (2009).
\newblock Starpu: A unified platform for task scheduling on heterogeneous
multicore architectures.
\newblock In Sips, H., Epema, D., and Lin, H.-X., editors, {\em Euro-Par 2009
Parallel Processing}, pages 863--874, Berlin, Heidelberg. Springer Berlin
Heidelberg.
\bibitem[Bromley et~al., 1993]{siamese}
Bromley, J., Guyon, I., LeCun, Y., S\"{a}ckinger, E., and Shah, R. (1993).
\newblock Signature verification using a “siamese” time delay neural
network.
\newblock In {\em Proceedings of the 6th International Conference on Neural
Information Processing Systems}, NIPS’93, page 737–744, San Francisco,
CA, USA. Morgan Kaufmann Publishers Inc.
\bibitem[Gelado et~al., 2010]{gelado}
Gelado, I., Stone, J.~E., Cabezas, J., Patel, S., Navarro, N., and Hwu,
W.-m.~W. (2010).
\newblock An asymmetric distributed shared memory model for heterogeneous
parallel systems.
\newblock In {\em Proceedings of the Fifteenth International Conference on
Architectural Support for Programming Languages and Operating Systems},
ASPLOS XV, pages 347--358, New York, NY, USA. ACM.
\bibitem[Ghose et~al., 2017]{schedcl}
Ghose, A., Dokara, L., Dey, S., and Mitra, P. (2017).
\newblock A framework for opencl task scheduling on heterogeneous multicores.
\newblock {\em PPL}, 27(3-4):1--32.
\bibitem[Grewe and O’Boyle, 2011]{grewe2011static}
Grewe, D. and O’Boyle, M.~F. (2011).
\newblock A static task partitioning approach for heterogeneous systems using
opencl.
\newblock In {\em CC}, pages 286--305. Springer.
\bibitem[Grewe et~al., 2013]{grewe2013opencl}
Grewe, D., Wang, Z., and O’Boyle, M.~F. (2013).
\newblock Opencl task partitioning in the presence of gpu contention.
\newblock In {\em LCPC}, pages 87--101. Springer.
\bibitem[{Han} and {Abdelrahman}, 2011]{hicuda}
{Han}, T.~D. and {Abdelrahman}, T.~S. (2011).
\newblock hicuda: High-level gpgpu programming.
\newblock {\em IEEE Transactions on Parallel and Distributed Systems},
22(1):78--90.
\bibitem[Henry et~al., 2014]{henry2014toward}
Henry, S., Denis, A., Barthou, D., Counilh, M.-C., and Namyst, R. (2014).
\newblock Toward opencl automatic multi-device support.
\newblock In Silva, F., Dutra, I., and Santos~Costa, V., editors, {\em Euro-Par
2014 Parallel Processing}, pages 776--787, Cham. Springer International
Publishing.
\bibitem[Hochreiter and Schmidhuber, 1997]{hochreiter1997long}
Hochreiter, S. and Schmidhuber, J. (1997).
\newblock Long short-term memory.
\newblock {\em Neural computation}, 9(8):1735--1780.
\bibitem[{Hoshino} et~al., 2013]{openacc}
{Hoshino}, T., {Maruyama}, N., {Matsuoka}, S., and {Takaki}, R. (2013).
\newblock Cuda vs openacc: Performance case studies with kernel benchmarks and
a memory-bound cfd application.
\newblock In {\em 2013 13th IEEE/ACM International Symposium on Cluster, Cloud,
and Grid Computing}, pages 136--143.
\bibitem[{Hugo} et~al., 2013]{hugo2014composing}
{Hugo}, A., {Guermouche}, A., {Wacrenier}, P., and {Namyst}, R. (2013).
\newblock Composing multiple starpu applications over heterogeneous machines: A
supervised approach.
\newblock In {\em 2013 IEEE International Symposium on Parallel Distributed
Processing, Workshops and Phd Forum}, pages 1050--1059.
\bibitem[J{\"a}{\"a}skel{\"a}inen et~al., 2018]{pekka}
J{\"a}{\"a}skel{\"a}inen, P., Korhonen, V., Koskela, M., Takala, J.,
Egiazarian, K., Danielyan, A., Cruz, C., James, P., and McIntosh-Smith, S.
(2018).
\newblock Exploiting task parallelism with opencl: A case study.
\newblock {\em Journal of Signal Processing Systems}.
\newblock EXT={"}Danielyan, Aram{"}.
\bibitem[Kim et~al., 2012]{snucl}
Kim, J., Seo, S., Lee, J., Nah, J., Jo, G., and Lee, J. (2012).
\newblock Snucl: An opencl framework for heterogeneous cpu/gpu clusters.
\newblock In {\em Proceedings of the 26th ACM International Conference on
Supercomputing}, ICS ’12, page 341–352, New York, NY, USA. Association
for Computing Machinery.
\bibitem[Kl{\"o}ckner et~al., 2012]{pyopencl}
Kl{\"o}ckner, A., Pinto, N., Lee, Y., Catanzaro, B., Ivanov, P., and Fasih, A.
(2012).
\newblock Pycuda and pyopencl: A scripting-based approach to gpu run-time code
generation.
\newblock {\em Parallel Computing}, 38(3):157--174.
\bibitem[Kofler et~al., 2013]{kofler2013automatic}
Kofler, K., Grasso, I., Cosenza, B., and Fahringer, T. (2013).
\newblock An automatic input-sensitive approach for heterogeneous task
partitioning.
\newblock In {\em SC}, pages 149--160. ACM.
\bibitem[Mikolov et~al., 2013]{NIPS2013_5021}
Mikolov, T., Sutskever, I., Chen, K., Corrado, G.~S., and Dean, J. (2013).
\newblock Distributed representations of words and phrases and their
compositionality.
\newblock In Burges, C. J.~C., Bottou, L., Welling, M., Ghahramani, Z., and
Weinberger, K.~Q., editors, {\em Advances in Neural Information Processing
Systems 26}, pages 3111--3119. Curran Associates, Inc.
\bibitem[Nvidia, 2010]{nvidia}
Nvidia (2010).
\newblock Nvidia gpu computing sdk.
\bibitem[Pennington et~al., 2014]{Pennington14glove:global}
Pennington, J., Socher, R., and Manning, C.~D. (2014).
\newblock Glove: Global vectors for word representation.
\newblock In {\em In EMNLP}.
\bibitem[Pouchet, 2012]{polybench}
Pouchet, L.-N. (2012).
\newblock Polybench benchmark suite.
\bibitem[{Steuwer} et~al., 2011]{skelCL}
{Steuwer}, M., {Kegel}, P., and {Gorlatch}, S. (2011).
\newblock Skelcl - a portable skeleton library for high-level gpu programming.
\newblock In {\em 2011 IEEE International Symposium on Parallel and Distributed
Processing Workshops and Phd Forum}, pages 1176--1182.
\bibitem[Stone et~al., 2010]{stone2010opencl}
Stone, J.~E., Gohara, D., and Shi, G. (2010).
\newblock Opencl: A parallel programming standard for heterogeneous computing
systems.
\newblock {\em Computing in science \& engineering}, 12(3):66.
\bibitem[{Topcuoglu} et~al., 2002]{heftoriginal}
{Topcuoglu}, H., {Hariri}, S., and {Min-You Wu} (2002).
\newblock Performance-effective and low-complexity task scheduling for
heterogeneous computing.
\newblock {\em IEEE Transactions on Parallel and Distributed Systems},
13(3):260--274.
\bibitem[Vaswani et~al., 2017]{DBLP:journals/corr/VaswaniSPUJGKP17}
Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.~N.,
Kaiser, L., and Polosukhin, I. (2017).
\newblock Attention is all you need.
\newblock {\em CoRR}, abs/1706.03762.
\bibitem[Wen et~al., 2014]{smart}
Wen, Y., Wang, Z., and O'Boyle, M. F.~P. (2014).
\newblock Smart multi-task scheduling for opencl programs on cpu/gpu
heterogeneous platforms.
\newblock In {\em HiPC}, pages 1--10.
\bibitem[{Xiao} et~al., 2012]{vocl}
{Xiao}, S., {Balaji}, P., {Zhu}, Q., {Thakur}, R., {Coghlan}, S., {Lin}, H.,
{Wen}, G., {Hong}, J., and {Feng}, W. (2012).
\newblock Vocl: An optimized environment for transparent virtualization of
graphics processing units.
\newblock In {\em 2012 Innovative Parallel Computing (InPar)}, pages 1--12.
\bibitem[You et~al., 2015]{virtcl}
You, Y.-P., Wu, H.-J., Tsai, Y.-N., and Chao, Y.-T. (2015).
\newblock {VirtCL: a framework for OpenCL device abstraction and management}.
\newblock In {\em {Proceedings of the 20th Symposium on Principles and Practice
of Parallel Programming}}, pages 161--172. {ACM}.
\end{thebibliography}