diff --git a/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py b/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py
index 34ee6fadb04de..e784b53b7ce1f 100644
--- a/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py
+++ b/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py
@@ -4,7 +4,7 @@
#
# for STARTUP ONLY use try and use Offline 3D PV from pixelTracks, with adaptive vertex
#
-#from RecoPixelVertexing.PixelVertexFinding.PixelVertexes_cff import *
-from RecoVertex.PrimaryVertexProducer.OfflinePixel3DPrimaryVertices_cfi import *
+from RecoPixelVertexing.PixelVertexFinding.PixelVertexes_cff import *
+#from RecoVertex.PrimaryVertexProducer.OfflinePixel3DPrimaryVertices_cfi import *
recopixelvertexingTask = cms.Task(pixelTracksTask,pixelVertices)
recopixelvertexing = cms.Sequence(recopixelvertexingTask)
diff --git a/RecoPixelVertexing/PixelVertexFinding/python/PixelVertexes_cfi.py b/RecoPixelVertexing/PixelVertexFinding/python/PixelVertexes_cfi.py
index 77a9f367b9d9b..ea9e4b1e4e037 100644
--- a/RecoPixelVertexing/PixelVertexFinding/python/PixelVertexes_cfi.py
+++ b/RecoPixelVertexing/PixelVertexFinding/python/PixelVertexes_cfi.py
@@ -20,3 +20,6 @@
)
+from Configuration.ProcessModifiers.gpu_cff import gpu
+from RecoPixelVertexing.PixelVertexFinding.pixelVertexHeterogeneousProducer_cfi import pixelVertexHeterogeneousProducer as _pixelVertexHeterogeneousProducer
+gpu.toReplaceWith(pixelVertices, _pixelVertexHeterogeneousProducer)
diff --git a/RecoPixelVertexing/PixelVertexFinding/test/BuildFile.xml b/RecoPixelVertexing/PixelVertexFinding/test/BuildFile.xml
index ad1f03999fbea..dc3b98f8456a5 100644
--- a/RecoPixelVertexing/PixelVertexFinding/test/BuildFile.xml
+++ b/RecoPixelVertexing/PixelVertexFinding/test/BuildFile.xml
@@ -21,5 +21,6 @@
+
diff --git a/RecoPixelVertexing/PixelVertexFinding/test/gpuVertexFinder_t.cu b/RecoPixelVertexing/PixelVertexFinding/test/gpuVertexFinder_t.cu
index a92c116702231..d1f508ca98798 100644
--- a/RecoPixelVertexing/PixelVertexFinding/test/gpuVertexFinder_t.cu
+++ b/RecoPixelVertexing/PixelVertexFinding/test/gpuVertexFinder_t.cu
@@ -4,6 +4,11 @@
#include
#include "RecoPixelVertexing/PixelVertexFinding/src/gpuClusterTracks.h"
+#include "RecoPixelVertexing/PixelVertexFinding/src/gpuFitVertices.h"
+#include "RecoPixelVertexing/PixelVertexFinding/src/gpuSortByPt2.h"
+#include "RecoPixelVertexing/PixelVertexFinding/src/gpuSplitVertices.h"
+
+
using namespace gpuVertexFinder;
#include
@@ -81,7 +86,8 @@ int main() {
}
auto current_device = cuda::device::current::get();
-
+
+ auto ntrks_d = cuda::memory::device::make_unique(current_device, 1);
auto zt_d = cuda::memory::device::make_unique(current_device, 64000);
auto ezt2_d = cuda::memory::device::make_unique(current_device, 64000);
auto ptt2_d = cuda::memory::device::make_unique(current_device, 64000);
@@ -96,11 +102,13 @@ int main() {
auto iv_d = cuda::memory::device::make_unique(current_device, 64000);
auto nv_d = cuda::memory::device::make_unique(current_device, 1);
+ auto nv2_d = cuda::memory::device::make_unique(current_device, 1);
auto onGPU_d = cuda::memory::device::make_unique(current_device, 1);
OnGPU onGPU;
+ onGPU.ntrks = ntrks_d.get();
onGPU.zt = zt_d.get();
onGPU.ezt2 = ezt2_d.get();
onGPU.ptt2 = ptt2_d.get();
@@ -109,7 +117,8 @@ int main() {
onGPU.chi2 = chi2_d.get();
onGPU.ptv2 = ptv2_d.get();
onGPU.sortInd = ind_d.get();
- onGPU.nv = nv_d.get();
+ onGPU.nvFinal = nv_d.get();
+ onGPU.nvIntermediate = nv2_d.get();
onGPU.izt = izt_d.get();
onGPU.nn = nn_d.get();
onGPU.iv = iv_d.get();
@@ -131,7 +140,8 @@ int main() {
gen(ev);
std::cout << ev.zvert.size() << ' ' << ev.ztrack.size() << std::endl;
-
+ auto nt = ev.ztrack.size();
+ cuda::memory::copy(onGPU.ntrks,&nt,sizeof(uint32_t));
cuda::memory::copy(onGPU.zt,ev.ztrack.data(),sizeof(float)*ev.ztrack.size());
cuda::memory::copy(onGPU.ezt2,ev.eztrack.data(),sizeof(float)*ev.eztrack.size());
cuda::memory::copy(onGPU.ptt2,ev.pttrack.data(),sizeof(float)*ev.eztrack.size());
@@ -143,51 +153,101 @@ int main() {
if ( (i%4) == 0 )
cuda::launch(clusterTracks,
{ 1, 512+256 },
- ev.ztrack.size(), onGPU_d.get(),kk,eps,
+ onGPU_d.get(),kk,eps,
0.02f,12.0f
);
if ( (i%4) == 1 )
cuda::launch(clusterTracks,
{ 1, 512+256 },
- ev.ztrack.size(), onGPU_d.get(),kk,eps,
+ onGPU_d.get(),kk,eps,
0.02f,9.0f
);
if ( (i%4) == 2 )
cuda::launch(clusterTracks,
{ 1, 512+256 },
- ev.ztrack.size(), onGPU_d.get(),kk,eps,
+ onGPU_d.get(),kk,eps,
0.01f,9.0f
);
if ( (i%4) == 3 )
cuda::launch(clusterTracks,
{ 1, 512+256 },
- ev.ztrack.size(), onGPU_d.get(),kk,0.7f*eps,
+ onGPU_d.get(),kk,0.7f*eps,
0.01f,9.0f
);
-
+ cudaCheck(cudaGetLastError());
cudaDeviceSynchronize();
+
+ cuda::launch(fitVertices,
+ { 1,1024-256 },
+ onGPU_d.get(),50.f
+ );
+ cudaCheck(cudaGetLastError());
+
+ uint32_t nv;
+ cuda::memory::copy(&nv, onGPU.nvFinal, sizeof(uint32_t));
+ if (nv==0) {
+ std::cout << "NO VERTICES???" << std::endl;
+ continue;
+ }
+ float chi2[2*nv]; // make space for splitting...
+ float zv[2*nv];
+ float wv[2*nv];
+ float ptv2[2*nv];
+ int32_t nn[2*nv];
+ uint16_t ind[2*nv];
+
+ cuda::memory::copy(&nn, onGPU.nn, nv*sizeof(int32_t));
+ cuda::memory::copy(&chi2, onGPU.chi2, nv*sizeof(float));
+ for (auto j=0U; j0) chi2[j]/=float(nn[j]);
+ {
+ auto mx = std::minmax_element(chi2,chi2+nv);
+ std::cout << "after fit min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
+ }
+
+ cuda::launch(fitVertices,
+ { 1,1024-256 },
+ onGPU_d.get(), 50.f
+ );
+ cuda::memory::copy(&nv, onGPU.nvFinal, sizeof(uint32_t));
+ cuda::memory::copy(&nn, onGPU.nn, nv*sizeof(int32_t));
+ cuda::memory::copy(&chi2, onGPU.chi2, nv*sizeof(float));
+ for (auto j=0U; j0) chi2[j]/=float(nn[j]);
+ {
+ auto mx = std::minmax_element(chi2,chi2+nv);
+ std::cout << "before splitting min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
+ }
+
+ cuda::launch(splitVertices,
+ { 1024, 64 },
+ onGPU_d.get(),
+ 9.f
+ );
+ cuda::memory::copy(&nv, onGPU.nvIntermediate, sizeof(uint32_t));
+ std::cout << "after split " << nv << std::endl;
+
+ cuda::launch(fitVertices,
+ { 1,1024-256 },
+ onGPU_d.get(),5000.f
+ );
+ cudaCheck(cudaGetLastError());
+
+
cuda::launch(sortByPt2,
{ 1, 256 },
- ev.ztrack.size(), onGPU_d.get()
+ onGPU_d.get()
);
- uint32_t nv;
- cuda::memory::copy(&nv, onGPU.nv, sizeof(uint32_t));
+ cuda::memory::copy(&nv, onGPU.nvFinal, sizeof(uint32_t));
if (nv==0) {
std::cout << "NO VERTICES???" << std::endl;
continue;
}
- float zv[nv];
- float wv[nv];
- float chi2[nv];
- float ptv2[nv];
- int32_t nn[nv];
- uint16_t ind[nv];
+
cuda::memory::copy(&zv, onGPU.zv, nv*sizeof(float));
cuda::memory::copy(&wv, onGPU.wv, nv*sizeof(float));
cuda::memory::copy(&chi2, onGPU.chi2, nv*sizeof(float));
@@ -195,15 +255,16 @@ int main() {
cuda::memory::copy(&nn, onGPU.nn, nv*sizeof(int32_t));
cuda::memory::copy(&ind, onGPU.sortInd, nv*sizeof(uint16_t));
for (auto j=0U; j0) chi2[j]/=float(nn[j]);
-
{
- auto mx = std::minmax_element(wv,wv+nv);
- std::cout << "min max error " << 1./std::sqrt(*mx.first) << ' ' << 1./std::sqrt(*mx.second) << std::endl;
+ auto mx = std::minmax_element(chi2,chi2+nv);
+ std::cout << "min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
}
+
{
- auto mx = std::minmax_element(chi2,chi2+nv);
- std::cout << "min max chi2 " << *mx.first << ' ' << *mx.second << std::endl;
+ auto mx = std::minmax_element(wv,wv+nv);
+ std::cout << "min max error " << 1./std::sqrt(*mx.first) << ' ' << 1./std::sqrt(*mx.second) << std::endl;
}
+
{
auto mx = std::minmax_element(ptv2,ptv2+nv);
std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl;
@@ -212,16 +273,15 @@ int main() {
}
float dd[nv];
- uint32_t ii=0;
- for (auto zr : zv) {
+ for (auto kv=0U; kv