diff --git a/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py b/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py index 34ee6fadb04de..e784b53b7ce1f 100644 --- a/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py +++ b/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py @@ -4,7 +4,7 @@ # # for STARTUP ONLY use try and use Offline 3D PV from pixelTracks, with adaptive vertex # -#from RecoPixelVertexing.PixelVertexFinding.PixelVertexes_cff import * -from RecoVertex.PrimaryVertexProducer.OfflinePixel3DPrimaryVertices_cfi import * +from RecoPixelVertexing.PixelVertexFinding.PixelVertexes_cff import * +#from RecoVertex.PrimaryVertexProducer.OfflinePixel3DPrimaryVertices_cfi import * recopixelvertexingTask = cms.Task(pixelTracksTask,pixelVertices) recopixelvertexing = cms.Sequence(recopixelvertexingTask) diff --git a/RecoPixelVertexing/PixelVertexFinding/python/PixelVertexes_cfi.py b/RecoPixelVertexing/PixelVertexFinding/python/PixelVertexes_cfi.py index 77a9f367b9d9b..ea9e4b1e4e037 100644 --- a/RecoPixelVertexing/PixelVertexFinding/python/PixelVertexes_cfi.py +++ b/RecoPixelVertexing/PixelVertexFinding/python/PixelVertexes_cfi.py @@ -20,3 +20,6 @@ ) +from Configuration.ProcessModifiers.gpu_cff import gpu +from RecoPixelVertexing.PixelVertexFinding.pixelVertexHeterogeneousProducer_cfi import pixelVertexHeterogeneousProducer as _pixelVertexHeterogeneousProducer +gpu.toReplaceWith(pixelVertices, _pixelVertexHeterogeneousProducer) diff --git a/RecoPixelVertexing/PixelVertexFinding/test/BuildFile.xml b/RecoPixelVertexing/PixelVertexFinding/test/BuildFile.xml index ad1f03999fbea..dc3b98f8456a5 100644 --- a/RecoPixelVertexing/PixelVertexFinding/test/BuildFile.xml +++ b/RecoPixelVertexing/PixelVertexFinding/test/BuildFile.xml @@ -21,5 +21,6 @@ + diff --git a/RecoPixelVertexing/PixelVertexFinding/test/gpuVertexFinder_t.cu b/RecoPixelVertexing/PixelVertexFinding/test/gpuVertexFinder_t.cu index a92c116702231..d1f508ca98798 100644 --- a/RecoPixelVertexing/PixelVertexFinding/test/gpuVertexFinder_t.cu +++ b/RecoPixelVertexing/PixelVertexFinding/test/gpuVertexFinder_t.cu @@ -4,6 +4,11 @@ #include #include "RecoPixelVertexing/PixelVertexFinding/src/gpuClusterTracks.h" +#include "RecoPixelVertexing/PixelVertexFinding/src/gpuFitVertices.h" +#include "RecoPixelVertexing/PixelVertexFinding/src/gpuSortByPt2.h" +#include "RecoPixelVertexing/PixelVertexFinding/src/gpuSplitVertices.h" + + using namespace gpuVertexFinder; #include @@ -81,7 +86,8 @@ int main() { } auto current_device = cuda::device::current::get(); - + + auto ntrks_d = cuda::memory::device::make_unique(current_device, 1); auto zt_d = cuda::memory::device::make_unique(current_device, 64000); auto ezt2_d = cuda::memory::device::make_unique(current_device, 64000); auto ptt2_d = cuda::memory::device::make_unique(current_device, 64000); @@ -96,11 +102,13 @@ int main() { auto iv_d = cuda::memory::device::make_unique(current_device, 64000); auto nv_d = cuda::memory::device::make_unique(current_device, 1); + auto nv2_d = cuda::memory::device::make_unique(current_device, 1); auto onGPU_d = cuda::memory::device::make_unique(current_device, 1); OnGPU onGPU; + onGPU.ntrks = ntrks_d.get(); onGPU.zt = zt_d.get(); onGPU.ezt2 = ezt2_d.get(); onGPU.ptt2 = ptt2_d.get(); @@ -109,7 +117,8 @@ int main() { onGPU.chi2 = chi2_d.get(); onGPU.ptv2 = ptv2_d.get(); onGPU.sortInd = ind_d.get(); - onGPU.nv = nv_d.get(); + onGPU.nvFinal = nv_d.get(); + onGPU.nvIntermediate = nv2_d.get(); onGPU.izt = izt_d.get(); onGPU.nn = nn_d.get(); onGPU.iv = iv_d.get(); @@ -131,7 +140,8 @@ int main() { gen(ev); std::cout << ev.zvert.size() << ' ' << ev.ztrack.size() << std::endl; - + auto nt = ev.ztrack.size(); + cuda::memory::copy(onGPU.ntrks,&nt,sizeof(uint32_t)); cuda::memory::copy(onGPU.zt,ev.ztrack.data(),sizeof(float)*ev.ztrack.size()); cuda::memory::copy(onGPU.ezt2,ev.eztrack.data(),sizeof(float)*ev.eztrack.size()); cuda::memory::copy(onGPU.ptt2,ev.pttrack.data(),sizeof(float)*ev.eztrack.size()); @@ -143,51 +153,101 @@ int main() { if ( (i%4) == 0 ) cuda::launch(clusterTracks, { 1, 512+256 }, - ev.ztrack.size(), onGPU_d.get(),kk,eps, + onGPU_d.get(),kk,eps, 0.02f,12.0f ); if ( (i%4) == 1 ) cuda::launch(clusterTracks, { 1, 512+256 }, - ev.ztrack.size(), onGPU_d.get(),kk,eps, + onGPU_d.get(),kk,eps, 0.02f,9.0f ); if ( (i%4) == 2 ) cuda::launch(clusterTracks, { 1, 512+256 }, - ev.ztrack.size(), onGPU_d.get(),kk,eps, + onGPU_d.get(),kk,eps, 0.01f,9.0f ); if ( (i%4) == 3 ) cuda::launch(clusterTracks, { 1, 512+256 }, - ev.ztrack.size(), onGPU_d.get(),kk,0.7f*eps, + onGPU_d.get(),kk,0.7f*eps, 0.01f,9.0f ); - + cudaCheck(cudaGetLastError()); cudaDeviceSynchronize(); + + cuda::launch(fitVertices, + { 1,1024-256 }, + onGPU_d.get(),50.f + ); + cudaCheck(cudaGetLastError()); + + uint32_t nv; + cuda::memory::copy(&nv, onGPU.nvFinal, sizeof(uint32_t)); + if (nv==0) { + std::cout << "NO VERTICES???" << std::endl; + continue; + } + float chi2[2*nv]; // make space for splitting... + float zv[2*nv]; + float wv[2*nv]; + float ptv2[2*nv]; + int32_t nn[2*nv]; + uint16_t ind[2*nv]; + + cuda::memory::copy(&nn, onGPU.nn, nv*sizeof(int32_t)); + cuda::memory::copy(&chi2, onGPU.chi2, nv*sizeof(float)); + for (auto j=0U; j0) chi2[j]/=float(nn[j]); + { + auto mx = std::minmax_element(chi2,chi2+nv); + std::cout << "after fit min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl; + } + + cuda::launch(fitVertices, + { 1,1024-256 }, + onGPU_d.get(), 50.f + ); + cuda::memory::copy(&nv, onGPU.nvFinal, sizeof(uint32_t)); + cuda::memory::copy(&nn, onGPU.nn, nv*sizeof(int32_t)); + cuda::memory::copy(&chi2, onGPU.chi2, nv*sizeof(float)); + for (auto j=0U; j0) chi2[j]/=float(nn[j]); + { + auto mx = std::minmax_element(chi2,chi2+nv); + std::cout << "before splitting min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl; + } + + cuda::launch(splitVertices, + { 1024, 64 }, + onGPU_d.get(), + 9.f + ); + cuda::memory::copy(&nv, onGPU.nvIntermediate, sizeof(uint32_t)); + std::cout << "after split " << nv << std::endl; + + cuda::launch(fitVertices, + { 1,1024-256 }, + onGPU_d.get(),5000.f + ); + cudaCheck(cudaGetLastError()); + + cuda::launch(sortByPt2, { 1, 256 }, - ev.ztrack.size(), onGPU_d.get() + onGPU_d.get() ); - uint32_t nv; - cuda::memory::copy(&nv, onGPU.nv, sizeof(uint32_t)); + cuda::memory::copy(&nv, onGPU.nvFinal, sizeof(uint32_t)); if (nv==0) { std::cout << "NO VERTICES???" << std::endl; continue; } - float zv[nv]; - float wv[nv]; - float chi2[nv]; - float ptv2[nv]; - int32_t nn[nv]; - uint16_t ind[nv]; + cuda::memory::copy(&zv, onGPU.zv, nv*sizeof(float)); cuda::memory::copy(&wv, onGPU.wv, nv*sizeof(float)); cuda::memory::copy(&chi2, onGPU.chi2, nv*sizeof(float)); @@ -195,15 +255,16 @@ int main() { cuda::memory::copy(&nn, onGPU.nn, nv*sizeof(int32_t)); cuda::memory::copy(&ind, onGPU.sortInd, nv*sizeof(uint16_t)); for (auto j=0U; j0) chi2[j]/=float(nn[j]); - { - auto mx = std::minmax_element(wv,wv+nv); - std::cout << "min max error " << 1./std::sqrt(*mx.first) << ' ' << 1./std::sqrt(*mx.second) << std::endl; + auto mx = std::minmax_element(chi2,chi2+nv); + std::cout << "min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl; } + { - auto mx = std::minmax_element(chi2,chi2+nv); - std::cout << "min max chi2 " << *mx.first << ' ' << *mx.second << std::endl; + auto mx = std::minmax_element(wv,wv+nv); + std::cout << "min max error " << 1./std::sqrt(*mx.first) << ' ' << 1./std::sqrt(*mx.second) << std::endl; } + { auto mx = std::minmax_element(ptv2,ptv2+nv); std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl; @@ -212,16 +273,15 @@ int main() { } float dd[nv]; - uint32_t ii=0; - for (auto zr : zv) { + for (auto kv=0U; kv