-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2020-02-03-analytics-database.html
501 lines (418 loc) · 25.7 KB
/
2020-02-03-analytics-database.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" type="text/css" href="/theme/css/elegant.prod.9e9d5ce754.css" media="screen">
<link rel="stylesheet" type="text/css" href="/theme/css/custom.css" media="screen">
<link rel="dns-prefetch" href="//fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com/" crossorigin>
<meta name="author" content="jin" />
<meta name="description" content="" />
<meta name="twitter:creator" content="@jinfwhuang">
<meta property="og:type" content="article" />
<meta name="twitter:card" content="summary">
<meta name="keywords" content="database, misc, " />
<meta property="og:title" content="Analytics Database "/>
<meta property="og:url" content="/2020-02-03-analytics-database" />
<meta property="og:description" content="" />
<meta property="og:site_name" content="Jin's Notes" />
<meta property="og:article:author" content="jin" />
<meta property="og:article:published_time" content="2020-02-03T00:00:00-08:00" />
<meta name="twitter:title" content="Analytics Database ">
<meta name="twitter:description" content="">
<meta property="og:image" content="/images/android-chrome-192x192.png" />
<meta name="twitter:image" content="/images/android-chrome-192x192.png" >
<title>Analytics Database · Jin's Notes
</title>
<link rel="shortcut icon" href="/theme/images/favicon.ico" type="image/x-icon" />
<link rel="icon" href="/theme/images/apple-touch-icon-152x152.png" type="image/png" />
<link rel="apple-touch-icon" href="/theme/images/apple-touch-icon.png" type="image/png" />
<link rel="apple-touch-icon" sizes="57x57" href="/theme/images/apple-touch-icon-57x57.png" type="image/png" />
<link rel="apple-touch-icon" sizes="72x72" href="/theme/images/apple-touch-icon-72x72.png" type="image/png" />
<link rel="apple-touch-icon" sizes="76x76" href="/theme/images/apple-touch-icon-76x76.png" type="image/png" />
<link rel="apple-touch-icon" sizes="114x114" href="/theme/images/apple-touch-icon-114x114.png" type="image/png" />
<link rel="apple-touch-icon" sizes="120x120" href="/theme/images/apple-touch-icon-120x120.png" type="image/png" />
<link rel="apple-touch-icon" sizes="144x144" href="/theme/images/apple-touch-icon-144x144.png" type="image/png" />
<link rel="apple-touch-icon" sizes="152x152" href="/theme/images/apple-touch-icon-152x152.png" type="image/png" />
<link rel="apple-touch-icon" sizes="152x152" href="/theme/images/apple-touch-icon-180x180.png" type="image/png" />
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-207279664-1', 'auto');
ga('send', 'pageview');
</script>
</head>
<body>
<div id="content">
<div class="navbar navbar-static-top">
<div class="navbar-inner">
<div class="container-fluid">
<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</a>
<a class="brand" href="/"><span class=site-name><span style="color:black;">Jin's Notes</span></span></a>
<div class="nav-collapse collapse">
<ul class="nav pull-right top-menu">
<li >
<a href=
"/"
>Home</a>
</li>
<!-- <li ><a href="/categories">Categories</a></li>-->
<li ><a href="/tags">Tags</a></li>
<li ><a href="/archives">Archives</a></li>
<li><form class="navbar-search" action="/search" onsubmit="return validateForm(this.elements['q'].value);"> <input type="text" class="search-query" placeholder="Search" name="q" id="tipue_search_input"></form></li>
</ul>
</div>
</div>
</div>
</div>
<div class="container-fluid">
<div class="row-fluid">
<div class="span1"></div>
<div class="span10">
<article itemscope>
<div class="row-fluid">
<header class="page-header span10 offset2">
<h1>
<a href="/2020-02-03-analytics-database">
Analytics Database
</a>
</h1>
</header>
</div>
<div class="row-fluid">
<div class="span2 table-of-content">
<nav>
<h4>Contents</h4>
<div class="toc">
<ul>
<li><a href="#batch">Batch</a></li>
<li><a href="#realtime">Realtime</a></li>
<li><a href="#data-storage">Data Storage</a></li>
<li><a href="#realtime-data-and-realtime-analytics">Realtime Data and Realtime Analytics</a></li>
</ul>
</div>
</nav>
</div>
<div class="span8 article-content">
<p>In this post, I discuss how I would choose an analytics database as of early 2020. </p>
<p>Choosing a database technology is a huge decision because it constrains how I am going to build the data loading pipeline, query patterns, and database administration. In this post, I ignore the consideration of database administration.</p>
<p>When I am evaluating database technologies, at a minimum I will do my best to understand the architecture <em>query engine</em> and <em>storage engine</em>. To understand the query engine, I review the in-memory data mode and how worker nodes coordinate smaller computing tasks. To understand the storage engine, I review the file format and storage backends. Examples of file formats are <span class="caps">ORC</span>, parquet, and custom-built format; examples of storage backends are apache Kudu, apache Bookeeper, object store such as S3, and custom-built storage layers that are collocated with compute workers.</p>
<p>I distinguish between two types of requirements. The first type is for batch processing, and the other is to serve user-facing, realtime queries.</p>
<p><br/></p>
<h4 id="batch">Batch<a class="headerlink" href="#batch" title="Permanent link">¶</a></h4>
<p>Queries for batch processing could take anywhere from seconds to hours to days. The query engines are used to generate reports and ad-hoc analysis. Some data workloads could bypass the query engines and access the data directly.</p>
<p>The majority of the hyped-up databases fall into this category. Most of these solutions work well enough. Query performances are generally acceptable. Using <span class="caps">SQL</span> query is likely to be the easiest way to slice and dice the data. Once I figure out that a particular solution is sufficient for my query patterns and performance requirements, the other questions that I would ask are:</p>
<ul>
<li>What are my choices for storage engines?</li>
<li>Can I use other computing frameworks (Beam, Flink, Spark, MapReduce) on my data?</li>
<li>How do I load data into the storage engines?</li>
<li>How do I deploy and administer the database?</li>
</ul>
<p>Some of the most obvious choices are:</p>
<ul>
<li>Dremio </li>
<li>Presto</li>
<li>Hive</li>
<li>Spark <span class="caps">SQL</span></li>
<li>Snowflake</li>
</ul>
<p>Snowflake stands out because it is a fully managed solution. It does not expose its storage backends for other tools to connect to. It usually means that the data platform needs to keep another copy of the raw data in an object store (e.g. S3) before the data is loaded into Snowflake.</p>
<h4 id="realtime">Realtime<a class="headerlink" href="#realtime" title="Permanent link">¶</a></h4>
<p>Realtime analytics queries need to be completed in seconds, if not less. All of the query engines listed in the previous section are not designed to serve user-facing queries. Even if we tune those engines to have reasonable realtime results, they are usually expensive and deliver horrendous tail latency, which will take years to learn and optimize.</p>
<p>Realtime analytics should take advantage of as many of the following features as possible:</p>
<ul>
<li>minimize network: Majority of the data are collocated at compute nodes.</li>
<li>minimize disk: Compute nodes caches hot data in memory.</li>
<li>memory mapping: The file format should be designed to be compatible with the in-memory data model. Memory mapping avoids data copying from kernel space into user space. The data model uses page markers to allow for partial scans.</li>
<li>optimized in-memory layout: The compute engine performs computation without wasting cycles on converting data types.</li>
</ul>
<p>In my experience, even if the most appropriate query engine is chosen, there is going to be a need to preaggregate data and remove unnecessary columns to get acceptable query performances. The options for realtime analytics are limited and flawed.</p>
<p>My top choices are:</p>
<ul>
<li>Druid(<a href='#druid2020' id='ref-druid2020-1'>
dru20
</a>)</li>
<li>Pinot(<a href='#pinot2020' id='ref-pinot2020-1'>
pin20
</a>)</li>
<li>Elasticsearch</li>
</ul>
<p>Both Druid and Pinot are designed to be realtime analytics engines (<a href='#leventov2018' id='ref-leventov2018-1'>
Lev18
</a>). One can read their documentation for details. I have to mention Elasticsearch as a valid option here even though it is designed for full-text document search. Elasticsearch has a simple design that horizontally scales out its search and aggregation to each of its shards. Data collocation, horizontal scaling, aggressive in-memory processing, and flexible aggregation semantics are sufficient to make it a passable realtime analytics engine for basic aggregation and bucketing that are typical of user-facing dashboards. </p>
<h4 id="data-storage">Data Storage<a class="headerlink" href="#data-storage" title="Permanent link">¶</a></h4>
<p>I would consider data storage backends after I identify the query engine most appropriate for the query patterns. Sometimes I do not have a choice. For example, Elasticsearch has built-in storage layer. Druid’s storage format in the form of Druid segments and metadata is fixed. However, if the engine of choice is Presto, we would have to choose the storage backend and file formats.</p>
<p>My personal preference is leverage on an object storage as much as I can. S3 is dead simple. I tend to prefer Parquet if I have a choice. For example, one of my favorites is to organize data as partition datasets on S3 as Parquet files. </p>
<div class="highlight"><pre><span></span>table_name/partition_key1/partition_key2/partition_key3
</pre></div>
<p>These data could be used by Preso, Hive, Dremio, and Spark <span class="caps">SQL</span> directly. Besides using <span class="caps">SQL</span>, I can easily write Spark or Flink jobs to process them. I can write background batch jobs on the data without any frameworks. This style of storage engine is flexible and supports high throughput on a batch mode.</p>
<h4 id="realtime-data-and-realtime-analytics">Realtime Data and Realtime Analytics<a class="headerlink" href="#realtime-data-and-realtime-analytics" title="Permanent link">¶</a></h4>
<p>One of the most desirable features of an analytics system is have realtime query performances on realtime data. This is hard, and there should be an impossibility theorem out there to sound the alarms for the uninitialized. The perfect solution does not exist, it is a matter of tradeoffs. </p>
<p>Achieving realtime analytics is hard enough. That is, even if we can assume that the data format and storage are fixed and could be optimized for answering queries, fast realtime queries are still difficult. In the best case scenario when we have data collocating, data model compatible with memory-mapping, and optimized in-memory data representation, we might still have to rely on pre-aggregation to have reasonable query performances. When a query need to scan through large amount of data, there is a theoretical limit on time needed to complete based on the compute resources. All a perfect query engine could do is to get close to that limit.</p>
<p>Allowing for real time data loading invariably degrades the ability the query engine could optimize compute. For example, if we want data to show up more quickly, we could build more but smaller files. A large number of small file leads to query slow down. We could update the existing files, but the query engine have to swap out those files. We could stream the realtime data points into the query engine, but those data points would have to be limited otherwise the query engine could spent all of its compute cycle acting as a stream processor. </p>
<p>It does not mean that we cannot improve on the existing solutions to have both better realtime data and query. One promising data storage engine is Kudu (<a href='#kudu2020' id='ref-kudu2020-1'>
kud20
</a>). It looks like that it has good integration with some query engines (e.g. Presto) and processing framework (e.g. Flink). However, none of the query engine that connects to Kudu are designed for realtime. In a way, Kudu could not be a storage engine for a best in-class realtime query engine because it is not designed to be memory-mapped and not designed to be collated with compute nodes. </p>
<p>Another way to achieve realtime query results is to think about how to optimize some of the more popular batch query engines. Take Dremio as an example. If it is computing on Parquet on S3, there is already too much time spent on network and converting data into Arrow in-memory representation. A more realtime “Dremio” can get around those two bottlenecks. It needs to introduce stickness between data and compute nodes. It would need to use a file format that could be memory map onto Arrow layout, e.g. Arrow <span class="caps">IPC</span> file format (<a href='#arrow2020' id='ref-arrow2020-1'>
arr20
</a>).</p>
<p>Given the tradeoffs between storage engine and realtime query engines, the best strategy to deliver realtime queries is probably pre-computation, building aggregated tables that comes as close to your query patterns as possible.</p>
<!--
Ref:
https://druid.apache.org/docs/latest/comparisons/
Real-time:
- clickhouse
- rockset
- hosted only
- pinot
- druid
- starrocks
- open-source, https://github.com/StarRocks/starrocks
- This looks like a really good choice
- https://www.youtube.com/watch?v=AzDxEZuMBwM
- Most are columnar data stores: clickhouse, druid, pinot, starocks, rockset
Comparisons
https://presentations.clickhouse.com/cern/#38
https://rockset.com/real-time-analytics-comparison/
https://druid.apache.org/docs/latest/comparisons/
Batch:
- firebolt
-->
<div id="citations">
<hr>
<h3>Citations</h3>
<ol class="references">
<li id="druid2020">
<span class="reference-text">druid.
<em>Druid Architecture Overview</em>.
2020.
URL: <a href="https://druid.apache.org/docs/latest/design/architecture.html">https://druid.apache.org/docs/latest/design/architecture.html</a>.</span>
<a class="cite-backref" href="#ref-druid2020-1"
title="Jump back to reference 1">
<sup>
<i>
<b>
1
</b>
</i>
</sup>
</a>
</li>
<li id="pinot2020">
<span class="reference-text">pinot.
<em>Pinot Architecture Overview</em>.
2020.
URL: <a href="https://docs.pinot.apache.org/basics/architecture">https://docs.pinot.apache.org/basics/architecture</a>.</span>
<a class="cite-backref" href="#ref-pinot2020-1"
title="Jump back to reference 1">
<sup>
<i>
<b>
1
</b>
</i>
</sup>
</a>
</li>
<li id="leventov2018">
<span class="reference-text">Leventov, Roman.
<em>Comparison of the Open Source OLAP Systems for Big Data</em>.
2018.
URL: <a href="https://leventov.medium.com/comparison-of-the-open-source-olap-systems-for-big-data-clickhouse-druid-and-pinot-8e042a5ed1c7">https://leventov.medium.com/comparison-of-the-open-source-olap-systems-for-big-data-clickhouse-druid-and-pinot-8e042a5ed1c7</a>.</span>
<a class="cite-backref" href="#ref-leventov2018-1"
title="Jump back to reference 1">
<sup>
<i>
<b>
1
</b>
</i>
</sup>
</a>
</li>
<li id="kudu2020">
<span class="reference-text"><em>Kudu Overview</em>.
2020.
URL: <a href="https://kudu.apache.org/overview.html">https://kudu.apache.org/overview.html</a>.</span>
<a class="cite-backref" href="#ref-kudu2020-1"
title="Jump back to reference 1">
<sup>
<i>
<b>
1
</b>
</i>
</sup>
</a>
</li>
<li id="arrow2020">
<span class="reference-text"><em>Apache Arrow File Format</em>.
2020.
URL: <a href="https://arrow.apache.org/faq/">https://arrow.apache.org/faq/</a>.</span>
<a class="cite-backref" href="#ref-arrow2020-1"
title="Jump back to reference 1">
<sup>
<i>
<b>
1
</b>
</i>
</sup>
</a>
</li>
</ol>
</div>
<hr/>
<script src="https://utteranc.es/client.js"
repo="jinfwhuang/jinfwhuang.github.io"
issue-term="pathname"
label="user-comments"
theme="github-light"
crossorigin="anonymous"
async>
</script>
<hr/>
<section>
<h2>Related Posts</h2>
<ul class="related-posts-list">
<li><a href="/2020-02-01-data-streaming" title="Streaming Data Platform">Streaming Data Platform</a></li>
</ul>
<hr />
</section>
<aside>
<nav>
<ul class="articles-timeline">
<li class="previous-article">« <a href="/2020-02-01-data-streaming" title="Previous: Streaming Data Platform">Streaming Data Platform</a></li>
<li class="next-article"><a href="/2020-12-15-quote-of-the-month" title="Next: Quote of the Month">Quote of the Month</a> »</li>
</ul>
</nav>
</aside>
</div>
<section id="article-sidebar" class="span2">
<h4>Published</h4>
<time itemprop="dateCreated" datetime="2020-02-03T00:00:00-08:00">Mon 03 February 2020</time>
<!-- <h4>Category</h4>
<a class="category-link" href="/categories#misc-ref">misc</a>
-->
<h4>Tags</h4>
<ul class="list-of-tags tags-in-article">
<li><a href="/tags#database-ref">database
<span class="superscript">2</span>
</a></li>
</ul>
<h4>Contact</h4>
<div id="sidebar-social-link">
<a href="https://twitter.com/jinfwhuang" title="Twiiter" target="_blank" rel="nofollow noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" aria-label="Twitter" role="img" viewBox="0 0 512 512"><rect width="512" height="512" rx="15%" fill="#1da1f3"/><path fill="#fff" d="M437 152a72 72 0 0 1-40 12 72 72 0 0 0 32-40 72 72 0 0 1-45 17 72 72 0 0 0-122 65 200 200 0 0 1-145-74 72 72 0 0 0 22 94 72 72 0 0 1-32-7 72 72 0 0 0 56 69 72 72 0 0 1-32 1 72 72 0 0 0 67 50 200 200 0 0 1-105 29 200 200 0 0 0 309-179 200 200 0 0 0 35-37"/></svg>
</a>
<a href="https://www.linkedin.com/in/jinfwhuang" title="LinkedIn" target="_blank" rel="nofollow noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" aria-label="LinkedIn" role="img" viewBox="0 0 512 512" fill="#fff"><rect width="512" height="512" rx="15%" fill="#0077b5"/><circle cx="142" cy="138" r="37"/><path stroke="#fff" stroke-width="66" d="M244 194v198M142 194v198"/><path d="M276 282c0-20 13-40 36-40 24 0 33 18 33 45v105h66V279c0-61-32-89-76-89-34 0-51 19-59 32"/></svg>
</a>
</div>
</section>
</div>
</article>
<!-- Root element of PhotoSwipe. Must have class pswp. -->
<div class="pswp" tabindex="-1" role="dialog" aria-hidden="true">
<!-- Background of PhotoSwipe.
It's a separate element as animating opacity is faster than rgba(). -->
<div class="pswp__bg"></div>
<!-- Slides wrapper with overflow:hidden. -->
<div class="pswp__scroll-wrap">
<!-- Container that holds slides.
PhotoSwipe keeps only 3 of them in the DOM to save memory.
Don't modify these 3 pswp__item elements, data is added later on. -->
<div class="pswp__container">
<div class="pswp__item"></div>
<div class="pswp__item"></div>
<div class="pswp__item"></div>
</div>
<!-- Default (PhotoSwipeUI_Default) interface on top of sliding area. Can be changed. -->
<div class="pswp__ui pswp__ui--hidden">
<div class="pswp__top-bar">
<!-- Controls are self-explanatory. Order can be changed. -->
<div class="pswp__counter"></div>
<button class="pswp__button pswp__button--close" title="Close (Esc)"></button>
<button class="pswp__button pswp__button--share" title="Share"></button>
<button class="pswp__button pswp__button--fs" title="Toggle fullscreen"></button>
<button class="pswp__button pswp__button--zoom" title="Zoom in/out"></button>
<!-- Preloader demo https://codepen.io/dimsemenov/pen/yyBWoR -->
<!-- element will get class pswp__preloader--active when preloader is running -->
<div class="pswp__preloader">
<div class="pswp__preloader__icn">
<div class="pswp__preloader__cut">
<div class="pswp__preloader__donut"></div>
</div>
</div>
</div>
</div>
<div class="pswp__share-modal pswp__share-modal--hidden pswp__single-tap">
<div class="pswp__share-tooltip"></div>
</div>
<button class="pswp__button pswp__button--arrow--left" title="Previous (arrow left)">
</button>
<button class="pswp__button pswp__button--arrow--right" title="Next (arrow right)">
</button>
<div class="pswp__caption">
<div class="pswp__caption__center"></div>
</div>
</div>
</div>
</div> </div>
<div class="span1"></div>
</div>
</div>
</div>
<!-- <footer>
<div>
<span class="site-name"><span style="color:black;">Jin's Notes</span></span> - the hardest part is taking the first step
</div>
<div id="fpowered">
Powered by: <a href="http://getpelican.com/" title="Pelican Home Page" target="_blank" rel="nofollow noopener noreferrer">Pelican</a>
Theme: <a href="https://elegant.oncrashreboot.com/" title="Theme Elegant Home Page" target="_blank" rel="nofollow noopener noreferrer">Elegant</a>
</div>
</footer>-->
<script src="//code.jquery.com/jquery.min.js"></script>
<script src="//netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/js/bootstrap.min.js"></script>
<script src="/theme/js/elegant.prod.9e9d5ce754.js"></script>
<script>
function validateForm(query)
{
return (query.length > 0);
}
</script>
<script>
(function () {
if (window.location.hash.match(/^#comment-\d+$/)) {
$('#comment_thread').collapse('show');
}
})();
window.onhashchange=function(){
if (window.location.hash.match(/^#comment-\d+$/))
window.location.reload(true);
}
$('#comment_thread').on('shown', function () {
var link = document.getElementById('comment-accordion-toggle');
var old_innerHTML = link.innerHTML;
$(link).fadeOut(200, function() {
$(this).text('Click here to hide comments').fadeIn(200);
});
$('#comment_thread').on('hidden', function () {
$(link).fadeOut(200, function() {
$(this).text(old_innerHTML).fadeIn(200);
});
})
})
</script>
</body>
<!-- Theme: Elegant built for Pelican
License : MIT -->
</html>