This repository has been archived by the owner on Nov 7, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
queries.go
866 lines (761 loc) · 24.6 KB
/
queries.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
package core
// create collections table if it doesn't exist
const qCollectionCreateTable = `
CREATE TABLE IF NOT EXISTS collections (
id UUID PRIMARY KEY,
created timestamp NOT NULL,
updated timestamp NOT NULL,
creator text NOT NULL DEFAULT '',
title text NOT NULL DEFAULT '',
url text NOT NULL DEFAULT ''
);`
// list collections by reverse cronological date created
// paginated
const qCollections = `
SELECT
id, created, updated, creator, title, description, url
FROM collections
ORDER BY created DESC
LIMIT $1 OFFSET $2;`
// list collections by creator
const qCollectionsByCreator = `
SELECT
id, created, updated, creator, title, description, url
FROM collections
WHERE creator = $4
ORDER BY $3
LIMIT $1 OFFSET $2;`
// check for existence of a collection
const qCollectionExists = `
SELECT exists(SELECT 1 FROM collections WHERE id = $1)
`
// insert a collection
const qCollectionInsert = `
INSERT INTO collections
(id, created, updated, creator, title, description, url )
VALUES ($1, $2, $3, $4, $5, $6, $7);`
// update an existing collection, selecting by ID
const qCollectionUpdate = `
UPDATE collections
SET created=$2, updated=$3, creator=$4, title=$5, description=$6, url=$7
WHERE id = $1;`
// read collection info by ID
const qCollectionById = `
SELECT
id, created, updated, creator, title, description, url
FROM collections
WHERE id = $1;`
const qCollectionByUrl = `
SELECT
id, created, updated, creator, title, description, url
FROM collections
WHERE url = $1;`
// deleted a collection
const qCollectionDelete = `
DELETE from collections
WHERE id = $1;`
const qCollectionItemCreateTable = `
CREATE TABLE IF NOT EXISTS collection_items (
collection_id UUID NOT NULL,
url_id text NOT NULL default '',
index integer NOT NULL default -1,
description text NOT NULL default '',
PRIMARY KEY (collection_id, url_id)
);`
const qCollectionItemInsert = `
INSERT INTO collection_items
(collection_id, url_id, index, description)
VALUES
($1, $2, $3, $4);`
const qCollectionItemUpdate = `
UPDATE collection_items
SET index = $3, description = $4
WHERE collection_id = $1 and url_id = $2;`
const qCollectionItemDelete = `
DELETE FROM collection_items
WHERE collection_id = $1 AND url_id = $2;`
const qCollectionItemExists = `
SELECT exists(SELECT 1 FROM collection_items where collection_id = $1 AND url_id = $2);`
const qCollectionItemById = `
SELECT
ci.collection_id, u.id, u.hash, u.url, u.title, ci.index, ci.description
FROM collection_items as ci, urls as u
WHERE collection_id = $1 AND url_id = $2 AND u.id = ci.url_id;`
const qCollectionLength = `
SELECT count(1) FROM collection_items WHERE collection_id = $1;`
const qCollectionItems = `
SELECT
ci.collection_id, u.id, u.hash, u.url, u.title, ci.index, ci.description
FROM collection_items as ci, urls as u
WHERE collection_id = $4
AND u.id = ci.url_id
ORDER BY $3
LIMIT $1 OFFSET $2;`
// insert a dataRepo
const qDataRepoInsert = `
INSERT INTO data_repos
(id, created, updated, title, description, url)
VALUES ($1, $2, $3, $4, $5, $6);`
// update an existing dataRepo, selecting by ID
const qDataRepoUpdate = `
UPDATE data_repos
SET created=$2, updated=$3, title=$4, description=$5, url=$6
WHERE id = $1;`
const qDataRepoCreateTable = `
CREATE TABLE IF NOT EXISTS data_repos (
id UUID PRIMARY KEY NOT NULL,
created timestamp NOT NULL default (now() at time zone 'utc'),
updated timestamp NOT NULL default (now() at time zone 'utc'),
title text NOT NULL default '',
description text NOT NULL default '',
url text NOT NULL default '',
deleted boolean default false
);`
const qDataRepoExists = `SELECT exists(SELECT 1 FROM data_repos WHERE id = $1)`
// read dataRepo info by ID
const qDataRepoById = `
SELECT
id, created, updated, title, description, url
FROM data_repos
WHERE id = $1;`
// deleted a dataRepo
const qDataRepoDelete = `
DELETE from data_repos
WHERE id = $1;`
// list data_repos by reverse cronological date created
// paginated
const qDataRepos = `
SELECT
id, created, updated, title, description, url
FROM data_repos
ORDER BY created DESC
LIMIT $1 OFFSET $2;`
// create links table
const qLinkCreateTable = `
CREATE TABLE IF NOT EXISTS links (
created timestamp NOT NULL,
updated timestamp NOT NULL,
src text NOT NULL references urls(url) ON DELETE CASCADE,
dst text NOT NULL references urls(url) ON DELETE CASCADE,
PRIMARY KEY (src, dst)
);`
// check to see if a link exists
const qLinkExists = `SELECT exists(select 1 from links where src = $1 and dst = $2)`
// read a link
const qLinkRead = `
SELECT
created, updated, src, dst
FROM links
WHERE
src = $1 AND
dst = $2;`
// insert a link
const qLinkInsert = `
INSERT INTO LINKS
(created, updated, src, dst)
VALUES
($1, $2, $3, $4);`
// update a link
const qLinkUpdate = `
UPDATE links SET
created = $1, updated = $2
WHERE
src = $3 AND
dst = $4`
// delete a link
const qLinkDelete = `
DELETE FROM links
WHERE
src = $1 AND
dst = $2;`
const qMetadataCreateTable = `
CREATE TABLE IF NOT EXISTS metadata (
hash text NOT NULL default '',
time_stamp timestamp NOT NULL,
key_id text NOT NULL default '',
subject text NOT NULL,
prev text NOT NULL default '',
meta json,
deleted boolean default false
);
`
// list latest metadata entries by reverse cronological order
// paginated
const qMetadataLatest = `
SELECT
hash, time_stamp, key_id, subject, prev, meta
FROM metadata
WHERE
key_id = $1 AND
subject = $2
ORDER BY time_stamp DESC;`
// select metadata entries for a given subject hash
// TODO - should this be paginated?
const qMetadataForSubject = `
SELECT
hash, time_stamp, key_id, subject, prev, meta
FROM metadata
WHERE
subject = $1 AND
deleted = false AND
meta IS NOT NULL;`
// count the number of metatadata entries for a a given user key
// omitting empty content
const qMetadataCountForKey = `
SELECT
count(1)
FROM metadata
WHERE
-- confirm is not empty hash
hash != '1220e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' AND
key_id = $1;`
// pagniated selection of only the latest metadata entry for each user-key/subject pair
// paginated
const qMetadataLatestForKey = `
SELECT DISTINCT ON (subject)
hash, time_stamp, key_id, subject, prev, meta
FROM metadata
WHERE
key_id = $1 and
deleted = false
ORDER BY subject, time_stamp DESC
LIMIT $2 OFFSET $3;`
// insert a metdata entry
const qMetadataInsert = `
INSERT INTO metadata
(hash, time_stamp, key_id, subject, prev, meta, deleted)
VALUES
($1, $2, $3, $4, $5, $6, false);`
const qPrimerCreateTable = `
CREATE TABLE IF NOT EXISTS primers (
id UUID PRIMARY KEY NOT NULL,
created timestamp NOT NULL default (now() at time zone 'utc'),
updated timestamp NOT NULL default (now() at time zone 'utc'),
short_title text NOT NULL default '',
title text NOT NULL default '',
description text NOT NULL default '',
parent_id text NOT NULL default '', -- this should be "UUID references primers(id)", but then we'd need to accept null values, no bueno
stats json,
meta json,
deleted boolean default false
);`
const qPrimerExists = `SELECT exists(SELECT 1 FROM primers WHERE id = $1)`
// read a primer for a given Id
const qPrimerById = `
SELECT
id, created, updated, short_title, title, description,
parent_id, stats, meta
FROM primers
WHERE
deleted = false AND
id = $1;`
// insert a primer
const qPrimerInsert = `
INSERT INTO primers
(id, created, updated, short_title, title, description, parent_id, stats, meta)
VALUES
($1, $2, $3, $4, $5, $6, $7, $8, $9);`
// update an existing primer
const qPrimerUpdate = `
UPDATE PRIMERS set
created = $2, updated = $3, short_title = $4, title = $5, description = $6,
parent_id = $7, stats = $8, meta = $9
WHERE
deleted = false AND
id = $1;`
// 'delete' a primer
const qPrimerDelete = `
UPDATE primers SET
deleted = true
WHERE id = $1;`
// read children for a given primer ID. children only, decendants are omitted.
const qPrimerSubPrimers = `
SELECT
id, created, updated, short_title, title, description,
parent_id, stats, meta
FROM primers
WHERE
deleted = false AND
parent_id = $1;`
// read all of a primer's sources
const qPrimerSources = `
SELECT
id, created, updated, title, description, url, primer_id, crawl, stale_duration,
last_alert_sent, meta, stats
FROM sources
WHERE
deleted = false AND
primer_id = $1;`
// enumerate primers
const qPrimersCount = `SELECT count(1) FROM primers WHERE deleted = false`
// list primers by reverse chronolgical created date, no hierarchy is observed
// paginated
const qPrimersList = `
SELECT
id, created, updated, short_title, title, description,
parent_id, stats, meta
FROM primers
WHERE
deleted = false
ORDER BY created DESC
LIMIT $1 OFFSET $2;`
// list primers that have no parent ("root" or "base" primers), order by reverse chronological created date.
// paginated
const qBasePrimersList = `
select
id, created, updated, short_title, title, description,
parent_id, stats, meta
from primers
where
deleted = false and
parent_id = ''
order by created desc
limit $1 offset $2;`
const qSourceCreateTable = `
CREATE TABLE IF NOT EXISTS sources (
id UUID PRIMARY KEY NOT NULL,
created timestamp NOT NULL default (now() at time zone 'utc'),
updated timestamp NOT NULL default (now() at time zone 'utc'),
title text NOT NULL default '',
description text NOT NULL default '',
url text UNIQUE NOT NULL,
primer_id UUID references primers(id) ON DELETE CASCADE,
crawl boolean default true,
stale_duration integer NOT NULL DEFAULT 43200000, -- defaults to 12 hours, column needs to be multiplied by 1000000 to become a poper duration
last_alert_sent timestamp,
stats json,
meta json,
deleted boolean default false
);`
const qSourceExists = `SELECT exists(SELECT 1 FROM sources WHERE id = $1)`
const qSourceExistsByUrl = `SELECT exists(SELECT 1 FROM sources WHERE url = $1)`
// select
const qSourcesCount = `SELECT count(1) FROM sources;`
// list sources, ordered by reverse chronological created date
// paginated
const qSourcesList = `
SELECT
id, created, updated, title, description, url, primer_id, crawl, stale_duration,
last_alert_sent, meta, stats
FROM sources
WHERE
deleted = false
ORDER BY created DESC
LIMIT $1 OFFSET $2;`
// read a source for a given ID
const qSourceById = `
SELECT
id, created, updated, title, description, url, primer_id, crawl, stale_duration,
last_alert_sent, meta, stats
FROM sources
WHERE
deleted = false AND
id = $1;`
// read a source for a given url string
const qSourceByUrl = `
SELECT
id, created, updated, title, description, url, primer_id, crawl, stale_duration,
last_alert_sent, meta, stats
FROM sources
WHERE
deleted = false AND
url = $1;`
// add a source
const qSourceInsert = `
INSERT INTO sources
(id, created, updated, title, description, url, primer_id, crawl, stale_duration,
last_alert_sent, meta, stats)
VALUES
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);`
// update an exsiting source
const qSourceUpdate = `
UPDATE sources
SET
created = $2, updated = $3, title = $4, description = $5, url = $6, primer_id = $7,
crawl = $8, stale_duration = $9, last_alert_sent = $10, meta = $11, stats = $12
WHERE
deleted = false AND
id = $1;`
// 'delete' a source
const qSourceDelete = `
UPDATE sources
SET
deleted = true
WHERE
url = $1;`
// count how many URLs this source matches from the urls table
// the passed in argument can take the form '%[arg]%' to ignore position within
// the url string
// warning - big & slow
const qSourceUrlCount = `
SELECT count(1)
FROM urls
WHERE
url ilike $1;`
// list sources that have crawl column set to true, ordered by reverse chronolgical created date
// paginated
const qSourcesCrawling = `
SELECT
id, created, updated, title, description, url, primer_id, crawl, stale_duration,
last_alert_sent, meta, stats
FROM sources
WHERE
deleted = false and
crawl = true
ORDER BY created DESC
LIMIT $1 OFFSET $2;`
// enumerate all urls for a given source that are suspected to have content
// this generates the "content urls" stat.
const qSourceContentUrlCount = `
SELECT count(1)
FROM urls
WHERE
hash != '' AND
hash != '1220e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' AND
content_sniff != 'text/html; charset=utf-8' AND
url ilike $1;`
// enumerate all urls for a given source that are suspected to have content *and*
// have an entry in the metadata table
// this generates the "content urls" stat.
const qSourceContentWithMetadataCount = `
SELECT count(1)
FROM urls
WHERE
url ilike $1 AND
content_sniff != 'text/html; charset=utf-8' AND
-- confirm is not empty hash
hash != '1220e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' AND
hash != '' AND
EXISTS (SELECT null FROM metadata WHERE urls.hash = metadata.subject);`
const qSourceUndescribedContentUrls = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
where
url ilike $1
and content_sniff != 'text/html; charset=utf-8'
and last_get is not null
-- confirm is not empty hash
and hash != '1220e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
and not exists (select null from metadata where urls.hash = metadata.subject)
limit $2 offset $3;`
const qSourceDescribedContentUrls = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
where
url ilike $1
and content_sniff != 'text/html; charset=utf-8'
and last_get is not null
-- confirm is not empty hash
and hash != '1220e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
and exists (select null from metadata where urls.hash = metadata.subject)
limit $2 offset $3;`
const qSnapshotCreateTable = `
CREATE TABLE IF NOT EXISTS snapshots (
url text NOT NULL references urls(url) ON DELETE CASCADE,
created timestamp NOT NULL,
status integer NOT NULL DEFAULT 0,
duration integer NOT NULL DEFAULT 0,
meta json,
hash text NOT NULL DEFAULT ''
);`
const qSnapeshotExists = `SELECT exists(SELECT 1 FROM snapshots WHERE hash = $1)`
const qSnapshotsByUrl = `
select
url, created, status, duration, meta, hash
from snapshots
where
url = $1;`
const qSnapshotInsert = `
insert into snapshots
(url, created, status, duration, meta, hash)
values
($1, $2, $3, $4, $5, $6);`
const qUrlsSearch = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
where
url ilike $1
limit $2 offset $3;`
const qUrlsCreateTable = `
CREATE TABLE IF NOT EXISTS urls (
url text PRIMARY KEY NOT NULL,
created timestamp NOT NULL,
updated timestamp NOT NULL,
last_head timestamp,
last_get timestamp,
status integer NOT NULL default 0,
content_type text NOT NULL default '',
content_sniff text NOT NULL default '',
content_length bigint NOT NULL default 0,
file_name text NOT NULL default '',
title text NOT NULL default '',
id text NOT NULL default '',
headers_took integer NOT NULL default 0,
download_took integer NOT NULL default 0,
headers json,
meta json,
hash text NOT NULL default ''
);`
const qUrlExists = `SELECT exists(SELECT 1 FROM urls WHERE id = $1)`
const qUrlsList = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
order by created desc
limit $1 offset $2;`
const qContentUrlsList = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
where
last_get is not null and
content_sniff != 'text/html; charset=utf-8' and
content_sniff != '' and
hash != ''
order by created desc
limit $1 offset $2;`
const qContentUrlsCount = `
select
count(1)
from urls
where
last_get is not null and
content_sniff != 'text/html; charset=utf-8' and
content_sniff != '' and
hash != ''
`
const qUrlsFetched = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
where
last_get is not null
order by created desc
limit $1 offset $2;`
const qUrlsUnfetched = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
where
last_get is null
order by created desc
limit $1 offset $2;`
const qUrlsForHash = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
where
hash = $1;`
const qUrlExistsByUrlString = `SELECT exists(SELECT 1 FROM urls WHERE url = $1)`
const qUrlByUrlString = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
where
url = $1;`
const qUrlById = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
where
id = $1;`
const qUrlExistsById = `SELECT exists(SELECT 1 FROM urls WHERE id = $1)`
const qUrlByHash = `
select
url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls
where
hash = $1;`
const qUrlExistsByHash = `SELECT exists(SELECT 1 FROM urls WHERE hash = $1)`
const qUrlInsert = `
insert into urls
(url, created, updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash)
values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17);`
const qUrlUpdate = `
update urls
set
created=$2, updated=$3, last_head=$4, last_get=$5, status=$6, content_type=$7, content_sniff=$8,
content_length=$9, file_name=$10, title=$11, id=$12, headers_took=$13, download_took=$14, headers=$15, meta=$16, hash=$17
where
url = $1;`
const qUrlDelete = `
delete from urls
where
url = $1;`
const qUrlInboundLinkUrlStrings = `
select src
from links
where
dst = $1;`
const qUrlOutboundLinkUrlStrings = `
select dst
from links
where
src = $1;`
const qUrlDstLinks = `
select
urls.url, urls.created, urls.updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls, links
where
links.src = $1 and
links.dst = urls.url;`
// select all destination links that lead to content urls
const qUrlDstContentLinks = `
SELECT
urls.url, urls.created, urls.updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
FROM urls, links
WHERE
links.src = $1 AND
links.dst = urls.url AND
urls.hash != '' AND
urls.hash != '1220e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' AND
urls.content_sniff != 'text/html; charset=utf-8'
;`
const qUrlSrcLinks = `
select
urls.url, urls.created, urls.updated, last_head, last_get, status, content_type, content_sniff,
content_length, file_name, title, id, headers_took, download_took, headers, meta, hash
from urls, links
where
links.dst = $1 and
links.src = urls.url;`
const qUncrawlableCreateTable = `
CREATE TABLE IF NOT EXISTS uncrawlables (
id text NOT NULL default '',
url text PRIMARY KEY NOT NULL,
created timestamp NOT NULL default (now() at time zone 'utc'),
updated timestamp NOT NULL default (now() at time zone 'utc'),
creator_key_id text NOT NULL default '',
name text NOT NULL default '',
email text NOT NULL default '',
event_name text NOT NULL default '',
agency_name text NOT NULL default '',
agency_id text NOT NULL default '',
subagency_id text NOT NULL default '',
org_id text NOT NULL default '',
suborg_id text NOT NULL default '',
subprimer_id text NOT NULL default '',
ftp boolean default false,
database boolean default false,
interactive boolean default false,
many_files boolean default false,
comments text NOT NULL default '',
deleted boolean NOT NULL default false
);`
const qUncrawlableExists = `SELECT exists(SELECT 1 FROM uncrawlables WHERE id = $1)`
const qUncrawlableExistsByUrl = `SELECT exists(SELECT 1 FROM uncrawlables WHERE url = $1)`
const qUncrawlablesList = `
select
id, url,created,updated,creator_key_id,
name,email,event_name,agency_name,
agency_id,subagency_id,org_id,suborg_id,subprimer_id,
ftp,database,interactive,many_files,
comments
from uncrawlables
order by created desc
limit $1 offset $2;`
const qUncrawlableInsert = `
insert into uncrawlables
( id, url, created,updated,creator_key_id,
name,email,event_name,agency_name,
agency_id,subagency_id,org_id,suborg_id,subprimer_id,
ftp,database,interactive,many_files,
comments)
values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19);`
const qUncrawlableUpdate = `
update uncrawlables
set
url = $2, created = $3, updated = $4, creator_key_id = $5,
name = $6, email = $7, event_name = $8, agency_name = $9,
agency_id = $10, subagency_id = $11, org_id = $12, suborg_id = $13, subprimer_id = $14,
ftp = $15, database = $16,interactive = $17, many_files = $18,
comments = $19
where id = $1;`
const qUncrawlableByUrl = `
SELECT
id, url,created,updated,creator_key_id,
name,email,event_name,agency_name,
agency_id,subagency_id,org_id,suborg_id,subprimer_id,
ftp,database,interactive,many_files,
comments
FROM uncrawlables
WHERE url = $1;`
const qUncrawlableById = `
select
id, url,created,updated,creator_key_id,
name,email,event_name,agency_name,
agency_id,subagency_id,org_id,suborg_id,subprimer_id,
ftp,database,interactive,many_files,
comments
from uncrawlables
where id = $1;`
const qUncrawlableDelete = `
delete from uncrawlables
where url = $1;`
const qCustomCrawlCreateTable = `
CREATE TABLE IF NOT EXISTS custom_crawls (
id UUID PRIMARY KEY NOT NULL,
created timestamp NOT NULL default (now() at time zone 'utc'),
updated timestamp NOT NULL default (now() at time zone 'utc'),
jwt text NOT NULL default '',
morphRunId text NOT NULL default '',
dateCompleted timestamp NOT NULL default (now() at time zone 'utc'),
githubRepo text NOT NULL default '',
originalUrl text NOT NULL default '',
sqliteChecksum text NOT NULL default ''
);`
const qCustomCrawlExists = `SELECT exists(SELECT 1 FROM custom_crawls WHERE id = $1)`
const qCustomCrawlsList = `
select
id, created, updated,
jwt, morphRunId, dateCompleted, githubRepo, originalUrl,
sqliteChecksum
from custom_crawls
order by created DESC
limit $1 offset $2`
const qCustomCrawlInsert = `
insert into custom_crawls
(id, created, updated,
jwt, morphRunId, dateCompleted, githubRepo, originalUrl,
sqliteChecksum)
values ($1, $2, $3, $4, $5, $6, $7, $8, $9)`
const qCustomCrawlUpdate = `
update custom_crawls
set
created = $2, updated = $3,
jwt = $4, morphRunId = $5, dateCompleted = $6, githubRepo = $7, originalUrl = $8,
sqliteChecksum = $9
where id = $1`
const qCustomCrawlByUrl = `
SELECT
id, created, updated,
jwt, morphRunId, dateCompleted, githubRepo, originalUrl,
sqliteChecksum
FROM custom_crawls
WHERE url = $1;`
const qCustomCrawlById = `
select
id, created, updated,
jwt, morphRunId, dateCompleted, githubRepo, originalUrl,
sqliteChecksum
from custom_crawls
where id = $1;`
const qCustomCrawlDelete = `
delete from custom_crawls where id = $1;`