Skip to content

Commit

Permalink
update & improve system
Browse files Browse the repository at this point in the history
  • Loading branch information
trieu committed Oct 23, 2024
1 parent b7ce2e9 commit 6b4aba9
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 21 deletions.
8 changes: 8 additions & 0 deletions configs/database-query-template.aql
Original file line number Diff line number Diff line change
Expand Up @@ -1888,6 +1888,14 @@ AQL_GET_SEGMENTS_BY_PAGINATION =>
RETURN d
;

AQL_GET_SEGMENTS_TO_REFRESH =>
FOR d in cdp_segment
FILTER d.status >= 0 AND d.autoUpdateProfiles == TRUE
SORT d.updatedAt
LIMIT @startIndex,@numberResult
RETURN d
;

AQL_COUNT_SEGMENT_FOR_PAGINATION =>
FOR d in cdp_segment
FILTER d.status >= 0 AND (@searchValue != "" ? LOWER(d.name) LIKE @searchValue : true)
Expand Down
64 changes: 45 additions & 19 deletions configs/regexes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ user_agent_parsers:

# AWS S3 Clients
# must come before "Bots General matcher" to catch "boto"/"boto3" before "bot"
- regex: '\b(Boto3?|JetS3t|aws-(?:cli|sdk-(?:cpp|go|java|nodejs|ruby2?|dotnet-(?:\d{1,2}|core)))|s3fs)/(\d+)\.(\d+)(?:\.(\d+)|)'
- regex: '\b(Boto3?|JetS3t|aws-(?:cli|sdk-(?:cpp|go|go-v\d|java|nodejs|ruby2?|dotnet-(?:\d{1,2}|core)))|s3fs)/(\d+)\.(\d+)(?:\.(\d+)|)'

# SAFE FME
- regex: '(FME)\/(\d+\.\d+)\.(\d+)\.(\d+)'
Expand All @@ -179,6 +179,9 @@ user_agent_parsers:
- regex: '\[FB.{0,300};'
family_replacement: 'Facebook'

# RecipeRadar crawler
- regex: '(RecipeRadar)/(\d+)\.(\d+)(?:\.(\d+)|)'

# Bots General matcher 'name/0.0'
- regex: '^.{0,200}?(?:\/[A-Za-z0-9\.]{0,50}|) {0,2}([A-Za-z0-9 \-_\!\[\]:]{0,50}(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]{0,50}))[/ ](\d+)(?:\.(\d+)(?:\.(\d+)|)|)'
# Bots containing bot(but not CUBOT)
Expand Down Expand Up @@ -487,11 +490,17 @@ user_agent_parsers:
family_replacement: 'Tenta Browser'

# Ecosia on iOS / Android
- regex: '(Ecosia) ios@(\d+)\.(\d+)\.(\d+)\.(\d+)'
- regex: '(Ecosia) ios@(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)'
family_replacement: 'Ecosia iOS'
- regex: '(Ecosia) android@(\d+)\.(\d+)\.(\d+)\.(\d+)'
- regex: '(Ecosia) android@(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)'
family_replacement: 'Ecosia Android'

# VivoBrowser
- regex: '(VivoBrowser)\/(\d+)\.(\d+)\.(\d+)\.(\d+)'

# HiBrowser
- regex: '(HiBrowser)\/v(\d+)\.(\d+)\.(\d+)\.(\d+)'

# Chrome Mobile
- regex: 'Version/.{1,300}(Chrome)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
family_replacement: 'Chrome Mobile WebView'
Expand Down Expand Up @@ -1387,6 +1396,13 @@ os_parsers:
# Box Drive and Box Sync on Mac OS X use OSX version numbers, not Darwin
- regex: '^Box.{0,200};(Darwin)/(10)\.(1\d)(?:\.(\d+)|)'
os_replacement: 'Mac OS X'

##########
# Hashicorp API
# APN/1.0 HashiCorp/1.0 Terraform/1.8.0 (+https://www.terraform.io) terraform-provider-aws/4.67.0 (+https://registry.terraform.io/providers/hashicorp/aws) aws-sdk-go/1.44.261 (go1.19.8; darwin; arm64)
##########
- regex: 'darwin; arm64'
os_replacement: 'Mac OS X'

##########
# iOS
Expand Down Expand Up @@ -1672,29 +1688,27 @@ os_parsers:
- regex: 'CFNetwork/.{0,100} Darwin/(21)\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '15'
- regex: 'CFNetwork/.{0,100} Darwin/22\.0\.\d+'
- regex: 'CFNetwork/.{0,100} Darwin/22\.([0-5])\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v2_replacement: '0'
- regex: 'CFNetwork/.{0,100} Darwin/22\.1\.\d+'
os_v2_replacement: '$1'
- regex: 'CFNetwork/.{0,100} Darwin/(22)\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v2_replacement: '1'
- regex: 'CFNetwork/.{0,100} Darwin/22\.2\.\d+'
- regex: 'CFNetwork/.{0,100} Darwin/23\.([0-5])\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v2_replacement: '2'
- regex: 'CFNetwork/.{0,100} Darwin/22\.3\.\d+'
os_v1_replacement: '17'
os_v2_replacement: '$1'
- regex: 'CFNetwork/.{0,100} Darwin/(23)\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v2_replacement: '3'
- regex: 'CFNetwork/.{0,100} Darwin/22\.4\.\d+'
os_v1_replacement: '17'
- regex: 'CFNetwork/.{0,100} Darwin/24\.([0-5])\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v2_replacement: '4'
- regex: 'CFNetwork/.{0,100} Darwin/(22)\.\d+'
os_v1_replacement: '18'
os_v2_replacement: '$1'
- regex: 'CFNetwork/.{0,100} Darwin/(24)\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v1_replacement: '18'
- regex: 'CFNetwork/.{0,100} Darwin/'
os_replacement: 'iOS'

Expand Down Expand Up @@ -1889,6 +1903,18 @@ os_parsers:
# Roku Digital-Video-Players https://www.roku.com/
- regex: '^(Roku)/DVP-(\d+)\.(\d+)'

##########
# Amazon S3 client boto3
# Hasicorp API
# Boto3/1.28.62 md/Botocore#1.31.62 ua/2.0 os/macos#22.4.0 md/arch#arm64 lang/python#3.11.6 md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.31.62
# APN/1.0 HashiCorp/1.0 Terraform/1.8.1 (+https://www.terraform.io) terraform-provider-aws/4.67.0 (+https://registry.terraform.io/providers/hashicorp/aws) aws-sdk-go-v2/1.18.0 os/macos lang/go/1.19.8 md/GOOS/darwin md/GOARCH/arm64 api/identitystore/1.16.11
##########
- regex: 'os\/macos[#]?(\d*)[.]?(\d*)[.]?(\d*)'
os_replacement: 'Mac OS X'
os_v1_replacement: '$1'
os_v2_replacement: '$2'
os_v3_replacement: '$3'

device_parsers:

#########
Expand Down Expand Up @@ -5905,7 +5931,7 @@ device_parsers:
##########
# Spiders (this is a hack...)
##########
- regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper)'
- regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper|RecipeRadar)'
regex_flag: 'i'
device_replacement: 'Spider'
brand_replacement: 'Spider'
Expand Down
Binary file modified leo-data-processing-starter-v_0.9.0.jar
Binary file not shown.
Binary file modified leo-main-starter-v_0.9.0.jar
Binary file not shown.
Binary file modified leo-observer-starter-v_0.9.0.jar
Binary file not shown.
Binary file modified leo-scheduler-starter-v_0.9.0.jar
Binary file not shown.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ <h4 id="segment_name" class="highlight_text" > </h4>
<i class="fa fa-object-group" aria-hidden="true"></i> Query with LEO AI
</button>
<label class="control-label" for="segment_custom_aql_editor" style="margin-bottom: 8px;">
<mark> ArangoDB Query Editor </mark> (You are an AQL expert. You know what you are doing)
<mark> ArangoDB Query Editor </mark> (You are an AQL expert. You know what you are doing) <br>
<p> Disallow 'update', 'remove', 'insert', or 'replace' or 'cdp_profile' keywords </p>
</label>
<div id="segment_custom_query_holder" style="width: 98%; border-left: 1px dotted;">
<textarea id="segment_custom_aql_editor" ></textarea>
Expand Down

0 comments on commit 6b4aba9

Please sign in to comment.