Skip to content

Commit

Permalink
Fix add jindocache engine for jindodata 5.0 beta release (#3346)
Browse files Browse the repository at this point in the history
* add jindocache engine

Signed-off-by: frankleaf <[email protected]>

* add jindocache engine

Signed-off-by: frankleaf <[email protected]>

* add jindocache engine

Signed-off-by: frankleaf <[email protected]>

* update readMe

Signed-off-by: frankleaf <[email protected]>

* fix time

Signed-off-by: frankleaf <[email protected]>

* fix file

Signed-off-by: frankleaf <[email protected]>

* update imageTagSupportAKFile

Signed-off-by: frankleaf <[email protected]>

* add jindocache dataloader

Signed-off-by: frankleaf <[email protected]>

* fix gitignore

Signed-off-by: frankleaf <[email protected]>

* reorder

Signed-off-by: frankleaf <[email protected]>

* reorder

Signed-off-by: frankleaf <[email protected]>

---------

Signed-off-by: frankleaf <[email protected]>
  • Loading branch information
frankleaf authored Jul 21, 2023
1 parent 2bf426c commit 556a605
Show file tree
Hide file tree
Showing 81 changed files with 15,553 additions and 11 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,3 @@ sdk
# debug related
**/__debug_bin
**/__pycache__

24 changes: 24 additions & 0 deletions charts/fluid-dataloader/jindocache/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

### 0.10.0
- Support cron dataload

### 0.6.0
- Support for setting affinity,tolerations,nodeSelector,schedulerName
### 0.5.0

- Support configurable DataLoad pod metadata

### 0.4.0

- Change Restart Policy from OnFailure to Never

### 0.3.0
- Remove unused volumes

### 0.2.0
- Support JindoFSx

### 0.1.0

- Support parallel prefetch job
- Support configurations by setting values
23 changes: 23 additions & 0 deletions charts/fluid-dataloader/jindocache/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: v2
name: fluid-dataloader
description: A Helm chart for Fluid to prefetch data

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.5.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
appVersion: 0.5.0
173 changes: 173 additions & 0 deletions charts/fluid-dataloader/jindocache/templates/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ printf "%s-data-load-script" .Release.Name }}
labels:
release: {{ .Release.Name }}
role: dataload-job
data:
dataloader.jindo.init: |
#!/usr/bin/env bash
set -xe
jindo_env_vars=(
STORAGE_ADDRESS
)
function public::jindo::init_conf() {
local IFS=$'\n' # split by line instead of space
for keyvaluepair in $(env); do
# split around the first "="
key=$(echo ${keyvaluepair} | cut -d= -f1)
value=$(echo ${keyvaluepair} | cut -d= -f2-)
if [[ "${jindo_env_vars[*]}" =~ "${key}" ]]; then
export ${key}=\"${value}\"
fi
done
}
main() {
public::jindo::init_conf
}
main
dataloader.distributedLoad: |
#!/usr/bin/env bash
set -xe
function checkPathExistence() {
local targetPath=$1
local checkPathResult=$(timeout 30s jindo fs -ls jindo://$targetPath |& tail -3)
local strUnexistence="No such file or directory"
if [[ $checkPathResult =~ $strUnexistence ]];then
echo -e "dataLoad failed because some paths not exist."
exit 1
fi
}
function distributedLoad() {
local path=$1
local replica=$2
local default=$3
local cmd="jindocache -load"
checkPathExistence "$path"
if [[ $needLoadMetadata == 'true' ]]; then
echo -e "--- enable metaCache"
cmd="$cmd -meta"
echo -e "now cmd $cmd"
else
echo -e "--- disable metaCache"
fi
if [[ $loadMetadataOnly == 'true' ]]; then
echo -e "--- disable datacache"
else
echo -e "--- enable datacache"
cmd="$cmd -data"
echo -e "now cmd $cmd"
fi
if [[ $atomicCache == 'true' ]]; then
echo -e "--- enable atomicCache"
cmd="$cmd -atomic"
echo -e "now cmd $cmd"
else
echo -e "--- disable atomicCache"
fi
if [[ $loadMemorydata == 'true' ]]; then
echo -e "--- enable loadMemorydata"
cmd="$cmd -m"
echo -e "now cmd $cmd"
else
echo -e "--- disable loadMemorydata"
fi
if [[ $enableDryrun == 'true' ]]; then
echo -e "--- enable dryrun"
cmd="$cmd -dryrun"
echo -e "now cmd $cmd"
else
echo -e "--- disable dryrun"
fi
if [[ $enbaleCacheListLocation == 'true' ]]; then
if [[ -e "ossutil64" ]]; then
echo "Found ossutil64 executable file under /"
else
echo -e "--- begin download ossutil"
apt-get update && apt-get install wget -y
wget http://gosspublic.alicdn.com/ossutil/1.7.5/ossutil64 && chmod 755 ossutil64
fi
./ossutil64 -e $cacheListEndpoint -i $cacheListAccessKeyId -k $cacheListAccessKeySecret cp $cacheListUrl /cachelist.txt
echo -e "--- enable File List"
cmd="$cmd -R -replica $cacheListReplica -cachelist /cachelist.txt -thread $cacheListThread $default/"
echo -e "execute cmd $cmd"
time $cmd
elif [[ $enableFilter == 'true' ]]; then
cmd="$cmd -s -R -replica $replica -filter $cacheFilter -thread $cacheListThread $default$path"
echo -e "execute cmd $cmd"
time $cmd
else
cmd="$cmd -s -R -replica $replica $default$path"
echo -e "execute cmd $cmd"
time $cmd
fi
#echo -e "distributedLoad and sleep start now"
#sleep 10m
}
function main() {
needLoadMetadata="$NEED_LOAD_METADATA"
loadMemorydata="$LOAD_MEMORY_DATA"
loadMetadataOnly="$LOAD_METADATA_ONLY"
atomicCache="$ENABLE_ATOMIC_CACHE"
cacheListReplica=$CACHE_LIST_REPLICA
cacheListThread=$CACHE_LIST_THREAD
enbaleCacheListLocation=$Enable_CACHE_LIST_LOCATION
cacheListAccessKeyId=$CACHE_LIST_ACCESSKEYID
cacheListAccessKeySecret=$CACHE_LIST_ACCESSKEYSECRET
cacheListEndpoint=$CACHE_LIST_ENDPOINT
cacheListUrl=$CACHE_LIST_URL
cacheFilter=$CACHE_FILTER
enableFilter=false
enableDryrun=$DRY_RUN_ENABLE
#judge whether to use locaion list
if [[ -z "$cacheListAccessKeyId" ]] || [[ -z "$cacheListAccessKeySecret" ]] || [[ -z "$cacheListEndpoint" ]] || [[ -z "$cacheListUrl" ]]; then
enbaleCacheListLocation=false
else
enbaleCacheListLocation=true
fi
if [[ -z "$cacheListReplica" ]]; then
cacheListReplica=1
else
echo -e "cacheListReplica $cacheListReplica"
fi
if [[ -z "$cacheListThread" ]]; then
cacheListThread=10
else
echo -e "cacheListThread $cacheListThread"
fi
if [[ -z "$cacheFilter" ]]; then
enableFilter=false
else
enableFilter=true
echo -e "enableFilter $enableFilter"
fi
dafault="jindo://"
paths="$DATA_PATH"
paths=(${paths//:/ })
replicas="$PATH_REPLICAS"
replicas=(${replicas//:/ })
for((i=0;i<${#paths[@]};i++)) do
local path="${paths[i]}"
local replica="${replicas[i]}"
echo -e "distributedLoad on $path starts"
distributedLoad ${paths[i]} ${replicas[i]} ${dafault}
#echo -e "distributedLoad on $path ends"
done
}
main "$@"
Loading

0 comments on commit 556a605

Please sign in to comment.