Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix add jindocache engine for jindodata 5.0 beta release #3346

Merged
merged 11 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,3 @@ sdk
# debug related
**/__debug_bin
**/__pycache__

24 changes: 24 additions & 0 deletions charts/fluid-dataloader/jindocache/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

### 0.10.0
- Support cron dataload

### 0.6.0
- Support for setting affinity,tolerations,nodeSelector,schedulerName
### 0.5.0

- Support configurable DataLoad pod metadata

### 0.4.0

- Change Restart Policy from OnFailure to Never

### 0.3.0
- Remove unused volumes

### 0.2.0
- Support JindoFSx

### 0.1.0

- Support parallel prefetch job
- Support configurations by setting values
23 changes: 23 additions & 0 deletions charts/fluid-dataloader/jindocache/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: v2
name: fluid-dataloader
description: A Helm chart for Fluid to prefetch data

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.5.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
appVersion: 0.5.0
173 changes: 173 additions & 0 deletions charts/fluid-dataloader/jindocache/templates/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ printf "%s-data-load-script" .Release.Name }}
labels:
release: {{ .Release.Name }}
role: dataload-job
data:
dataloader.jindo.init: |
#!/usr/bin/env bash
set -xe
jindo_env_vars=(
STORAGE_ADDRESS
)
function public::jindo::init_conf() {
local IFS=$'\n' # split by line instead of space
for keyvaluepair in $(env); do
# split around the first "="
key=$(echo ${keyvaluepair} | cut -d= -f1)
value=$(echo ${keyvaluepair} | cut -d= -f2-)
if [[ "${jindo_env_vars[*]}" =~ "${key}" ]]; then
export ${key}=\"${value}\"
fi
done
}
main() {
public::jindo::init_conf
}
main
dataloader.distributedLoad: |
#!/usr/bin/env bash
set -xe

function checkPathExistence() {
local targetPath=$1
local checkPathResult=$(timeout 30s jindo fs -ls jindo://$targetPath |& tail -3)
local strUnexistence="No such file or directory"
if [[ $checkPathResult =~ $strUnexistence ]];then
echo -e "dataLoad failed because some paths not exist."
exit 1
fi
}

function distributedLoad() {
local path=$1
local replica=$2
local default=$3
local cmd="jindocache -load"
checkPathExistence "$path"
if [[ $needLoadMetadata == 'true' ]]; then
echo -e "--- enable metaCache"
cmd="$cmd -meta"
echo -e "now cmd $cmd"
else
echo -e "--- disable metaCache"
fi

if [[ $loadMetadataOnly == 'true' ]]; then
echo -e "--- disable datacache"
else
echo -e "--- enable datacache"
cmd="$cmd -data"
echo -e "now cmd $cmd"
fi

if [[ $atomicCache == 'true' ]]; then
echo -e "--- enable atomicCache"
cmd="$cmd -atomic"
echo -e "now cmd $cmd"
else
echo -e "--- disable atomicCache"
fi

if [[ $loadMemorydata == 'true' ]]; then
echo -e "--- enable loadMemorydata"
cmd="$cmd -m"
echo -e "now cmd $cmd"
else
echo -e "--- disable loadMemorydata"
fi

if [[ $enableDryrun == 'true' ]]; then
echo -e "--- enable dryrun"
cmd="$cmd -dryrun"
echo -e "now cmd $cmd"
else
echo -e "--- disable dryrun"
fi

if [[ $enbaleCacheListLocation == 'true' ]]; then
if [[ -e "ossutil64" ]]; then
echo "Found ossutil64 executable file under /"
else
echo -e "--- begin download ossutil"
apt-get update && apt-get install wget -y
wget http://gosspublic.alicdn.com/ossutil/1.7.5/ossutil64 && chmod 755 ossutil64
fi
./ossutil64 -e $cacheListEndpoint -i $cacheListAccessKeyId -k $cacheListAccessKeySecret cp $cacheListUrl /cachelist.txt
echo -e "--- enable File List"
cmd="$cmd -R -replica $cacheListReplica -cachelist /cachelist.txt -thread $cacheListThread $default/"
echo -e "execute cmd $cmd"
time $cmd
elif [[ $enableFilter == 'true' ]]; then
cmd="$cmd -s -R -replica $replica -filter $cacheFilter -thread $cacheListThread $default$path"
echo -e "execute cmd $cmd"
time $cmd
else
cmd="$cmd -s -R -replica $replica $default$path"
echo -e "execute cmd $cmd"
time $cmd
fi

#echo -e "distributedLoad and sleep start now"
#sleep 10m
}

function main() {
needLoadMetadata="$NEED_LOAD_METADATA"
loadMemorydata="$LOAD_MEMORY_DATA"
loadMetadataOnly="$LOAD_METADATA_ONLY"
atomicCache="$ENABLE_ATOMIC_CACHE"
cacheListReplica=$CACHE_LIST_REPLICA
cacheListThread=$CACHE_LIST_THREAD
enbaleCacheListLocation=$Enable_CACHE_LIST_LOCATION
cacheListAccessKeyId=$CACHE_LIST_ACCESSKEYID
cacheListAccessKeySecret=$CACHE_LIST_ACCESSKEYSECRET
cacheListEndpoint=$CACHE_LIST_ENDPOINT
cacheListUrl=$CACHE_LIST_URL
cacheFilter=$CACHE_FILTER
enableFilter=false
enableDryrun=$DRY_RUN_ENABLE
#judge whether to use locaion list
if [[ -z "$cacheListAccessKeyId" ]] || [[ -z "$cacheListAccessKeySecret" ]] || [[ -z "$cacheListEndpoint" ]] || [[ -z "$cacheListUrl" ]]; then
enbaleCacheListLocation=false
else
enbaleCacheListLocation=true
fi
if [[ -z "$cacheListReplica" ]]; then
cacheListReplica=1
else
echo -e "cacheListReplica $cacheListReplica"
fi
if [[ -z "$cacheListThread" ]]; then
cacheListThread=10
else
echo -e "cacheListThread $cacheListThread"
fi
if [[ -z "$cacheFilter" ]]; then
enableFilter=false
else
enableFilter=true
echo -e "enableFilter $enableFilter"
fi
dafault="jindo://"
paths="$DATA_PATH"
paths=(${paths//:/ })
replicas="$PATH_REPLICAS"
replicas=(${replicas//:/ })
for((i=0;i<${#paths[@]};i++)) do
local path="${paths[i]}"
local replica="${replicas[i]}"
echo -e "distributedLoad on $path starts"
distributedLoad ${paths[i]} ${replicas[i]} ${dafault}
#echo -e "distributedLoad on $path ends"
done
}

main "$@"





Loading
Loading