ci: add CI action to reindex site into Trieve #28

Workflow file for this run

.github/workflows/index-trieve-search-index.yaml at 86a9e99

	name: "Index Trieve Search Component"

	on:
	push:
	workflow_dispatch:

	jobs:
	create-search-index:
	runs-on: ubuntu-24.04
	steps:
	- name: update-curl
	shell: bash
	run: \|
	sudo apt-get update && sudo apt-get install curl

	- name: "Ingest Trieve Search Index"
	shell: bash
	env:
	ORGANIZATION_ID: ${{ secrets.TRIEVE_ORGANIZATION_ID }}
	DATASET_ID: ${{ secrets.TRIEVE_DATASET_ID }}
	API_KEY: ${{ secrets.TRIEVE_API_KEY }}
	run: \|
	# Load environment variables
	TRIEVE_URL="https://api.trieve.ai"

	all_envs() {
	echo "
	These github repo secrets must to be set:
	TRIEVE_ORGANIZATION_ID="\<your-trieve-organization-id\>"
	TRIEVE_DATASET_ID="\<your-trieve-dataset-id\>"
	TRIEVE_API_KEY="\<your-trieve-api-key\>"

	alternatively you can modify the actions env's as such
	ORGANIZATION_ID="\<your-trieve-organization-id\>"
	DATASET_ID="\<your-trieve-dataset-id\>"
	API_KEY="\<your-trieve-api-key\>"
	"
	}

	[ -z $ORGANIZATION_ID ] && echo "ORGANIZATION_ID is not found" && all_envs && exit 1
	[ -z $DATASET_ID ] && echo "DATASET_ID is not found" && all_envs && exit 1
	[ -z $API_KEY ] && echo "API_KEY is not found" && all_envs && exit 1

	PAYLOAD='{
	"crawl_options": {
	"allow_external_links": null,
	"boost_titles": true,
	"exclude_paths": [],
	"exclude_tags": [
	"nav",
	"img",
	"pre",
	".PageFeedback_feedbackContainer___tGjJ",
	".doc-sidenav",
	".doc-toc",
	"div.z-[10].flex.flex-col.justify-center.border-t.border-solid.border-gray-900.bg-signoz_ink-500.bg-opacity-70.backdrop-blur-md",
	".related-articles",
	"aside",
	"footer"
	],
	"include_paths": [
	"faqs",
	"docs"
	],
	"include_tags": [
	".doc-content",
	"main"
	],
	"interval": "daily",
	"limit": 10000,
	"site_url": "https://signoz.io/docs/",
	"scrape_options": null
	},
	"dataset_id": "'${DATASET_ID}'"
	}'

	echo "Making call to ${TRIEVE_URL}/api/dataset to scrape dataset ${DATASET_ID} org ${ORGANIZATION_ID}"

	uname -a
	curl --version

	# Make the API call using curl and capture the response code
	curl -X PUT "${TRIEVE_URL}/api/dataset" \
	-H "Content-Type: application/json" \
	-H "Authorization: ${API_KEY}" \
	-H "TR-Organization: ${ORGANIZATION_ID}" \
	-H "TR-Dataset: ${DATAST_ID}" \
	-d "${PAYLOAD}" \
	-w "\n%{http_code}" \
	-i

	# # Extract the response code from the last line
	# http_code=$(echo "$response" \| tail -n1)
	# # Extract the response body (everything except the last line)
	# response_body=$(echo "$response" \| sed \$d)
	#
	# # Check if the response code is 200
	# if [ "$http_code" -eq 200 ]; then
	# echo "Crawling finished Successfully"
	# exit 0
	# else
	# echo "Error: Received HTTP status code $http_code"
	# echo "Response: $response_body"
	# exit 1
	# fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ci: add CI action to reindex site into Trieve #28

Workflow file

ci: add CI action to reindex site into Trieve #28

Jobs

Run details

Workflow file for this run