captaincore/app/capture
Austin Ginder 19d182f649 🐛 FIX: Sync data
2025-02-21 12:19:26 -05:00

251 lines
7.5 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# Captures website's pages visually over time based on quicksaves and html changes.
#
# `captaincore capture`
#
# [<site>...]
# One or more sites to check.
#
# [@<target>]
# Target groups of sites like @all @production or @staging.
#
# [--pages=<page>,<page>,<page>...]
# Overrides pages to check. Defaults to site's `capture_pages` configuration.
#
if [ ${#@} -ne 1 ]; then
echo -e "${COLOR_RED}Error:${COLOR_NORMAL} Please specify a <site>."
exit
fi
while read config; do
if [[ "$config" == "Error:"* ]]; then
continue
fi
declare "$config"
done <<< "$(php ${CAPTAINCORE_PATH}/lib/local-scripts/configs.php fetch)"
site=$1
user_agent="captaincore/1.0 (CaptainCore Capture by CaptainCore.io)"
urlencode() {
local _length="${#1}"
for (( _offset = 0 ; _offset < _length ; _offset++ )); do
_print_offset="${1:_offset:1}"
case "${_print_offset}" in
[a-zA-Z0-9.~_-]) printf "${_print_offset}" ;;
' ') printf + ;;
*) printf '%%%X' "'${_print_offset}" ;;
esac
done
}
run_command() {
# Extract environment
if [[ "$site" == *"-staging"* ]]; then
environment=staging
else
environment=production
fi
# Load site configs
while read site_configs; do declare "$site_configs"; done <<< "$(captaincore site get $site --bash --captain-id=$CAPTAIN_ID)"
# Return error if domain not found
if [[ "$domain" == "" ]] || [[ "$site" == "" ]]; then
echo "Can't locate website for $site"
continue
fi
if [[ "$home_url" == "" ]]; then
echo -e "${COLOR_RED}Error:${COLOR_NORMAL} Site $domain is missing home_url. Recommend running sync-data first."
continue
fi
if [[ "$pages" != "" ]]; then
echo "Overidding default page selection."
capture_pages=$pages
fi
if [[ "$capture_pages" == "" ]]; then
capture_pages="/"
fi
echo "Capturing site ${site}-${environment} for pages $capture_pages"
# Define remote paths
site_path=${site}_${site_id}/${environment}
remote_path_captures=${site}_${site_id}/${environment}/captures
remote_path_screenshots=${site}_${site_id}/${environment}/screenshots
if [[ "$captaincore_fleet" == "true" ]]; then
remote_path_captures=${CAPTAIN_ID}/${remote_path_captures}
remote_path_screenshots=${CAPTAIN_ID}/${remote_path_screenshots}
fi
# Append trailing slash if home_directory exist
if [ "$home_directory" != "" ]; then
home_directory="${home_directory}/"
fi
# Current timestamp
timedate=$(date +%Y-%m-%d_%H-%M-%S)
# Prepare git repo
mkdir -p $path/$site_path/captures/
cd $path/$site_path/captures/
# Create new git repo if needed
if [ ! -d ".git" ]; then
git init
echo "/.screenshots" > .gitignore
fi
# Remove current captures if needed
current_capture_count=$( ls *.capture | wc -l )
if [[ "$current_capture_count" != "0" ]]; then
rm *.capture
fi
# Create array of pages to capture
IFS=',';pages=($capture_pages);unset IFS;
captured_pages=""
# Loop through and capture
for i in `seq 1 ${#pages[@]}`; do
selected=$( expr $i - 1 )
page=${pages[$selected]}
file_name="${page//\//#}.capture"
echo "Capturing ${home_url}${page} to ${page//\//#}.capture"
curl -L --max-time 30 --compressed --silent "${home_url}${page}" > "$file_name"
# Continue if no changes found
check_status=$( git status "$file_name" -s )
if [[ "$check_status" == "" ]]; then
continue
fi
git add "$file_name"
check_status_again=$( git status "$file_name" -s )
if [[ "$check_status_again" == "A"* || "$check_status_again" == "M"* || "$check_status_again" == "R"* ]]; then
captured_pages="${captured_pages}${page},"
mkdir -p ".screenshots/"
query="url=$( urlencode "${home_url}${page}" )&full_page=true&viewport_width=1280&force=true&user_agent=$( urlencode "$user_agent" )&format=jpg&pixel_ratio=2"
if [[ "$auth" != "" ]]; then
query="url=$( urlencode "${home_url}${page}" )&authorization=$( urlencode "Basic $auth" )&full_page=true&viewport_width=1280&force=true&user_agent=$( urlencode "$user_agent" )&format=jpg&pixel_ratio=2"
fi
screenshots_cloud_token=$( printf '%s' "$query" | openssl sha1 -hmac "$screenshots_cloud_api_secret" | sed 's/^.* //' )
echo "Downloading https://api.screenshots.cloud/v1/screenshot/$screenshots_cloud_api_key/$screenshots_cloud_token/?$query"
curl --silent "https://api.screenshots.cloud/v1/screenshot/$screenshots_cloud_api_key/$screenshots_cloud_token/?$query" > ".screenshots/${page//\//#}_working.jpg"
fi
done
# Add all files to git repo
git add -A
# Current git status
git_status=$(git status -s)
if [[ "$git_status" == "" && "$force" != "true" ]]; then
# Skip capture as nothing changed
echo "No changes found."
exit
fi
# New commit
git commit -m "Captured at $timedate"
# Save git hash
git_commit=$(git log -n 1 --pretty=format:"%H") # Get hash of last commit (commit hash)
git_commit_short=$(git log -n 1 --pretty=format:"%h")
git_status=$(git show $git_commit --shortstat --format=)
created_at=$(git show -s --pretty=format:"%ct" $git_commit) # Get date of last commit (UNIX timestamp)
files_changed=$( git show $git_commit --name-status --format= )
# Organize new screenshots. Rename all files working_* to ${date}_${git_commit_short}_
for image in $( ls .screenshots/*_working.jpg ); do
new_ending="_${created_at}_${git_commit_short}"
new_image_name=${image/_working/$new_ending}
# Generate home page thumbnails
if [[ "$image" == ".screenshots/#_working.jpg" ]]; then
convert ".screenshots/#_working.jpg" -thumbnail 800 -gravity North -crop 800x500+0+0 ".screenshots/${created_at}_${git_commit_short}_thumb-800.jpg"
convert ".screenshots/${created_at}_${git_commit_short}_thumb-800.jpg" -thumbnail 100 ".screenshots/${created_at}_${git_commit_short}_thumb-100.jpg"
rclone move ".screenshots/${created_at}_${git_commit_short}_thumb-100.jpg" ${rclone_upload}${remote_path_screenshots}/ --fast-list --transfers=32 --no-update-modtime --progress --stats-one-line --stats=1m
rclone move ".screenshots/${created_at}_${git_commit_short}_thumb-800.jpg" ${rclone_upload}${remote_path_screenshots}/ --fast-list --transfers=32 --no-update-modtime --progress --stats-one-line --stats=1m
fi
mv "$image" "$new_image_name"
done
# Copy to remote storage
rclone move .screenshots/ ${rclone_upload}${remote_path_captures}/ --fast-list --transfers=32 --no-update-modtime --progress --stats-one-line --stats=1m
# Build json for capture
read -r -d '' capture << EOM
{
"git_commit":"$git_commit",
"capture_pages":"$capture_pages",
"captured_pages":"${captured_pages%?}",
"created_at":"$created_at"
}
EOM
echo "Git hash $git_commit"
if [[ "$debug" == "true" ]]; then
read -r -d '' VAR << EOM
{
"command": "new-capture",
"site_id":"$site_id",
"environment":"$environment",
"data": $capture,
"token":"$token"
}
EOM
echo $VAR
continue
fi
# Adds capture to CaptainCore
if [[ "$captaincore_dev" == true ]]; then
curl_argument="-k"
fi
# Adds capture to CaptainCore
curl --silent ${curl_argument} --request POST "$captaincore_api" --header "Content-Type: application/json" --data @- << EOM
{
"command": "new-capture",
"site_id":"$site_id",
"environment":"$environment",
"data": $capture,
"token":"$token"
}
EOM
# Generate capture usage stats
capture_count=$( git rev-list --all --count )
# Sync site data, pull in new thumbnails
captaincore sync-data $site_id --captain-id=$CAPTAIN_ID
# Clear out variables
site=''
domain=''
home_directory=''
subsite=''
response=''
response_parsed=''
# End logging
echo "$(date +'%Y-%m-%d %H:%M') Finished capture"
}
run_command