Using AWS spot instances for blender rendering
This code doesn’t work! I’m still working on it. Parts of it work, and it may be useful to someone.
#!/bin/bash ### # RMS 2021 - Spot Instance Blender rendering script # Provisions an EC2 spot instance at the price and time configured below # uploads a blend file and begins to render it with the output being put into S3 incrementally # # WARNING : THIS SCRIPT IS PROVIDED AS INSPIRATION ONLY # WARNING : DO NOT USE THIS SCRIPT UNLESS YOU KNOW WHAT YOU ARE DOING. # WARNING : READ AND UNDERSTAND THE SCRIPT FULLY BEFORE USING IT, OR ANY PART OF IT # WARNING : GETTING THIS WRONG COULD COST YOU A HUGE AMOUNT OF MONEY # # You will need : # Put this script in ~/blender/run.sh # In ~/.aws you will need to create your profile files (see elsewhere for instructions) # In ~/.ssh you'll need to put your key pem file (generate it in there.. see elsewhere for instructions) # TODO finish this bit ### #The region you want to create the spot instance in. region="eu-central-1" #the location of your pem file key_file="~/.ssh/rms-administrator-key-pair.pem" #the name of the key the pem file describes key_name="rms-administrator-key-pair" #you'll need to choose an x86 instance type because blender won't run on arm #instance_type="p2.xlarge" instance_type="t2.micro" #look up this price from the list.. Don't over bid! #eg at the time of writing, an a1.medim was about $0.04/hour #spot_price="0.04" #T2.micro is about 0.007 spot_price="0.007" #the time you want the spot instance for in minutes, must be a multiple of 60 spot_time="60" #see elsewhere about setting up AWS profiles profile="rms" #instance_type="t2.micro" boot_script="./blender.sh" function die() { printf '%s\n' "$1" >&2 exit 1 } function getUsername() { local foo=$(aws iam --profile $profile --region $region get-user --query "User.UserName" --output text) echo "$foo" } function getUserArn() { local foo=$(aws iam --profile $profile --region $region get-user --query "User.Arn" --output text) echo "$foo" } function get_ami_id() { local arch=$(getArchitectureForInstancetype $instance_type) #we're using the amazon linux image which we can find with the search string below local ami_name="amzn2-ami-hvm-2.0*$arch*gp2*" #find the ami id for the ubuntu (arm, free tier) image in our region local foo=$(aws ec2 --profile $profile --region $region describe-images --filters Name=name,Values=$ami_name --query 'Images[*].[ImageId,CreationDate]' --output text | sort -k2 -r | head -n1 | awk '{print $1; }') if [ -z "$foo" ]; then echo "Could not find the latest Amazon Linux AMI image in this region, please check the ami_name property of this script" exit 1 else echo "$foo" fi } function get_ubuntu_ami_id() { local arch=$(getArchitectureForInstancetype $instance_type) if [[ "$arch" == *"x86"* ]]; then local ami_name="ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server*" #local ami_name="ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server*" elif [[ "$arch" == *"arm"* ]]; then local ami_name="ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-arm64-server*" #local ami_name="ubuntu/images/hvm-ssd/ubuntu-focal-20.04-arm64-server*" fi #find the ami id for the ubuntu in our region local foo=$(aws ec2 --profile $profile --region $region describe-images --filters Name=name,Values=$ami_name --query 'Images[*].[ImageId,CreationDate]' --output text | sort -k2 -r | head -n1 | awk '{print $1; }') if [ -z "$foo" ]; then echo "Could not find the latest ubuntu 20.04 AMI image for ($arch) in this region, please check the ami_name property of this script" exit 1 else echo "$foo" fi } function listSubnets() { local vpcid=$(getVpcId) local foo="$(aws ec2 --profile $profile --region $region describe-subnets --filters "Name=vpc-id,Values=$vpcid" --query "Subnets[].SubnetId" --output text)" echo "$foo" } function listRouteTables() { local foo=$(aws ec2 --region $region --profile $profile describe-route-tables --query "RouteTables[].RouteTableId" --output text) if [ -z "$foo" ]; then die "Could not find route tables in this VPC!?"; fi echo "$foo" } function getDefaultRouteTable() { local foo=$(listRouteTables) local c=$(echo "$foo" | wc -w) if [ "$c" -gt "1" ]; then die "There is more than one route table, so it is not possible to get the 'default' route table." else echo "$foo" fi } function getRoutingTableForSubnet() { if [ -z "$1" ]; then die "must supply subnetID to this function";fi local foo=$(aws ec2 --region $region --profile $profile describe-route-tables --filter "Name=association.subnet-id,Values=[$1]" --query "RouteTables[].RouteTableId" --output text) if [ -z "$foo" ]; then #the chances are there is only one route table and this subnet has been automatically associated with it foo=$(getDefaultRouteTable) fi echo "$foo" } function getArchitectureForInstancetype() { if [ -z "$1" ]; then die "must provide instance type to this function"; fi local foo=$(aws ec2 --profile $profile --region $region describe-instance-types --instance-types $1 --query "InstanceTypes[].SupportedUsageClasses" --output text) if [[ "$foo" != *"$spot"* ]]; then die "the configured instance type $1 does not support spotting. Please choose a different instance type";fi local foo=$(aws ec2 --profile $profile --region $region describe-instance-types --instance-types $1 --query "InstanceTypes[].ProcessorInfo.SupportedArchitectures" --output text) if [[ "$foo" == *"x86"* ]]; then echo "x86_64" return fi #It's not easy to install blender on arm, so we default to x86 if [[ "$foo" == *"arm64"* ]]; then echo "arm64" return fi die "no reasonable architectures supported by this instance type. Please choose a different instance type in the instance_type parameter at the top of this script" } function isRouteTableSuitable() { if [ -z "$1" ]; then die "must supply route table id to this function";fi local foo=$(aws ec2 --region $region --profile $profile describe-route-tables --filter "Name=route-table-id,Values=[$1]" --query "RouteTables[].Routes[].GatewayId" --output text) if [ -z "$foo" ]; then return 1;fi if [[ "$foo" == *"$igw-"* ]]; then return 0 else return 1 fi } function getSubnet() { #gets all public subnets and local subnets=$(listSubnets) if [ -z "$subnets" ]; then die "no subnets were found in this VPC!?";fi for snid in $subnets; do local rt=$(getRoutingTableForSubnet snid) if [ -z "$rt" ]; then die "couldn't find a route table associated with a subnet, something is wrong.";fi if isRouteTableSuitable "$rt"; then echo "$snid" return fi done die "could not find a suitable subnet (public with IGW routing) for this spot instance. Please configure your subnets" } function getSecurityGroup() { #get the SG which is configured to allow SSH local vpcid=$(getVpcId) local foo=$(aws ec2 --profile $profile --region $region describe-security-groups --filters Name=vpc-id,Values=[$vpcid] Name=ip-permission.to-port,Values=22 --query "SecurityGroups[*].[GroupId]" --output text) if [ -z "$foo" ]; then die "There are no security groups in this VPC that will allow SSH traffic. You should go into AWS console and properly configure your security groups."; fi #is there more than one suitable SG? local sgids=$(echo "$foo" | wc -w) if [ "$sgids" -gt "1" ]; then if [ -z "$sg_id" ]; then echo "there is more than one applicable security group in this vpc and the sg_id variable is not set to tell us which one to work on." echo "please create a variable named sg_id at the top of the script and choose one of the SG id's here :" echo "$foo" exit 1 else if [[ "$foo" != *"$sg_id"* ]]; then echo "The sg_id variable contains $sg_id but that SG does not exist in the list of applicable sg's for this profile given here :" echo "$foo" echo "please choose one of the SG's from this list and reconfigure your sg_id variable at the top of this script" exit 1 fi #the sg_id variable contains a valid SG so return it echo "$sg_id" fi else #there is only one SG in this profile so return it echo "$foo" fi } function listVpcs { local ret=$(aws ec2 --profile $profile --region $region describe-vpcs --query "Vpcs[*].[VpcId]" --output text) echo "$ret" } function getVpcId() { local foo=$(listVpcs) if [ -z "$foo" ]; then die "This profile has no currently active VPC. AWS require an existing relationship in order to use Spot Instances. Go and provision a t2.micro or something via the console."; fi #is there more than one VPC in this profile? local nvpc=$(echo "$foo" | wc -w) if [ "$nvpc" -gt "1" ]; then if [ -z "$vpc_id" ]; then echo "there is more than one VPC in this profile and you have no vpc_id variable set to tell us which one to work on." echo "please create a variable named vpc_id at the top of the script and choose one of the VPC id's here :" echo "$foo" exit 1 else if [[ "$foo" != *"$vpc_id"* ]]; then echo "The vpc_id variable contains $vpc_id but that VPC does not exist in the list of VPC's for this profile given here :" echo "$foo" echo "please choose one of the VPC's from this list and reconfigure your vpc_id variable at the top of this script" exit 1 fi #the vpc_id variable contains a valid VPC so return it echo "$vpc_id" fi else #there is only one VPC in this profile so return it echo "$foo" fi } function makeSpotRequest() { local foo=$(aws ec2 --region $region --profile $profile request-spot-instances \ --spot-price "$spot_price" \ --block-duration-minutes "$spot_time" \ --launch-specification \ "{ \"KeyName\": \"$key_name\", \"ImageId\": \"$AMI_ID\", \"InstanceType\": \"$instance_type\", \"SecurityGroupIds\": [\"$SG_ID\"], \"SubnetId\": \"$SUBNET_ID\" }" --query "SpotInstanceRequests[].SpotInstanceRequestId" --output text) if [ -z "$foo" ]; then die "failed to get any response from the spot request? Check the console to make sure nothing was provisioned"; fi echo "$foo" } function preFlight() { #who are we operating as? local foo=$(getUsername) if [ -z "$foo" ]; then die "failed to connect to AWS. Is your profile created correctly?"; fi echo "Proceeding in profile $profile as username $foo" IAM_ROLE_ARN=$(getUserArn) echo "ARN of user is $IAM_ROLE_ARN" #check there is at least one existing VPC in this profile VPC_ID=$(getVpcId) echo "Found VPC as $VPC_ID" SG_ID=$(getSecurityGroup) echo "Found Security group as $SG_ID" SUBNET_ID=$(getSubnet) echo "Found public routable subnet as $SUBNET_ID" local foo=$(getArchitectureForInstancetype $instance_type) echo "chosen instance type ($instance_type) is $foo architecture" AMI_ID=$(get_ami_id) echo "Found suitable AMI for $foo as $AMI_ID" } function get_latest_spot_request_id() { #get the latest open or active spot request, this will be the one we just issued. local foo=$(aws ec2 --region eu-central-1 --profile rms describe-spot-instance-requests --filters Name=state,Values=["open","active"] --query 'SpotInstanceRequests[*].[SpotInstanceRequestId,CreateTime]' --output text | sort -k2 -r | head -n1 | awk '{print $1; }') if [ -z "$foo" ]; then return else echo "$foo" fi } function wait_for_request_accept() { spin="/-\|" rid=$(get_latest_spot_request_id) while [ -z "$rid" ]; do # run spinner for ((i=0; i<10; i++)); do j=$(( (j+1) %4 )) printf "\rWaiting for request to be accepted...${spin:$j:1}" sleep 5 done rid=$(get_latest_spot_request_id) done #return the request id if we got it, otherwise return nothing if [ ! -z "$rid" ]; then echo "$rid" fi } function getInstanceIdFromSpotRequest() { if [ -z "$1" ]; then die "you must pass the spot instance request id to this function" fi local foo=$(aws ec2 --region $region --profile $profile describe-spot-instance-requests --filters Name=spot-instance-request-id,Values=["$1"] --query 'SpotInstanceRequests[*].InstanceId' --output text) return "$foo" } function wait_for_running_instance() { if [ -z "$1" ]; then die "must pass spot instance request id to this function"; fi spin="/-\|" instance_id=$(getInstanceIdFromSpotRequest "$1") while [ -z "$instance_id" ]; do # run spinner for ((i=0; i<10; i++)); do j=$(( (j+1) %4 )) printf "\rWaiting for request to be fulfilled...${spin:$j:1}" sleep 5 done done echo "$instance_id" } function get_public_dns() { if [ -z "$1" ]; then die "must pass instanceid to this function"; fi # get instance DNS, strip quotes from string local foo=$(aws ec2 --region $region --profile $profile describe-instances --filters Name=instance-id,Values=[$1] --query "Reservations[0].Instances[0].PublicDnsName" --output text) if [ -z "$foo" ]; then die "failed to get public dns for instance"; fi echo "$foo" } function provision() { #actually makes the spot request and waits for it to be running local foo=$(makeSpotRequest) if [ -z "$foo" ]; then local foo=$(wait_for_request_accept) else echo "Success. Spot Instance Request ID is $foo" fi if [ -z "$foo" ]; then die "could not get a valid spot instance request id. Please check the console to make sure everything is ok." fi local iid=$(wait_for_running_instance $foo) if [ -z "$iid" ]; then die "failed to get instance id"; fi } function install() { echo "scp up some stuff, connect by ssh yada yada" } preFlight provision exit 0 public_dns=$(get_public_dns) if [ -z "$public_dns" ]; then echo "Failed to get the public DNS for the spot instance.. something is wrong and this script can't continue." exit 1 else echo "Spot instance created with ID $instance_id. Congrats, you bought some compute at $spot_price (or less) per $spot_time minutes :)" fi ssh_command="ssh -i $key_file ec2-user@$public_dns" echo "About to test this instance for connectivity.."