To get all the values for AWS CLI run the following
aws s3control describe-job –account-id=210437677164 –job-id 2b827469-05db-4666-9708-2d50e03a55aa –region us-east-1
You can then create jobs from awscli
aws s3control create-job
–region us-east-1
–account-id 210437677164
–operation ‘{“LambdaInvoke”: { “FunctionArn”: “arn:aws:lambda:us-east-1:210437677164:function:account-reports-archive:$LATEST” }}’
–manifest ‘{“Spec”:{“Format”:“S3BatchOperations_CSV_20180820”,“Fields”:[“Bucket”,“Key”]},“Location”:{“ObjectArn”:“arn:aws:s3:::egocogito.data.batch/account_reports/output/manifest.csv”,“ETag”:“8fc538ef160154c26aaef256e0cdf148”}}’
–report ‘{“Bucket”:“arn:aws:s3:::egocogito.data.batch”,“Prefix”:“user_files/ignacio/archive-reuslts”, “Format”:“Report_CSV_20180820”,“Enabled”:true,“ReportScope”:“AllTasks”}’
–priority 10
–role-arn arn:aws:iam::210437677164:role/awsBatchServiceRole
–client-request-token $(uuidgen)
–description “job Description”
–no-confirmation-required
import boto3, os, tarfile, io from botocore.exceptions import ClientError
def lambda_handler(event, context): # Instantiate boto client s3Client = boto3.client(‘s3’)
# Parse job parameters from S3 Batch Operations
jobId = event['job']['id']
invocationId = event['invocationId']
invocationSchemaVersion = event['invocationSchemaVersion']
# Prepare results
results = []
# Parse Amazon S3 Key, Key Version, and Bucket ARN
taskId = event['tasks'][0]['taskId']
s3Key = event['tasks'][0]['s3Key']
s3Bucket = event['tasks'][0]['s3BucketArn'].split(':::')[-1]
compressedFile = os.path.basename(s3Key) + ".tgz"
try:
os.chdir("/tmp/")
# Prepare result code and string
resultCode = None
resultString = None
paginator = s3Client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=s3Bucket, Prefix=s3Key)
file_list = []
for page in pages:
for obj in page['Contents']:
file_list.append(obj['Key'])
print("Found {} items in S3 prefix".format(len(file_list)))
tarFileObj = io.BytesIO()
with tarfile.open(fileobj=tarFileObj, mode="w:gz") as tar:
for obj in file_list:
local_path = "out/" + obj
if not os.path.exists(os.path.dirname(local_path)):
os.makedirs(os.path.dirname(local_path))
# print("Downloading {}".format(obj))
s3Client.download_file(Bucket=s3Bucket, Key=obj, Filename=local_path)
tar.add(local_path, arcname=local_path.replace("out/account_reports", ""))
os.remove(local_path)
print("Uploading compressed file to {}".format("account_reports/" + compressedFile))
tarFileObj.seek(0)
response = s3Client.put_object(Bucket=s3Bucket, Body=tarFileObj, Key="account_reports/" + compressedFile)
print("OK!")
# Mark as succeeded
resultCode = 'Succeeded'
resultString = str("account_reports/" + compressedFile)
except ClientError as e:
# If request timed out, mark as a temp failure
# and S3 Batch Operations will make the task for retry. If
# any other exceptions are received, mark as permanent failure.
errorCode = e.response['Error']['Code']
errorMessage = e.response['Error']['Message']
if errorCode == 'RequestTimeout':
resultCode = 'TemporaryFailure'
resultString = 'Retry request to Amazon S3 due to timeout.'
else:
resultCode = 'PermanentFailure'
resultString = '{}: {}'.format(errorCode, errorMessage)
print(e)
except Exception as e:
# Catch all exceptions to permanently fail the task
resultCode = 'PermanentFailure'
resultString = 'Exception: {}'.format(e)
print(e)
finally:
results.append({
'taskId': taskId,
'resultCode': resultCode,
'resultString': resultString.replace("\n", " ").replace("\r", " ")
})
return {
'invocationSchemaVersion': invocationSchemaVersion,
'treatMissingKeysAs': 'PermanentFailure',
'invocationId': invocationId,
'results': results
}