aws-samples · Dec 28, 2019
diff --git a/‎.gitignore
+7-1 b/‎.gitignore
+7-1
diff --git a/‎README.md
+46-30 b/‎README.md
+46-30
diff --git a/‎build.py
+49-42 b/‎build.py
+49-42
diff --git a/‎cloudformation/athenarunner-lambda-params.json
+2-2 b/‎cloudformation/athenarunner-lambda-params.json
+2-2
diff --git a/‎cloudformation/athenarunner-lambda.yaml
+51-40 b/‎cloudformation/athenarunner-lambda.yaml
+51-40
diff --git a/‎cloudformation/glue-resources-params.json
+8-4 b/‎cloudformation/glue-resources-params.json
+8-4
diff --git a/‎cloudformation/glue-resources.yaml
+45-43 b/‎cloudformation/glue-resources.yaml
+45-43
diff --git a/‎cloudformation/gluerunner-lambda-params.json
+2-2 b/‎cloudformation/gluerunner-lambda-params.json
+2-2
diff --git a/‎cloudformation/gluerunner-lambda.yaml
+50-22 b/‎cloudformation/gluerunner-lambda.yaml
+50-22
diff --git a/‎cloudformation/step-functions-resources-params.json
+4-4 b/‎cloudformation/step-functions-resources-params.json
+4-4
diff --git a/‎cloudformation/step-functions-resources.yaml
+55-19 b/‎cloudformation/step-functions-resources.yaml
+55-19
diff --git a/‎glue-scripts/process_marketing_data.py
-1 b/‎glue-scripts/process_marketing_data.py
-1
diff --git a/‎glue-scripts/process_sales_data.py
-1 b/‎glue-scripts/process_sales_data.py
-1
diff --git a/‎lambda/athenarunner/athenarunner.py
+91-79 b/‎lambda/athenarunner/athenarunner.py
+91-79
diff --git a/‎lambda/gluerunner/gluerunner-config.json
+1-1 b/‎lambda/gluerunner/gluerunner-config.json
+1-1
diff --git a/‎lambda/gluerunner/gluerunner.py
+76-72 b/‎lambda/gluerunner/gluerunner.py
+76-72
diff --git a/‎lambda/ons3objectcreated/ons3objectcreated.py
-1 b/‎lambda/ons3objectcreated/ons3objectcreated.py
-1
diff --git a/‎lambda/s3-deployment-descriptor.json
+6-6 b/‎lambda/s3-deployment-descriptor.json
+6-6
@@ -71,4 +71,10 @@ notebook
 .github
 
 # Pynt
-*.pyc
+*.pyc
+
+# Python Virtual Environment
+venv
+
+# Copy of customized configuration files
+config-copy
@@ -161,11 +161,11 @@ Specifies parameters for creation of the `gluerunner-lambda` CloudFormation stac
 ```json
 [
   {
-    "ParameterKey": "SourceS3BucketName",
+    "ParameterKey": "ArtifactBucketName",
     "ParameterValue": "<NO-DEFAULT>"
   },
   {
-    "ParameterKey": "SourceS3Key",
+    "ParameterKey": "LambdaSourceS3Key",
     "ParameterValue": "src/gluerunner.zip"
   },
   {
@@ -180,9 +180,9 @@ Specifies parameters for creation of the `gluerunner-lambda` CloudFormation stac
 ```
 #### Parameters:
 
-* `SourceS3BucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) from which the Glue Runner AWS Lambda function package (.zip file) will be fetched by AWS CloudFormation.
+* `ArtifactBucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) in which Glue scripts and Lambda function source will be stored. **If a bucket with such a name does not exist, the `deploylambda` build command will create it for you with appropriate permissions.**
 
-* `SourceS3Key` - The Amazon S3 key (e.g. `src/gluerunner.zip`) pointing to your AWS Lambda function's .zip package.
+* `LambdaSourceS3Key` - The Amazon S3 key (e.g. `src/gluerunner.zip`) pointing to your AWS Lambda function's .zip package in the artifact bucket.
 
 * `DDBTableName` - The Amazon DynamoDB table in which the state of active AWS Glue jobs is tracked between Glue Runner AWS Lambda function invocations.
 
@@ -196,11 +196,11 @@ Specifies parameters for creation of the `gluerunner-lambda` CloudFormation stac
 ```json
 [
   {
-    "ParameterKey": "SourceS3BucketName",
+    "ParameterKey": "ArtifactBucketName",
     "ParameterValue": "<NO-DEFAULT>"
   },
   {
-    "ParameterKey": "SourceS3Key",
+    "ParameterKey": "LambdaSourceS3Key",
     "ParameterValue": "src/athenarunner.zip"
   },
   {
@@ -215,9 +215,9 @@ Specifies parameters for creation of the `gluerunner-lambda` CloudFormation stac
 ```
 #### Parameters:
 
-* `SourceS3BucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) from which the Athena Runner AWS Lambda function package (.zip file) will be fetched by AWS CloudFormation.
+* `ArtifactBucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) in which Glue scripts and Lambda function source will be stored. **If a bucket with such a name does not exist, the `deploylambda` build command will create it for you with appropriate permissions.**
 
-* `SourceS3Key` - The Amazon S3 key (e.g. `src/athenarunner.zip`) pointing to your AWS Lambda function's .zip package.
+* `LambdaSourceS3Key` - The Amazon S3 key (e.g. `src/athenarunner.zip`) pointing to your AWS Lambda function's .zip package.
 
 * `DDBTableName` - The Amazon DynamoDB table in which the state of active AWS Athena queries is tracked between Athena Runner AWS Lambda function invocations.
 
@@ -234,20 +234,20 @@ Sample content:
 ```json
 {
   "gluerunner": {
-    "SourceS3BucketName": "<NO-DEFAULT>",
-    "SourceS3Key":"src/gluerunner.zip"
+    "ArtifactBucketName": "<NO-DEFAULT>",
+    "LambdaSourceS3Key":"src/gluerunner.zip"
   },
   "ons3objectcreated": {
-    "SourceS3BucketName": "<NO-DEFAULT>",
-    "SourceS3Key":"src/ons3objectcreated.zip"
+    "ArtifactBucketName": "<NO-DEFAULT>",
+    "LambdaSourceS3Key":"src/ons3objectcreated.zip"
   }
 }
 ```
 #### Parameters:
 
-* `SourceS3BucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) to which the Glue Runner AWS Lambda function package (.zip file) will be deployed. If a bucket with such a name does not exist, the `deploylambda` build command will create it for you with appropriate permissions.
+* `ArtifactBucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) in which Glue scripts and Lambda function source will be stored. **If a bucket with such a name does not exist, the `deploylambda` build command will create it for you with appropriate permissions.**
 
-* `SourceS3Key` - The Amazon S3 key (e.g. `src/gluerunner.zip`) for your AWS Lambda function's .zip package.
+* `LambdaSourceS3Key` - The Amazon S3 key (e.g. `src/gluerunner.zip`) for your AWS Lambda function's .zip package.
 
 >**NOTE: The values set here must match values set in `cloudformation/gluerunner-lambda-params.json`.**
 
@@ -260,26 +260,34 @@ Specifies parameters for creation of the `glue-resources` CloudFormation stack (
 ```json
 [
   {
-    "ParameterKey": "S3ETLScriptPath",
+    "ParameterKey": "ArtifactBucketName",
     "ParameterValue": "<NO-DEFAULT>"
   },
   {
-    "ParameterKey": "S3ETLOutputPath",
-    "ParameterValue": "<NO-DEFAULT>"
+    "ParameterKey": "ETLScriptsPrefix",
+    "ParameterValue": "scripts"
   },
   {
-    "ParameterKey": "SourceDataBucketName",
+    "ParameterKey": "DataBucketName",
     "ParameterValue": "<NO-DEFAULT>"
+  },
+  {
+    "ParameterKey": "ETLOutputPrefix",
+    "ParameterValue": "output"
   }
 ]
 ```
 #### Parameters:
 
-* `S3ETLScriptPath` - The Amazon S3 path (including bucket name and prefix in ``s3://example/path`` format) to which AWS Glue scripts under `glue-scripts` directory will be dpeloyed. 
+* `ArtifactBucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) that will be created by the `step-functions-resources.yaml` CloudFormation template. **If a bucket with such a name does not exist, the `deploylambda` build command will create it for you with appropriate permissions.**
+
+* `ETLScriptsPrefix` - The Amazon S3 prefix (in the format ``example/path`` without leading or trailing '/') to which AWS Glue scripts will be deployed in the artifact bucket. Glue scripts can be found under the `glue-scripts` project directory 
+
+* `DataBucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) that will be created by the `step-functions-resources.yaml` CloudFormation template. This is the bucket to which Sales and Marketing datasets must be uploaded. It is also the bucket in which output will be created. **This bucket is created by `step-functions-resources` CloudFormation. CloudFormation stack creation will fail if the bucket already exists.**
+
+* `ETLOutputPrefix` - The Amazon S3 prefix (in the format ``example/path`` without leading or trailing '/') to which AWS Glue jobs will produce their intermediary outputs. This path will be created in the data bucket.
 
-* `S3ETLOutputPath` - The Amazon S3 path to which AWS Glue jobs will produce their intermediary outputs.
 
-* `SourceDataBucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) that will be created by the `step-functions-resources.yaml` CloudFormation template. This is the bucket to which Sales and Marketing datasets must be uploaded.
 
 The parameters are used by AWS CloudFormation during the creation of `glue-resources` stack.
 
@@ -290,7 +298,7 @@ Specifies the parameters used by Glue Runner AWS Lambda function at run-time.
 
 ```json
 {
-  "sfn_activity_arn": "<NO-DEFAULT>",
+  "sfn_activity_arn": "arn:aws:states:<AWS-REGION>:<AWS-ACCOUNT-ID>:activity:GlueRunnerActivity",
   "sfn_worker_name": "gluerunner",
   "ddb_table": "GlueRunnerActiveJobs",
   "ddb_query_limit": 50,
@@ -299,7 +307,7 @@ Specifies the parameters used by Glue Runner AWS Lambda function at run-time.
 ```
 #### Parameters:
 
-* `sfn_activity_arn` - AWS Step Functions activity task ARN. This ARN is used to query AWS Step Functions for new tasks (i.e. new AWS Glue jobs to run). The ARN is a combination of the AWS region, your AWS account Id, and the name property of the  [AWS::StepFunctions::Activity](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-stepfunctions-activity.html) resource in the `stepfunctions-resources.yaml` CloudFormation template. An ARN looks as follows `arn:aws:states:<AWS-REGION>:<YOUR-AWS-ACCOUNT-ID>:activity:<STEPFUNCTIONS-ACTIVITY-NAME>`. By default, the activity name is `GlueRunnerActivity`.
+* `sfn_activity_arn` - AWS Step Functions activity task ARN. This ARN is used to query AWS Step Functions for new tasks (i.e. new AWS Glue jobs to run). The ARN is a combination of the AWS region, your AWS account Id, and the name property of the  [AWS::StepFunctions::Activity](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-stepfunctions-activity.html) resource in the `stepfunctions-resources.yaml` CloudFormation template. An ARN looks as follows `arn:aws:states:<AWS-REGION>:<AWS-ACCOUNT-ID>:activity:<STEPFUNCTIONS-ACTIVITY-NAME>`. By default, the activity name is `GlueRunnerActivity`.
 
 * `sfn_worker_name` - A property that is passed to AWS Step Functions when getting activity tasks.
 
@@ -317,19 +325,19 @@ Specifies parameters for creation of the `step-functions-resources` CloudFormati
 ```json
 [
   {
-    "ParameterKey": "SourceS3BucketName",
+    "ParameterKey": "ArtifactBucketName",
     "ParameterValue": "<NO-DEFAULT>"
   },
   {
-    "ParameterKey": "SourceS3Key",
+    "ParameterKey": "LambdaSourceS3Key",
     "ParameterValue": "src/ons3objectcreated.zip"
   },
   {
     "ParameterKey": "GlueRunnerActivityName",
     "ParameterValue": "GlueRunnerActivity"
   },
   {
-    "ParameterKey": "SourceDataBucketName",
+    "ParameterKey": "DataBucketName",
     "ParameterValue": "<NO-DEFAULT>"
   }
 ]
@@ -340,11 +348,11 @@ Specifies parameters for creation of the `step-functions-resources` CloudFormati
 
 Both parameters are also used by AWS CloudFormation during stack creation.
 
-* `SourceS3BucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) to which the `ons3objectcreated` AWS Lambda function package (.zip file) will be deployed. If a bucket with such a name does not exist, the `deploylambda` build command will create it for you with appropriate permissions.
+* `ArtifactBucketName` - The Amazon S3 bucket name (without the `s3://...` prefix) to which the `ons3objectcreated` AWS Lambda function package (.zip file) will be deployed. If a bucket with such a name does not exist, the `deploylambda` build command will create it for you with appropriate permissions.
 
-* `SourceS3Key` - The Amazon S3 key (e.g. `src/ons3objectcreated.zip`) for your AWS Lambda function's .zip package.
+* `LambdaSourceS3Key` - The Amazon S3 key (e.g. `src/ons3objectcreated.zip`) for your AWS Lambda function's .zip package.
 
-* `SourceDataBucketName` - The Amazon S3 bucket name (without the `s3://...` prefix).  All OnS3ObjectCreated CloudWatch Events will for the bucket be handled by the `ons3objectcreated` AWS Lambda function. **This bucket will be created by CloudFormation. CloudFormation stack creation will fail if the bucket already exists.**
+* `DataBucketName` - The Amazon S3 bucket name (without the `s3://...` prefix).  All OnS3ObjectCreated CloudWatch Events will for the bucket be handled by the `ons3objectcreated` AWS Lambda function. **This bucket will be created by CloudFormation. CloudFormation stack creation will fail if the bucket already exists.**
 
 <a name="build-commands"></a>
 # Build commands
@@ -490,7 +498,15 @@ pynt createstack["athenarunner-lambda"]
 
 Note that the `step-functions-resources` stack **must** be created first, before the `glue-resources` stack.
 
-Now head to the AWS Step Functions console. Start and observe an execution of the 'MarketingAndSalesETLOrchestrator' state machine. Execution should halt at the 'Wait for XYZ Data' states. At this point, you should upload the sample .CSV files under the `samples` directory to the S3 bucket you specified as the `SourceDataBucketName` parameter value in `step-functions-resources-config.json` configuration file. This should allow the state machine to move on to next steps -- Process Sales Data and Process Marketing Data.
+Now head to the AWS Step Functions console. Start and observe an execution of the 'MarketingAndSalesETLOrchestrator' state machine. Execution should halt at the 'Wait for XYZ Data' states. At this point, you should upload the sample .CSV files under the `samples` directory to the S3 bucket you specified as the `SourceDataBucketName` parameter value in `step-functions-resources-config.json` configuration file. **Upload the marketing sample file under prefix 'marketing' and the sales sample file under prefix 'sales'. To do that, you may issue the following AWS CLI commands while at the project's root directory:**
+
+```
+aws s3 cp samples/MarketingData_QuickSightSample.csv s3://{SourceDataBucketName}/marketing/
+
+aws s3 cp samples/SalesPipeline_QuickSightSample.csv s3://{SourceDataBucketName}/sales/
+```
+
+This should allow the state machine to move on to next steps -- Process Sales Data and Process Marketing Data.
 
 If you have setup and run the sample correctly, you should see this output in the AWS Step Functions console:
 
 
@@ -17,22 +17,25 @@
 import shutil
 import zipfile
 import time
+
+from pip._vendor.distlib.compat import raw_input
 from pynt import task
 import boto3
 import botocore
 from botocore.exceptions import ClientError
 import json
 import re
 
+
 def write_dir_to_zip(src, zf):
     '''Write a directory tree to an open ZipFile object.'''
     abs_src = os.path.abspath(src)
     for dirname, subdirs, files in os.walk(src):
         for filename in files:
             absname = os.path.abspath(os.path.join(dirname, filename))
             arcname = absname[len(abs_src) + 1:]
-            print 'zipping %s as %s' % (os.path.join(dirname, filename),
-                                        arcname)
+            print('zipping {} as {}'.format(os.path.join(dirname, filename),
+                                            arcname))
             zf.write(absname, arcname)
 
 def read_json(jsonf_path):
@@ -63,7 +66,7 @@ def check_bucket_exists(s3path):
 @task()
 def clean():
     '''Clean build directory.'''
-    print 'Cleaning build directory...'
+    print('Cleaning build directory...')
 
     if os.path.exists('build'):
     	shutil.rmtree('build')
@@ -78,11 +81,11 @@ def packagelambda(* functions):
 
     os.chdir("build")
 
-    if(len(functions) == 0):
+    if len(functions) == 0:
         functions = ("athenarunner", "gluerunner", "ons3objectcreated")
 
     for function in functions:
-        print 'Packaging "{}" lambda function in directory'.format(function)
+        print('Packaging "{}" lambda function in directory'.format(function))
         zipf = zipfile.ZipFile("%s.zip" % function, "w", zipfile.ZIP_DEFLATED)
 
         write_dir_to_zip("../lambda/{}/".format(function), zipf)
@@ -99,7 +102,7 @@ def updatelambda(*functions):
     '''Directly update lambda function code in AWS (without upload to S3).'''
     lambda_client = boto3.client('lambda')
 
-    if(len(functions) == 0):
+    if len(functions) == 0:
         functions = ("athenarunner", "gluerunner", "ons3objectcreated")
 
     for function in functions:
@@ -115,7 +118,7 @@ def updatelambda(*functions):
 def deploylambda(*functions, **kwargs):
     '''Upload lambda functions .zip file to S3 for download by CloudFormation stack during creation.'''
 
-    if (len(functions) == 0):
+    if len(functions) == 0:
         functions = ("athenarunner", "gluerunner", "ons3objectcreated")
 
     region_name = boto3.session.Session().region_name
@@ -128,21 +131,21 @@ def deploylambda(*functions, **kwargs):
 
     for function in functions:
 
-        src_s3_bucket_name = params[function]['SourceS3BucketName']
-        src_s3_key = params[function]['SourceS3Key']
+        src_s3_bucket_name = params[function]['ArtifactBucketName']
+        src_s3_key = params[function]['LambdaSourceS3Key']
 
         if not src_s3_key and not src_s3_bucket_name:
             print(
-                "ERROR: Both Source S3 bucket name and S3 key must be specified for function '{}'. FUNCTION NOT DEPLOYED.".format(
+                "ERROR: Both Artifact S3 bucket name and Lambda source S3 key must be specified for function '{}'. FUNCTION NOT DEPLOYED.".format(
                     function))
             continue
 
         print("Checking if S3 Bucket '{}' exists...".format(src_s3_bucket_name))
 
-        if (not check_bucket_exists(src_s3_bucket_name)):
+        if not check_bucket_exists(src_s3_bucket_name):
             print("Bucket %s not found. Creating in region {}.".format(src_s3_bucket_name, region_name))
 
-            if (region_name == "us-east-1"):
+            if region_name == "us-east-1":
                 s3_client.create_bucket(
                     # ACL="authenticated-read",
                     Bucket=src_s3_bucket_name
@@ -156,7 +159,7 @@ def deploylambda(*functions, **kwargs):
                     }
                 )
 
-        print "Uploading function '{}' to '{}'".format(function, src_s3_key)
+        print("Uploading function '{}' to '{}'".format(function, src_s3_key))
 
         with open('build/{}.zip'.format(function), 'rb') as data:
             s3_client.upload_fileobj(data, src_s3_bucket_name, src_s3_key)
@@ -168,8 +171,9 @@ def deploylambda(*functions, **kwargs):
 def createstack(* stacks, **kwargs):
     '''Create stacks using CloudFormation.'''
 
-    if (len(stacks) == 0):
-        print("ERROR: Please specify a stack to create. Valid values are glue-resources, gluerunner-lambda, step-functions-resources.")
+    if len(stacks) == 0:
+        print(
+            "ERROR: Please specify a stack to create. Valid values are glue-resources, gluerunner-lambda, step-functions-resources.")
         return
 
     for stack in stacks:
@@ -183,7 +187,7 @@ def createstack(* stacks, **kwargs):
 
         cfn_client = boto3.client('cloudformation')
 
-        print("Attempting to CREATE '%s' stack using CloudFormation." % (stack_name))
+        print("Attempting to CREATE '%s' stack using CloudFormation." % stack_name)
         start_t = time.time()
         response = cfn_client.create_stack(
             StackName=stack_name,
@@ -197,7 +201,7 @@ def createstack(* stacks, **kwargs):
         print("Waiting until '%s' stack status is CREATE_COMPLETE" % stack_name)
 
         try:
-
+            # cc                     +o
             cfn_stack_delete_waiter = cfn_client.get_waiter('stack_create_complete')
             cfn_stack_delete_waiter.wait(StackName=stack_name)
             print("Stack CREATED in approximately %d secs." % int(time.time() - start_t))
@@ -211,8 +215,9 @@ def createstack(* stacks, **kwargs):
 def updatestack(* stacks, **kwargs):
     '''Update a CloudFormation stack.'''
 
-    if (len(stacks) == 0):
-        print("ERROR: Please specify a stack to create. Valid values are glue-resources, gluerunner-lambda, step-functions-resources.")
+    if len(stacks) == 0:
+        print(
+            "ERROR: Please specify a stack to create. Valid values are glue-resources, gluerunner-lambda, step-functions-resources.")
         return
 
     for stack in stacks:
@@ -226,7 +231,7 @@ def updatestack(* stacks, **kwargs):
 
         cfn_client = boto3.client('cloudformation')
 
-        print("Attempting to UPDATE '%s' stack using CloudFormation." % (stack_name))
+        print("Attempting to UPDATE '%s' stack using CloudFormation." % stack_name)
         try:
             start_t = time.time()
             response = cfn_client.update_stack(
@@ -244,13 +249,13 @@ def updatestack(* stacks, **kwargs):
 
             print("Stack UPDATED in approximately %d secs." % int(time.time() - start_t))
         except ClientError as e:
-            print "EXCEPTION: " + e.response["Error"]["Message"]
+            print("EXCEPTION: " + e.response["Error"]["Message"])
 
 @task()
 def stackstatus(* stacks):
     '''Check the status of a CloudFormation stack.'''
 
-    if (len(stacks) == 0):
+    if len(stacks) == 0:
         stacks = ("glue-resources", "gluerunner-lambda", "step-functions-resources")
 
     for stack in stacks:
@@ -263,18 +268,18 @@ def stackstatus(* stacks):
                 StackName=stack_name
             )
 
-            if(response["Stacks"][0]):
+            if response["Stacks"][0]:
                 print("Stack '%s' has the status '%s'" % (stack_name, response["Stacks"][0]["StackStatus"]))
 
         except ClientError as e:
-            print "EXCEPTION: " + e.response["Error"]["Message"]
+            print("EXCEPTION: " + e.response["Error"]["Message"])
 
 
 @task()
 def deletestack(* stacks):
     '''Delete stacks using CloudFormation.'''
 
-    if (len(stacks) == 0):
+    if len(stacks) == 0:
         print("ERROR: Please specify a stack to delete.")
         return
 
@@ -308,36 +313,38 @@ def deploygluescripts(**kwargs):
     glue_cfn_params = read_json("cloudformation/glue-resources-params.json")
 
     s3_etl_script_path = ''
-
+    bucket_name = ''
+    prefix = ''
     for param in glue_cfn_params:
-        if param['ParameterKey'] == 'S3ETLScriptPath':
-            s3_etl_script_path = param['ParameterValue']
+        if param['ParameterKey'] == 'ArtifactBucketName':
+            bucket_name = param['ParameterValue']
+        if param['ParameterKey'] == 'ETLScriptsPrefix':
+            prefix = param['ParameterValue']
 
-    if not s3_etl_script_path:
+    if not bucket_name or not prefix:
         print(
-            "ERROR: S3ETLScriptPath must be set in 'cloudformation/glue-resources-params.json'.")
+            "ERROR: ArtifactBucketName and ETLScriptsPrefix must be set in 'cloudformation/glue-resources-params.json'.")
         return
 
+    s3_etl_script_path = 's3://' + bucket_name + '/' + prefix
+
     result = re.search('s3://(.+?)/(.*)', s3_etl_script_path)
-    if(result is None):
-        print("ERROR: S3ETLScriptPath is malformed.")
+    if result is None:
+        print("ERROR: Invalid S3 ETL bucket name and/or script prefix.")
         return
 
-    s3_bucket_name = result.group(1)
-    s3_key = result.group(2)
-
-    print("Checking if S3 Bucket '{}' exists...".format(s3_bucket_name))
+    print("Checking if S3 Bucket '{}' exists...".format(bucket_name))
 
-    if (not check_bucket_exists(s3_bucket_name)):
-        print("ERROR: S3 bucket for path '{}' not found.".format(s3_etl_script_path))
+    if not check_bucket_exists(bucket_name):
+        print("ERROR: S3 bucket '{}' not found.".format(bucket_name))
         return
 
     for dirname, subdirs, files in os.walk(glue_scripts_path):
         for filename in files:
             absname = os.path.abspath(os.path.join(dirname, filename))
-            print "Uploading AWS Glue script '{}' to '{}/{}'".format(absname, s3_bucket_name, s3_key)
+            print("Uploading AWS Glue script '{}' to '{}/{}'".format(absname, bucket_name, prefix))
             with open(absname, 'rb') as data:
-                s3_client.upload_fileobj(data, s3_bucket_name, '{}/{}'.format(s3_key, filename))
+                s3_client.upload_fileobj(data, bucket_name, '{}/{}'.format(prefix, filename))
 
     return
 
@@ -348,9 +355,9 @@ def deletes3bucket(name):
 
     proceed = raw_input(
         "This command will DELETE ALL DATA in S3 bucket '%s' and the BUCKET ITSELF.\nDo you wish to continue? [Y/N] " \
-        % (name))
+        % name)
 
-    if (proceed.lower() != 'y'):
+    if proceed.lower() != 'y':
         print("Aborting deletion.")
         return
 
 
@@ -1,10 +1,10 @@
 [
   {
-    "ParameterKey": "SourceS3BucketName",
+    "ParameterKey": "ArtifactBucketName",
     "ParameterValue": "<NO-DEFAULT>"
   },
   {
-    "ParameterKey": "SourceS3Key",
+    "ParameterKey": "LambdaSourceS3Key",
     "ParameterValue": "src/athenarunner.zip"
   },
   {
 
@@ -29,12 +29,12 @@ Parameters:
     Default: "athenarunner"
     Description: "Name of the Lambda function that mediates between AWS Step Functions and AWS Athena."
 
-  SourceS3BucketName:
+  ArtifactBucketName:
     Type: String
     MinLength: "1"
     Description: "Name of the S3 bucket containing source .zip files."
 
-  SourceS3Key:
+  LambdaSourceS3Key:
     Type: String
     MinLength: "1"
     Description: "Name of the S3 key of Athena Runner lambda function .zip file."
@@ -53,46 +53,59 @@ Resources:
             - lambda.amazonaws.com
           Action:
           - sts:AssumeRole
-      ManagedPolicyArns:
-        - arn:aws:iam::aws:policy/AmazonS3FullAccess
-        - arn:aws:iam::aws:policy/AmazonDynamoDBFullAccess
-        - arn:aws:iam::aws:policy/AmazonSNSFullAccess
-        - arn:aws:iam::aws:policy/CloudWatchLogsFullAccess
-        - arn:aws:iam::aws:policy/AWSStepFunctionsFullAccess
-
       Path: "/"
 
-  AmazonAthenaFullAccessPolicy:
-    Type: "AWS::IAM::Policy"
-    Properties:
-      PolicyDocument: {
-          "Version": "2012-10-17",
-          "Statement": [
-              {
-                  "Effect": "Allow",
-                  "Action": "athena:*",
-                  "Resource": "*"
-              }
-          ]
-      }
-      PolicyName: "AmazonAthenaFullAccessForAthenaRunner"
-      Roles:
-        - !Ref AthenaRunnerLambdaExecutionRole
-
-  AWSGlueFullAccessPolicy:
+  AthenaRunnerPolicy:
     Type: "AWS::IAM::Policy"
     Properties:
       PolicyDocument: {
-          "Version": "2012-10-17",
-          "Statement": [
-              {
-                  "Effect": "Allow",
-                  "Action": "glue:*",
-                  "Resource": "*"
-              }
-          ]
+        "Version": "2012-10-17",
+        "Statement": [{
+                        "Effect": "Allow",
+                        "Action": [
+                          "dynamodb:GetItem",
+                          "dynamodb:Query",
+                          "dynamodb:PutItem",
+                          "dynamodb:UpdateItem",
+                          "dynamodb:DeleteItem"
+                        ],
+                        "Resource": !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/${DDBTableName}"
+                      },
+                      {
+                        "Effect": "Allow",
+                        "Action": [
+                          "logs:CreateLogStream",
+                          "logs:PutLogEvents"
+                        ],
+                        "Resource": !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:*"
+                      },
+                      {
+                        "Effect": "Allow",
+                        "Action": "logs:CreateLogGroup",
+                        "Resource": "*"
+                      },
+                      {
+                        "Effect": "Allow",
+                        "Action": [
+                          "states:SendTaskSuccess",
+                          "states:SendTaskFailure",
+                          "states:SendTaskHeartbeat",
+                          "states:GetActivityTask"
+                        ],
+                        "Resource": "*"
+                      },
+                      {
+                        "Effect": "Allow",
+                        "Action": [
+                          "athena:StartQueryExecution",
+                          "athena:GetQueryExecution",
+                          "athena:GetNamedQuery"
+                        ],
+                        "Resource": "*"
+                      }
+        ]
       }
-      PolicyName: "AWSGlueFullAccessForAthenaRunner"
+      PolicyName: "AthenaRunnerPolicy"
       Roles:
         - !Ref AthenaRunnerLambdaExecutionRole
 
@@ -104,14 +117,12 @@ Resources:
       Handler: "athenarunner.handler"
       Role: !GetAtt AthenaRunnerLambdaExecutionRole.Arn
       Code:
-        S3Bucket: !Ref SourceS3BucketName
-        S3Key: !Ref SourceS3Key
+        S3Bucket: !Ref ArtifactBucketName
+        S3Key: !Ref LambdaSourceS3Key
       Timeout: 180 #seconds
       MemorySize: 128 #MB
       Runtime: python2.7
     DependsOn:
-      - AmazonAthenaFullAccessPolicy
-      - AWSGlueFullAccessPolicy
       - AthenaRunnerLambdaExecutionRole
 
   ScheduledRule:
 
@@ -1,14 +1,18 @@
 [
   {
-    "ParameterKey": "S3ETLScriptPath",
+    "ParameterKey": "ArtifactBucketName",
     "ParameterValue": "<NO-DEFAULT>"
   },
   {
-    "ParameterKey": "S3ETLOutputPath",
-    "ParameterValue": "<NO-DEFAULT>"
+    "ParameterKey": "ETLScriptsPrefix",
+    "ParameterValue": "scripts"
   },
   {
-    "ParameterKey": "SourceDataBucketName",
+    "ParameterKey": "DataBucketName",
     "ParameterValue": "<NO-DEFAULT>"
+  },
+  {
+    "ParameterKey": "ETLOutputPrefix",
+    "ParameterValue": "output"
   }
 ]
@@ -38,60 +38,62 @@ Parameters:
     Default: "marketing_qs"
     Description: "Name of the Marketing data table in AWS Glue."
 
-  S3ETLScriptPath:
+  ETLScriptsPrefix:
     Type: String
-    MinLength: "4"
+    MinLength: "1"
     Description: "Location of the Glue job ETL scripts in S3."
 
-  S3ETLOutputPath:
+  ETLOutputPrefix:
     Type: String
-    MinLength: "10"
+    MinLength: "1"
     Description: "Name of the S3 output path to which this CloudFormation template's AWS Glue jobs are going to write ETL output."
 
-  SourceDataBucketName:
+  DataBucketName:
     Type: String
     MinLength: "1"
     Description: "Name of the S3 bucket in which the source Marketing and Sales data will be uploaded. Bucket is created by this CFT."
 
+  ArtifactBucketName:
+    Type: String
+    MinLength: "1"
+    Description: "Name of the S3 bucket in which the Marketing and Sales ETL scripts reside. Bucket is NOT created by this CFT."
+
 Resources:
 
   ### AWS GLUE RESOURCES ###
-  AWSGlueCrawlerRole:
-    Type: "AWS::IAM::Role"
-    Properties:
-      AssumeRolePolicyDocument:
-        Version: '2012-10-17'
-        Statement:
-        - Effect: Allow
-          Principal:
-            Service:
-            - glue.amazonaws.com
-          Action:
-          - sts:AssumeRole
-      ManagedPolicyArns:
-        - arn:aws:iam::aws:policy/AmazonS3FullAccess
-        - arn:aws:iam::aws:policy/AdministratorAccess
-        - arn:aws:iam::aws:policy/CloudWatchLogsFullAccess
-      Path: "/"
-
   AWSGlueJobRole:
     Type: "AWS::IAM::Role"
     Properties:
       AssumeRolePolicyDocument:
         Version: '2012-10-17'
         Statement:
-        - Effect: Allow
-          Principal:
-            Service:
-            - glue.amazonaws.com
-          Action:
-          - sts:AssumeRole
+          - Effect: Allow
+            Principal:
+              Service:
+                - glue.amazonaws.com
+            Action:
+              - sts:AssumeRole
+      Policies:
+        - PolicyName: root
+          PolicyDocument:
+            Version: 2012-10-17
+            Statement:
+              - Effect: Allow
+                Action:
+                  - "s3:GetObject"
+                  - "s3:PutObject"
+                  - "s3:ListBucket"
+                  - "s3:DeleteObject"
+                Resource:
+                  - !Sub "arn:aws:s3:::${DataBucketName}"
+                  - !Sub "arn:aws:s3:::${DataBucketName}/*"
+                  - !Sub "arn:aws:s3:::${ArtifactBucketName}"
+                  - !Sub "arn:aws:s3:::${ArtifactBucketName}/*"
       ManagedPolicyArns:
-        - arn:aws:iam::aws:policy/AmazonS3FullAccess
-        - arn:aws:iam::aws:policy/AdministratorAccess
-        - arn:aws:iam::aws:policy/CloudWatchLogsFullAccess
+        - arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole
       Path: "/"
 
+
   MarketingAndSalesDatabase:
     Type: "AWS::Glue::Database"
     Properties:
@@ -170,7 +172,7 @@ Resources:
             }
             SerializationLibrary: "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"
           Compressed: False
-          Location: !Sub "s3://${SourceDataBucketName}/sales/"
+          Location: !Sub "s3://${DataBucketName}/sales/"
         Retention: 0
         Name: !Ref SalesPipelineTableName
       DatabaseName: !Ref MarketingAndSalesDatabaseName
@@ -260,7 +262,7 @@ Resources:
             }
             SerializationLibrary: "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"
           Compressed: False
-          Location: !Sub "s3://${SourceDataBucketName}/marketing/"
+          Location: !Sub "s3://${DataBucketName}/marketing/"
         Retention: 0
         Name: !Ref MarketingTableName
       DatabaseName: !Ref MarketingAndSalesDatabaseName
@@ -273,12 +275,12 @@ Resources:
       Name: "ProcessSalesData"
       Command: {
         "Name" : "glueetl",
-        "ScriptLocation" : !Sub "${S3ETLScriptPath}/process_sales_data.py"
+        "ScriptLocation": !Sub "s3://${ArtifactBucketName}/${ETLScriptsPrefix}/process_sales_data.py"
       }
       DefaultArguments: {
           "--database_name" : !Ref MarketingAndSalesDatabaseName,
           "--table_name" : !Ref SalesPipelineTableName,
-          "--s3_output_path" : !Sub "${S3ETLOutputPath}/tmp/sales"
+          "--s3_output_path": !Sub "s3://${DataBucketName}/${ETLOutputPrefix}/tmp/sales"
       }
       MaxRetries: 0
       Description: "Process Sales Pipeline data."
@@ -291,12 +293,12 @@ Resources:
       Name: "ProcessMarketingData"
       Command: {
         "Name" : "glueetl",
-        "ScriptLocation" : !Sub "${S3ETLScriptPath}/process_marketing_data.py"
+        "ScriptLocation": !Sub "s3://${ArtifactBucketName}/${ETLScriptsPrefix}/process_marketing_data.py"
       }
       DefaultArguments: {
           "--database_name" : !Ref MarketingAndSalesDatabaseName,
           "--table_name" : !Ref MarketingTableName,
-          "--s3_output_path" : !Sub "${S3ETLOutputPath}/tmp/marketing"
+          "--s3_output_path": !Sub "s3://${DataBucketName}/${ETLOutputPrefix}/tmp/marketing"
       }
       MaxRetries: 0
       Description: "Process Marketing data."
@@ -309,13 +311,13 @@ Resources:
       Name: "JoinMarketingAndSalesData"
       Command: {
         "Name" : "glueetl",
-        "ScriptLocation" : !Sub "${S3ETLScriptPath}/join_marketing_and_sales_data.py"
+        "ScriptLocation": !Sub "s3://${ArtifactBucketName}/${ETLScriptsPrefix}/join_marketing_and_sales_data.py"
       }
       DefaultArguments: {
-          "--database_name" : !Ref MarketingAndSalesDatabaseName,
-          "--s3_output_path" : !Sub "${S3ETLOutputPath}/sales-leads-influenced",
-          "--s3_sales_data_path" : !Sub "${S3ETLOutputPath}/tmp/sales",
-          "--s3_marketing_data_path" : !Sub "${S3ETLOutputPath}/tmp/marketing"
+        "--database_name": !Ref MarketingAndSalesDatabaseName,
+        "--s3_output_path": !Sub "s3://${DataBucketName}/${ETLOutputPrefix}/sales-leads-influenced",
+        "--s3_sales_data_path": !Sub "s3://${DataBucketName}/${ETLOutputPrefix}/tmp/sales",
+        "--s3_marketing_data_path": !Sub "s3://${DataBucketName}/${ETLOutputPrefix}/tmp/marketing"
       }
       MaxRetries: 0
       Description: "Join Marketing and Sales data."
 
@@ -1,10 +1,10 @@
 [
   {
-    "ParameterKey": "SourceS3BucketName",
+    "ParameterKey": "ArtifactBucketName",
     "ParameterValue": "<NO-DEFAULT>"
   },
   {
-    "ParameterKey": "SourceS3Key",
+    "ParameterKey": "LambdaSourceS3Key",
     "ParameterValue": "src/gluerunner.zip"
   },
   {
 
@@ -29,12 +29,12 @@ Parameters:
     Default: "gluerunner"
     Description: "Name of the Lambda function that mediates between AWS Step Functions and AWS Glue."
 
-  SourceS3BucketName:
+  ArtifactBucketName:
     Type: String
     MinLength: "1"
     Description: "Name of the S3 bucket containing source .zip files."
 
-  SourceS3Key:
+  LambdaSourceS3Key:
     Type: String
     MinLength: "1"
     Description: "Name of the S3 key of Glue Runner lambda function .zip file."
@@ -53,29 +53,58 @@ Resources:
             - lambda.amazonaws.com
           Action:
           - sts:AssumeRole
-      ManagedPolicyArns:
-        - arn:aws:iam::aws:policy/AmazonS3FullAccess
-        - arn:aws:iam::aws:policy/AmazonDynamoDBFullAccess
-        - arn:aws:iam::aws:policy/AmazonSNSFullAccess
-        - arn:aws:iam::aws:policy/CloudWatchLogsFullAccess
-        - arn:aws:iam::aws:policy/AWSStepFunctionsFullAccess
-
       Path: "/"
 
-  AWSGlueFullAccessPolicy:
+  GlueRunnerPolicy:
     Type: "AWS::IAM::Policy"
     Properties:
       PolicyDocument: {
-          "Version": "2012-10-17",
-          "Statement": [
-              {
-                  "Effect": "Allow",
-                  "Action": "glue:*",
-                  "Resource": "*"
-              }
-          ]
+        "Version": "2012-10-17",
+        "Statement": [{
+                        "Effect": "Allow",
+                        "Action": [
+                          "dynamodb:GetItem",
+                          "dynamodb:Query",
+                          "dynamodb:PutItem",
+                          "dynamodb:UpdateItem",
+                          "dynamodb:DeleteItem"
+                        ],
+                        "Resource": !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/${DDBTableName}"
+                      },
+                      {
+                        "Effect": "Allow",
+                        "Action": [
+                          "logs:CreateLogStream",
+                          "logs:PutLogEvents"
+                        ],
+                        "Resource": !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:*"
+                      },
+                      {
+                        "Effect": "Allow",
+                        "Action": "logs:CreateLogGroup",
+                        "Resource": "*"
+                      },
+                      {
+                        "Effect": "Allow",
+                        "Action": [
+                          "states:SendTaskSuccess",
+                          "states:SendTaskFailure",
+                          "states:SendTaskHeartbeat",
+                          "states:GetActivityTask"
+                        ],
+                        "Resource": "*"
+                      },
+                      {
+                        "Effect": "Allow",
+                        "Action": [
+                          "glue:StartJobRun",
+                          "glue:GetJobRun"
+                        ],
+                        "Resource": "*"
+                      }
+        ]
       }
-      PolicyName: "AWSGlueFullAccessForGlueRunner"
+      PolicyName: "GlueRunnerPolicy"
       Roles:
         - !Ref GlueRunnerLambdaExecutionRole
 
@@ -87,13 +116,12 @@ Resources:
       Handler: "gluerunner.handler"
       Role: !GetAtt GlueRunnerLambdaExecutionRole.Arn
       Code:
-        S3Bucket: !Ref SourceS3BucketName
-        S3Key: !Ref SourceS3Key
+        S3Bucket: !Ref ArtifactBucketName
+        S3Key: !Ref LambdaSourceS3Key
       Timeout: 180 #seconds
       MemorySize: 128 #MB
       Runtime: python2.7
     DependsOn:
-      - AWSGlueFullAccessPolicy
       - GlueRunnerLambdaExecutionRole
 
   ScheduledRule:
 
@@ -1,18 +1,18 @@
 [
   {
-    "ParameterKey": "SourceS3BucketName",
+    "ParameterKey": "ArtifactBucketName",
     "ParameterValue": "<NO-DEFAULT>"
   },
   {
-    "ParameterKey": "SourceS3Key",
+    "ParameterKey": "LambdaSourceS3Key",
     "ParameterValue": "src/ons3objectcreated.zip"
   },
   {
     "ParameterKey": "GlueRunnerActivityName",
     "ParameterValue": "GlueRunnerActivity"
   },
   {
-    "ParameterKey": "SourceDataBucketName",
-    "ParameterValue": "<NO-DEFAULT>"
+    "ParameterKey": "DataBucketName",
+    "ParameterValue": "etl-orchestrator-745-data"
   }
 ]
@@ -48,17 +48,17 @@ Parameters:
     Default: "AthenaRunnerActivity"
     Description: "Name of the AWS Step Functions activity to be polled by AthenaRunner."
 
-  SourceS3BucketName:
+  ArtifactBucketName:
     Type: String
     MinLength: "1"
-    Description: "Name of the S3 bucket containing source .zip files."
+    Description: "Name of the S3 bucket containing source .zip files. Bucket is NOT created by this CFT."
 
-  SourceS3Key:
+  LambdaSourceS3Key:
     Type: String
     MinLength: "1"
     Description: "Name of the S3 key of Glue Runner lambda function .zip file."
 
-  SourceDataBucketName:
+  DataBucketName:
     Type: String
     MinLength: "1"
     Description: "Name of the S3 bucket in which the source Marketing and Sales data will be uploaded. Bucket is created by this CFT."
@@ -104,12 +104,12 @@ Resources:
   WaitForSalesDataActivity:
     Type: "AWS::StepFunctions::Activity"
     Properties:
-      Name: !Sub "${SourceDataBucketName}-SalesPipeline_QuickSightSample.csv"
+      Name: !Sub "${DataBucketName}-SalesPipeline_QuickSightSample.csv"
 
   WaitForMarketingDataActivity:
     Type: "AWS::StepFunctions::Activity"
     Properties:
-      Name: !Sub "${SourceDataBucketName}-MarketingData_QuickSightSample.csv"
+      Name: !Sub "${DataBucketName}-MarketingData_QuickSightSample.csv"
 
 
   # State Machine resources
@@ -141,9 +141,9 @@ Resources:
             GlueDatabaseName: !Ref MarketingAndSalesDatabaseName,
             GlueTableName: !Ref MarketingTableName,
             AthenaRunnerActivityArn: !Ref AthenaRunnerActivity,
-            AthenaResultOutputLocation: !Sub "s3://${SourceDataBucketName}/athena-runner-output/",
+            AthenaResultOutputLocation: !Sub "s3://${DataBucketName}/athena-runner-output/",
             AthenaResultEncryptionOption: "SSE_S3"
-        }
+          }
       RoleArn: !GetAtt StateExecutionRole.Arn
 
   MarketingAndSalesETLOrchestrator:
@@ -256,16 +256,53 @@ Resources:
         - Effect: Allow
           Principal:
             Service:
-            - lambda.amazonaws.com
+              - lambda.amazonaws.com
           Action:
-          - sts:AssumeRole
+            - sts:AssumeRole
       ManagedPolicyArns:
         - arn:aws:iam::aws:policy/AmazonS3FullAccess
         - arn:aws:iam::aws:policy/AmazonSNSFullAccess
         - arn:aws:iam::aws:policy/CloudWatchLogsFullAccess
         - arn:aws:iam::aws:policy/AWSStepFunctionsFullAccess
       Path: "/"
 
+  OnS3ObjectCreatedPolicy:
+    Type: "AWS::IAM::Policy"
+    Properties:
+      PolicyDocument: {
+        "Version": "2012-10-17",
+        "Statement": [
+        {
+          "Effect": "Allow",
+          "Action": [
+            "logs:CreateLogStream",
+            "logs:PutLogEvents"
+          ],
+          "Resource": !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:*"
+        },
+        {
+          "Effect": "Allow",
+          "Action": "logs:CreateLogGroup",
+          "Resource": "*"
+        },
+        {
+          "Effect": "Allow",
+          "Action": [
+            "states:SendTaskSuccess",
+            "states:SendTaskFailure",
+            "states:SendTaskHeartbeat",
+            "states:GetActivityTask"
+          ],
+          "Resource": "*"
+        }
+        ]
+      }
+      PolicyName: "OnS3ObjectCreatedPolicy"
+      Roles:
+        - !Ref OnS3ObjectCreatedLambdaExecutionRole
+
+
+
   OnS3ObjectCreatedLambdaFunction:
     Type: "AWS::Lambda::Function"
     Properties:
@@ -274,8 +311,8 @@ Resources:
       Handler: "ons3objectcreated.handler"
       Role: !GetAtt OnS3ObjectCreatedLambdaExecutionRole.Arn
       Code:
-        S3Bucket: !Ref SourceS3BucketName
-        S3Key: !Ref SourceS3Key
+        S3Bucket: !Ref ArtifactBucketName
+        S3Key: !Ref LambdaSourceS3Key
       Timeout: 180 #seconds
       MemorySize: 128 #MB
       Runtime: python2.7
@@ -284,14 +321,13 @@ Resources:
 
   # For every bucket that needs to invoke OnS3ObjectCreated:
 
-  SourceDataBucket:
+  DataBucket:
     Type: "AWS::S3::Bucket"
     Properties:
-      BucketName: !Ref SourceDataBucketName
+      BucketName: !Ref DataBucketName
       NotificationConfiguration:
         LambdaConfigurations:
-          -
-            Function: !GetAtt OnS3ObjectCreatedLambdaFunction.Arn
+          - Function: !GetAtt OnS3ObjectCreatedLambdaFunction.Arn
             Event: "s3:ObjectCreated:*"
             Filter:
               S3Key:
@@ -300,13 +336,13 @@ Resources:
                     Name: "suffix"
                     Value: "csv"
     DependsOn:
-      - SourceDataBucketPermission
+      - DataBucketPermission
 
-  SourceDataBucketPermission:
+  DataBucketPermission:
     Type: "AWS::Lambda::Permission"
     Properties:
       Action: 'lambda:InvokeFunction'
       FunctionName: !Ref OnS3ObjectCreatedLambdaFunction
       Principal: s3.amazonaws.com
       SourceAccount: !Ref "AWS::AccountId"
-      SourceArn: !Sub "arn:aws:s3:::${SourceDataBucketName}"
+      SourceArn: !Sub "arn:aws:s3:::${DataBucketName}"
@@ -45,7 +45,6 @@
     ('return visitors', 'bigint', 'return_visitors', 'bigint'),
         ], transformation_ctx='applymapping1')
 
-print 'Count:  ', mktg_DyF.count()
 mktg_DyF.printSchema()
 
 mktg_DF = mktg_DyF.toDF()
 
@@ -51,7 +51,6 @@
     ('last status entry', 'string', 'last_status_entry', 'string'),
         ], transformation_ctx='applymapping1')
 
-print 'Count:  ', sales_DyF.count()
 sales_DyF.printSchema()
 
 sales_DF = sales_DyF.toDF()
 
@@ -34,7 +34,7 @@ def load_config():
 def is_json(jsonstring):
     try:
         json_object = json.loads(jsonstring)
-    except ValueError, e:
+    except ValueError:
         return False
     return True
 
@@ -177,102 +177,114 @@ def check_athena_queries(config):
         athena_query_execution_id = item['athena_query_execution_id']
         sfn_task_token = item['sfn_task_token']
 
-        logger.debug('Polling Athena query execution status..')
-
-        # Query athena query execution status...
-        athena_resp = athena.get_query_execution(
-            QueryExecutionId=athena_query_execution_id
-        )
-
-        query_exec_resp = athena_resp['QueryExecution']
-        query_exec_state = query_exec_resp['Status']['State']
-        query_state_change_reason = query_exec_resp['Status'].get('StateChangeReason', '')
-
-        logger.debug('Query with Execution Id {} is currently in state "{}"'.format(query_exec_state, query_state_change_reason))
-
-        # If Athena query completed, return success:
-        if query_exec_state in ['SUCCEEDED']:
+        try:
+            logger.debug('Polling Athena query execution status..')
 
-            logger.info('Query with Execution Id {} SUCCEEDED.'.format(athena_query_execution_id))
+            # Query athena query execution status...
+            athena_resp = athena.get_query_execution(
+                QueryExecutionId=athena_query_execution_id
+            )
 
-            # Build an output dict and format it as JSON
-            task_output_dict = {
-                "AthenaQueryString": query_exec_resp['Query'],
-                "AthenaQueryExecutionId": athena_query_execution_id,
-                "AthenaQueryExecutionState": query_exec_state,
-                "AthenaQueryExecutionStateChangeReason": query_state_change_reason,
-                "AthenaQuerySubmissionDateTime": query_exec_resp['Status'].get('SubmissionDateTime', '').strftime('%x, %-I:%M %p %Z'),
-                "AthenaQueryCompletionDateTime": query_exec_resp['Status'].get('CompletionDateTime', '').strftime(
-                    '%x, %-I:%M %p %Z'),
-                "AthenaQueryEngineExecutionTimeInMillis": query_exec_resp['Statistics'].get('EngineExecutionTimeInMillis', 0),
-                "AthenaQueryDataScannedInBytes": query_exec_resp['Statistics'].get('DataScannedInBytes', 0)
-            }
+            query_exec_resp = athena_resp['QueryExecution']
+            query_exec_state = query_exec_resp['Status']['State']
+            query_state_change_reason = query_exec_resp['Status'].get('StateChangeReason', '')
+
+            logger.debug('Query with Execution Id {} is currently in state "{}"'.format(query_exec_state,
+                                                                                        query_state_change_reason))
+
+            # If Athena query completed, return success:
+            if query_exec_state in ['SUCCEEDED']:
+
+                logger.info('Query with Execution Id {} SUCCEEDED.'.format(athena_query_execution_id))
+
+                # Build an output dict and format it as JSON
+                task_output_dict = {
+                    "AthenaQueryString": query_exec_resp['Query'],
+                    "AthenaQueryExecutionId": athena_query_execution_id,
+                    "AthenaQueryExecutionState": query_exec_state,
+                    "AthenaQueryExecutionStateChangeReason": query_state_change_reason,
+                    "AthenaQuerySubmissionDateTime": query_exec_resp['Status'].get('SubmissionDateTime', '').strftime(
+                        '%x, %-I:%M %p %Z'),
+                    "AthenaQueryCompletionDateTime": query_exec_resp['Status'].get('CompletionDateTime', '').strftime(
+                        '%x, %-I:%M %p %Z'),
+                    "AthenaQueryEngineExecutionTimeInMillis": query_exec_resp['Statistics'].get(
+                        'EngineExecutionTimeInMillis', 0),
+                    "AthenaQueryDataScannedInBytes": query_exec_resp['Statistics'].get('DataScannedInBytes', 0)
+                }
 
-            task_output_json = json.dumps(task_output_dict)
+                task_output_json = json.dumps(task_output_dict)
 
+                logger.info('Sending "Task Succeeded" signal to Step Functions..')
+                sfn_resp = sfn.send_task_success(
+                    taskToken=sfn_task_token,
+                    output=task_output_json
+                )
 
-            logger.info('Sending "Task Succeeded" signal to Step Functions..')
-            sfn_resp = sfn.send_task_success(
-                taskToken=sfn_task_token,
-                output=task_output_json
-            )
+                # Delete item
+                resp = ddb_table.delete_item(
+                    Key={
+                        'sfn_activity_arn': sfn_activity_arn,
+                        'athena_query_execution_id': athena_query_execution_id
+                    }
+                )
 
-            # Delete item
-            resp = ddb_table.delete_item(
-                Key={
-                    'sfn_activity_arn': sfn_activity_arn,
-                    'athena_query_execution_id': athena_query_execution_id
-                }
-            )
+                # Task succeeded, next item
 
-            # Task succeeded, next item
+            elif query_exec_state in ['RUNNING', 'QUEUED']:
+                logger.debug(
+                    'Query with Execution Id {} is in state hasn\'t completed yet.'.format(athena_query_execution_id))
 
-        elif query_exec_state in ['RUNNING', 'QUEUED']:
-            logger.debug('Query with Execution Id {} is in state hasn\'t completed yet.'.format(athena_query_execution_id))
+                # Send heartbeat
+                sfn_resp = sfn.send_task_heartbeat(
+                    taskToken=sfn_task_token
+                )
 
-            # Send heartbeat
-            sfn_resp = sfn.send_task_heartbeat(
-                taskToken=sfn_task_token
-            )
+                logger.debug('Heartbeat sent to Step Functions.')
 
-            logger.debug('Heartbeat sent to Step Functions.')
+                # Heartbeat sent, next item
 
-            # Heartbeat sent, next item
+            elif query_exec_state in ['FAILED', 'CANCELLED']:
 
-        elif query_exec_state in ['FAILED', 'CANCELLED']:
+                message = 'Athena query with Execution Id "{}" failed. Last state: {}. Error message: {}' \
+                    .format(athena_query_execution_id, query_exec_state, query_state_change_reason)
 
-            message = 'Athena query with Execution Id "{}" failed. Last state: {}. Error message: {}'\
-                .format(athena_query_execution_id, query_exec_state, query_state_change_reason)
+                logger.error(message)
 
-            logger.error(message)
+                message_json = {
+                    "AthenaQueryString": query_exec_resp['Query'],
+                    "AthenaQueryExecutionId": athena_query_execution_id,
+                    "AthenaQueryExecutionState": query_exec_state,
+                    "AthenaQueryExecutionStateChangeReason": query_state_change_reason,
+                    "AthenaQuerySubmissionDateTime": query_exec_resp['Status'].get('SubmissionDateTime', '').strftime(
+                        '%x, %-I:%M %p %Z'),
+                    "AthenaQueryCompletionDateTime": query_exec_resp['Status'].get('CompletionDateTime', '').strftime(
+                        '%x, %-I:%M %p %Z'),
+                    "AthenaQueryEngineExecutionTimeInMillis": query_exec_resp['Statistics'].get(
+                        'EngineExecutionTimeInMillis', 0),
+                    "AthenaQueryDataScannedInBytes": query_exec_resp['Statistics'].get('DataScannedInBytes', 0)
+                }
 
-            message_json={
-                "AthenaQueryString": query_exec_resp['Query'],
-                "AthenaQueryExecutionId": athena_query_execution_id,
-                "AthenaQueryExecutionState": query_exec_state,
-                "AthenaQueryExecutionStateChangeReason": query_state_change_reason,
-                "AthenaQuerySubmissionDateTime": query_exec_resp['Status'].get('SubmissionDateTime', '').strftime('%x, %-I:%M %p %Z'),
-                "AthenaQueryCompletionDateTime": query_exec_resp['Status'].get('CompletionDateTime', '').strftime(
-                    '%x, %-I:%M %p %Z'),
-                "AthenaQueryEngineExecutionTimeInMillis": query_exec_resp['Statistics'].get('EngineExecutionTimeInMillis', 0),
-                "AthenaQueryDataScannedInBytes": query_exec_resp['Statistics'].get('DataScannedInBytes', 0)
-            }
+                sfn_resp = sfn.send_task_failure(
+                    taskToken=sfn_task_token,
+                    cause=json.dumps(message_json),
+                    error='AthenaQueryFailedError'
+                )
 
-            sfn_resp = sfn.send_task_failure(
-                taskToken=sfn_task_token,
-                cause=json.dumps(message_json),
-                error='AthenaQueryFailedError'
-            )
+                # Delete item
+                resp = ddb_table.delete_item(
+                    Key={
+                        'sfn_activity_arn': sfn_activity_arn,
+                        'athena_query_execution_id': athena_query_execution_id
+                    }
+                )
 
-            # Delete item
-            resp = ddb_table.delete_item(
-                Key={
-                    'sfn_activity_arn': sfn_activity_arn,
-                    'athena_query_execution_id': athena_query_execution_id
-                }
-            )
+                logger.error(message)
 
-            logger.error(message)
+        except Exception as e:
+            logger.error('There was a problem checking status of Athena query..')
+            logger.error('Glue job Run Id "{}"'.format(athena_query_execution_id))
+            logger.error('Reason: {}'.format(e.message))
+            logger.info('Checking next Athena query.')
 
             # Task failed, next item
 
 
@@ -1,5 +1,5 @@
 {
-  "sfn_activity_arn": "<NO-DEFAULT>",
+  "sfn_activity_arn": "arn:aws:states:<AWS-REGION>:<AWS-ACCOUNTID>:activity:GlueRunnerActivity",
   "sfn_worker_name": "gluerunner",
   "ddb_table": "GlueRunnerActiveJobs",
   "ddb_query_limit": 50,
 
@@ -34,7 +34,7 @@ def load_config():
 def is_json(jsonstring):
     try:
         json_object = json.loads(jsonstring)
-    except ValueError, e:
+    except ValueError:
         return False
     return True
 
@@ -146,102 +146,106 @@ def check_glue_jobs(config):
 
     # For each item...
     for item in ddb_resp['Items']:
-
         glue_job_run_id = item['glue_job_run_id']
         glue_job_name = item['glue_job_name']
         sfn_task_token = item['sfn_task_token']
 
-        logger.debug('Polling Glue job run status..')
-
-        # Query glue job status...
-        glue_resp = glue.get_job_run(
-            JobName=glue_job_name,
-            RunId=glue_job_run_id,
-            PredecessorsIncluded=False
-        )
-
-        job_run_state = glue_resp['JobRun']['JobRunState']
-        job_run_error_message = glue_resp['JobRun'].get('ErrorMessage', '')
-
-        logger.debug('Job with Run Id {} is currently in state "{}"'.format(glue_job_run_id, job_run_state))
+        try:
 
-        # If Glue job completed, return success:
-        if job_run_state in ['SUCCEEDED']:
+            logger.debug('Polling Glue job run status..')
 
-            logger.info('Job with Run Id {} SUCCEEDED.'.format(glue_job_run_id))
+            # Query glue job status...
+            glue_resp = glue.get_job_run(
+                JobName=glue_job_name,
+                RunId=glue_job_run_id,
+                PredecessorsIncluded=False
+            )
 
-            # Build an output dict and format it as JSON
-            task_output_dict = {
-                "GlueJobName": glue_job_name,
-                "GlueJobRunId": glue_job_run_id,
-                "GlueJobRunState": job_run_state,
-                "GlueJobStartedOn": glue_resp['JobRun'].get('StartedOn', '').strftime('%x, %-I:%M %p %Z'),
-                "GlueJobCompletedOn": glue_resp['JobRun'].get('CompletedOn', '').strftime('%x, %-I:%M %p %Z'),
-                "GlueJobLastModifiedOn": glue_resp['JobRun'].get('LastModifiedOn', '').strftime('%x, %-I:%M %p %Z')
-            }
+            job_run_state = glue_resp['JobRun']['JobRunState']
+            job_run_error_message = glue_resp['JobRun'].get('ErrorMessage', '')
 
-            task_output_json = json.dumps(task_output_dict)
+            logger.debug('Job with Run Id {} is currently in state "{}"'.format(glue_job_run_id, job_run_state))
 
+            # If Glue job completed, return success:
+            if job_run_state in ['SUCCEEDED']:
 
-            logger.info('Sending "Task Succeeded" signal to Step Functions..')
-            sfn_resp = sfn.send_task_success(
-                taskToken=sfn_task_token,
-                output=task_output_json
-            )
+                logger.info('Job with Run Id {} SUCCEEDED.'.format(glue_job_run_id))
 
-            # Delete item
-            resp = ddb_table.delete_item(
-                Key={
-                    'sfn_activity_arn': sfn_activity_arn,
-                    'glue_job_run_id': glue_job_run_id
+                # Build an output dict and format it as JSON
+                task_output_dict = {
+                    "GlueJobName": glue_job_name,
+                    "GlueJobRunId": glue_job_run_id,
+                    "GlueJobRunState": job_run_state,
+                    "GlueJobStartedOn": glue_resp['JobRun'].get('StartedOn', '').strftime('%x, %-I:%M %p %Z'),
+                    "GlueJobCompletedOn": glue_resp['JobRun'].get('CompletedOn', '').strftime('%x, %-I:%M %p %Z'),
+                    "GlueJobLastModifiedOn": glue_resp['JobRun'].get('LastModifiedOn', '').strftime('%x, %-I:%M %p %Z')
                 }
-            )
 
-            # Task succeeded, next item
+                task_output_json = json.dumps(task_output_dict)
 
-        elif job_run_state in ['STARTING', 'RUNNING', 'STARTING', 'STOPPING']:
-            logger.debug('Job with Run Id {} hasn\'t succeeded yet.'.format(glue_job_run_id))
+                logger.info('Sending "Task Succeeded" signal to Step Functions..')
+                sfn_resp = sfn.send_task_success(
+                    taskToken=sfn_task_token,
+                    output=task_output_json
+                )
 
-            # Send heartbeat
-            sfn_resp = sfn.send_task_heartbeat(
-                taskToken=sfn_task_token
-            )
+                # Delete item
+                resp = ddb_table.delete_item(
+                    Key={
+                        'sfn_activity_arn': sfn_activity_arn,
+                        'glue_job_run_id': glue_job_run_id
+                    }
+                )
 
-            logger.debug('Heartbeat sent to Step Functions.')
+                # Task succeeded, next item
 
-            # Heartbeat sent, next item
+            elif job_run_state in ['STARTING', 'RUNNING', 'STARTING', 'STOPPING']:
+                logger.debug('Job with Run Id {} hasn\'t succeeded yet.'.format(glue_job_run_id))
 
-        elif job_run_state in ['FAILED', 'STOPPED']:
+                # Send heartbeat
+                sfn_resp = sfn.send_task_heartbeat(
+                    taskToken=sfn_task_token
+                )
 
-            message = 'Glue job "{}" run with Run Id "{}" failed. Last state: {}. Error message: {}'\
-                .format(glue_job_name, glue_job_run_id[:8] + "...", job_run_state, job_run_error_message)
+                logger.debug('Heartbeat sent to Step Functions.')
 
-            logger.error(message)
+                # Heartbeat sent, next item
 
-            message_json={
-                'glue_job_name': glue_job_name,
-                'glue_job_run_id': glue_job_run_id,
-                'glue_job_run_state': job_run_state,
-                'glue_job_run_error_msg': job_run_error_message
-            }
+            elif job_run_state in ['FAILED', 'STOPPED']:
 
-            sfn_resp = sfn.send_task_failure(
-                taskToken=sfn_task_token,
-                cause=json.dumps(message_json),
-                error='GlueJobFailedError'
-            )
+                message = 'Glue job "{}" run with Run Id "{}" failed. Last state: {}. Error message: {}' \
+                    .format(glue_job_name, glue_job_run_id[:8] + "...", job_run_state, job_run_error_message)
 
-            # Delete item
-            resp = ddb_table.delete_item(
-                Key={
-                    'sfn_activity_arn': sfn_activity_arn,
-                    'glue_job_run_id': glue_job_run_id
-                }
-            )
+                logger.error(message)
 
-            logger.error(message)
+                message_json = {
+                    'glue_job_name': glue_job_name,
+                    'glue_job_run_id': glue_job_run_id,
+                    'glue_job_run_state': job_run_state,
+                    'glue_job_run_error_msg': job_run_error_message
+                }
 
-            # Task failed, next item
+                sfn_resp = sfn.send_task_failure(
+                    taskToken=sfn_task_token,
+                    cause=json.dumps(message_json),
+                    error='GlueJobFailedError'
+                )
+
+                # Delete item
+                resp = ddb_table.delete_item(
+                    Key={
+                        'sfn_activity_arn': sfn_activity_arn,
+                        'glue_job_run_id': glue_job_run_id
+                    }
+                )
+
+                logger.error(message)
+                # Task failed, next item
+        except Exception as e:
+            logger.error('There was a problem checking status of Glue job "{}"..'.format(glue_job_name))
+            logger.error('Glue job Run Id "{}"'.format(glue_job_run_id))
+            logger.error('Reason: {}'.format(e.message))
+            logger.info('Checking next Glue job.')
 
 
 glue = boto3.client('glue')
 
@@ -6,7 +6,6 @@
 import logging, logging.config
 from botocore.client import Config
 
-s3 = boto3.client('s3')
 # Because Step Functions client uses long polling, read timeout has to be > 60 seconds
 sfn_client_config = Config(connect_timeout=50, read_timeout=70)
 sfn = boto3.client('stepfunctions', config=sfn_client_config)
 
@@ -1,14 +1,14 @@
 {
   "athenarunner": {
-    "SourceS3BucketName": "<NO-DEFAULT>",
-    "SourceS3Key":"src/athenarunner.zip"
+    "ArtifactBucketName": "<NO-DEFAULT>",
+    "LambdaSourceS3Key": "src/athenarunner.zip"
   },
   "gluerunner": {
-    "SourceS3BucketName": "<NO-DEFAULT>",
-    "SourceS3Key":"src/gluerunner.zip"
+    "ArtifactBucketName": "<NO-DEFAULT>",
+    "LambdaSourceS3Key": "src/gluerunner.zip"
   },
   "ons3objectcreated": {
-    "SourceS3BucketName": "<NO-DEFAULT>",
-    "SourceS3Key":"src/ons3objectcreated.zip"
+    "ArtifactBucketName": "<NO-DEFAULT>",
+    "LambdaSourceS3Key": "src/ons3objectcreated.zip"
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,10 @@`
`1`	`1`	`[`
`2`	`2`	`{`
`3`		`- "ParameterKey": "SourceS3BucketName",`
	`3`	`+ "ParameterKey": "ArtifactBucketName",`
`4`	`4`	`"ParameterValue": "<NO-DEFAULT>"`
`5`	`5`	`},`
`6`	`6`	`{`
`7`		`- "ParameterKey": "SourceS3Key",`
	`7`	`+ "ParameterKey": "LambdaSourceS3Key",`
`8`	`8`	`"ParameterValue": "src/athenarunner.zip"`
`9`	`9`	`},`
`10`	`10`	`{`
Original file line number	Diff line number	Diff line change
`@@ -1,14 +1,18 @@`
`1`	`1`	`[`
`2`	`2`	`{`
`3`		`- "ParameterKey": "S3ETLScriptPath",`
	`3`	`+ "ParameterKey": "ArtifactBucketName",`
`4`	`4`	`"ParameterValue": "<NO-DEFAULT>"`
`5`	`5`	`},`
`6`	`6`	`{`
`7`		`- "ParameterKey": "S3ETLOutputPath",`
`8`		`- "ParameterValue": "<NO-DEFAULT>"`
	`7`	`+ "ParameterKey": "ETLScriptsPrefix",`
	`8`	`+ "ParameterValue": "scripts"`
`9`	`9`	`},`
`10`	`10`	`{`
`11`		`- "ParameterKey": "SourceDataBucketName",`
	`11`	`+ "ParameterKey": "DataBucketName",`
`12`	`12`	`"ParameterValue": "<NO-DEFAULT>"`
	`13`	`+ },`
	`14`	`+ {`
	`15`	`+ "ParameterKey": "ETLOutputPrefix",`
	`16`	`+ "ParameterValue": "output"`
`13`	`17`	`}`
`14`	`18`	`]`
Original file line number	Diff line number	Diff line change
`@@ -1,18 +1,18 @@`
`1`	`1`	`[`
`2`	`2`	`{`
`3`		`- "ParameterKey": "SourceS3BucketName",`
	`3`	`+ "ParameterKey": "ArtifactBucketName",`
`4`	`4`	`"ParameterValue": "<NO-DEFAULT>"`
`5`	`5`	`},`
`6`	`6`	`{`
`7`		`- "ParameterKey": "SourceS3Key",`
	`7`	`+ "ParameterKey": "LambdaSourceS3Key",`
`8`	`8`	`"ParameterValue": "src/ons3objectcreated.zip"`
`9`	`9`	`},`
`10`	`10`	`{`
`11`	`11`	`"ParameterKey": "GlueRunnerActivityName",`
`12`	`12`	`"ParameterValue": "GlueRunnerActivity"`
`13`	`13`	`},`
`14`	`14`	`{`
`15`		`- "ParameterKey": "SourceDataBucketName",`
`16`		`- "ParameterValue": "<NO-DEFAULT>"`
	`15`	`+ "ParameterKey": "DataBucketName",`
	`16`	`+ "ParameterValue": "etl-orchestrator-745-data"`
`17`	`17`	`}`
`18`	`18`	`]`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`{`
`2`		`- "sfn_activity_arn": "<NO-DEFAULT>",`
	`2`	`+ "sfn_activity_arn": "arn:aws:states:<AWS-REGION>:<AWS-ACCOUNTID>:activity:GlueRunnerActivity",`
`3`	`3`	`"sfn_worker_name": "gluerunner",`
`4`	`4`	`"ddb_table": "GlueRunnerActiveJobs",`
`5`	`5`	`"ddb_query_limit": 50,`