diff --git a/.gitignore b/.gitignore index 0fd6b1558..d27500680 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ +.idea __pycache__/ .vscode build/* +.venv/ +venv/ workshop/*/.ipynb_checkpoints workshop/1-Personalization/interactions.csv workshop/1-Personalization/items.csv diff --git a/README.md b/README.md index 501a75c8f..372bc1a89 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,7 @@ Here you will find several workshops in a directory structure in the notebook in If you're interested in contributing enhancements, features, or fixes to the Retail Demo Store, please see the [Developer Instructions](./Developer-Instructions.md) for details on how to setup your local environment and deployment environment. + # Delivering a Demo of the Retail Demo Store Once you have deployed the Retail Demo Store, you may want to walk through the demonstration guide to learn how to show the features the Retail Demo Store provides. diff --git a/aws/cloudformation-templates/base/_template.yaml b/aws/cloudformation-templates/base/_template.yaml index eaabab314..c7fe021a8 100644 --- a/aws/cloudformation-templates/base/_template.yaml +++ b/aws/cloudformation-templates/base/_template.yaml @@ -282,6 +282,10 @@ Outputs: Description: Optimizely SDK key Parameter Value: !GetAtt SSMParameters.Outputs.ParameterOptimizelySdkKey + ParameterIVSVideoChannelMap: + Description: Retail Demo Store video file to IVS channel mapping parameter + Value: !GetAtt SSMParameters.Outputs.ParameterIVSVideoChannelMap + ParameterSegmentWriteKey: Description: Segment write key Parameter Value: !GetAtt SSMParameters.Outputs.ParameterSegmentWriteKey diff --git a/aws/cloudformation-templates/base/authentication.yaml b/aws/cloudformation-templates/base/authentication.yaml index d393654b3..dc0bc65d1 100644 --- a/aws/cloudformation-templates/base/authentication.yaml +++ b/aws/cloudformation-templates/base/authentication.yaml @@ -2,8 +2,7 @@ AWSTemplateFormatVersion: 2010-09-09 Description: > - This template deploys the Retail Demo Store Cognito Configuration. - + This template deploys the Retail Demo Store Cognito Configuration. Parameters: AuthName: Type: String @@ -18,20 +17,20 @@ Resources: SNSRole: Type: AWS::IAM::Role Properties: - AssumeRolePolicyDocument: + AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" - Principal: - Service: + Principal: + Service: - "cognito-idp.amazonaws.com" - Action: + Action: - "sts:AssumeRole" Policies: - PolicyName: "CognitoSNSPolicy" - PolicyDocument: + PolicyDocument: Version: "2012-10-17" - Statement: + Statement: - Effect: "Allow" Action: "sns:publish" Resource: "*" @@ -93,7 +92,7 @@ Resources: StringAttributeConstraints: MinLength: '1' MaxLength: '200' - + # Creates a User Pool Client to be used by the identity pool UserPoolClient: Type: AWS::Cognito::UserPoolClient @@ -101,14 +100,14 @@ Resources: ClientName: !Sub ${AuthName}-client GenerateSecret: false UserPoolId: !Ref UserPool - + # Creates a federeated Identity pool IdentityPool: Type: AWS::Cognito::IdentityPool Properties: IdentityPoolName: !Sub ${AuthName}Identity AllowUnauthenticatedIdentities: true - CognitoIdentityProviders: + CognitoIdentityProviders: - ClientId: !Ref UserPoolClient ProviderName: !GetAtt UserPool.ProviderName @@ -116,29 +115,29 @@ Resources: CognitoUnAuthorizedRole: Type: AWS::IAM::Role Properties: - AssumeRolePolicyDocument: + AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" - Principal: + Principal: Federated: "cognito-identity.amazonaws.com" - Action: + Action: - "sts:AssumeRoleWithWebIdentity" Condition: - StringEquals: + StringEquals: "cognito-identity.amazonaws.com:aud": !Ref IdentityPool "ForAnyValue:StringLike": "cognito-identity.amazonaws.com:amr": unauthenticated Policies: - PolicyName: "CognitoUnauthorizedPolicy" - PolicyDocument: + PolicyDocument: Version: "2012-10-17" - Statement: + Statement: - Effect: "Allow" Action: - mobiletargeting:UpdateEndpoint - mobiletargeting:PutEvents - Resource: + Resource: - !Sub "arn:aws:mobiletargeting:${AWS::Region}:${AWS::AccountId}:apps/${PinpointAppId}/*" - Effect: "Allow" Action: @@ -148,7 +147,7 @@ Resources: - Effect: "Allow" Action: - "cognito-sync:*" - Resource: + Resource: - !Sub "arn:aws:cognito-sync:${AWS::Region}:${AWS::AccountId}:identitypool/${IdentityPool}" - Effect: "Allow" Action: @@ -161,29 +160,29 @@ Resources: CognitoAuthorizedRole: Type: AWS::IAM::Role Properties: - AssumeRolePolicyDocument: + AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" - Principal: + Principal: Federated: "cognito-identity.amazonaws.com" - Action: + Action: - "sts:AssumeRoleWithWebIdentity" Condition: - StringEquals: + StringEquals: "cognito-identity.amazonaws.com:aud": !Ref IdentityPool "ForAnyValue:StringLike": "cognito-identity.amazonaws.com:amr": authenticated Policies: - PolicyName: "CognitoAuthorizedPolicy" - PolicyDocument: + PolicyDocument: Version: "2012-10-17" - Statement: + Statement: - Effect: "Allow" Action: - mobiletargeting:UpdateEndpoint - mobiletargeting:PutEvents - Resource: + Resource: - !Sub "arn:aws:mobiletargeting:${AWS::Region}:${AWS::AccountId}:apps/${PinpointAppId}/*" - Effect: "Allow" Action: @@ -193,19 +192,19 @@ Resources: - Effect: "Allow" Action: - "cognito-identity:*" - Resource: + Resource: - !Sub "arn:aws:cognito-identity:${AWS::Region}:${AWS::AccountId}:identitypool/${IdentityPool}" - Effect: "Allow" Action: - "cognito-sync:*" - Resource: + Resource: - !Sub "arn:aws:cognito-sync:${AWS::Region}:${AWS::AccountId}:identitypool/${IdentityPool}" - Effect: "Allow" Action: - "lex:PostText" Resource: - !Sub "arn:aws:lex:${AWS::Region}:${AWS::AccountId}:bot:RetailDemoStore:*" - + # Assigns the roles to the Identity Pool IdentityPoolRoleMapping: Type: "AWS::Cognito::IdentityPoolRoleAttachment" @@ -224,4 +223,4 @@ Outputs: Value: !Ref UserPoolClient IdentityPoolId: Description: Identity Pool Id - Value: !Ref IdentityPool + Value: !Ref IdentityPool \ No newline at end of file diff --git a/aws/cloudformation-templates/base/notebook.yaml b/aws/cloudformation-templates/base/notebook.yaml index 47b4d40f1..cbd140c29 100644 --- a/aws/cloudformation-templates/base/notebook.yaml +++ b/aws/cloudformation-templates/base/notebook.yaml @@ -118,7 +118,7 @@ Resources: Action: - ssm:PutParameter - ssm:GetParameter - Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore-*' + Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore*' - Effect: "Allow" Action: @@ -155,6 +155,15 @@ Resources: Condition: StringEquals: aws:ResourceTag/RetailDemoStoreServiceName: 'web-ui' + - + Effect: Allow + Action: # Notebook users should be able to start and stop training + - events:DescribeRule + - events:ListRules + - events:EnableRule + Resource: + - !Sub 'arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/RetailDemoStore-PersonalizePreCreateScheduledRule' + - PolicyName: "3-Experimentation" PolicyDocument: @@ -196,7 +205,7 @@ Resources: Effect: "Allow" Action: - lambda:GetFunction - Resource: !Sub 'arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:RetailDemoStorePinpointRecommender' + Resource: !Sub 'arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:RetailDemoStorePinpointRecommender' Outputs: NotebookInstanceId: diff --git a/aws/cloudformation-templates/base/ssm.yaml b/aws/cloudformation-templates/base/ssm.yaml index 9a7c9326b..f511e4379 100644 --- a/aws/cloudformation-templates/base/ssm.yaml +++ b/aws/cloudformation-templates/base/ssm.yaml @@ -68,6 +68,14 @@ Resources: Value: "NONE" Description: "Retail Demo Store Personalized Ranking Campaign Arn Parameter" + ParameterPersonalizeTrainConfig: + Type: "AWS::SSM::Parameter" + Properties: + Name: "retaildemostore-training-config" + Type: "String" + Value: "NONE" + Description: "Retail Demo Store Personalize Train Config" + ParameterPersonalizeFilterPurchasedArn: Type: "AWS::SSM::Parameter" Properties: @@ -108,6 +116,14 @@ Resources: Value: !If [HasOptimizelySdkKey, !Ref OptimizelySdkKey, 'NONE'] Description: "Retail Demo Store Optimizely SDK key" + ParameterIVSVideoChannelMap: + Type: "AWS::SSM::Parameter" + Properties: + Name: "retaildemostore-ivs-video-channel-map" + Type: "String" + Value: "NONE" + Description: "Retail Demo Store video file to IVS channel mapping" + ParameterSegmentWriteKey: Type: "AWS::SSM::Parameter" Properties: @@ -134,6 +150,10 @@ Outputs: Description: Personalized Ranking Campaign Arn Value: !Ref ParameterPersonalizedRankingCampaignArn + ParameterPersonalizeTrainConfig: + Description: Train config + Value: !Ref ParameterPersonalizeTrainConfig + ParameterPersonalizeFilterPurchasedArn: Description: Personalize Filter Purchased Products Arn Value: !Ref ParameterPersonalizeFilterPurchasedArn @@ -154,6 +174,11 @@ Outputs: Description: Optimizely SDK key Value: !Ref ParameterOptimizelySdkKey + ParameterIVSVideoChannelMap: + Description: Retail Demo Store video file to IVS channel mapping + Value: !Ref ParameterIVSVideoChannelMap + ParameterSegmentWriteKey: Description: Segment write key - Value: !Ref ParameterSegmentWriteKey \ No newline at end of file + Value: !Ref ParameterSegmentWriteKey + diff --git a/aws/cloudformation-templates/deployment-support.yaml b/aws/cloudformation-templates/deployment-support.yaml index 7f58ab16a..fde457f54 100644 --- a/aws/cloudformation-templates/deployment-support.yaml +++ b/aws/cloudformation-templates/deployment-support.yaml @@ -8,20 +8,20 @@ Parameters: ResourceBucket: Type: String Description: > - S3 bucket name where the Retail Demo Store deployment resources are staged (product images, nested CloudFormation templates, source code snapshot, + S3 bucket name where the Retail Demo Store deployment resources are staged (product images, nested CloudFormation templates, source code snapshot, notebooks, deployment Lambda code, etc). ResourceBucketRelativePath: Type: String Description: > - Optional path in the Deployment Resources Staging bucket where the deployment resources are stored (e.g. path/path2/). + Optional path in the Deployment Resources Staging bucket where the deployment resources are stored (e.g. path/path2/). Leave blank if resources are at the root of the Staging Resource Bucket. If specified, MUST end with '/'. PreIndexElasticsearch: Type: String Description: > - Automatically index the Retail Demo Store products in Elasticsearch. Otherwise, select 'No' if you would - prefer to complete this process yourself by stepping through the Search workshop included in this deployment + Automatically index the Retail Demo Store products in Elasticsearch. Otherwise, select 'No' if you would + prefer to complete this process yourself by stepping through the Search workshop included in this deployment as a Jupyter notebook in SageMaker. AllowedValues: - 'Yes' @@ -31,20 +31,20 @@ Parameters: PreCreatePersonalizeCampaign: Type: String Description: > - Automatically build solutions and launch Personalize campaigns. Otherwise, select 'No' if you would - prefer to complete this process yourself by stepping through the Personalization workshop included in this deployment - as a Jupyter notebook in SageMaker. Note that this process is done in the background after deployment finishes + Automatically build solutions and launch Personalize campaigns. Otherwise, select 'No' if you would + prefer to complete this process yourself by stepping through the Personalization workshop included in this deployment + as a Jupyter notebook in SageMaker. Note that this process is done in the background after deployment finishes and can take 2+ hours to complete. AllowedValues: - 'Yes' - 'No' Default: 'No' - + PreCreatePinpointWorkshop: Type: String Description: > - Automatically configure Pinpoint with messaging templates, segments, and campaigns. Otherwise, select 'No' if you would - prefer to complete this process yourself by stepping through the Messaging workshop included in your deployment as a Jupyter notebook + Automatically configure Pinpoint with messaging templates, segments, and campaigns. Otherwise, select 'No' if you would + prefer to complete this process yourself by stepping through the Messaging workshop included in your deployment as a Jupyter notebook in SageMaker. AllowedValues: - 'Yes' @@ -53,7 +53,7 @@ Parameters: Subnet1: Type: String - + Subnet2: Type: String @@ -66,6 +66,9 @@ Parameters: ElasticsearchDomainEndpoint: Type: String + ParameterIVSVideoChannelMap: + Type: String + Uid: Type: String @@ -104,12 +107,11 @@ Resources: PersonalizePreCreateLambdaFunction: Condition: DeployPreCreateCampaign - DependsOn: PersonalizeDeleteLambdaFunction # Ensure PreCreate function is deleted before Delete function so PreCreate is stopped first Type: 'AWS::Lambda::Function' Properties: Description: 'Retail Demo Store deployment utility function that uploads datasets, builds solutions, and creates campaigns in Amazon Personalize' Handler: personalize-pre-create-campaigns.lambda_handler - Role: !GetAtt + Role: !GetAtt - PersonalizePreCreateLambdaExecutionRole - Arn Code: @@ -154,11 +156,42 @@ Resources: - Effect: Allow Action: - ssm:PutParameter - - ssm:GetParameter + - ssm:GetParameter Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore-*' - Effect: Allow Action: - logs:CreateLogGroup + Resource: '*' + - Effect: Allow + Action: + - iam:GetRole + - iam:PassRole + - iam:CreateRole + - iam:AttachRolePolicy + - iam:DetachRolePolicy + - iam:DeleteRole + Resource: + - !Sub 'arn:aws:iam::${AWS::AccountId}:role/${Uid}-PersonalizeS3' + - Effect: Allow + Action: + - events:ListTargetsByRule + - events:DisableRule + - events:EnableRule + Resource: !Sub 'arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/RetailDemoStore-PersonalizePreCreateScheduledRule' + - Effect: Allow + Action: + - events:PutRule + - events:PutTargets + - events:RemoveTargets + - events:DeleteRule + Resource: '*' + - Effect: Allow + Action: + - lambda:AddPermission + - lambda:RemovePermission + Resource: !Sub 'arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:RetailDemoStorePersonalizePreCreateCampaigns' + - Effect: Allow + Action: - personalize:ListCampaigns - personalize:ListDatasetGroups - personalize:ListSolutions @@ -179,38 +212,35 @@ Resources: - personalize:DescribeDatasetGroup - personalize:DescribeDatasetImportJob - personalize:DescribeSolution - - personalize:DescribeEventTracker - personalize:CreateCampaign - personalize:CreateDataset - personalize:CreateEventTracker - personalize:CreateFilter - Resource: + - personalize:DeleteCampaign + - personalize:DeleteDataset + - personalize:DeleteDatasetGroup + - personalize:DeleteFilter + - personalize:DeleteSchema + - personalize:DeleteSolution + - personalize:ListFilters + Resource: - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:*/retaildemostore*' - Effect: Allow Action: - - iam:GetRole - - iam:PassRole - - iam:CreateRole - - iam:AttachRolePolicy - Resource: - - !Sub 'arn:aws:iam::${AWS::AccountId}:role/${Uid}-PersonalizeS3' - - Effect: Allow - Action: - - events:ListTargetsByRule - - events:RemoveTargets - - events:DeleteRule + - personalize:DescribeEventTracker + - personalize:DeleteEventTracker Resource: - - !Sub 'arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/RetailDemoStore-PersonalizePreCreateScheduledRule' + - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:*' - Effect: Allow Action: - codepipeline:ListPipelines - codepipeline:ListTagsForResource - Resource: + Resource: - !Sub 'arn:aws:codepipeline:${AWS::Region}:${AWS::AccountId}:*' - Effect: Allow Action: - codepipeline:StartPipelineExecution - Resource: + Resource: - !Sub 'arn:aws:codepipeline:${AWS::Region}:${AWS::AccountId}:*' Condition: StringEquals: @@ -221,49 +251,58 @@ Resources: Type: 'AWS::Events::Rule' Properties: Name: 'RetailDemoStore-PersonalizePreCreateScheduledRule' - Description: Calls Personalize pre-create Lambda function every 5 minutes until campaigns are fully created + Description: Calls Personalize pre-create Lambda function every 5 minutes until Personalize reaches desired state ScheduleExpression: rate(5 minutes) - State: ENABLED + State: DISABLED Targets: - - Arn: !GetAtt + - Arn: !GetAtt - PersonalizePreCreateLambdaFunction - Arn Id: TargetFunctionV1 - PersonalizePreCreatePermissionToInvokeLambda: + PersonalizePreCreatePermissionToInvokeLambda: Condition: DeployPreCreateCampaign Type: AWS::Lambda::Permission - Properties: - FunctionName: + Properties: + FunctionName: Ref: "PersonalizePreCreateLambdaFunction" Action: "lambda:InvokeFunction" Principal: "events.amazonaws.com" - SourceArn: - Fn::GetAtt: + SourceArn: + Fn::GetAtt: - "PersonalizePreCreateScheduledRule" - "Arn" - ####################### Personalize Resource Delete Custom Resource ####################### + CustomLaunchPersonalizePreCreateLambdaFunction: + Condition: DeployPreCreateCampaign + Type: Custom::CustomLambdaPersonalize + Properties: + ServiceToken: !GetAtt PersonalizePreCreateLambdaFunction.Arn + + ####################### Create IVS Channels ####################### - PersonalizeDeleteLambdaFunction: + IVSCreateChannelsLambdaFunction: Type: 'AWS::Lambda::Function' Properties: - Description: 'Retail Demo Store deployment utility function that deletes Personalize resources created in workshop or by pre-create Lambda' - Handler: personalize-delete-resources.lambda_handler - Role: !GetAtt - - PersonalizeDeleteLambdaExecutionRole + Description: 'Retail Demo Store deployment utility function that creates IVS channels.' + Handler: ivs-create-channels.lambda_handler + Role: !GetAtt + - IVSCreateChannelsLambdaExecutionRole - Arn Code: S3Bucket: !Ref ResourceBucket - S3Key: !Sub '${ResourceBucketRelativePath}aws-lambda/personalize-delete-resources.zip' + S3Key: !Sub '${ResourceBucketRelativePath}aws-lambda/ivs-create-channels.zip' Runtime: python3.8 - Timeout: 60 - FunctionName: RetailDemoStorePersonalizeDeleteResources + Timeout: 900 + FunctionName: RetailDemoStoreIVSCreateChannels Environment: Variables: + bucket: !Ref ResourceBucket + videos_path: !Sub '${ResourceBucketRelativePath}videos/' + ssm_video_channel_map_param: !Ref ParameterIVSVideoChannelMap Uid: !Ref Uid - PersonalizeDeleteLambdaExecutionRole: + IVSCreateChannelsLambdaExecutionRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: @@ -275,49 +314,56 @@ Resources: - lambda.amazonaws.com Action: - 'sts:AssumeRole' - Path: /service-role/ + Path: / Policies: - PolicyName: root PolicyDocument: Version: 2012-10-17 Statement: - - Effect: Allow - Action: - - logs:CreateLogGroup - Resource: - - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:*' - Effect: Allow Action: - logs:CreateLogStream - logs:PutLogEvents Resource: - - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/RetailDemoStorePersonalizeDeleteResources*' - - Effect: Allow - Action: - - personalize:* - - lambda:AddPermission - - lambda:RemovePermission - - events:PutRule - - events:DeleteRule - - events:PutTargets - - events:RemoveTargets - Resource: '*' + - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/RetailDemoStoreIVSCreateChannels:log-stream:*' + - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/RetailDemoStoreIVSCreateChannels' - Effect: Allow Action: - ssm:PutParameter + - ssm:GetParameter Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore-*' - Effect: Allow Action: - - iam:DetachRolePolicy - - iam:DeleteRole - Resource: !Sub 'arn:aws:iam::${AWS::AccountId}:role/${Uid}-PersonalizeS3' + - ivs:CreateChannel + - ivs:CreateStreamKey + - ivs:ListStreamKeys + - ivs:DeleteChannel + Resource: !Sub 'arn:aws:ivs:${AWS::Region}:${AWS::AccountId}:*' + - Effect: Allow + Action: + - ivs:StopStream + - ivs:GetChannel + Resource: !Sub 'arn:aws:ivs:${AWS::Region}:${AWS::AccountId}:channel/*' + - Effect: Allow + Action: + - ivs:DeleteStreamKey + Resource: !Sub 'arn:aws:ivs:${AWS::Region}:${AWS::AccountId}:stream-key/*' + - Effect: Allow + Action: + - s3:ListBucket + Resource: + - !Sub "arn:aws:s3:::${ResourceBucket}/*" + - !Sub "arn:aws:s3:::${ResourceBucket}" + - Effect: Allow + Action: + - logs:CreateLogGroup + Resource: '*' - # Custom resource to launch elasticsearch preindex function - CustomPersonalizeDeleteLambdaFunction: - Type: Custom::CustomPersonalizeDelete + # Custom resource to launch IVS create channels function + CustomLaunchIVSCreateChannelsLambdaFunction: + Type: Custom::CustomLambdaIVS Properties: - ServiceToken: !GetAtt PersonalizeDeleteLambdaFunction.Arn - DatasetGroupName: 'retaildemostore' + ServiceToken: !GetAtt IVSCreateChannelsLambdaFunction.Arn ####################### Pre-Index Elasticsearch ####################### @@ -327,7 +373,7 @@ Resources: Properties: Description: 'Retail Demo Store deployment utility function that indexes product catalog in Amazon Elasticsearch' Handler: elasticsearch-pre-index.lambda_handler - Role: !GetAtt + Role: !GetAtt - ElasticsearchPreIndexLambdaExecutionRole - Arn Code: @@ -376,7 +422,7 @@ Resources: - logs:PutLogEvents Resource: - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/*ElasticsearchPreIndexLambdaFunction*:log-stream:*' - - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/*ElasticsearchPreIndexLambdaFunction*' + - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/*ElasticsearchPreIndexLambdaFunction*' - Effect: Allow Action: - es:ESHttpDelete @@ -403,7 +449,7 @@ Resources: Properties: Description: 'Retail Demo Store deployment utility function that configures messaging templates, segments, and campaigns in Amazon Pinpoint' Handler: pinpoint-auto-workshop.lambda_handler - Role: !GetAtt + Role: !GetAtt - PinpointPreCreateLambdaExecutionRole - Arn Code: @@ -455,9 +501,9 @@ Resources: Action: - ssm:GetParameter Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore-*' - - + - Effect: "Allow" - Action: + Action: - mobiletargeting:* Resource: "*" - Effect: Allow @@ -474,32 +520,32 @@ Resources: Properties: Name: 'RetailDemoStore-PinpointPreCreateRule' Description: Calls Pinpoint workshop pre-create Lambda function when the Personalize campaign ARN SSM parameter is updated - EventPattern: - source: + EventPattern: + source: - "aws.ssm" - detail-type: + detail-type: - "Parameter Store Change" - detail: - name: + detail: + name: - "retaildemostore-product-recommendation-campaign-arn" - operation: + operation: - "Update" State: ENABLED Targets: - - Arn: !GetAtt + - Arn: !GetAtt - PinpointPreCreateLambdaFunction - Arn Id: TargetFunctionV1 - PinpointPreCreatePermissionToInvokeLambda: + PinpointPreCreatePermissionToInvokeLambda: Condition: DeployPreCreatePinpointWorkshop Type: AWS::Lambda::Permission - Properties: - FunctionName: + Properties: + FunctionName: Ref: "PinpointPreCreateLambdaFunction" Action: "lambda:InvokeFunction" Principal: "events.amazonaws.com" - SourceArn: - Fn::GetAtt: + SourceArn: + Fn::GetAtt: - "PinpointPreCreateRule" - "Arn" diff --git a/aws/cloudformation-templates/services/_template.yaml b/aws/cloudformation-templates/services/_template.yaml index 10615ece6..7b0d8fffe 100644 --- a/aws/cloudformation-templates/services/_template.yaml +++ b/aws/cloudformation-templates/services/_template.yaml @@ -4,6 +4,11 @@ AWSTemplateFormatVersion: 2010-09-09 Description: > This template deploys the Retail Demo Store Services. +Conditions: + UseDefaultIVSStreams: !Equals + - !Ref UseDefaultIVSStreams + - 'Yes' + Parameters: ResourceBucket: Type: String @@ -103,9 +108,17 @@ Parameters: Type: String Description: SSM parameter name for the Optimizely SDK key + ParameterIVSVideoChannelMap: + Type: String + Description: SSM parameter name for video to IVS stream map + CleanupBucketLambdaArn: Type: String Description: Lambda Arn for cleanup function + + UseDefaultIVSStreams: + Type: String + Description: Whether to use created IVS streams or those hosted by AWS. ParentStackName: Type: String @@ -305,6 +318,39 @@ Resources: WebRootUrl: !Ref WebRootUrl ImageRootUrl: !Ref ImageRootUrl Uid: !Sub ${ParentStackName}-${AWS::Region} + + VideosService: + Type: AWS::CloudFormation::Stack + Properties: + TemplateURL: !Sub https://s3.amazonaws.com/${ResourceBucket}/${ResourceBucketRelativePath}cloudformation-templates/services/service/_template.yaml + Parameters: + ServiceName: videos + ServicePath: src/videos + ResourceBucket: !Ref ResourceBucket + ResourceBucketRelativePath: !Ref ResourceBucketRelativePath + SourceDeploymentType: !Ref SourceDeploymentType + GitHubRepo: !Ref GitHubRepo + GitHubBranch: !Ref GitHubBranch + GitHubToken: !Ref GitHubToken + GitHubUser: !Ref GitHubUser + UserPoolId: !Ref UserPoolId + UserPoolClientId: !Ref UserPoolClientId + IdentityPoolId: !Ref IdentityPoolId + StackBucketName: !Ref StackBucketName + Subnets: !Ref Subnets + VpcId: !Ref VpcId + ClusterName: !Ref ClusterName + ServiceDiscoveryNamespace: !Ref ServiceDiscoveryNamespace + ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId + ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey + ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey + ParameterIVSVideoChannelMap: !Ref ParameterIVSVideoChannelMap + CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn + DeleteRepositoryLambdaArn: !GetAtt DeleteRepositoryLambdaFunction.Arn + UseDefaultIVSStreams: !If [UseDefaultIVSStreams, true, false] + WebRootUrl: !Ref WebRootUrl + ImageRootUrl: !Ref ImageRootUrl + Uid: !Sub ${ParentStackName}-${AWS::Region} # Pinpoint personalized messaging customization PinpointPersonalize: @@ -425,6 +471,10 @@ Outputs: Description: Recommendations load balancer URL. Value: !GetAtt RecommendationsService.Outputs.ServiceUrl + VideosServiceUrl: + Description: Videos load balancer URL. + Value: !GetAtt VideosService.Outputs.ServiceUrl + SearchServiceUrl: Description: Search load balancer URL. Value: !GetAtt SearchService.Outputs.ServiceUrl diff --git a/aws/cloudformation-templates/services/pinpoint-personalize.yaml b/aws/cloudformation-templates/services/pinpoint-personalize.yaml index 9dae6c891..4cb94271f 100644 --- a/aws/cloudformation-templates/services/pinpoint-personalize.yaml +++ b/aws/cloudformation-templates/services/pinpoint-personalize.yaml @@ -44,9 +44,9 @@ Resources: - personalize:DescribeCampaign - personalize:GetRecommendations Resource: - - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:solution/retaildemostore-product-personalization' - - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:campaign/retaildemostore-product-personalization' - - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:filter/retaildemostore-product-personalization' + - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:solution/retaildemostore-product-personalization*' + - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:campaign/retaildemostore-product-personalization*' + - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:filter/retaildemostore-product-personalization*' CustomizeRecommendationsFunctionRole: Type: AWS::IAM::Role diff --git a/aws/cloudformation-templates/services/service/_template.yaml b/aws/cloudformation-templates/services/service/_template.yaml index cce669257..d1bd471c0 100644 --- a/aws/cloudformation-templates/services/service/_template.yaml +++ b/aws/cloudformation-templates/services/service/_template.yaml @@ -76,6 +76,14 @@ Parameters: ClusterName: Type: String + ContainerCpu: + Type: String + Default: 256 + + ContainerMemory: + Type: String + Default: 512 + ServiceDiscoveryNamespace: Type: String @@ -91,6 +99,10 @@ Parameters: Type: String Default: none + VideosServiceExternalUrl: + Type: String + Default: none + OrdersServiceExternalUrl: Type: String Default: none @@ -170,6 +182,11 @@ Parameters: Description: SSM parameter name for the Optimizely SDK key Default: none + ParameterIVSVideoChannelMap: + Type: String + Description: SSM parameter name for video to IVS stream map + Default: none + CleanupBucketLambdaArn: Type: String Description: Lambda Arn for cleanup function @@ -178,6 +195,10 @@ Parameters: Type: String Description: Lambda Arn for deleting ECR repository + UseDefaultIVSStreams: + Type: String + Default: false + Uid: Type: String @@ -213,6 +234,7 @@ Resources: ProductsServiceExternalUrl: !Ref ProductsServiceExternalUrl UsersServiceExternalUrl: !Ref UsersServiceExternalUrl CartsServiceExternalUrl: !Ref CartsServiceExternalUrl + VideosServiceExternalUrl: !Ref VideosServiceExternalUrl OrdersServiceExternalUrl: !Ref OrdersServiceExternalUrl RecommendationsServiceExternalUrl: !Ref RecommendationsServiceExternalUrl SearchServiceExternalUrl: !Ref SearchServiceExternalUrl @@ -233,6 +255,8 @@ Resources: ServiceName: !Ref ServiceName ClusterName: !Ref ClusterName DesiredCount: '1' + ContainerMemory: !Ref ContainerMemory + ContainerCpu: !Ref ContainerCpu TargetGroup: !GetAtt Loadbalancer.Outputs.TargetGroup SourceSecurityGroup: !GetAtt Loadbalancer.Outputs.SecurityGroup Subnets: !Ref Subnets @@ -248,6 +272,9 @@ Resources: EnvSearchServiceInternalUrl: !Ref EnvSearchServiceInternalUrl EnvSerchServiceInternalPort: !Ref EnvSerchServiceInternalPort EnvElasticsearchDomainEndpoint: !Ref EnvElasticsearchDomainEndpoint + ResourceBucket: !Ref ResourceBucket + ParameterIVSVideoChannelMap: !Ref ParameterIVSVideoChannelMap + UseDefaultIVSStreams: !Ref UseDefaultIVSStreams ProductsTable: !Ref ProductsTable CategoriesTable: !Ref CategoriesTable ExperimentStrategyTable: !Ref ExperimentStrategyTable diff --git a/aws/cloudformation-templates/services/service/pipeline.yaml b/aws/cloudformation-templates/services/service/pipeline.yaml index d653cca93..5a32473ab 100644 --- a/aws/cloudformation-templates/services/service/pipeline.yaml +++ b/aws/cloudformation-templates/services/service/pipeline.yaml @@ -63,6 +63,10 @@ Parameters: Type: String Default: none + VideosServiceExternalUrl: + Type: String + Default: none + OrdersServiceExternalUrl: Type: String Default: none @@ -290,6 +294,8 @@ Resources: Value: !Sub ${UsersServiceExternalUrl} - Name: CARTS_SERVICE_URL Value: !Sub ${CartsServiceExternalUrl} + - Name: VIDEOS_SERVICE_URL + Value: !Sub ${VideosServiceExternalUrl} - Name: ORDERS_SERVICE_URL Value: !Sub ${OrdersServiceExternalUrl} - Name: RECOMMENDATIONS_SERVICE_URL diff --git a/aws/cloudformation-templates/services/service/service.yaml b/aws/cloudformation-templates/services/service/service.yaml index 09ece7649..368fe1a0f 100644 --- a/aws/cloudformation-templates/services/service/service.yaml +++ b/aws/cloudformation-templates/services/service/service.yaml @@ -16,6 +16,12 @@ Parameters: Type: Number Default: 1 + ContainerCpu: + Type: Number + + ContainerMemory: + Type: Number + TargetGroup: Type: String @@ -72,6 +78,18 @@ Parameters: Type: String Default: none + ResourceBucket: + Type: String + Default: none + + ParameterIVSVideoChannelMap: + Type: String + Default: none + + UseDefaultIVSStreams: + Type: String + Default: false + WebRootUrl: Type: String Description: Public facing root URL where the Retail Demo Store web user interface is served. Used when building fully qualified URLs for the web user interface. @@ -160,14 +178,72 @@ Resources: Resource: '*' - Effect: Allow Action: + - logs:CreateLogGroup + - personalize:CreateSchema + - personalize:CreateDatasetGroup + - personalize:CreateSolutionVersion + - personalize:CreateDatasetImportJob + - personalize:CreateSolution + - personalize:DescribeDatasetGroup + - personalize:DescribeDatasetImportJob + - personalize:DescribeSolution - personalize:DescribeSolutionVersion + - personalize:DescribeEventTracker - personalize:DescribeCampaign - - personalize:GetPersonalizedRanking - - personalize:GetRecommendations - Resource: - - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:solution/retaildemostore-*' - - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:campaign/retaildemostore-*' - - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:filter/retaildemostore-*' + - personalize:CreateCampaign + - personalize:CreateDataset + - personalize:CreateEventTracker + - personalize:CreateFilter + - personalize:GetPersonalizedRanking + - personalize:GetRecommendations + - personalize:DeleteEventTracker + - personalize:DescribeEventTracker + Resource: + - !Sub 'arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:*/retaildemostore*' + - Effect: Allow + Action: + - logs:CreateLogGroup + - personalize:ListCampaigns + - personalize:ListDatasetGroups + - personalize:ListSolutions + - personalize:ListSchemas + - personalize:ListSolutionVersions + - personalize:ListDatasetImportJobs + - personalize:ListDatasets + - personalize:ListEventTrackers + Resource: + - '*' + - Effect: Allow + Action: + - s3:GetObject + Resource: + - !Sub 'arn:aws:s3:::${ResourceBucket}' + - !Sub 'arn:aws:s3:::${ResourceBucket}/*' + - Effect: Allow + Action: + - ivs:ListStreamKeys + Resource: + - !Sub 'arn:aws:ivs:${AWS::Region}:${AWS::AccountId}:*' + - Effect: Allow + Action: + - ivs:GetChannel + - ivs:GetStream + - ivs:PutMetadata + Resource: + - !Sub 'arn:aws:ivs:${AWS::Region}:${AWS::AccountId}:channel/*' + - Effect: Allow + Action: + - ivs:GetStreamKey + Resource: + - !Sub 'arn:aws:ivs:${AWS::Region}:${AWS::AccountId}:stream-key/*' + - Effect: Allow + Action: + - events:DescribeRule + - events:ListRules + - events:EnableRule + Resource: + - !Sub 'arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/RetailDemoStore-PersonalizePreCreateScheduledRule' + ManagedPolicyArns: - arn:aws:iam::aws:policy/AWSCloudMapDiscoverInstanceAccess @@ -204,8 +280,8 @@ Resources: Family: !Sub ${AWS::StackName}-retaildemostore RequiresCompatibilities: - FARGATE - Memory: '512' - Cpu: '256' + Memory: !Ref ContainerMemory + Cpu: !Ref ContainerCpu NetworkMode: awsvpc ExecutionRoleArn: !Ref TaskExecutionRole TaskRoleArn: !Ref TaskRole @@ -238,6 +314,12 @@ Resources: Value: '80' - Name: STACK_BUCKET Value: none + - Name: RESOURCE_BUCKET + Value: !Ref ResourceBucket + - Name: PARAMETER_IVS_VIDEO_CHANNEL_MAP + Value: !Ref ParameterIVSVideoChannelMap + - Name: USE_DEFAULT_IVS_STREAMS + Value: !Ref UseDefaultIVSStreams - Name: ES_SEARCH_DOMAIN_HOST Value: !Ref EnvElasticsearchDomainEndpoint - Name: ES_SEARCH_DOMAIN_PORT @@ -250,6 +332,8 @@ Resources: Value: !Ref WebRootUrl - Name: IMAGE_ROOT_URL Value: !Ref ImageRootUrl + - Name: PERSONALIZE_PRECREATE_CAMPAIGNS_EVENTRULENAME + Value: 'RetailDemoStore-PersonalizePreCreateScheduledRule' PortMappings: - ContainerPort: 80 LogConfiguration: diff --git a/aws/cloudformation-templates/template.yaml b/aws/cloudformation-templates/template.yaml index ed5a98389..81cc54151 100644 --- a/aws/cloudformation-templates/template.yaml +++ b/aws/cloudformation-templates/template.yaml @@ -32,6 +32,10 @@ Metadata: - PreCreatePinpointWorkshop - PinpointEmailFromAddress - PinpointEmailFromName + - Label: + default: "Use default IVS streams" + Parameters: + - UseDefaultIVSStreams - Label: default: "AWS Partner Integrations" Parameters: @@ -65,6 +69,8 @@ Metadata: default: "Reply-To email address" PinpointEmailFromName: default: "Reply-To name" + UseDefaultIVSStreams: + default: "Use default IVS streams" AmplitudeApiKey: default: "Amplitude API Key" OptimizelySdkKey: @@ -173,6 +179,16 @@ Parameters: Name to use with Reply-To email address when sending emails from Pinpoint. This parameter only applies when 'Auto-Configure Pinpoint' is set to 'Yes'. Default: "AWS Retail Demo Store" + + UseDefaultIVSStreams: + Type: String + Description: > + Whether to create and use the default Interactive Video Service (IVS) streams hosted by AWS. If 'No', then new IVS streams will be created and streams + created based on any videos placed in the CloudFormation resource bucket. + AllowedValues: + - 'Yes' + - 'No' + Default: 'Yes' AmplitudeApiKey: Type: String @@ -236,7 +252,9 @@ Resources: ParameterAmplitudeApiKey: !GetAtt Base.Outputs.ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !GetAtt Base.Outputs.ParameterOptimizelySdkKey CleanupBucketLambdaArn: !GetAtt CleanupBucket.Outputs.LambdaFunctionArn + ParameterIVSVideoChannelMap: !GetAtt Base.Outputs.ParameterIVSVideoChannelMap WebRootUrl: !GetAtt Base.Outputs.WebUICDNURL + UseDefaultIVSStreams: !Ref UseDefaultIVSStreams ImageRootUrl: !Sub - '${RootURL}/images/' - RootURL: !GetAtt Base.Outputs.WebUICDNURL @@ -264,6 +282,7 @@ Resources: OrdersServiceExternalUrl: !GetAtt Services.Outputs.OrdersServiceUrl RecommendationsServiceExternalUrl: !GetAtt Services.Outputs.RecommendationsServiceUrl SearchServiceExternalUrl: !GetAtt Services.Outputs.SearchServiceUrl + VideosServiceExternalUrl: !GetAtt Services.Outputs.VideosServiceUrl PinpointAppId: !GetAtt Base.Outputs.PinpointAppId ParameterPersonalizeEventTrackerId: !GetAtt Base.Outputs.ParameterPersonalizeEventTrackerId ParameterAmplitudeApiKey: !GetAtt Base.Outputs.ParameterAmplitudeApiKey @@ -290,7 +309,7 @@ Resources: # Deployment support DeploymentSupport: - DependsOn: Services # Delay towards end of deployment so that ES domain and DNS changes become consistent + DependsOn: [Services, Base] # Delay towards end of deployment so that ES domain and DNS changes become consistent Type: AWS::CloudFormation::Stack Properties: TemplateURL: !Sub https://s3.amazonaws.com/${ResourceBucket}/${ResourceBucketRelativePath}cloudformation-templates/deployment-support.yaml @@ -304,6 +323,7 @@ Resources: ElasticsearchSecurityGroupId: !GetAtt Base.Outputs.ElasticsearchSecurityGroupId ElasticsearchDomainArn: !GetAtt Base.Outputs.ElasticsearchDomainArn ElasticsearchDomainEndpoint: !GetAtt Base.Outputs.ElasticsearchDomainEndpoint + ParameterIVSVideoChannelMap: !GetAtt Base.Outputs.ParameterIVSVideoChannelMap PreCreatePinpointWorkshop: !Ref PreCreatePinpointWorkshop Uid: !Sub ${AWS::StackName}-${AWS::Region} PinpointAppId: !GetAtt Base.Outputs.PinpointAppId @@ -368,6 +388,10 @@ Outputs: Description: Elasticsearch Endpoint Value: !GetAtt Base.Outputs.ElasticsearchDomainEndpoint + ParameterIVSVideoChannelMap: + Description: Retail Demo Store video file to IVS channel mapping parameter + Value: !GetAtt Base.Outputs.ParameterIVSVideoChannelMap + PinpointAppId: Description: Pinpoint App Id. Value: !GetAtt Base.Outputs.PinpointAppId diff --git a/aws/cloudformation-templates/web-ui-pipeline.yaml b/aws/cloudformation-templates/web-ui-pipeline.yaml index dd9d94765..64c37824f 100644 --- a/aws/cloudformation-templates/web-ui-pipeline.yaml +++ b/aws/cloudformation-templates/web-ui-pipeline.yaml @@ -87,6 +87,10 @@ Parameters: Type: String Default: none + VideosServiceExternalUrl: + Type: String + Default: none + PinpointAppId: Type: String Default: none @@ -343,6 +347,8 @@ Resources: Value: !Sub ${RecommendationsServiceExternalUrl} - Name: SEARCH_SERVICE_URL Value: !Sub ${SearchServiceExternalUrl} + - Name: VIDEOS_SERVICE_URL + Value: !Sub ${VideosServiceExternalUrl} - Name: DEPLOYED_REGION Value: !Ref AWS::Region - Name: PINPOINT_APP_ID diff --git a/buildspec.yml b/buildspec.yml index d747faa87..c52988778 100644 --- a/buildspec.yml +++ b/buildspec.yml @@ -24,6 +24,7 @@ phases: commands: - git clone https://github.com/aws-samples/retail-demo-store - cd retail-demo-store/ + - pip install -r generators/requirements.txt - ./stage.sh retail-demo-store-us-west-2 - ./stage.sh retail-demo-store-us-east-1 - - ./stage.sh retail-demo-store-eu-west-1 \ No newline at end of file + - ./stage.sh retail-demo-store-eu-west-1 diff --git a/generators/README.md b/generators/README.md index 50042e3be..d3df2af9a 100644 --- a/generators/README.md +++ b/generators/README.md @@ -15,3 +15,8 @@ The datagenerator library is a Python library that provides the following functi * The ability to specify a set of user behavior funnels and to then generate events that can be sent to Amazon Personalize, Segment, or Amplitude (see [./datagenerator/file.py](./datagenerator/file.py), [./datagenerator/amplitude.py](./datagenerator/amplitude.py), and [./datagenerator/segment.py](./datagenerator/segment.py)). For a working example of the event generator features, see [3.5-Amplitude-Performance-Metrics.ipynb](../workshop/3-Experimentation/3.5-Amplitude-Performance-Metrics.ipynb) + +# Interaction events + +The `generate_interactions_personalize.py` script is a standalone script that reads in the users data and products +data direct from file and generates sample interactions. diff --git a/generators/datagenerator/users.py b/generators/datagenerator/users.py index 68859322d..ce5b62074 100644 --- a/generators/datagenerator/users.py +++ b/generators/datagenerator/users.py @@ -30,11 +30,18 @@ age_dist = truncnorm((age_min - age_mean) / age_sd, (age_max - age_mean) / age_sd, loc=age_mean, scale=age_sd) # Persona combinations ordered from strongest affinity to latent interest. -personas = [ +category_preference_personas = [ 'apparel_housewares_accessories', 'housewares_apparel_electronics', 'footwear_outdoors_apparel', 'outdoors_footwear_housewares', 'electronics_beauty_outdoors', 'beauty_electronics_accessories', - 'jewelry_accessories_beauty', 'accessories_jewelry_apparel' + 'jewelry_accessories_beauty', 'accessories_jewelry_apparel', + 'beauty_jewelry_accessories', 'beauty_apparel_housewares' +] + +discount_personas = [ + 'discount_indifferent', # does not care about discounts + 'all_discounts', # likes discounts all the time + 'lower_priced_products' # likes discounts on cheaper products ] class UserPool: @@ -121,7 +128,8 @@ def __init__(self, id_string=None): self.name = f'{self.first_name} {self.last_name}' self.username = f'user{self.id}' # These are hard-coded from the AWS samples Retail Demo Store workshop - self.persona = random.choice(personas) + self.persona = random.choice(category_preference_personas) + self.discount_persona = random.choice(discount_personas) self.traits = {} ios_token = fake.ios_platform_token() @@ -131,20 +139,20 @@ def __init__(self, id_string=None): self.platforms = { "ios": { - "anonymous_id": str(uuid.uuid4()), - "advertising_id": str(uuid.uuid4()), + "anonymous_id": str(fake.uuid4()), + "advertising_id": str(fake.uuid4()), "user_agent": ios_token, "model": ios_identifiers[0], "version": ios_identifiers[4] }, "android": { - "anonymous_id": str(uuid.uuid4()), - "advertising_id": str(uuid.uuid4()), + "anonymous_id": str(fake.uuid4()), + "advertising_id": str(fake.uuid4()), "user_agent": android_token, "version": android_identifiers[1] }, "web": { - "anonymous_id": str(uuid.uuid4()), + "anonymous_id": str(fake.uuid4()), "user_agent": fake.user_agent() } } diff --git a/generators/generate_interactions_personalize.py b/generators/generate_interactions_personalize.py new file mode 100644 index 000000000..c2a2d76ba --- /dev/null +++ b/generators/generate_interactions_personalize.py @@ -0,0 +1,371 @@ +""" +This script exists so that when developing or internal deployment of public commits +(to retail-demo-store-eu-west-1 and retail-demo-store-us-east-1 and retail-demo-store-us-west-2) +the new Personalize training files can be generated, picked up, and uploaded. + +This script generates interactions for Amazon Personalize by heuristic simulation. It is based off the notebook +under workshop/01-Personalization where the logic is explained in more detail. +However, it has been improved in the following ways: + 1. This script is deterministic; random seeds from RANDOM_SEED random variable below. + 2. Logic exists for ensuring balance across categories. + 3. Logic exists for ensuring balance across products. + 4. Discount events are also generated according to 3 different types of users: discount-likers discount-indifferent, + and price-sensitive-discount-likers. +Item 1 allows us to re-generate data during staging and item 2 and 3 helps recommendations look appropriate in +the final demo. If there is poor balance across products and categories then one may not get recommendations +for products in the same category. This is a hotfix for the logic whereby we generate profiles and probabilistically +sample product categories according to the sample user profile. Item 4 is necessary for training the discounts +personalizeation campaign. +""" +import json +import pandas as pd +import numpy as np +import time +import csv +from pathlib import Path +import gzip +import random +import yaml +import logging +from collections import defaultdict + +# Keep things deterministic +RANDOM_SEED = 0 + +# Where to put the generated data so that it is picked up by stage.sh +GENERATED_DATA_ROOT = "src/aws-lambda/personalize-pre-create-campaigns/data" + +# Interactions will be generated between these dates +FIRST_TIMESTAMP = 1591803782 # 2020-06-10, 18:43:02 +LAST_TIMESTAMP = 1599579782 # 2020-09-08, 18:43:02 + +# Users are set up with 3 product categories on their personas. If [0.6, 0.25, 0.15] it means +# 60% of the time they'll choose a product from the first category, etc. +CATEGORY_AFFINITY_PROBS = [0.6, 0.25, 0.15] + +# After a product, there are this many products within the category that a user is likely to jump on next. +# The purpose of this is to keep recommendations focused within the category if there are too many products +# in a category, because at present the user profiles approach samples products from a category. +PRODUCT_AFFINITY_N = 4 + +# from 0 to 1. If 0 then products in busy categories get represented less. If 1 then all products same amount. +NORMALISE_PER_PRODUCT_WEIGHT = 1.0 + +# With this probability a product interaction will be with the product discounted +# Here we go the other way - what is the probability that a product that a user is already interacting +# with is discounted - depending on whether user likes discounts or not +DISCOUNT_PROBABILITY = 0.2 +DISCOUNT_PROBABILITY_WITH_PREFERENCE = 0.5 + +IN_PRODUCTS_FILENAME = "src/products/src/products-service/data/products.yaml" +IN_USERS_FILENAME = "src/users/src/users-service/data/users.json.gz" + +PROGRESS_MONITOR_SECONDS_UPDATE = 30 + +# This is where stage.sh will pick them up from +out_items_filename = f"{GENERATED_DATA_ROOT}/items.csv" +out_users_filename = f"{GENERATED_DATA_ROOT}/users.csv" +out_interactions_filename = f"{GENERATED_DATA_ROOT}/interactions.csv" + +# The meaning of the below constants is described in the relevant notebook. + +# Minimum number of interactions to generate +min_interactions = 675000 +# min_interactions = 50000 + +# Percentages of each event type to generate +product_added_percent = .08 +cart_viewed_percent = .05 +checkout_started_percent = .02 +order_completed_percent = .01 + + +def generate_user_items(out_users_filename, out_items_filename, in_users_filename, in_products_filename): + + Path(out_items_filename).parents[0].mkdir(parents=True, exist_ok=True) + Path(out_users_filename).parents[0].mkdir(parents=True, exist_ok=True) + + # Product info is stored in the repository + with open(in_products_filename, 'r') as f: + products = yaml.load(f, Loader=yaml.SafeLoader) + + products_df = pd.DataFrame(products) + + # User info is stored in the repository - it was automatically generated + with gzip.open(in_users_filename, 'r') as f: + users = json.load(f) + + users_df = pd.DataFrame(users) + + products_dataset_df = products_df[['id', 'category', 'style']] + products_dataset_df = products_dataset_df.rename(columns={'id': 'ITEM_ID', + 'category': 'CATEGORY', + 'style': 'STYLE'}) + products_dataset_df.to_csv(out_items_filename, index=False) + + users_dataset_df = users_df[['id', 'age', 'gender']] + users_dataset_df = users_dataset_df.rename(columns={'id': 'USER_ID', + 'age': 'AGE', + 'gender': 'GENDER'}) + + users_dataset_df.to_csv(out_users_filename, index=False) + + return users_df, products_df + + +def generate_interactions(out_interactions_filename, users_df, products_df): + """Generate items.csv, users.csv from users and product dataframes makes interactions.csv by simulating some + shopping behaviour.""" + + # Count of interactions generated for each event type + product_viewed_count = 0 + discounted_product_viewed_count = 0 + product_added_count = 0 + discounted_product_added_count = 0 + cart_viewed_count = 0 + discounted_cart_viewed_count = 0 + checkout_started_count = 0 + discounted_checkout_started_count = 0 + order_completed_count = 0 + discounted_order_completed_count = 0 + + Path(out_interactions_filename).parents[0].mkdir(parents=True, exist_ok=True) + + # ensure determinism + random.seed(RANDOM_SEED) + np.random.seed(RANDOM_SEED) + + start_time_progress = int(time.time()) + next_timestamp = FIRST_TIMESTAMP + seconds_increment = int((LAST_TIMESTAMP - FIRST_TIMESTAMP) / min_interactions) + next_update_progress = start_time_progress + PROGRESS_MONITOR_SECONDS_UPDATE/2 + + average_product_price = int(products_df.price.mean()) + print('Average product price: ${:.2f}'.format(average_product_price)) + + if seconds_increment <= 0: raise AssertionError(f"Should never happen: {seconds_increment} <= 0") + + print('Minimum interactions to generate: {}'.format(min_interactions)) + print('Starting timestamp: {} ({})'.format(next_timestamp, + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(next_timestamp)))) + print('Seconds increment: {}'.format(seconds_increment)) + + print("Generating interactions... (this may take a few minutes)") + interactions = 0 + + subsets_cache = {} + + user_to_product = defaultdict(set) + + category_affinity_probs = np.array(CATEGORY_AFFINITY_PROBS) + + print("Writing interactions to: {}".format(out_interactions_filename)) + + with open(out_interactions_filename, 'w') as outfile: + f = csv.writer(outfile) + f.writerow(["ITEM_ID", "USER_ID", "EVENT_TYPE", "TIMESTAMP", "DISCOUNT"]) + + category_frequencies = products_df.category.value_counts() + category_frequencies /= sum(category_frequencies.values) + + interaction_product_counts = defaultdict(int) + + # Here we build up a list for each category/gender, of product + # affinities. The product affinity is keyed by one product, + # so we do not end up with exactly PRODUCT_AFFINITY_N sized + # cliques. They overlap a little over multiple users + # - that is why PRODUCT_AFFINITY_N + # can be a little bit lower than a desired clique size. + all_categories = products_df.category.unique() + product_affinities_bycatgender = {} + for category in all_categories: + for gender in ['M', 'F']: + products_cat = products_df.loc[products_df.category==category] + products_cat = products_cat.loc[ + products_cat.gender_affinity.isnull()|(products_cat.gender_affinity==gender)].id.values + # We ensure that all products have PRODUCT_AFFINITY_N products that lead into it + # and PRODUCT_AFFINITY_N products it leads to + affinity_matrix = sum([np.roll(np.identity(len(products_cat)), [0, i], [0, 1]) + for i in range(PRODUCT_AFFINITY_N)]) + np.random.shuffle(affinity_matrix) + affinity_matrix = affinity_matrix.T + np.random.shuffle(affinity_matrix) + affinity_matrix = affinity_matrix.astype(bool) # use as boolean index + affinity_matrix = affinity_matrix | np.identity(len(products_cat), dtype=bool) + + product_infinities = [products_cat[row] for row in affinity_matrix] + product_affinities_bycatgender[(category, gender)] = { + products_cat[i]: products_df.loc[products_df.id.isin(product_infinities[i])] + for i in range(len(products_cat))} + + user_category_to_first_prod = {} + + while interactions < min_interactions: + if (time.time() > next_update_progress): + rate = interactions / (time.time() - start_time_progress) + to_go = (min_interactions - interactions) / rate + print('Generated {} interactions so far (about {} seconds to go)'.format(interactions, int(to_go))) + next_update_progress += PROGRESS_MONITOR_SECONDS_UPDATE + + # Pick a random user + user = users_df.loc[random.randint(0, users_df.shape[0] - 1)] + + # Determine category affinity from user's persona + persona = user['persona'] + preferred_categories = persona.split('_') + + p_normalised = (category_affinity_probs * category_frequencies[preferred_categories].values) + p_normalised /= p_normalised.sum() + p = NORMALISE_PER_PRODUCT_WEIGHT * p_normalised + (1-NORMALISE_PER_PRODUCT_WEIGHT) * category_affinity_probs + + # Select category based on weighted preference of category order. + category = np.random.choice(preferred_categories, 1, p=p)[0] + discount_persona = user['discount_persona'] + + gender = user['gender'] + + # Here, in order to keep the number of products that are related to a product, + # we restrict the size of the set of products that are recommended to an individual + # user - in effect, the available subset for a particular category/gender + # depends on the first product selected, which is selected as per previous logic + # (looking at category affinities and gender) + usercat_key = (user['id'], category) # has this user already selected a "first" product? + if usercat_key in user_category_to_first_prod: + # If a first product is already selected, we use the product affinities for that product + # To provide the list of products to select from + first_prod = user_category_to_first_prod[usercat_key] + prods_subset_df = product_affinities_bycatgender[(category, gender)][first_prod] + + if not usercat_key in user_category_to_first_prod: + # If the user has not yet selected a first product for this category + # we do it according to the old logic of choosing between all products for gender + # Check if subset data frame is already cached for category & gender + prods_subset_df = subsets_cache.get(category + gender) + if prods_subset_df is None: + # Select products from selected category without gender affinity or that match user's gender + prods_subset_df = products_df.loc[(products_df['category'] == category) & ( + (products_df['gender_affinity'] == gender) | (products_df['gender_affinity'].isnull()))] + # Update cache + subsets_cache[category + gender] = prods_subset_df + + # Pick a random product from gender filtered subset + product = prods_subset_df.sample().iloc[0] + + interaction_product_counts[product.id] += 1 + + user_to_product[user['id']].add(product['id']) + # if len(user_to_product[user['id']])>8: + # import pdb;pdb.set_trace() + + if not usercat_key in user_category_to_first_prod: + user_category_to_first_prod[usercat_key] = product['id'] + + # Decide if the product the user is interacting with is discounted + if discount_persona == 'discount_indifferent': + discounted = random.random() < DISCOUNT_PROBABILITY + elif discount_persona == 'all_discounts': + discounted = random.random() < DISCOUNT_PROBABILITY_WITH_PREFERENCE + elif discount_persona == 'lower_priced_products': + if product.price < average_product_price: + discounted = random.random() < DISCOUNT_PROBABILITY_WITH_PREFERENCE + else: + discounted = random.random() < DISCOUNT_PROBABILITY + else: + raise ValueError(f'Unable to handle discount persona: {discount_persona}') + + this_timestamp = next_timestamp + random.randint(0, seconds_increment) + + num_interaction_sets_to_insert = 1 + prodcnts = list(interaction_product_counts.values()) + prodcnts_max = max(prodcnts) if len(prodcnts)>0 else 0 + prodcnts_min = min(prodcnts) if len(prodcnts) > 0 else 0 + prodcnts_avg = sum(prodcnts)/len(prodcnts) if len(prodcnts)>0 else 0 + if interaction_product_counts[product.id] * 2 < prodcnts_max: + num_interaction_sets_to_insert += 1 + if interaction_product_counts[product.id] < prodcnts_avg: + num_interaction_sets_to_insert += 1 + if interaction_product_counts[product.id] == prodcnts_min: + num_interaction_sets_to_insert += 1 + + for _ in range(num_interaction_sets_to_insert): + + discount_context = 'Yes' if discounted else 'No' + + f.writerow([product['id'], + user['id'], + 'ProductViewed', + this_timestamp, + discount_context]) + next_timestamp += seconds_increment + product_viewed_count += 1 + interactions += 1 + + if discounted: + discounted_product_viewed_count += 1 + + if product_added_count < int(product_viewed_count * product_added_percent): + this_timestamp += random.randint(0, int(seconds_increment / 2)) + f.writerow([product['id'], + user['id'], + 'ProductAdded', + this_timestamp, + discount_context]) + interactions += 1 + product_added_count += 1 + + if discounted: + discounted_product_added_count += 1 + + if cart_viewed_count < int(product_viewed_count * cart_viewed_percent): + this_timestamp += random.randint(0, int(seconds_increment / 2)) + f.writerow([product['id'], + user['id'], + 'CartViewed', + this_timestamp, + discount_context]) + interactions += 1 + cart_viewed_count += 1 + if discounted: + discounted_cart_viewed_count += 1 + + if checkout_started_count < int(product_viewed_count * checkout_started_percent): + this_timestamp += random.randint(0, int(seconds_increment / 2)) + f.writerow([product['id'], + user['id'], + 'CheckoutStarted', + this_timestamp, + discount_context]) + interactions += 1 + checkout_started_count += 1 + if discounted: + discounted_checkout_started_count += 1 + + if order_completed_count < int(product_viewed_count * order_completed_percent): + this_timestamp += random.randint(0, int(seconds_increment / 2)) + f.writerow([product['id'], + user['id'], + 'OrderCompleted', + this_timestamp, + discount_context]) + interactions += 1 + order_completed_count += 1 + if discounted: + discounted_order_completed_count += 1 + + print("Interactions generation done.") + print(f"Total interactions: {interactions}") + print(f"Total product viewed: {product_viewed_count} ({discounted_product_viewed_count})") + print(f"Total product added: {product_added_count} ({discounted_product_added_count})") + print(f"Total cart viewed: {cart_viewed_count} ({discounted_cart_viewed_count})") + print(f"Total checkout started: {checkout_started_count} ({discounted_checkout_started_count})") + print(f"Total order completed: {order_completed_count} ({discounted_order_completed_count})") + + globals().update(locals()) # This can be used for inspecting in console after script ran or if run with ipython. + print('Generation script finished') + + +if __name__ == '__main__': + + logging.basicConfig(level=logging.INFO) + users_df, products_df = generate_user_items(out_users_filename, out_items_filename, IN_USERS_FILENAME, IN_PRODUCTS_FILENAME) + generate_interactions(out_interactions_filename, users_df, products_df) diff --git a/generators/generate_users_json.py b/generators/generate_users_json.py index bb509a3ba..7c2cc6ba6 100644 --- a/generators/generate_users_json.py +++ b/generators/generate_users_json.py @@ -12,8 +12,16 @@ file that is bundled with all Retail Demo Store deployments. """ +import datagenerator.users from datagenerator.users import UserPool +import numpy as np +import random + +datagenerator.users.Faker.seed(42) # Deterministic randomness +random.seed(42) # Deterministic randomness +np.random.seed(42) # Deterministic randomness + num_users = 6000 print('Generating {} random users...'.format(num_users)) diff --git a/generators/requirements.txt b/generators/requirements.txt index 5c3839a68..13a522809 100644 --- a/generators/requirements.txt +++ b/generators/requirements.txt @@ -12,3 +12,4 @@ six==1.15.0 text-unidecode==1.3 urllib3==1.25.9 zope.interface==5.1.0 +pandas==1.1.2 diff --git a/images/apparel/32.jpg b/images/apparel/32.jpg new file mode 100644 index 000000000..450849f0d Binary files /dev/null and b/images/apparel/32.jpg differ diff --git a/images/beauty/11.jpg b/images/beauty/11.jpg new file mode 100644 index 000000000..fc2061d57 Binary files /dev/null and b/images/beauty/11.jpg differ diff --git a/images/beauty/12.jpg b/images/beauty/12.jpg new file mode 100644 index 000000000..8bcca5c7f Binary files /dev/null and b/images/beauty/12.jpg differ diff --git a/images/beauty/13.jpg b/images/beauty/13.jpg new file mode 100644 index 000000000..f91cc019e Binary files /dev/null and b/images/beauty/13.jpg differ diff --git a/images/beauty/14.jpg b/images/beauty/14.jpg new file mode 100644 index 000000000..73137fdff Binary files /dev/null and b/images/beauty/14.jpg differ diff --git a/images/beauty/15.jpg b/images/beauty/15.jpg new file mode 100644 index 000000000..54b0f02f1 Binary files /dev/null and b/images/beauty/15.jpg differ diff --git a/images/beauty/16.jpg b/images/beauty/16.jpg new file mode 100644 index 000000000..6ec2685b4 Binary files /dev/null and b/images/beauty/16.jpg differ diff --git a/images/beauty/17.jpg b/images/beauty/17.jpg new file mode 100644 index 000000000..d6844273f Binary files /dev/null and b/images/beauty/17.jpg differ diff --git a/images/beauty/18.jpg b/images/beauty/18.jpg new file mode 100644 index 000000000..a52f5ed65 Binary files /dev/null and b/images/beauty/18.jpg differ diff --git a/images/beauty/19.jpg b/images/beauty/19.jpg new file mode 100644 index 000000000..bb2cc5a95 Binary files /dev/null and b/images/beauty/19.jpg differ diff --git a/images/beauty/20.jpg b/images/beauty/20.jpg new file mode 100644 index 000000000..44ecc281d Binary files /dev/null and b/images/beauty/20.jpg differ diff --git a/images/beauty/21.jpg b/images/beauty/21.jpg new file mode 100644 index 000000000..3a85ab8a1 Binary files /dev/null and b/images/beauty/21.jpg differ diff --git a/images/beauty/22.jpg b/images/beauty/22.jpg new file mode 100644 index 000000000..57b8ae080 Binary files /dev/null and b/images/beauty/22.jpg differ diff --git a/images/beauty/23.jpg b/images/beauty/23.jpg new file mode 100644 index 000000000..8f4a00135 Binary files /dev/null and b/images/beauty/23.jpg differ diff --git a/images/beauty/24.jpg b/images/beauty/24.jpg new file mode 100644 index 000000000..9ba7c615b Binary files /dev/null and b/images/beauty/24.jpg differ diff --git a/images/beauty/25.jpg b/images/beauty/25.jpg new file mode 100644 index 000000000..b16b8cead Binary files /dev/null and b/images/beauty/25.jpg differ diff --git a/images/beauty/26.jpg b/images/beauty/26.jpg new file mode 100644 index 000000000..39892b08b Binary files /dev/null and b/images/beauty/26.jpg differ diff --git a/images/beauty/27.jpg b/images/beauty/27.jpg new file mode 100644 index 000000000..a4137344d Binary files /dev/null and b/images/beauty/27.jpg differ diff --git a/images/beauty/28.jpg b/images/beauty/28.jpg new file mode 100644 index 000000000..490b301f3 Binary files /dev/null and b/images/beauty/28.jpg differ diff --git a/images/beauty/29.jpg b/images/beauty/29.jpg new file mode 100644 index 000000000..7477cb2fc Binary files /dev/null and b/images/beauty/29.jpg differ diff --git a/images/beauty/30.jpg b/images/beauty/30.jpg new file mode 100644 index 000000000..02441546e Binary files /dev/null and b/images/beauty/30.jpg differ diff --git a/images/beauty/31.jpg b/images/beauty/31.jpg new file mode 100644 index 000000000..e1e2bffce Binary files /dev/null and b/images/beauty/31.jpg differ diff --git a/images/electronics/15.jpg b/images/electronics/15.jpg new file mode 100644 index 000000000..c649cba0e Binary files /dev/null and b/images/electronics/15.jpg differ diff --git a/images/housewares/10.jpg b/images/housewares/10.jpg new file mode 100644 index 000000000..48268e13c Binary files /dev/null and b/images/housewares/10.jpg differ diff --git a/images/housewares/11.jpg b/images/housewares/11.jpg new file mode 100644 index 000000000..e3413ebeb Binary files /dev/null and b/images/housewares/11.jpg differ diff --git a/images/housewares/12.jpg b/images/housewares/12.jpg new file mode 100644 index 000000000..e9b79b76d Binary files /dev/null and b/images/housewares/12.jpg differ diff --git a/images/housewares/8.jpg b/images/housewares/8.jpg new file mode 100644 index 000000000..3eb0f69b9 Binary files /dev/null and b/images/housewares/8.jpg differ diff --git a/images/housewares/9.jpg b/images/housewares/9.jpg new file mode 100644 index 000000000..72b75c8ab Binary files /dev/null and b/images/housewares/9.jpg differ diff --git a/src/aws-lambda/ivs-create-channels/bundle.sh b/src/aws-lambda/ivs-create-channels/bundle.sh new file mode 100644 index 000000000..2ebe37098 --- /dev/null +++ b/src/aws-lambda/ivs-create-channels/bundle.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +pip () { + command pip3 "$@" +} + +LAMBDA_SOURCE=ivs-create-channels.py +PACKAGE_FILE=ivs-create-channels.zip + +echo "Cleaning up intermediate files" +[ -e ${PACKAGE_FILE} ] && rm ${PACKAGE_FILE} +[ -e "package" ] && rm -rf package + +echo "Installing Lambda dependencies" +pip install -r requirements.txt --target ./package + +echo "Building Lambda deployment package" +cd package +zip -r9 ${OLDPWD}/${PACKAGE_FILE} . +cd ${OLDPWD} + +echo "Adding Lambda function source code to package" +zip -g ${PACKAGE_FILE} ${LAMBDA_SOURCE} + + +echo "Done!" \ No newline at end of file diff --git a/src/aws-lambda/ivs-create-channels/ivs-create-channels.py b/src/aws-lambda/ivs-create-channels/ivs-create-channels.py new file mode 100644 index 000000000..3227850d3 --- /dev/null +++ b/src/aws-lambda/ivs-create-channels/ivs-create-channels.py @@ -0,0 +1,243 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 +from crhelper import CfnResource +import boto3 +import botocore.exceptions + +import json +import logging +import os +import re +from datetime import datetime + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +helper = CfnResource() + +SSM_VIDEO_CHANNEL_MAP_PARAM = os.environ.get('ssm_video_channel_map_param') +S3_BUCKET = os.environ.get('bucket') +VIDEO_PATH = os.environ.get('videos_path') + +ivs_client = boto3.client('ivs') +ssm_client = boto3.client('ssm') +s3_client = boto3.client('s3') + + +def is_ssm_parameter_set(parameter_name): + """ + Returns boolean stating whether an SSM parameter with a given name has been set (ie. value is not 'NONE'). + """ + try: + response = ssm_client.get_parameter(Name=parameter_name) + return response['Parameter']['Value'] != 'NONE' + except ssm_client.exceptions.ParameterNotFound: + return False + + +def list_video_file_keys(): + """ + Returns the S3 keys of all .mkv files in the 'video path' of the staging S3 bucket. + """ + objects = s3_client.list_objects_v2(Bucket=S3_BUCKET, Prefix=VIDEO_PATH)['Contents'] + # TODO: Ensure the fact we are only handling mkv files is captured in the README + video_file_keys = [s3_object['Key'] for s3_object in objects if s3_object['Key'].endswith('.mkv')] + return video_file_keys + + +def channel_config_exists(video_s3_key): + """ + Returns boolean stating whether an IVS channel already has a value (ie. a video file to IVS channel association) + in the SSM_VIDEO_CHANNEL_MAP_PARAM. + """ + if not is_ssm_parameter_set(SSM_VIDEO_CHANNEL_MAP_PARAM): + return False + + video_channel_param_value = ssm_client.get_parameter(Name=SSM_VIDEO_CHANNEL_MAP_PARAM)['Parameter']['Value'] + video_channel_map = json.loads(video_channel_param_value) + + if video_s3_key in video_channel_map: + return True + return False + + +def channel_exists(arn): + """ + Returns boolean stating whether an IVS channel with given ARN exists. + """ + try: + ivs_client.get_channel(arn=arn) + return True + except ivs_client.exceptions.ResourceNotFoundException: + return False + + +def remove_channel(arn): + """ + Stops and removes all resources for a given IVS channel. This is done by: + 1. Deleting all stream keys from the channel to prevent any further streaming to the channel. + 2. Calling the `stop_stream` endpoint on the channel. + 3. Deleting the channel itself. + """ + logger.info(f'Deleting channel {arn}') + + try: + logger.info(f"Retrieving stream keys for channel {arn}") + stream_keys = ivs_client.list_stream_keys(channelArn=arn)['streamKeys'] + logger.info(f"Found stream keys {stream_keys} for channel {arn}") + for stream_key in stream_keys: + logger.info(f"Deleting stream key {stream_key['arn']}") + ivs_client.delete_stream_key(arn=stream_key['arn']) + logger.info(f"Stream key {stream_key['arn']} deleted") + except ivs_client.exceptions.ResourceNotFoundException: + logger.info(f"IVS channel {arn} not found, no stream keys to delete") + + try: + logger.info(f"Stopping stream for IVS channel {arn}") + ivs_client.stop_stream(channelArn=arn) + logger.info(f"Stream stopped for IVS channel {arn}") + except ivs_client.exceptions.ResourceNotFoundException: + logger.info(f"IVS channel {arn} not found, nothing to delete") + return + except ivs_client.exceptions.ChannelNotBroadcasting: + logger.info(f"IVS channel {arn} is not broadcasting.") + + try: + logger.info(f"Deleting channel {arn}") + ivs_client.delete_channel(arn=arn) + logger.info(f"IVS channel {arn} deleted") + except ivs_client.exceptions.ResourceNotFoundException: + logger.info(f"IVS channel {arn} not found, nothing to delete") + + +def get_video_channel(video_s3_key): + """ + Returns the IVS channel ARN for a given S3 video key. + """ + if not is_ssm_parameter_set(SSM_VIDEO_CHANNEL_MAP_PARAM): + return None + + video_channel_param_value = ssm_client.get_parameter(Name=SSM_VIDEO_CHANNEL_MAP_PARAM)['Parameter']['Value'] + video_channel_map = json.loads(video_channel_param_value) + return video_channel_map.get(video_s3_key) + + +@helper.create +def create_ivs_channels(event, _): + """ + Creates IVS channels. One channel is created per video in the 'video path' of the S3 staging bucket. A mapping + of video to channel ARN is stored in SSM. + """ + video_file_keys = list_video_file_keys() + logger.info(f"Found video file keys: {video_file_keys}") + + for video_file_key in video_file_keys: + if channel_config_exists(video_file_key): + video_channel_arn = get_video_channel(video_file_key) + if channel_exists(video_channel_arn): + logger.info(f"Video with key {video_file_key} is already associated with IVS channel {video_channel_arn}") + continue + + # Note: IVS does not mind identical channel names and accepts the below characters + channel_name = 'retail-demo-store-' + \ + re.subn(r"[^A-z|0-9|\-]", '', video_file_key+'-'+datetime.now().isoformat())[0][:127] + logger.info(f"Creating IVS channel for video {video_file_key} with name {channel_name}") + + try: + created_channel_arn = ivs_client.create_channel(name=channel_name, latencyMode='NORMAL')['channel']['arn'] + logger.info(f"IVS channel created with ARN {created_channel_arn}") + + if is_ssm_parameter_set(SSM_VIDEO_CHANNEL_MAP_PARAM): + video_channel_param_value = ssm_client.get_parameter(Name=SSM_VIDEO_CHANNEL_MAP_PARAM)['Parameter']['Value'] + video_channel_map = json.loads(video_channel_param_value) + + video_channel_map[video_file_key] = created_channel_arn + + ssm_client.put_parameter( + Name=SSM_VIDEO_CHANNEL_MAP_PARAM, + Value=json.dumps(video_channel_map), + Type='String', + Overwrite=True + ) + + else: + ssm_client.put_parameter( + Name=SSM_VIDEO_CHANNEL_MAP_PARAM, + Value=json.dumps({video_file_key: created_channel_arn}), + Type='String', + Overwrite=True + ) + except botocore.exceptions.EndpointConnectionError as ex: + logger.error("Could not create any IVS channels - probably because IVS is not supported in region. " + f"Channel name: {channel_name}. Region: {ivs_client.meta.region_name}") + + +@helper.update +def update_channels(event, _): + """ + Updates IVS channels. If a video in the 'video path' of the S3 staging bucket does not have an associated + channel then it will be created. If a config entry is present for a video file which no longer exists in the + channel & associated config entry will be removed. + """ + create_ivs_channels(event, _) + + video_channel_param_value = ssm_client.get_parameter(Name=SSM_VIDEO_CHANNEL_MAP_PARAM)['Parameter']['Value'] + video_channel_map = json.loads(video_channel_param_value) + video_file_keys = list_video_file_keys() + + deleted_video_keys = [] + for video_path, channel_arn in video_channel_map.items(): + if video_path not in video_file_keys: + logger.info(f"Video with key {video_path} no longer exists. Associated IVS channel {channel_arn} will be deleted") + remove_channel(channel_arn) + deleted_video_keys.append(video_path) + + if deleted_video_keys: + for video_key in deleted_video_keys: + del video_channel_map[video_key] + + ssm_client.put_parameter( + Name=SSM_VIDEO_CHANNEL_MAP_PARAM, + Value=json.dumps(video_channel_map), + Type='String', + Overwrite=True + ) + + +@helper.delete +def delete_all_channels(event, _): + """ + Deletes all IVS channels referenced in the SSM_VIDEO_CHANNEL_MAP_PARAM. + """ + logger.info(f"Deleting all IVS channels in stack") + if is_ssm_parameter_set(SSM_VIDEO_CHANNEL_MAP_PARAM): + video_channel_param_value = ssm_client.get_parameter(Name=SSM_VIDEO_CHANNEL_MAP_PARAM)['Parameter']['Value'] + video_channel_map = json.loads(video_channel_param_value) + else: + logger.info("No channels to delete") + return + + new_video_channel_map = {} + for video_path, channel_arn in video_channel_map.items(): + try: + remove_channel(channel_arn) + except ivs_client.exceptions.ConflictException as ex: + new_video_channel_map[video_path] = channel_arn + logger.error(f'Could not delete {channel_arn} - probably still streaming.' + f'. Exception: {ex}') + + ssm_client.put_parameter( + Name=SSM_VIDEO_CHANNEL_MAP_PARAM, + Value="NONE" if len(new_video_channel_map)==0 else json.dumps(new_video_channel_map), + Type='String', + Overwrite=True + ) + + +def lambda_handler(event, context): + logger.info('ENVIRONMENT VARIABLES') + logger.info(os.environ) + logger.info('EVENT') + logger.info(event) + + helper(event, context) diff --git a/src/aws-lambda/ivs-create-channels/requirements.txt b/src/aws-lambda/ivs-create-channels/requirements.txt new file mode 100644 index 000000000..05fb32695 --- /dev/null +++ b/src/aws-lambda/ivs-create-channels/requirements.txt @@ -0,0 +1,3 @@ +packaging==20.4 +boto3==1.14.57 +crhelper \ No newline at end of file diff --git a/src/aws-lambda/personalize-delete-resources/stage.sh b/src/aws-lambda/ivs-create-channels/stage.sh similarity index 100% rename from src/aws-lambda/personalize-delete-resources/stage.sh rename to src/aws-lambda/ivs-create-channels/stage.sh diff --git a/src/aws-lambda/personalize-delete-resources/bundle.sh b/src/aws-lambda/personalize-delete-resources/bundle.sh deleted file mode 100644 index 6ae502aa9..000000000 --- a/src/aws-lambda/personalize-delete-resources/bundle.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -set -e - -LAMBDA_SOURCE=personalize-delete-resources.py -PACKAGE_FILE=personalize-delete-resources.zip - -echo "Cleaning up intermediate files" -[ -e ${PACKAGE_FILE} ] && rm ${PACKAGE_FILE} -[ -e "models" ] && rm -rf models -[ -e "package" ] && rm -rf package - -echo "Installing Lambda dependencies" -pip install -r requirements.txt --target ./package - -echo "Building Lambda deployment package" -cd package -zip -r9 ${OLDPWD}/${PACKAGE_FILE} . -cd ${OLDPWD} - -echo "Downloading Personalize SDK model files" -wget -q https://raw.githubusercontent.com/boto/botocore/e22aad94ba9f33bda7a0abea3a959fb1af56b25d/botocore/data/personalize-runtime/2018-05-22/service-2.json -P ./models/personalize-runtime/2018-05-22 -wget -q https://raw.githubusercontent.com/boto/botocore/e22aad94ba9f33bda7a0abea3a959fb1af56b25d/botocore/data/personalize/2018-05-22/service-2.json -P ./models/personalize/2018-05-22 - -echo "Adding Lambda function source code to package" -zip -g ${PACKAGE_FILE} ${LAMBDA_SOURCE} -zip -gr ${PACKAGE_FILE} models - -echo "Done!" \ No newline at end of file diff --git a/src/aws-lambda/personalize-delete-resources/personalize-delete-resources.py b/src/aws-lambda/personalize-delete-resources/personalize-delete-resources.py deleted file mode 100644 index 75e92b4b0..000000000 --- a/src/aws-lambda/personalize-delete-resources/personalize-delete-resources.py +++ /dev/null @@ -1,282 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: MIT-0 - -import json -import os -import time -import logging -import boto3 -import botocore - -from crhelper import CfnResource -from packaging import version -from botocore.exceptions import ClientError - -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -min_botocore_version = '1.16.24' - -# Check if Lambda runtime needs to be patched with more recent Personalize SDK -# This must be done before creating Personalize clients from boto3. -if version.parse(botocore.__version__) < version.parse(min_botocore_version): - logger.info('Patching botocore SDK libraries for Personalize') - - dir_path = os.path.dirname(os.path.realpath(__file__)) - models_path = os.path.join(dir_path, 'models') - - aws_data_path = set(os.environ.get('AWS_DATA_PATH', '').split(os.pathsep)) - aws_data_path.add(models_path) - - os.environ.update({ - 'AWS_DATA_PATH': os.pathsep.join(aws_data_path) - }) - - logger.info(os.environ) -else: - logger.info('Patching botocore SDK for Personalize not required') - -helper = CfnResource() - -# Setup Clients -personalize = boto3.client('personalize') -ssm = boto3.client('ssm') -iam = boto3.client('iam') - -schemas_to_delete = [ - 'retaildemostore-schema-users', - 'retaildemostore-schema-items', - 'retaildemostore-schema-interactions', - 'retaildemostore-event-schema' -] - -def get_dataset_arn(dataset_group_name): - dataset_group_arn = None - - dataset_groups_paginator = personalize.get_paginator('list_dataset_groups') - for dataset_groups_page in dataset_groups_paginator.paginate(): - for dataset_group in dataset_groups_page['datasetGroups']: - if dataset_group['name'] == dataset_group_name: - dataset_group_arn = dataset_group['datasetGroupArn'] - break - - return dataset_group_arn - -def delete_filters(dataset_group_arn): - filters_response = personalize.list_filters(datasetGroupArn = dataset_group_arn, maxResults = 100) - for filter in filters_response['Filters']: - logger.info('Deleting filter: ' + filter['filterArn']) - personalize.delete_filter(filterArn = filter['filterArn']) - - return True - -def get_solutions(dataset_group_arn): - solution_arns = [] - - solutions_response = personalize.list_solutions(datasetGroupArn = dataset_group_arn, maxResults = 100) - if 'solutions' in solutions_response: - for solution in solutions_response['solutions']: - solution_arns.append(solution['solutionArn']) - - logger.info('Solutions found: ' + str(solution_arns)) - - return solution_arns - -def delete_campaigns(solution_arns): - logger.info('Clearing related products campaign arn SSM parameter') - ssm.put_parameter( - Name='retaildemostore-related-products-campaign-arn', - Description='Retail Demo Store Related Products Campaign Arn Parameter', - Value='NONE', - Type='String', - Overwrite=True - ) - logger.info('Clearing product recommendation campaign arn SSM parameter') - ssm.put_parameter( - Name='retaildemostore-product-recommendation-campaign-arn', - Description='Retail Demo Store Product Recommendation Campaign Arn Parameter', - Value='NONE', - Type='String', - Overwrite=True - ) - logger.info('Clearing personalized ranking campaign arn SSM parameter') - response = ssm.put_parameter( - Name='retaildemostore-personalized-ranking-campaign-arn', - Description='Retail Demo Store Personalized Ranking Campaign Arn Parameter', - Value='NONE', - Type='String', - Overwrite=True - ) - - campaign_count = 0 - - for solution_arn in solution_arns: - campaigns_response = personalize.list_campaigns(solutionArn = solution_arn, maxResults = 100) - - if 'campaigns' in campaigns_response: - for campaign in campaigns_response['campaigns']: - campaign_count += 1 - if campaign['status'] == 'ACTIVE': - logger.info('Deleting campaign: ' + campaign['campaignArn']) - - personalize.delete_campaign(campaignArn = campaign['campaignArn']) - - return campaign_count == 0 - -def delete_solutions(dataset_group_arn): - solution_count = 0 - - solutions_response = personalize.list_solutions(datasetGroupArn = dataset_group_arn, maxResults = 100) - if 'solutions' in solutions_response: - for solution in solutions_response['solutions']: - solution_count += 1 - solution_arn = solution['solutionArn'] - if solution['status'] == 'ACTIVE': - logger.info('Deleting solution: ' + solution_arn) - personalize.delete_solution(solutionArn = solution_arn) - - return solution_count == 0 - -def delete_event_trackers(dataset_group_arn): - ssm.put_parameter( - Name='retaildemostore-personalize-event-tracker-id', - Description='Retail Demo Store Personalize Event Tracker ID Parameter', - Value='NONE', - Type='String', - Overwrite=True - ) - - logger.info('Deleting event trackers for dataset group') - event_tracker_count = 0 - event_trackers_paginator = personalize.get_paginator('list_event_trackers') - for event_tracker_page in event_trackers_paginator.paginate(datasetGroupArn = dataset_group_arn): - for event_tracker in event_tracker_page['eventTrackers']: - event_tracker_count += 1 - if event_tracker['status'] == 'ACTIVE': - logger.info('Deleting event tracker {}'.format(event_tracker['eventTrackerArn'])) - personalize.delete_event_tracker(eventTrackerArn = event_tracker['eventTrackerArn']) - - return event_tracker_count == 0 - -def delete_datasets(dataset_group_arn): - logger.info('Deleting datasets for dataset group') - dataset_count = 0 - dataset_paginator = personalize.get_paginator('list_datasets') - - for dataset_page in dataset_paginator.paginate(datasetGroupArn = dataset_group_arn): - for dataset in dataset_page['datasets']: - dataset_count += 1 - if dataset['status'] == 'ACTIVE': - logger.info('Deleting dataset {}'.format(dataset['datasetArn'])) - personalize.delete_dataset(datasetArn = dataset['datasetArn']) - - return dataset_count == 0 - -def delete_dataset_group(dataset_group_arn): - try: - logger.info('Deleting dataset group') - personalize.delete_dataset_group(datasetGroupArn = dataset_group_arn) - return False - except ClientError as e: - error_code = e.response['Error']['Code'] - if error_code == 'ResourceNotFoundException': - logger.info("Dataset group does not exist") - - return True - -def delete_schemas(schemas_to_delete): - schema_paginator = personalize.get_paginator('list_schemas') - for schema_page in schema_paginator.paginate(): - for schema in schema_page['schemas']: - if schema['name'] in schemas_to_delete: - try: - logger.info('Deleting schema {}'.format(schema['schemaArn'])) - personalize.delete_schema(schemaArn = schema['schemaArn']) - except ClientError as e: - error_code = e.response['Error']['Code'] - if error_code == 'ResourceNotFoundException': - logger.info("Schema does not exist") - - logger.info('Done deleting schemas') - - return True - -def delete_role(): - try: - response = iam.detach_role_policy( - RoleName=os.environ.get('Uid')+'-PersonalizeS3', - PolicyArn='arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess' - ) - except ClientError as e: - error_code = e.response['Error']['Code'] - if error_code != 'NoSuchEntity': - logger.error(e) - - try: - response = iam.delete_role( - RoleName=os.environ.get('Uid')+'-PersonalizeS3' - ) - except ClientError as e: - error_code = e.response['Error']['Code'] - if error_code != 'NoSuchEntity': - logger.error(e) - - return True - -@helper.create -@helper.update -def no_op(_, __): - # we only delete things here. - pass - -@helper.poll_delete -def poll_delete(event, _): - ''' Deletes resources one call at a time - - The crhelper will keep calling this function every 2 minutes until we return True. This - will ensure we complete the delete process if it takes longer than usual timeout period. - In practice, the delete process occurs pretty quickly, though. - ''' - # Name of dataset group that was created in the Personalize workshop notebook or by pre-create Lambda. - dataset_group_name = event['ResourceProperties'].get('DatasetGroupName', 'retaildemostore') - logger.info('Deleting resources for Personalize dataset group: ' + dataset_group_name) - - dataset_group_arn = get_dataset_arn(dataset_group_name) - - done = dataset_group_arn is None - - if dataset_group_arn: - # Other than the dataset group, no deps on filters so delete them first. - delete_filters(dataset_group_arn) - - # Delete rest of dataset group resources from inside out. - solution_arns = get_solutions(dataset_group_arn) - - if delete_campaigns(solution_arns): - logger.info('Campaigns fully deleted') - if delete_solutions(dataset_group_arn): - logger.info('Solutions and SolutionVersions fully deleted') - if delete_event_trackers(dataset_group_arn): - logger.info('EventTrackers fully deleted') - if delete_datasets(dataset_group_arn): - logger.info('Datasets fully deleted') - if delete_dataset_group(dataset_group_arn): - logger.info('DatasetGroup fully deleted') - - # These resources aren't children of a dataset group so are deleted - # when the dataset group is done being deleted (or doesn't exist). - if done and delete_schemas(schemas_to_delete): - logger.info('Schemas fully deleted') - delete_role() - - logger.info('IAM Role fully deleted') - - logger.info('All Personalize resources for dataset group {} deleted'.format(dataset_group_name)) - - helper.Data['Output'] = 'All Personalize resources for dataset group {} deleted'.format(dataset_group_name) - - # By returning False, we'll get called back in 2 mins. Otherwise returning True completes delete process. - return done - -def lambda_handler(event, context): - helper(event, context) diff --git a/src/aws-lambda/personalize-delete-resources/requirements.txt b/src/aws-lambda/personalize-delete-resources/requirements.txt deleted file mode 100644 index dc39723ce..000000000 --- a/src/aws-lambda/personalize-delete-resources/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -crhelper==2.0.5 -packaging==20.4 \ No newline at end of file diff --git a/src/aws-lambda/personalize-pre-create-campaigns/personalize-pre-create-campaigns.py b/src/aws-lambda/personalize-pre-create-campaigns/personalize-pre-create-campaigns.py index d876540b3..73df439a1 100644 --- a/src/aws-lambda/personalize-pre-create-campaigns/personalize-pre-create-campaigns.py +++ b/src/aws-lambda/personalize-pre-create-campaigns/personalize-pre-create-campaigns.py @@ -29,6 +29,8 @@ import botocore import logging import os +import uuid +from crhelper import CfnResource from packaging import version from botocore.exceptions import ClientError @@ -59,11 +61,137 @@ iam = boto3.client("iam") ssm = boto3.client('ssm') -sts = boto3.client('sts') personalize = boto3.client('personalize') personalize_runtime = boto3.client('personalize-runtime') cw_events = boto3.client('events') codepipeline = boto3.client('codepipeline') +cloudformation_helper = CfnResource() + +sts = boto3.client('sts') + +# Where our data is for training +bucket = os.environ['csv_bucket'] +bucket_path = os.environ.get('csv_path', '') + +items_filename = bucket_path + f"items.csv" +users_filename = bucket_path + f"users.csv" +interactions_filename = bucket_path + f"interactions.csv" + +session = boto3.session.Session() +region = session.region_name +account_id = sts.get_caller_identity().get('Account') + +# Dataset group names are now dynamically generated +dataset_group_name_root = 'retaildemostore-' + +# Exactly what we want train is stored in this SS parameter - or we generate it ourselves with a default +# if it does not exist. +training_config_param_name = 'retaildemostore-training-config' # ParameterPersonalizeTrainConfig + +role_name = os.environ.get('Uid') + '-PersonalizeS3' +event_tracking_id_param = 'retaildemostore-personalize-event-tracker-id' +filter_purchased_arn_param = 'retaildemostore-personalize-filter-purchased-arn' + +all_campaign_types = ['retaildemostore-related-products', + 'retaildemostore-product-personalization', + 'retaildemostore-personalized-ranking'] + +campaign_type_to_event_type = { + "retaildemostore-related-products": "ProductViewed", + "retaildemostore-product-personalization": "ProductViewed", + "retaildemostore-personalized-ranking": "ProductViewed" +} + +campaign_type_to_recipe_arn = { + "retaildemostore-related-products": "arn:aws:personalize:::recipe/aws-sims", + "retaildemostore-product-personalization": "arn:aws:personalize:::recipe/aws-user-personalization", + "retaildemostore-personalized-ranking": "arn:aws:personalize:::recipe/aws-personalized-ranking" +} + +campaign_type_to_ssm_param = { + "retaildemostore-related-products": "retaildemostore-related-products-campaign-arn", + "retaildemostore-product-personalization": "retaildemostore-product-recommendation-campaign-arn", + "retaildemostore-personalized-ranking": "retaildemostore-personalized-ranking-campaign-arn" +} + +# Info on CloudWatch event rule used to repeatedely call this function. +lambda_event_rule_name = os.environ['lambda_event_rule_name'] + +items_schema = { + "type": "record", + "name": "Items", + "namespace": "com.amazonaws.personalize.schema", + "fields": [ + { + "name": "ITEM_ID", + "type": "string" + }, + { + "name": "CATEGORY", + "type": "string", + "categorical": True + }, + { + "name": "STYLE", + "type": "string", + "categorical": True + } + ], + "version": "1.0" +} + +users_schema = { + "type": "record", + "name": "Users", + "namespace": "com.amazonaws.personalize.schema", + "fields": [ + { + "name": "USER_ID", + "type": "string" + }, + { + "name": "AGE", + "type": "int" + }, + { + "name": "GENDER", + "type": "string", + "categorical": True + } + ], + "version": "1.0" +} + +interactions_schema = { + "type": "record", + "name": "Interactions", + "namespace": "com.amazonaws.personalize.schema", + "fields": [ + { + "name": "ITEM_ID", + "type": "string" + }, + { + "name": "USER_ID", + "type": "string" + }, + { + "name": "EVENT_TYPE", + "type": "string" + }, + { + "name": "TIMESTAMP", + "type": "long" + }, + { + "name": "DISCOUNT", # This is the contextual metadata - "Yes" or "No". + "type": "string", + "categorical": True + } + ], + "version": "1.0" +} + def create_schema(schema, name): """ Conditionally creates a personalize schema if it does not already exist """ @@ -87,7 +215,8 @@ def create_schema(schema, name): schema_arn = create_schema_response['schemaArn'] return schema_arn - + + def create_dataset(dataset_group_arn, dataset_name, dataset_type, schema_arn): """ Conditionally creates dataset if it doesn't already exist """ response = personalize.list_datasets(datasetGroupArn = dataset_group_arn) @@ -112,6 +241,7 @@ def create_dataset(dataset_group_arn, dataset_name, dataset_type, schema_arn): return dataset_arn + def create_import_job(job_name, dataset_arn, account_id, region, data_location, role_arn): import_job_exists=False response=str(personalize.list_dataset_import_jobs(datasetArn = dataset_arn)) @@ -134,7 +264,8 @@ def create_import_job(job_name, dataset_arn, account_id, region, data_location, import_job_arn = create_dataset_import_job_response['datasetImportJobArn'] return import_job_arn - + + def is_import_job_active(import_job_arn): import_job_response = personalize.describe_dataset_import_job( datasetImportJobArn = import_job_arn @@ -149,13 +280,15 @@ def is_import_job_active(import_job_arn): return status == "ACTIVE" + def is_ssm_parameter_set(parameter_name): try: - response = ssm.get_parameter(Name = parameter_name) + response = ssm.get_parameter(Name=parameter_name) return response['Parameter']['Value'] != 'NONE' except ssm.exceptions.ParameterNotFound: return False + def create_personalize_role(role_name): role_arn = None @@ -193,8 +326,84 @@ def create_personalize_role(role_name): return role_arn + +def delete_personalize_role(): + try: + response = iam.detach_role_policy( + RoleName=os.environ.get('Uid')+'-PersonalizeS3', + PolicyArn='arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess' + ) + except ClientError as e: + error_code = e.response['Error']['Code'] + if error_code != 'NoSuchEntity': + logger.error(e) + + try: + response = iam.delete_role( + RoleName=os.environ.get('Uid')+'-PersonalizeS3' + ) + except ClientError as e: + error_code = e.response['Error']['Code'] + if error_code != 'NoSuchEntity': + logger.error(e) + + return True + + +def enable_event_rule(rule_name): + try: + logger.info('Enabling event rule {}'.format(rule_name)) + cw_events.enable_rule(Name=rule_name) + + except cw_events.exceptions.ClientError as e: + error_code = e.response['Error']['Code'] + if error_code == 'ResourceNotFoundException': + logger.error('CloudWatch event rule to enable not found') + raise + else: + logger.error(e) + raise + +def disable_event_rule(rule_name): + """ + Disables the CloudWatch event rule used to trigger this lambda function on a recurring schedule. + Can be preferrable to deleting the rule because it is then easy to re-instate exactly the same rule by + simply enabling it. + Args: + rule_name (str): Rule to disable + """ + try: + logger.info('Disabling event rule {}'.format(rule_name)) + cw_events.disable_rule(Name=rule_name) + + except ClientError as e: + error_code = e.response['Error']['Code'] + if error_code == 'ResourceNotFoundException': + logger.warning('CloudWatch event rule to disable not found') + else: + logger.error(e) + + +def delete_personalize_schemas(schemas_to_delete): + schema_paginator = personalize.get_paginator('list_schemas') + for schema_page in schema_paginator.paginate(): + for schema in schema_page['schemas']: + if schema['name'] in schemas_to_delete: + try: + logger.info('Deleting schema {}'.format(schema['schemaArn'])) + personalize.delete_schema(schemaArn=schema['schemaArn']) + except ClientError as e: + error_code = e.response['Error']['Code'] + if error_code == 'ResourceNotFoundException': + logger.info("Schema does not exist") + + logger.info('Done deleting schemas') + + return True + + def delete_event_rule(rule_name): - ''' Deletes CloudWatch event rule used to trigger this lambda function on a recurring schedule ''' + """Deletes CloudWatch event rule used to trigger this lambda function on a recurring schedule """ try: response = cw_events.list_targets_by_rule(Rule = rule_name) @@ -207,23 +416,23 @@ def delete_event_rule(rule_name): target_ids.append(target['Id']) response = cw_events.remove_targets( - Rule = rule_name, - Ids = target_ids + Rule=rule_name, + Ids=target_ids ) logger.info('Deleting event rule {}'.format(rule_name)) - cw_events.delete_rule(Name = rule_name) + cw_events.delete_rule(Name=rule_name) except ClientError as e: error_code = e.response['Error']['Code'] if error_code == 'ResourceNotFoundException': - logger.warn('CloudWatch event rule to delete not found') + logger.warning('CloudWatch event rule to delete not found') else: logger.error(e) def rebuild_webui_service(region, account_id): - ''' Initiates a build/deploy of the Web UI service so that event tracker is picked up ''' + """ Initiates a build/deploy of the Web UI service so that event tracker is picked up """ logger.info('Looking for pipeline with tag "RetailDemoStoreServiceName=web-ui" to initiate execution') @@ -259,10 +468,164 @@ def rebuild_webui_service(region, account_id): break if not restarted: - logger.warn('Pipeline with tag "RetailDemoStoreServiceName=web-ui" not restarted; does pipeline and/or tag exist?') + logger.warning('Pipeline with tag "RetailDemoStoreServiceName=web-ui" not restarted; does pipeline and/or tag exist?') + + +def create_campaign_polling(dataset_group_arn, recipe_arn, + campaign_solution_name, event_type, + **kwargs): + """ + For a particular campaign name (which also serves as the solution name), build the solution, one solution version + and one campaign. This function is meant to be called repeatedly (polling) till it returns the campaign Arn. + Args: + dataset_group_arn: Where to build the campaign + recipe_arn: Which recipe to build + campaign_solution_name: What name to build it under + event_type: Which event type to build it with (see Amazon Personalize docs for details of how that works). + kwargs: Other arguments which are ignored. This is to allow training configs to be passed around. + + Returns: + Arn if desired campaign has been created or failed. + None otherwise. + """ + + # We grab the below to reconstruct Arns. There are other ways to do this but it is tested and working, so + # we do it this way. + session = boto3.session.Session() + region = session.region_name + account_id = sts.get_caller_identity().get('Account') + + # We see if the solution name is created by looking through the string of all solutions + # - this is slightly unsafe but it has been tested and is working in this codebase. + list_solutions_response = str(personalize.list_solutions(datasetGroupArn=dataset_group_arn)) + + if list_solutions_response.find(campaign_solution_name) != -1: + solution_arn="arn:aws:personalize:"+region+":"+account_id+":solution/"+campaign_solution_name + else: + logger.info("Solution " + campaign_solution_name + " to be created.") + create_solution_response = personalize.create_solution( + name=campaign_solution_name, + datasetGroupArn=dataset_group_arn, + recipeArn=recipe_arn, + eventType=event_type, + performHPO=True + ) + + solution_arn = create_solution_response['solutionArn'] + logger.info(f"Product solution {campaign_solution_name} create initiated with Arn {solution_arn}") + + return None + + # Create product solution version if it doesn't exist + response = personalize.list_solution_versions(solutionArn=solution_arn) + if response['solutionVersions']: + logger.info("Solution Version for "+campaign_solution_name+" already exists, not creating") + solution_version_arn = response['solutionVersions'][0]['solutionVersionArn'] + else: + logger.info('Creating SolutionVersion') + create_solution_version_response = personalize.create_solution_version( + solutionArn=solution_arn) + solution_version_arn = create_solution_version_response['solutionVersionArn'] + + # Make sure product recommendation solution version is active, otherwise force a re-poll + describe_solution_version_response = personalize.describe_solution_version( + solutionVersionArn=solution_version_arn + ) + status = describe_solution_version_response["solutionVersion"]["status"] + logger.info(f"SolutionVersion Status for {campaign_solution_name} is: {status}") + if status != "ACTIVE": + # logger.info(f"Recommendation solution version for {campaign_solution_name} " + # f"status is NOT active - status {status} - repoll later") + return None + + # Create related product campaign if it doesn't exist + list_campaigns_response = personalize.list_campaigns( + solutionArn=solution_arn + ) + if list_campaigns_response['campaigns']: + for campaign in list_campaigns_response['campaigns']: + status = campaign['status'] + if status != 'ACTIVE': + logger.info(f"Campaign {campaign['campaignArn']} is NOT active - status {status} - repoll later") + return None + + logger.info('Campaign ' + campaign['campaignArn'] + ' is active.') + campaign_arn = campaign['campaignArn'] + return campaign_arn + else: + logger.info('Creating campaign - will poll') + personalize.create_campaign( + name=campaign_solution_name, + solutionVersionArn=solution_version_arn, + minProvisionedTPS=1 + ) + return None + # Will not arrive here + + +def delete_campaign_polling(dataset_group_arn, solution_arn, **kwargs): + """ + For a particular solution Arn, remove the solution and all campaigns attached to it. + This function is meant to be called repeatedly (polling) till it returns True. + Args: + dataset_group_arn: Where to delete the campaign+solution + solution_arn: What solution to remove + kwargs: Other arguments which are ignored. This is to allow training configs to be passed around. + + Returns: + Tuple - 1st item: + True if desired campaign has been deleted, or failed. + False otherwise. + 2nd item: + Campaign Arn if a campaign has been scheduled for deletion + None otherwise + """ + + finished = True + list_solutions_response = personalize.list_solutions(datasetGroupArn=dataset_group_arn) + list_campaigns_response = personalize.list_campaigns(solutionArn=solution_arn) + + for campaign in list_campaigns_response['campaigns']: + finished = False + try: + logger.info(f"We are signalling that we do not want campaign with Arn {campaign['campaignArn']}") + personalize.delete_campaign(campaignArn=campaign['campaignArn']) + + # Delete the SSM parameter if we have deleted a campaign + for ssm_param in campaign_type_to_ssm_param.values(): + test_campaign_arn = ssm.get_parameter(Name=ssm_param)['Parameter']['Value'] + if campaign['campaignArn'].strip() == test_campaign_arn.strip(): + logger.info(f"As campaign with Arn {campaign['campaignArn']} was configured in SSM parameter" + f" {ssm_param} but is to be deleted, we are removing it from SSM.") + response = ssm.put_parameter( + Name=ssm_param, + Description='Retail Demo Store Campaign Arn Parameter', + Value='NONE', + Type='String', + Overwrite=True + ) + except personalize.exceptions.ResourceInUseException as ex: + logger.info(f"Campaign with Arn {campaign['campaignArn']} is still alive - waiting for it to change status " + f"so it can disappear") + + if not finished: + return False + + for solution in list_solutions_response['solutions']: + if solution['solutionArn'] == solution_arn: + finished = False + try: + logger.info(f"We are signalling that we do not want solution with Arn {solution_arn}") + personalize.delete_solution(solutionArn=solution_arn) + except personalize.exceptions.ResourceInUseException as ex: + logger.info(f"Campaign with Arn {solution['solutionArn']} is still alive " + f"- waiting for it to change status so it can disappear") + + return finished + def create_recent_purchase_filter(dataset_group_arn, ssm_parameter_name): - ''' Creates Personalize Filter that excludes recommendations for recently purchased products ''' + """Creates Personalize Filter that excludes recommendations for recently purchased products""" logger.info('Creating purchased product filter') @@ -284,526 +647,505 @@ def create_recent_purchase_filter(dataset_group_arn, ssm_parameter_name): Overwrite=True ) -def lambda_handler(event, context): - logger.debug('## ENVIRONMENT VARIABLES') - logger.debug(os.environ) - logger.debug('## EVENT') - logger.debug(event) - bucket = os.environ['csv_bucket'] - bucket_path = os.environ.get('csv_path', '') +def delete_datasets(dataset_group_arn): + """Delete all datasets in a dataset group in a polling fashion and return the number still around.""" + logger.info('Deleting datasets for dataset group') + dataset_count = 0 + dataset_paginator = personalize.get_paginator('list_datasets') - items_filename = bucket_path + "items.csv" - users_filename = bucket_path + "users.csv" - interactions_filename = bucket_path + "interactions.csv" - - session = boto3.session.Session() - region = session.region_name - account_id = sts.get_caller_identity().get('Account') - - dataset_group_name = 'retaildemostore' - - related_product_campaign_arn_param = 'retaildemostore-related-products-campaign-arn' - product_campaign_arn_param = 'retaildemostore-product-recommendation-campaign-arn' - rerank_campaign_arn_param = 'retaildemostore-personalized-ranking-campaign-arn' - role_name = os.environ.get('Uid')+'-PersonalizeS3' - event_tracking_id_param = 'retaildemostore-personalize-event-tracker-id' - filter_purchased_arn_param = 'retaildemostore-personalize-filter-purchased-arn' - - # Info on CloudWatch event rule used to repeatedely call this function. - lambda_event_rule_name = os.environ['lambda_event_rule_name'] - - # If SSM parameters for campaign arns are already set, we are done. - related_product_campaign_arn_set = is_ssm_parameter_set(related_product_campaign_arn_param) - product_campaign_arn_set = is_ssm_parameter_set(product_campaign_arn_param) - rerank_campaign_arn_set = is_ssm_parameter_set(rerank_campaign_arn_param) - event_tracking_id_set = is_ssm_parameter_set(event_tracking_id_param) - filter_purchased_arn_set = is_ssm_parameter_set(filter_purchased_arn_param) - - # Short-circuit rest of logic of all campaign ARNs are set as parameters. Means there's nothing to do. - if (related_product_campaign_arn_set and - product_campaign_arn_set and - rerank_campaign_arn_set and - event_tracking_id_set and - filter_purchased_arn_set): - - logger.info('ARNs for related products, user recommendations, reranking campaigns, recent purchase filter set as SSM parameters; nothing to do') - - # No need for this lambda function to be called anymore so delete CW event rule that has been calling us. - delete_event_rule(lambda_event_rule_name) - - return { - 'statusCode': 200, - 'body': json.dumps('SSM parameters for related products, user recommendations, reranking campaign, and recent purchase filter ARNs already set; nothing to do') - } - - if not related_product_campaign_arn_set: - logger.info(related_product_campaign_arn_param + ' SSM parameter is not set yet; proceeding with step verification/completion process') + for dataset_page in dataset_paginator.paginate(datasetGroupArn = dataset_group_arn): + for dataset in dataset_page['datasets']: + dataset_count += 1 + if dataset['status'] == 'ACTIVE': + logger.info('Deleting dataset {}'.format(dataset['datasetArn'])) + try: + personalize.delete_dataset(datasetArn = dataset['datasetArn']) + except personalize.exceptions.ResourceInUseException as ex: + logger.info('Cannot yet delete it.') - if not product_campaign_arn_set: - logger.info(product_campaign_arn_param + ' SSM parameter is not set yet; proceeding with step verification/completion process') + return dataset_count == 0 - if not rerank_campaign_arn_set: - logger.info(rerank_campaign_arn_param + ' SSM parameter is not set yet; proceeding with step verification/completion process') - if not filter_purchased_arn_set: - logger.info(filter_purchased_arn_param + ' SSM parameter is not set yet; proceeding with step verification/completion process') +def delete_dataset_group(dataset_group_arn): + """Delete dataset group in a polling fashion and return the True if gone.""" + try: + logger.info('Deleting dataset group') + try: + personalize.delete_dataset_group(datasetGroupArn = dataset_group_arn) + return False + except personalize.exceptions.ResourceInUseException as ex: + logger.info('Cannot yet delete it.') + return False + except ClientError as e: + error_code = e.response['Error']['Code'] + if error_code == 'ResourceNotFoundException': + logger.info("Dataset group does not exist") - # Create personalize role, if necessary. - role_arn = create_personalize_role(role_name) - if not role_arn: - return { - 'statusCode': 200, - 'body': json.dumps('Waiting for IAM role to be consistent') - } + return True - items_schema = { - "type": "record", - "name": "Items", - "namespace": "com.amazonaws.personalize.schema", - "fields": [ - { - "name": "ITEM_ID", - "type": "string" - }, - { - "name": "CATEGORY", - "type": "string", - "categorical": True - }, - { - "name": "STYLE", - "type": "string", - "categorical": True - } - ], - "version": "1.0" - } - - users_schema = { - "type": "record", - "name": "Users", - "namespace": "com.amazonaws.personalize.schema", - "fields": [ - { - "name": "USER_ID", - "type": "string" - }, - { - "name": "AGE", - "type": "int" - }, - { - "name": "GENDER", - "type": "string", - "categorical": True - } - ], - "version": "1.0" - } - - interactions_schema = { - "type": "record", - "name": "Interactions", - "namespace": "com.amazonaws.personalize.schema", - "fields": [ - { - "name": "ITEM_ID", - "type": "string" - }, - { - "name": "USER_ID", - "type": "string" - }, - { - "name": "EVENT_TYPE", - "type": "string" - }, - { - "name": "TIMESTAMP", - "type": "long" - } - ], - "version": "1.0" - } - - # Conditionally create schemas - items_schema_arn = create_schema(items_schema, "retaildemostore-schema-items") - users_schema_arn = create_schema(users_schema, "retaildemostore-schema-users") - interactions_schema_arn = create_schema(interactions_schema, "retaildemostore-schema-interactions") - - # Create dataset group if it doesn't exist - response=personalize.list_dataset_groups() - datasetGroups=response['datasetGroups'] - group_exists=False - - for datasetGroup in datasetGroups: - if datasetGroup['name'] == dataset_group_name: - logger.info("Dataset group "+dataset_group_name+" already exists, not creating") - group_exists=True - dataset_group_arn = datasetGroup['datasetGroupArn'] - break - - if not group_exists: - create_dataset_group_response = personalize.create_dataset_group(name = dataset_group_name) - dataset_group_arn = create_dataset_group_response['datasetGroupArn'] - describe_dataset_group_response = personalize.describe_dataset_group( - datasetGroupArn = dataset_group_arn +def delete_event_trackers(dataset_group_arn): + """Delete all event trackers in a dataset group in a polling fashion and return the number still around.""" + ssm.put_parameter( + Name='retaildemostore-personalize-event-tracker-id', + Description='Retail Demo Store Personalize Event Tracker ID Parameter', + Value='NONE', + Type='String', + Overwrite=True ) - status = describe_dataset_group_response["datasetGroup"]["status"] - logger.info("DatasetGroup: {}".format(status)) - - if status == "CREATE FAILED": - logger.error('DatasetGroup {} create failed: {}'.format(dataset_group_name, json.dumps(describe_dataset_group_response))) - return { - 'statusCode': 200, - 'body': json.dumps('DatasetGroup create failed! Campaign pre-create aborted.') - } + logger.info('Deleting event trackers for dataset group') + event_tracker_count = 0 + event_trackers_paginator = personalize.get_paginator('list_event_trackers') + for event_tracker_page in event_trackers_paginator.paginate(datasetGroupArn=dataset_group_arn): + for event_tracker in event_tracker_page['eventTrackers']: + event_tracker_count += 1 + if event_tracker['status'] == 'ACTIVE': + logger.info('Deleting event tracker {}'.format(event_tracker['eventTrackerArn'])) + try: + personalize.delete_event_tracker(eventTrackerArn=event_tracker['eventTrackerArn']) + except personalize.exceptions.ResourceInUseException as ex: + logger.info('Could not yet delete it') + + return event_tracker_count == 0 + + +def delete_filters(dataset_group_arn): + """Delete all filters in a dataset group in a polling fashion and return the number still around.""" + filters_response = personalize.list_filters(datasetGroupArn = dataset_group_arn, maxResults = 100) + + for filter in filters_response['Filters']: + logger.info('Deleting filter: ' + filter['filterArn']) + try: + personalize.delete_filter(filterArn = filter['filterArn']) + except personalize.exceptions.ResourceInUseException as ex: + logger.info('Could not yet delete it') + + return len(filters_response['Filters'])==0 + +def update(): + """ + According to the contents of the SSM variable retaildemostore-training-config build and delete + dataset groups, datasets, solutions and campaigns to reach the desired state. + + The function runs off the training config which has the following JSON structure: + - Dictionary with one key "steps" which represent sequential configurations to be achieved. + - Its value is a list of dictionaries each representing a Personalize config to aspire to. + Once the step is finished the system mover on to the next step. + - Each step Personalize config dictionary has a set of keys as the name of dataset group + to create and the value the config for that dataset group. + - Each dataset group config consists of a single key: "campaigns" with its value a dictionary + with key campaign type (there are 4 campaign types: user-item, item-item, reranking) + and value campaign config for that campaign type. + - The campaign config consists of a dictionary with 3 keys: + - "desired_campaign_suffixes" - a list of numerical version numbers of solutions and campaigns to create. + For example, if the campaign_type is "retaildemostore-related-products" and the desired version nums is + [3], it will attempt to create a related products campaign with name retaildemostore-related-products-3 + - "desired_active_version_suffixes" - an int showing which of these version numbers should be activated in the UI + This is achieved by putting the SSM parameter for this campaign into the right parameter so that it is + picked up by the recommendations endpoint. + - "minimum_available_campaigns" - if 0 then campaigns can get deleted even if it means there will be + no active campaigns. If 1 then this campaign is preserved till there is another campaign of this type. + + See the poll_create function below for how the default training configuration is constrcuted. + + As another example, the following config requests a full teardown + followed by creation of a dataset group with two campaigns: - if status != "ACTIVE": - return { - 'statusCode': 200, - 'body': json.dumps('DatasetGroup NOT active yet') - } - - # Create datasets - items_dataset_arn = create_dataset(dataset_group_arn, 'retaildemostore-dataset-items', 'ITEMS', items_schema_arn) - users_dataset_arn = create_dataset(dataset_group_arn, 'retaildemostore-dataset-users', 'USERS', users_schema_arn) - interactions_dataset_arn = create_dataset(dataset_group_arn, 'retaildemostore-dataset-interactions', 'INTERACTIONS', interactions_schema_arn) - - # Create dataset import jobs - items_dataset_import_job_arn = create_import_job('retaildemostore-dataset-items-import-job', items_dataset_arn, account_id, region, "s3://{}/{}".format(bucket, items_filename), role_arn) - users_dataset_import_job_arn = create_import_job('retaildemostore-dataset-users-import-job', users_dataset_arn, account_id, region, "s3://{}/{}".format(bucket, users_filename), role_arn) - interactions_dataset_import_job_arn = create_import_job('retaildemostore-dataset-interactions-import-job', interactions_dataset_arn, account_id, region, "s3://{}/{}".format(bucket, interactions_filename), role_arn) - - # Make sure all import jobs are done/active before continuing - if not is_import_job_active(items_dataset_import_job_arn): - logger.info("Items import job is NOT active yet") - return { - 'statusCode': 200, - 'body': json.dumps('Items import job is NOT active yet') - } - - if not is_import_job_active(users_dataset_import_job_arn): - logger.info("Users import job is NOT active yet") - return { - 'statusCode': 200, - 'body': json.dumps('Users import job is NOT active yet') - } - - if not is_import_job_active(interactions_dataset_import_job_arn): - logger.info("Interactions import job is NOT active yet") - return { - 'statusCode': 200, - 'body': json.dumps('Interactions import job is NOT active yet') - } - - # Create event tracker, if necessary. - if not event_tracking_id_set: - # Either hasn't been created yet or isn't active yet. - list_event_trackers_response = personalize.list_event_trackers(datasetGroupArn = dataset_group_arn) - if len(list_event_trackers_response['eventTrackers']) == 0: - logger.info('Event Tracker does not exist; creating') - event_tracker = personalize.create_event_tracker( - datasetGroupArn=dataset_group_arn, - name='retaildemostore-event-tracker' - ) + { + "steps": [ + { + "dataset_groups": null + }, + { + "dataset_groups": { + "retaildemostore-MYDATASETGROUP": { + "campaigns": { + "retaildemostore-related-products": { + "desired_campaign_suffixes": [0], + "desired_active_version_suffixes": 0 + }, + "retaildemostore-product-personalization": { + "desired_campaign_suffixes": [0], + "desired_active_version_suffixes": 0 + } + } + } + } + } + ] + } + """ - if event_tracker.get('trackingId'): - event_tracking_id = event_tracker['trackingId'] - logger.info('Setting event tracking ID {} as SSM parameter'.format(event_tracking_id)) + # Already configured - grab that config - see it documented in the poll_create function below. + train_configs = ssm.get_parameter(Name=training_config_param_name) + train_configs = json.loads(train_configs['Parameter']['Value']) + trainstep_config = train_configs['steps'][0] - ssm.put_parameter( - Name=event_tracking_id_param, - Description='Retail Demo Store Personalize Event Tracker ID Parameter', - Value='{}'.format(event_tracking_id), - Type='String', - Overwrite=True - ) + logger.info(f"Got train config: {json.dumps(trainstep_config, indent=2)}") - event_tracking_id_set = True + # Find dataset group names I control + response = personalize.list_dataset_groups() + datasetGroups = response['datasetGroups'] + datasetGroups = [datasetGroup for datasetGroup in datasetGroups + if datasetGroup['name'].startswith(dataset_group_name_root)] + all_dataset_group_names = [datasetGroup['name'] for datasetGroup in datasetGroups] - # Trigger rebuild of Web UI service so event tracker gets picked up. - rebuild_webui_service(region, account_id) + # group them into ones we want and ones we do not want + desired_dataset_group_names = [] if trainstep_config['dataset_groups'] is None else list(trainstep_config['dataset_groups'].keys()) + undesired_dataset_group_names = [name for name in all_dataset_group_names if name not in desired_dataset_group_names] - return { - 'statusCode': 200, - 'body': json.dumps('Event tracker created; waiting for it to become active') - } - else: - event_tracker = list_event_trackers_response['eventTrackers'][0] - logger.info("Event Tracker: {}".format(event_tracker['status'])) - - if event_tracker['status'] == 'CREATE FAILED': - logger.error('Event tracker create failed: {}'.format(json.dumps(event_tracker))) - return { - 'statusCode': 200, - 'body': json.dumps('Event tracker CREATE_FAILED; aborting process') - } + dataset_group_name_to_arn = {datasetGroup['name']:datasetGroup['datasetGroupArn'] for datasetGroup in datasetGroups} - # Create recent product purchase filter, if necessary - if not filter_purchased_arn_set: - create_recent_purchase_filter(dataset_group_arn, filter_purchased_arn_param) - filter_purchased_arn_set = True + all_deleted = True + all_created = True - # Create related product, product recommendation, and rerank solutions if they doesn't exist - related_recipe_arn = "arn:aws:personalize:::recipe/aws-sims" - related_solution_name = related_campaign_name = "retaildemostore-related-products" - product_recipe_arn = "arn:aws:personalize:::recipe/aws-user-personalization" - product_solution_name = product_campaign_name = "retaildemostore-product-personalization" - rerank_recipe_arn = "arn:aws:personalize:::recipe/aws-personalized-ranking" - rerank_solution_name = rerank_campaign_name = "retaildemostore-personalized-ranking" + if len(desired_dataset_group_names) > 0: + # We want to create some dataset groups so we'll be needing the schema and the role + role_arn = create_personalize_role(role_name) + if not role_arn: + logger.info('Waiting for IAM role to be consistent') + return False - solution_created = False - list_solutions_response = str(personalize.list_solutions(datasetGroupArn=dataset_group_arn)) + # Conditionally create schemas + items_schema_arn = create_schema(items_schema, "retaildemostore-schema-items") + users_schema_arn = create_schema(users_schema, "retaildemostore-schema-users") + interactions_schema_arn = create_schema(interactions_schema, "retaildemostore-schema-interactions") - if list_solutions_response.find(related_solution_name) != -1: - logger.info("Related product solution "+related_solution_name+" already exists, not creating") - related_solution_arn="arn:aws:personalize:"+region+":"+account_id+":solution/"+related_solution_name - else: - create_solution_response = personalize.create_solution( - name = related_solution_name, - datasetGroupArn = dataset_group_arn, - recipeArn = related_recipe_arn, - performHPO = True - ) + for dataset_group_name in desired_dataset_group_names: - logger.info("Product solution "+related_solution_name+" created") + dataset_group_arn = dataset_group_name_to_arn.get(dataset_group_name, None) + # We want to build for our dataset group - related_solution_arn = create_solution_response['solutionArn'] - solution_created = True + # Create dataset group if it doesn't exist and save the name in an SSM variable + if dataset_group_arn is None: + logger.info(f'Generating a dataset group with unique name {dataset_group_name}') + create_dataset_group_response = personalize.create_dataset_group(name=dataset_group_name) + dataset_group_arn = create_dataset_group_response['datasetGroupArn'] - if list_solutions_response.find(product_solution_name) != -1: - logger.info("Product solution "+product_solution_name+" already exists, not creating") - product_solution_arn="arn:aws:personalize:"+region+":"+account_id+":solution/"+product_solution_name - else: - create_solution_response = personalize.create_solution( - name = product_solution_name, - datasetGroupArn = dataset_group_arn, - recipeArn = product_recipe_arn, - performHPO = True + describe_dataset_group_response = personalize.describe_dataset_group( + datasetGroupArn=dataset_group_arn ) - logger.info("Product solution "+product_solution_name+" created") + status = describe_dataset_group_response["datasetGroup"]["status"] + logger.info("DatasetGroup: {}".format(status)) + + if status == "CREATE FAILED": + logger.error(f'DatasetGroup {dataset_group_name} ' + f'create failed: {json.dumps(describe_dataset_group_response)}') + return False # Everything will hang on this step + + # Go away for another poll till dataset group active. + if status != "ACTIVE": + logger.info(f'DatasetGroup {dataset_group_name} not active yet') + return False + + # Create datasets if not already created + items_dataset_arn = create_dataset(dataset_group_arn, 'retaildemostore-dataset-items', + 'ITEMS', items_schema_arn) + users_dataset_arn = create_dataset(dataset_group_arn, 'retaildemostore-dataset-users', + 'USERS', users_schema_arn) + interactions_dataset_arn = create_dataset(dataset_group_arn, 'retaildemostore-dataset-interactions', + 'INTERACTIONS', interactions_schema_arn) + + # Create dataset import jobs + items_dataset_import_job_arn = create_import_job('retaildemostore-dataset-items-import-job', + items_dataset_arn, account_id, region, + "s3://{}/{}".format(bucket, items_filename), + role_arn) + users_dataset_import_job_arn = create_import_job('retaildemostore-dataset-users-import-job', + users_dataset_arn, account_id, region, + "s3://{}/{}".format(bucket, users_filename), + role_arn) + interactions_dataset_import_job_arn = create_import_job('retaildemostore-dataset-interactions-import-job', + interactions_dataset_arn, account_id, region, + "s3://{}/{}".format(bucket, interactions_filename), + role_arn) + + # Make sure all import jobs are done/active before continuing + for arn in [items_dataset_import_job_arn, users_dataset_import_job_arn, interactions_dataset_import_job_arn]: + + if not is_import_job_active(arn): + logger.info(f"Import job {arn} is NOT active yet") + return False + + # Create recent product purchase filter, if necessary + list_filters_response = personalize.list_filters(datasetGroupArn=dataset_group_arn) + if len(list_filters_response['Filters']) == 0: + create_recent_purchase_filter(dataset_group_arn, filter_purchased_arn_param) # Adds the SSM param + + # Create related product, product recommendation, and rerank solutions if they doesn't exist + + # Start by calculating what recipes, with what names, event types, and whether we want activated first. + campaigns_config = trainstep_config['dataset_groups'][dataset_group_name]['campaigns'] + augmented_train_config = [] + for campaign_type, campaign_train_config in campaigns_config.items(): + + for campaign_no in campaign_train_config['desired_campaign_suffixes']: + + config_for_campaign = dict( + recipe_arn=campaign_type_to_recipe_arn[campaign_type], + campaign_solution_name=campaign_type + '-' + str(campaign_no), + event_type=campaign_type_to_event_type[campaign_type], + activate_please=campaign_no == campaign_train_config['desired_active_version_suffixes'], + active_arn_param_name=campaign_type_to_ssm_param[campaign_type], + campaign_type=campaign_type) + + augmented_train_config += [config_for_campaign] + + # Train up any campaigns that may be missing and set their SSMs + logger.info(f"Set up to train with augmented training config: {json.dumps(augmented_train_config, indent=4)}") + for train_job in augmented_train_config: + logger.info(f'Polling training job {train_job}') + campaign_arn = create_campaign_polling(dataset_group_arn=dataset_group_arn, + **train_job) + if campaign_arn is not None and train_job['activate_please']: + logger.info(f"Setting campaignArn {campaign_arn} as system parameter" + f" for {train_job['active_arn_param_name']} which has finished") + # Finally, set the campaign arn as the system parameter expected by services + response = ssm.put_parameter( + Name=train_job['active_arn_param_name'], + Description='Retail Demo Store Campaign Arn Parameter', + Value='{}'.format(campaign_arn), + Type='String', + Overwrite=True + ) + all_created = all_created and campaign_arn is not None + # We record if we have an available campaign + + # Now we will go through the existing solutions and remove any we don't want + list_solutions_response = personalize.list_solutions(datasetGroupArn=dataset_group_arn) + all_desired_solution_names = [d['campaign_solution_name'] for d in augmented_train_config] + + # We go through the existing solutions and delete ones that are not in the list of desired solutions + for solution in sorted(list_solutions_response['solutions'], key=lambda x:x['name']): + if solution['name'] in all_desired_solution_names: + pass # We can keep this one because we have been configured to build it + else: + logger.info(f"Solution {solution['name']} with Arn {solution['solutionArn']} is unwanted. " + "We will try to remove it.") + + deleted_one = delete_campaign_polling( + dataset_group_arn=dataset_group_arn, + solution_arn=solution['solutionArn']) + all_deleted = all_deleted and deleted_one + + list_event_trackers_response = personalize.list_event_trackers(datasetGroupArn=dataset_group_arn) + if len(list_event_trackers_response['eventTrackers']) == 0 and all_created: + + # Either hasn't been created yet or isn't active yet. + if len(list_event_trackers_response['eventTrackers']) == 0: + logger.info('Event Tracker does not exist; creating') + event_tracker = personalize.create_event_tracker( + datasetGroupArn=dataset_group_arn, + name='retaildemostore-event-tracker' + ) - product_solution_arn = create_solution_response['solutionArn'] - solution_created = True + if event_tracker.get('trackingId'): + event_tracking_id = event_tracker['trackingId'] + logger.info('Setting event tracking ID {} as SSM parameter'.format(event_tracking_id)) + + ssm.put_parameter( + Name=event_tracking_id_param, + Description='Retail Demo Store Personalize Event Tracker ID Parameter', + Value='{}'.format(event_tracking_id), + Type='String', + Overwrite=True + ) + # Trigger rebuild of Web UI service so event tracker gets picked up. + rebuild_webui_service(region, account_id) + + return False # Give event tracker a moment to get ready + else: + event_tracker = list_event_trackers_response['eventTrackers'][0] + logger.info("Event Tracker: {}".format(event_tracker['status'])) + + if event_tracker['status'] == 'CREATE FAILED': + logger.error('Event tracker create failed: {}'.format(json.dumps(event_tracker))) + return False + + # Now go through dataset groups getting rid of any we do not need. + for datasetGroup in datasetGroups: + if datasetGroup['name'] not in desired_dataset_group_names: - if list_solutions_response.find(rerank_solution_name) != -1: - logger.info("Rerank solution "+rerank_solution_name+" already exists, not creating") - rerank_solution_arn="arn:aws:personalize:"+region+":"+account_id+":solution/"+rerank_solution_name - else: - create_solution_response = personalize.create_solution( - name = rerank_solution_name, - datasetGroupArn = dataset_group_arn, - recipeArn = rerank_recipe_arn, - performHPO = True - ) + dataset_group_arn = datasetGroup['datasetGroupArn'] - logger.info("Product solution "+rerank_solution_name+" created") + all_deleted = False + # Note that it may not pull down if there are campaigns and solutions attached to it. + # So we can try to remove them + list_solutions_response = personalize.list_solutions(datasetGroupArn=dataset_group_arn) + for solution in list_solutions_response['solutions']: + _ = delete_campaign_polling( + dataset_group_arn=dataset_group_arn, + solution_arn=solution['solutionArn']) + + if len(list_solutions_response['solutions'])>0: + logger.info(f"We do not need this dataset group {dataset_group_arn} but it still has solutions.") + else: + logger.info(f"We do not need this dataset group {dataset_group_arn}. Let us take it down.") + # Other than the dataset group, no deps on filters so delete them first. + delete_filters(dataset_group_arn) + if delete_event_trackers(dataset_group_arn): + logger.info('EventTrackers fully deleted') + if delete_datasets(dataset_group_arn): + logger.info('Datasets fully deleted') + event_schema_name = datasetGroup['name'] + '-event-schema' + if delete_personalize_schemas([event_schema_name]): + logger.info(f"Event schema deleted {event_schema_name}") + if delete_dataset_group(dataset_group_arn): + logger.info('DatasetGroup fully deleted') + + if len(desired_dataset_group_names) == 0: + all_deleted = all_deleted and delete_personalize_schemas([ + 'retaildemostore-schema-users', + 'retaildemostore-schema-items', + 'retaildemostore-schema-interactions']) + all_deleted = all_deleted and delete_personalize_role() + + if all_created and all_deleted: + # No need for this lambda function to be called anymore so disable CW event rule that has been calling us. + # If somethihng wants this functionality, they just enable the rule. + msg = ('Related product, Product recommendation, Personalized reranking, ' + 'fully provisioned ' + 'and unwanted campaigns removed.') + logger.info(msg) + if len(train_configs['steps'])>1: + train_configs['steps'] = train_configs['steps'][1:] + ssm.put_parameter( + Name=training_config_param_name, + Description='Retail Demo Store Training Config', + Value=json.dumps(train_configs, indent=3), + Type='String', + Overwrite=True + ) + logger.info(f" - Popping the training config that just succeded. " + f"New training config: {json.dumps(train_configs, indent=2)}") + logger.info(msg) + return False + else: + logger.info("Finished polling.") + return True + else: + if all_created: + msg = 'Still trying to clean up some things.' + elif all_deleted: + msg = "Still trying to provision something." + else: + msg = "Still trying to provision and delete things." + logger.info(msg) + return False - rerank_solution_arn = create_solution_response['solutionArn'] - solution_created = True - if solution_created: - logger.info("Solution(s) create initiated; waiting for next iteration to create versions") - return { - 'statusCode': 200, - 'body': json.dumps('Solution(s) create initiated; waiting for next call to create versions') +@cloudformation_helper.poll_create +def poll_create(event, context): + """ + Called on creation by CloudFormation. Polled till creation is done. + Sets up the default training configuration and calls update() which tries to pull everything in shape to match the + training config. By default we train up one version of each of thge campaign types (user-item, item-item, + reranking). + How the config is generated can be examined below but there is more documentation in update() + Args: + event: We ignore this - we know what we want to build + context: We ignore this + + Returns: + True if polling is finished. + """ + + if not is_ssm_parameter_set(training_config_param_name): + + # Default training config - build one of each campaign in one dataset group and leave it at that + campaigns_to_build = {} + for campaign_type in all_campaign_types: + campaigns_to_build[campaign_type] = { + 'desired_campaign_suffixes': [0], # We want a single campaign with suffix -0 for each campaign type + 'desired_active_version_suffixes': 0, # We want the campaign with suffix -0 to be activated in SSM + 'minimum_available_campaigns': 0} # We want at least zero campaigns to be available at all times + + dataset_group_name_unique = dataset_group_name_root + str(uuid.uuid4())[:8] + + train_configs = { + "steps": [ + { + "dataset_groups": { + dataset_group_name_unique: {'campaigns': campaigns_to_build} + } + } + ] } - # Create related product solution version if it doesn't exist - response=personalize.list_solution_versions(solutionArn=related_solution_arn) - if response['solutionVersions']: - logger.info("Related product Solution Version for "+related_solution_name+" already exists, not creating") - related_solution_version_arn = response['solutionVersions'][0]['solutionVersionArn'] - else: - logger.info('Creating Related Product SolutionVersion') - create_solution_version_response = personalize.create_solution_version( - solutionArn = related_solution_arn) - related_solution_version_arn = create_solution_version_response['solutionVersionArn'] + ssm.put_parameter( + Name=training_config_param_name, + Description='Retail Demo Store Training Config', + Value=json.dumps(train_configs, indent=3), + Type='String', + Overwrite=True + ) - # Create product recommendation solution version if it doesn't exist - response=personalize.list_solution_versions(solutionArn=product_solution_arn) - if response['solutionVersions']: - logger.info("Product Solution Version for "+product_solution_name+" already exists, not creating") - product_solution_version_arn=response['solutionVersions'][0]['solutionVersionArn'] - else: - logger.info('Creating Product Recommendation SolutionVersion') - create_solution_version_response = personalize.create_solution_version( - solutionArn = product_solution_arn) - product_solution_version_arn = create_solution_version_response['solutionVersionArn'] + # We enable the rule to carry on working but return to CloudFormation because CloudFormation does time out + # while waiting, otherwise. If it had not we could have done return update() here. + enable_event_rule(lambda_event_rule_name) + return True - # Create search solution version if it doesn't exist - response=personalize.list_solution_versions(solutionArn=rerank_solution_arn) - if response['solutionVersions']: - logger.info("Rerank Solution Version for "+rerank_solution_name+" already exists, not creating") - rerank_solution_version_arn=response['solutionVersions'][0]['solutionVersionArn'] - else: - logger.info('Creating Rerank SolutionVersion') - create_solution_version_response = personalize.create_solution_version( - solutionArn = rerank_solution_arn) - rerank_solution_version_arn = create_solution_version_response['solutionVersionArn'] - # Make sure related product solution version is active - describe_solution_version_response = personalize.describe_solution_version( - solutionVersionArn = related_solution_version_arn - ) - status = describe_solution_version_response["solutionVersion"]["status"] - logger.info("Related product SolutionVersion Status is: {}".format(status)) - if status != "ACTIVE": - logger.info("Related product solution version status is NOT active") - return { - 'statusCode': 200, - 'body': json.dumps('Related product solution version status is NOT active') - } +@cloudformation_helper.poll_delete +def poll_delete(event, context): + """ + Called on deletion by CloudFormation. + Sets up the training configuration to pull down everything and calls update() + How the config is generated for tear-down can be examined below but there is more documentation in update() + Args: + event: We ignore this - we know what we want to build + context: We ignore this - # Create related product campaign if it doesn't exist - list_campaigns_response = personalize.list_campaigns( - solutionArn = related_solution_arn - ) - if list_campaigns_response['campaigns']: - for campaign in list_campaigns_response['campaigns']: - status = campaign['status'] - if status != 'ACTIVE': - logger.info('Campaign ' + campaign['campaignArn'] + ' is NOT active; waiting') - return { - 'statusCode': 200, - 'body': json.dumps('Related product campaign status is NOT active') - } - - related_campaign_arn = campaign['campaignArn'] - - # Finally, set the campaign arn as the system parameter expected by services - logger.info("Setting related product campaignArn " + related_campaign_arn + " as system parameter") - response = ssm.put_parameter( - Name=related_product_campaign_arn_param, - Description='Retail Demo Store Related Products Campaign Arn Parameter', - Value='{}'.format(related_campaign_arn), - Type='String', - Overwrite=True - ) - else: - logger.info('Creating related product campaign') - personalize.create_campaign( - name = related_campaign_name, - solutionVersionArn = related_solution_version_arn, - minProvisionedTPS = 1 - ) - return { - 'statusCode': 200, - 'body': json.dumps('Initiated create related product campaign; will check back for status') - } + Returns: + True if polling is finished. + """ - # Make sure product recommendation solution version is active - describe_solution_version_response = personalize.describe_solution_version( - solutionVersionArn = product_solution_version_arn + # Empty the training config of dataset groups to pull it all down. + train_configs = {"steps": [{"dataset_groups": None}]} + ssm.put_parameter( + Name=training_config_param_name, + Description='Retail Demo Store Training Config', + Value=json.dumps(train_configs, indent=3), + Type='String', + Overwrite=True ) - status = describe_solution_version_response["solutionVersion"]["status"] - logger.info("Product recommendation SolutionVersion Status is: {}".format(status)) - if status != "ACTIVE": - logger.info("Product recommendation solution version status is NOT active") - return { - 'statusCode': 200, - 'body': json.dumps('Product recommendation solution version status is NOT active') - } + done = update() + return done - # Create product recommendation campaign if it doesn't exist - list_campaigns_response = personalize.list_campaigns( - solutionArn = product_solution_arn - ) - if list_campaigns_response['campaigns']: - for campaign in list_campaigns_response['campaigns']: - status = campaign['status'] - if status != 'ACTIVE': - logger.info('Campaign ' + campaign['campaignArn'] + ' is NOT active; waiting') - return { - 'statusCode': 200, - 'body': json.dumps('Product recommendation campaign status is NOT active') - } - - product_campaign_arn = campaign['campaignArn'] - - # Finally, set the campaign arn as the system parameter expected by services - logger.info("Setting product campaignArn " + product_campaign_arn + " as system parameter") - response = ssm.put_parameter( - Name=product_campaign_arn_param, - Description='Retail Demo Store Product Recommendation Campaign Arn Parameter', - Value='{}'.format(product_campaign_arn), - Type='String', - Overwrite=True - ) - else: - logger.info('Creating product personalization campaign') - personalize.create_campaign( - name = product_campaign_name, - solutionVersionArn = product_solution_version_arn, - minProvisionedTPS = 1 - ) - return { - 'statusCode': 200, - 'body': json.dumps('Initiated create product campaign; will check back for status') - } - # Make sure reranking solution version is active - describe_solution_version_response = personalize.describe_solution_version( - solutionVersionArn = rerank_solution_version_arn - ) - status = describe_solution_version_response["solutionVersion"]["status"] - logger.info("Reranking SolutionVersion Status is: {}".format(status)) - if status != "ACTIVE": - logger.info("Reranking solution version status is NOT active") - return { - 'statusCode': 200, - 'body': json.dumps('Reranking solution version status is NOT active') - } +@cloudformation_helper.poll_update +def poll_update(event, _): + done = update() + return done - # Create reranking campaign if it doesn't exist - list_campaigns_response = personalize.list_campaigns( - solutionArn = rerank_solution_arn - ) - if list_campaigns_response['campaigns']: - for campaign in list_campaigns_response['campaigns']: - status = campaign['status'] - if status != 'ACTIVE': - logger.info('Campaign ' + campaign['campaignArn'] + ' is NOT active; waiting') - return { - 'statusCode': 200, - 'body': json.dumps('Rerank campaign status is NOT active') - } - - rerank_arn = campaign['campaignArn'] - - # Finally, set the campaign arn as the system parameter expected by services - logger.info("Setting rerank campaignArn " + rerank_arn + " as system parameter") - response = ssm.put_parameter( - Name=rerank_campaign_arn_param, - Description='Retail Demo Store Personalized Reranking Campaign Arn Parameter', - Value='{}'.format(rerank_arn), - Type='String', - Overwrite=True - ) - else: - logger.info('Creating personalized reranking campaign') - personalize.create_campaign( - name = rerank_campaign_name, - solutionVersionArn = rerank_solution_version_arn, - minProvisionedTPS = 1 - ) - return { - 'statusCode': 200, - 'body': json.dumps('Initiated create personalized reranking campaign; will check back for status') - } - # If we get here... all campaigns have been successfully created! - logger.info('Related product, Product recommendation, and Personalized reranking campaigns fully provisioned!') +def lambda_handler(event, context): + """According to the contents of the SSM variable retaildemostore-training-config build and delete + dataset groups, datasets, solutions and campaigns to reach the desired state.""" + logger.debug('## ENVIRONMENT VARIABLES') + logger.debug(os.environ) + logger.debug('## EVENT') + logger.debug(event) - # No need for this lambda function to be called anymore so delete CW event rule that has been calling us. - delete_event_rule(lambda_event_rule_name) + ## Inspect the event - if it is coming from EventBridge then it is polling after reset + ## If it is coming from CloudFormation then use the helper to create or tear down + if "source" in event and event["source"] == "aws.events": + done = update() # Will pick up desired behaviour from training_config + if done: + disable_event_rule(lambda_event_rule_name) + return { + 'statusCode': 200, + 'body': json.dumps("Event rule disabled") + } + else: + return { + 'statusCode': 200, + 'body': json.dumps("Polled Personalize Create Update") + } - return { - 'statusCode': 200, - 'body': json.dumps('Related product, Product recommendation, and Personalized reranking campaigns fully provisioned!') - } \ No newline at end of file + else: + # We have a Cloud Formation Event - the event contains info about whether it wants to update, create or delete + cloudformation_helper(event, context) diff --git a/src/aws-lambda/personalize-pre-create-campaigns/requirements.txt b/src/aws-lambda/personalize-pre-create-campaigns/requirements.txt index f0ea08fbf..04fe8f43d 100644 --- a/src/aws-lambda/personalize-pre-create-campaigns/requirements.txt +++ b/src/aws-lambda/personalize-pre-create-campaigns/requirements.txt @@ -1 +1,2 @@ -packaging==20.4 \ No newline at end of file +packaging==20.4 +crhelper diff --git a/src/docker-compose.yml b/src/docker-compose.yml index cb89f6608..f52055c43 100644 --- a/src/docker-compose.yml +++ b/src/docker-compose.yml @@ -133,6 +133,23 @@ services: volumes: - ../images:/usr/share/nginx/html/images:ro + videos: + container_name: videos + build: + context: ./videos + networks: + - dev-net + environment: + - AWS_PROFILE + - AWS_DEFAULT_REGION + - RESOURCE_BUCKET + - PARAMETER_IVS_VIDEO_CHANNEL_MAP + - USE_DEFAULT_IVS_STREAMS + volumes: + - ~/.aws/:/root/.aws:ro + ports: + - "8007:80" + networks: dev-net: driver: bridge diff --git a/src/products/Dockerfile b/src/products/Dockerfile index 46a5d222b..d89a3dd2b 100644 --- a/src/products/Dockerfile +++ b/src/products/Dockerfile @@ -1,10 +1,7 @@ FROM public.ecr.aws/s5z3t2n9/golang:1.15-alpine AS build WORKDIR /src/ RUN apk add --no-cache git bash -RUN go get -u github.com/gorilla/mux -RUN go get -u gopkg.in/yaml.v2 RUN go get -u github.com/aws/aws-sdk-go/service/dynamodb/dynamodbattribute -RUN go get -u github.com/google/uuid COPY src/products-service/*.* /src/ COPY src/products-service/data/*.* /src/data/ RUN CGO_ENABLED=0 go build -o /bin/products-service diff --git a/src/products/src/products-service/data/products.yaml b/src/products/src/products-service/data/products.yaml index cb385aff7..0cc0c7af2 100644 --- a/src/products/src/products-service/data/products.yaml +++ b/src/products/src/products-service/data/products.yaml @@ -486,4 +486,290 @@ description: Every tackle box needs a silver and red reflective fishing lure. price: 9.99 image: "12.jpg" - current_stock: 9 \ No newline at end of file + current_stock: 9 +- id: 57 + name: Eyeshadow Palette - Set of 3 Palettes + category: beauty + style: makeup + description: Perfect for trialling different shades, each palette contains 4 different shades. + price: 145.00 + image: "11.jpg" + current_stock: 19 +- id: 58 + name: Waterproof Eyeliner and Mascara + category: beauty + style: makeup + description: In stylish yellow, guaranteed to stay on in sauna and car-wash. + price: 27.00 + image: "12.jpg" + current_stock: 9 +- id: 59 + name: Divine Shine - Satin Rose Lipstick Set + category: beauty + style: makeup + description: You get not only lipstick, but that other thing with the mirror. Brilliant! + price: 35.00 + image: "13.jpg" + current_stock: 1 +- id: 60 + name: Gloss Bomb Universal Lip Luminizer + category: beauty + style: makeup + description: 100% guaranteed universal and 100% luminous. + price: 19.00 + image: "14.jpg" + current_stock: 12 +- id: 61 + name: 7-in-1 Daily Wear Palette Essentials + category: beauty + style: makeup + description: You get a whole bunch of stuff and it is super good quality too. + price: 103.00 + image: "15.jpg" + current_stock: 2 +- id: 62 + name: Eye Care Set - Sparkle Gloss Eyes and Lashes + category: beauty + style: makeup + description: Your eyes will dazzle and gloss and shine and fizz pop. + price: 95.00 + image: "16.jpg" + current_stock: 2 +- id: 63 + name: 15 Piece Makeup Brush Set with Fold Up Leather Case + category: beauty + style: makeup + description: A great selection of makeup brushes; look good just carrying it around! + price: 99.00 + image: "17.jpg" + gender_affinity: F + image_license: "Free for Commercial Use" + link: https://www.pikrepo.com/fmvtc/black-makeup-brush-set-in-bag + current_stock: 2 +- id: 64 + name: Lovely Blue Mascara + category: beauty + style: makeup + description: You will never look beyond this shade of blue. + price: 29.00 + image: "18.jpg" + gender_affinity: F + image_license: CC0 + link: https://pxhere.com/en/photo/57398 + current_stock: 2 +- id: 65 + name: Nail Varnish for Conquerors of Hearts + category: beauty + style: makeup + description: Occupy his/her heart with your talons of love! + price: 24.00 + image: "19.jpg" + gender_affinity: F + image_license: CC0 + link: https://www.needpix.com/photo/1711500/nail-varnish-nail-design-cosmetics-manicure-fingernails-paint-toe-nails-fashionable-beauty + current_stock: 3 +- id: 66 + name: Rose Pink Blush Brush + category: beauty + style: makeup + description: For those looking for a blush brush and something that is rose pink. + price: 22.00 + image: "20.jpg" + gender_affinity: F + image_license: Free for commercial use - just do not resell as a stock photo + link: https://pixabay.com/photos/rouge-brush-cosmetics-rouge-brush-2092439/ + current_stock: 4 +- id: 67 + name: "Subtle and Fresh: Palette of 15 Concealers" + category: beauty + style: makeup + description: These colours allow your inner you to shine through, but shinier. + price: 44.00 + image: "21.jpg" + gender_affinity: F + image_license: Free for commercial use + link: https://www.pxfuel.com/en/free-photo-xidzw + current_stock: 6 +- id: 68 + name: Deep Disguise Concealer + category: beauty + style: makeup + description: Pimples? Invisible. Warts? Never happened. No need for witchcraft when you have science! + price: 12.00 + image: "22.jpg" + gender_affinity: F + image_license: CC0 + link: https://commons.m.wikimedia.org/wiki/File:Tcsfoundationlogo.jpg + current_stock: 7 +- id: 69 + name: Classic Bombshell Lipstick + category: beauty + style: makeup + description: For the no-nonsense blow-them-away classic look. + price: 24.00 + image: "23.jpg" + gender_affinity: F + image_license: Free for commercial use - just do not resell as a stock photo + link: https://pixabay.com/photos/lipstick-lips-makeup-cosmetics-5559338/ + current_stock: 11 +- id: 70 + name: Intense Matte Lipstick + category: beauty + style: makeup + description: Inspire awe, be the mystery. + price: 28.00 + image: "24.jpg" + gender_affinity: F + image_license: Unsplash - free for commercial use + link: https://unsplash.com/photos/rjB_1MT6G18 + current_stock: 12 +- id: 71 + name: 4-Piece Makeup Brush Set + category: beauty + style: makeup + description: Get makeup done, the only way how, on your skin. + price: 26.00 + image: "25.jpg" + gender_affinity: F + image_license: Free for commercial use - just do not resell as a stock photo + link: https://pixabay.com/photos/maciag-brush-makeup-brushes-5208359/ + current_stock: 4 +- id: 72 + name: Gangster-Girl Lipstick + category: beauty + style: makeup + description: Take out his/her heart with this lipstick, or shoot him/her... whichever suits. + price: 40.00 + image: "26.jpg" + gender_affinity: F + image_license: Free for commercial use + link: https://www.pikrepo.com/fyvwn/red-and-gold-lipstick-on-white-background + current_stock: 7 +- id: 73 + name: Lip Brush + category: beauty + style: makeup + description: Delicately calibrated hairs for consistency of application + price: 32.00 + image: "27.jpg" + gender_affinity: F + image_license: Free for commercial use + link: https://unsplash.com/photos/qbo7DPBvnV0 + current_stock: 2 +- id: 74 + name: Precious Cargo Makeup Containers + category: beauty + style: makeup + description: Strong reinforced plastic containers, with bright stylings. + price: 12.00 + image: "28.jpg" + gender_affinity: F + image_license: CC0 + link: https://pixy.org/5203022/ + current_stock: 20 +- id: 75 + name: Burn! Lipstick + category: beauty + style: makeup + description: The lipstick is burn red - and so is the fiery translucent sheath. + price: 18.00 + image: "29.jpg" + gender_affinity: F + image_license: Public domain + link: https://www.pikist.com/free-photo-xvcbj + current_stock: 12 +- id: 76 + name: Grandma's Mascara + category: beauty + style: makeup + description: The 20s look that never went out of fashion, till after the 20s. + price: 18.00 + image: "30.jpg" + gender_affinity: F + image_license: Free for commercial use + link: https://www.pickpik.com/cosmetics-make-up-makeup-beauty-color-eyes-138539 + current_stock: 13 +- id: 77 + name: Camera Tripod + category: electronics + style: photography + description: Take video anywhere, anytime, like a spy. + price: 49.00 + image: "15.jpg" + gender_affinity: F + image_license: CC0 + link: https://www.needpix.com/photo/908201/gorillapod-with-camera-free-pictures-free-photos-free-images-royalty-free + current_stock: 10 +- id: 78 + name: Nice Stripy Blouse + category: apparel + style: blouse + description: Easy-wearing and comfy momfy! + price: 39.00 + image: "32.jpg" + gender_affinity: F + image_license: Made by Dae.mn + link: + current_stock: 10 +- id: 79 + name: Pocket Powder Case + category: beauty + style: makeup + description: Touch up on the go! Wherever, whenever! + price: 29.00 + image: "31.jpg" + gender_affinity: F + image_license: Public domain + link: https://www.pikist.com/free-photo-ixyyz + current_stock: 12 +- id: 80 + name: Freestanding Glass Makeup Mirror + category: housewares + style: bedroom + description: Do your makeup properly! Don't make a big mess like someone else we know! + price: 99.00 + image: "8.jpg" + gender_affinity: F + image_license: CC0 + link: https://www.needpix.com/photo/download/1336308/mirror-small-reflection-decoration-modern-design-frame-round-shop + current_stock: 11 +- id: 81 + name: Perfect grey sofa + category: housewares + style: salon + description: Comfy, warm, and nice. + price: 399.00 + image: "9.jpg" + image_license: Free for commercial use - just do not resell as a stock photo + link: https://pixabay.com/photos/furniture-modern-luxury-indoors-3271762/ + current_stock: 10 +- id: 82 + name: Perfect cushions + category: housewares + style: salon + description: Your guests will say "I need cushions". You will say, "There, where you are sitting!". The best day. + price: 179.00 + image: "10.jpg" + image_license: Free for commercial use - just do not resell as a stock photo + link: https://pixabay.com/fr/photos/oreillers-patron-lit-int%C3%A9rieur-4326131/ + current_stock: 11 +- id: 83 + name: Classic coat-rack + category: housewares + style: salon + description: All the functionality of coatracks with the style of coatracks + price: 167.00 + image: "11.jpg" + image_license: Free for commercial use - just do not resell as a stock photo + link: https://pixabay.com/photos/hat-coat-rack-wing-pet-fashion-2176837/ + current_stock: 3 +- id: 84 + name: Spare bookshelves + category: housewares + style: salon + description: With that spare Scandinavian look. + price: 239.00 + image: "12.jpg" + image_license: CC0 + link: https://www.needpix.com/photo/download/1856333/shelf-white-living-world-bookshelf-books-bookshelves-set-up-living-room-book + current_stock: 3 diff --git a/src/products/src/products-service/go.mod b/src/products/src/products-service/go.mod new file mode 100644 index 000000000..96d6d74de --- /dev/null +++ b/src/products/src/products-service/go.mod @@ -0,0 +1,7 @@ +module products + +go 1.14 + +require github.com/gorilla/mux master +require gopkg.in/yaml.v2 master +require github.com/google/uuid master \ No newline at end of file diff --git a/src/recommendations/README.md b/src/recommendations/README.md index 59b5fa7dd..d357f9066 100644 --- a/src/recommendations/README.md +++ b/src/recommendations/README.md @@ -1,6 +1,8 @@ # Retail Demo Store Recommendations Service -The Recommendations web service provides a RESTful API for retrieving personalized product recommendations and related products (powered by Amazon Personalize). The [Web UI](../web-ui) makes calls to this service when a user is viewing the home view (recommended products), product detail view (related products), or the category view (personalized ranking of products). If Amazon Personalize campaigns have been created for these use-cases (either by the deployment Lambda option or by stepping through the [Personalization](../../workshop/1-Personalization/1.1-Personalize.ipynb) workshop), then those campaigns will be called by the Recommendations service. Otherwise, the service will call the [Products](../products) service to provide a suitable default behavior such as displaying featured products or products from the same category as the displayed product. +The Recommendations web service provides a RESTful API for retrieving personalized product recommendations, + related products, product reranking, and suggested discounts (powered by Amazon Personalize). + The [Web UI](../web-ui) makes calls to this service when a user is viewing the home view (recommended products), product detail view (related products), or the category view (personalized ranking of products). If Amazon Personalize campaigns have been created for these use-cases (either by the deployment Lambda option or by stepping through the [Personalization](../../workshop/1-Personalization/1.1-Personalize.ipynb) workshop), then those campaigns will be called by the Recommendations service. Otherwise, the service will call the [Products](../products) service to provide a suitable default behavior such as displaying featured products or products from the same category as the displayed product. This service also provides support for running experiments for personalization approaches using techniques such as A/B testing, interleaving results testing, and multi-armed bandit testing. The [Experimentation](../../workshop/3-Experimentation/3.1-Overview.ipynb) workshops are designed to walk you through how to setup, run, and evaluate experiments. diff --git a/src/recommendations/src/recommendations-service/app.py b/src/recommendations/src/recommendations-service/app.py index 70fa79b7a..daf9b657a 100644 --- a/src/recommendations/src/recommendations-service/app.py +++ b/src/recommendations/src/recommendations-service/app.py @@ -3,18 +3,26 @@ from flask import Flask, jsonify, Response from flask import request + from flask_cors import CORS from experimentation.experiment_manager import ExperimentManager -from experimentation.resolvers import DefaultProductResolver, PersonalizeRecommendationsResolver, PersonalizeRankingResolver, RankingProductsNoOpResolver +from experimentation.resolvers import DefaultProductResolver, PersonalizeRecommendationsResolver, \ + PersonalizeRankingResolver, RankingProductsNoOpResolver, PersonalizeContextComparePickResolver, RandomPickResolver from experimentation.utils import CompatEncoder from expiring_dict import ExpiringDict import json -import os, sys +import os import pprint import boto3 -import logging +import uuid import requests +import logging + +NUM_DISCOUNTS = 2 + +EXPERIMENTATION_LOGGING = True +DEBUG_LOGGING = True # Since the DescribeCampaign API easily throttles and we just need # the recipe from the campaign and it won't change often (if at all), @@ -24,10 +32,16 @@ servicediscovery = boto3.client('servicediscovery') personalize = boto3.client('personalize') ssm = boto3.client('ssm') +codepipeline = boto3.client('codepipeline') +sts = boto3.client('sts') +cw_events = boto3.client('events') # SSM parameter name for the Personalize filter for purchased items filter_purchased_param_name = 'retaildemostore-personalize-filter-purchased-arn' +training_config_param_name = 'retaildemostore-training-config' # ParameterPersonalizeTrainConfig +dataset_group_name_root = 'retaildemostore-' + # -- Shared Functions def get_recipe(campaign_arn): @@ -110,6 +124,7 @@ def get_products(feature, user_id, current_item_id, num_results, campaign_arn_pa items = [] resp_headers = {} experiment = None + exp_manager = None # Get active experiment if one is setup for feature and we have a user. if feature and user_id: @@ -131,7 +146,7 @@ def get_products(feature, user_id, current_item_id, num_results, campaign_arn_pa resp_headers['X-Experiment-Type'] = experiment.type resp_headers['X-Experiment-Id'] = experiment.id else: - # Fallback to default behavior of checking for campaign ARN parameter and + # Fallback to default behavior of checking for campaign ARN parameter and # then the default product resolver. values = get_parameter_values([ campaign_arn_param_name, filter_purchased_param_name ]) @@ -156,7 +171,9 @@ def get_products(feature, user_id, current_item_id, num_results, campaign_arn_pa for item in items: itemId = item['itemId'] url = f'http://{products_service_host}:{products_service_port}/products/id/{itemId}?fullyQualifyImageUrls={fully_qualify_image_urls}' + app.logger.debug(f"Asking for product info from {url}") response = requests.get(url) + app.logger.debug(f"Got product info: {response}") if response.ok: product = response.json() @@ -218,6 +235,7 @@ def to_dict(self): # -- Handlers app = Flask(__name__) +logger = app.logger corps = CORS(app, expose_headers=['X-Experiment-Name', 'X-Experiment-Type', 'X-Experiment-Id', 'X-Personalize-Recipe']) @app.errorhandler(BadRequest) @@ -305,7 +323,7 @@ def recommendations(): fully_qualify_image_urls = request.args.get('fullyQualifyImageUrls', '0').lower() in [ 'true', 't', '1'] try: - return get_products( + response = get_products( feature = feature, user_id = user_id, current_item_id = current_item_id, @@ -313,114 +331,336 @@ def recommendations(): campaign_arn_param_name = 'retaildemostore-product-recommendation-campaign-arn', fully_qualify_image_urls = fully_qualify_image_urls ) + app.logger.debug(f"Recommendations response to be returned: {response}") + return response except Exception as e: app.logger.exception('Unexpected error generating recommendations', e) raise BadRequest(message = 'Unhandled error', status_code = 500) -@app.route('/rerank', methods=['GET', 'POST']) + +def ranking_request_params(): + """ + Utility function which grabs a JSON body and extracts the UserID, item list and feature name. + Returns: + 3-tuple of user ID, item list and feature name + """ + + content = request.json + app.logger.info(f"JSON payload: {content}") + + user_id = content.get('userID') + if not user_id: + raise BadRequest('userID is required') + + items = content.get('items') + if not items: + raise BadRequest('items is required') + + # Determine name of feature where reranked items are being displayed + feature = content.get('feature') + if not feature: + feature = request.args.get('feature') + + app.logger.info(f"Items pulled from json: {items}") + + return user_id, items, feature + + +def get_ranking(user_id, items, feature, + default_campaign_arn_param_name='retaildemostore-personalized-ranking-campaign-arn', + top_n=None, context=None): + """ + Re-ranks a list of items using personalized reranking. + Or delegates to experiment manager if there is an active experiment. + + Args: + user_id (int): + items (list[dict]): e.g. [{"itemId":"33", "url":"path_to_product33"}, + {"itemId":"22", "url":"path_to_product22"}] + feature: Used to lookup the currently active experiment. + default_campaign_arn_param_name: For discounts this would be different. + top_n (Optional[int]): Only return the top N ranked if not None. + context (Optional[dict]): If available, passed to the reranking Personalization recipe. + + Returns: + Items as passed in, but ordered according to reranker - also might have experimentation metadata added. + """ + + app.logger.info(f"Items given for ranking: {items}") + + # Extract item IDs from items supplied by caller. Note that unranked items + # can be specified as a list of objects with just an 'itemId' key or as a + # list of fully defined items/products (i.e. with an 'id' key). + item_map = {} + unranked_items = [] + for item in items: + item_id = item.get('itemId') if item.get('itemId') else item.get('id') + item_map[item_id] = item + unranked_items.append(item_id) + + app.logger.info(f"Unranked items: {unranked_items}") + + resp_headers = {} + experiment = None + exp_manager = None + + # Get active experiment if one is setup for feature. + if feature: + exp_manager = ExperimentManager() + experiment = exp_manager.get_active(feature) + + if experiment: + app.logger.info('Using experiment: ' + experiment.name) + + # Get ranked items from experiment. + tracker = exp_manager.default_tracker() + + ranked_items = experiment.get_items( + user_id=user_id, + item_list=unranked_items, + tracker=tracker, + context=context + ) + + app.logger.debug(f"Experiment ranking resolver gave us this ranking: {ranked_items}") + + resp_headers['X-Experiment-Name'] = experiment.name + resp_headers['X-Experiment-Type'] = experiment.type + resp_headers['X-Experiment-Id'] = experiment.id + else: + # Fallback to default behavior of checking for campaign ARN parameter and + # then the default product resolver. + values = get_parameter_values([default_campaign_arn_param_name, filter_purchased_param_name]) + app.logger.info(f'Falling back to Personalize: {values}') + + campaign_arn = values[0] + filter_arn = values[1] + + if campaign_arn: + resolver = PersonalizeRankingResolver(campaign_arn=campaign_arn, filter_arn=filter_arn) + resp_headers['X-Personalize-Recipe'] = get_recipe(campaign_arn) + else: + app.logger.info(f'Falling back to No-op: {values}') + resolver = RankingProductsNoOpResolver() + + ranked_items = resolver.get_items( + user_id=user_id, + product_list=unranked_items, + context=context + ) + + response_items = [] + if top_n is not None: + # We may not want to return them all - for example in a "pick the top N" scenario. + ranked_items = ranked_items[:top_n] + + for ranked_item in ranked_items: + # Unlike with /recommendations and /related we are not hitting the products API to get product info back + # The caller may have left that info in there so in case they have we want to leave it in. + item = item_map.get(ranked_item.get('itemId')) + + if 'experiment' in ranked_item: + + item['experiment'] = ranked_item['experiment'] + + if 'url' in item: + # Append the experiment correlation ID to the product URL so it gets tracked if used by client. + product_url = item.get('url') + if '?' in product_url: + product_url += '&' + else: + product_url += '?' + + product_url += 'exp=' + ranked_item['experiment']['correlationId'] + + item['url'] = product_url + + response_items.append(item) + + return response_items, resp_headers + + +@app.route('/rerank', methods=['POST']) def rerank(): - """ Re-ranks a list of items using personalized reranking """ - if request.method == 'POST': - try: - content = request.json - app.logger.info(content) - - user_id = content.get('userID') - if not user_id: - raise BadRequest('userID is required') - - items = content.get('items') - if not items: - raise BadRequest('items is required') - - # Determine name of feature where reranked items are being displayed - feature = request.args.get('feature') - - app.logger.info(items) - - # Extract item IDs from items supplied by caller. Note that unranked items - # can be specified as a list of objects with just an 'itemId' key or as a - # list of fully defined items/products (i.e. with an 'id' key). - item_map = {} - unranked_items = [] - for item in items: - item_id = item.get('itemId') if item.get('itemId') else item.get('id') - item_map[item_id] = item - unranked_items.append(item_id) - - app.logger.info(unranked_items) - - ranked_items = [] - resp_headers = {} - experiment = None - - # Get active experiment if one is setup for feature. - if feature: - exp_manager = ExperimentManager() - experiment = exp_manager.get_active(feature) - - if experiment: - app.logger.info('Using experiment: ' + experiment.name) - - # Get ranked items from experiment. - tracker = exp_manager.default_tracker() - - ranked_items = experiment.get_items( - user_id = user_id, - item_list = unranked_items, - tracker = tracker - ) - - resp_headers['X-Experiment-Name'] = experiment.name - resp_headers['X-Experiment-Type'] = experiment.type - resp_headers['X-Experiment-Id'] = experiment.id - else: - # Fallback to default behavior of checking for campaign ARN parameter and - # then the default product resolver. - values = get_parameter_values([ 'retaildemostore-personalized-ranking-campaign-arn', filter_purchased_param_name ]) + """ + Gets user ID, items list and feature and gets ranking of items according to reranking campaign. + """ + items = [] + try: + user_id, items, feature = ranking_request_params() + print('ITEMS', items) + response_items, resp_headers = get_ranking(user_id, items, feature) + app.logger.debug(f"Response items for reranking: {response_items}") + resp = Response(json.dumps(response_items, cls=CompatEncoder), content_type='application/json', + headers=resp_headers) + return resp + except Exception as e: + app.logger.exception('Unexpected error reranking items', e) + return json.dumps(items) + + +def get_top_n(user_id, items, feature, top_n, + default_campaign_arn_param_name='retaildemostore-personalized-ranking-campaign-arn'): + """ + Gets Top N items using provided campaign. + Or delegates to experiment manager if there is an active experiment. + + Args: + user_id (int): User to get the topN for + items (list[dict]): e.g. [{"itemId":"33", "url":"path_to_product33"}, + {"itemId":"22", "url":"path_to_product22"}] + feature: Used to lookup the currently active experiment. + top_n (int): Only return the top N ranked if not None. + default_campaign_arn_param_name: Change this to use a different campaign. + + Returns: + Items as passed in, but truncated according to picker - also might have experimentation metadata added. + """ + + app.logger.info(f"Items given for top-n: {items}") + + # Extract item IDs from items supplied by caller. Note that unranked items + # can be specified as a list of objects with just an 'itemId' key or as a + # list of fully defined items/products (i.e. with an 'id' key). + item_map = {} + unranked_items = [] + for item in items: + item_id = item.get('itemId') if item.get('itemId') else item.get('id') + item_map[item_id] = item + unranked_items.append(item_id) + + app.logger.info(f"Pre-selection items: {unranked_items}") + + resp_headers = {} + experiment = None + exp_manager = None + + # Get active experiment if one is setup for feature. + if feature: + exp_manager = ExperimentManager() + experiment = exp_manager.get_active(feature) + + if experiment: + app.logger.info('Using experiment: ' + experiment.name) + + # Get ranked items from experiment. + tracker = exp_manager.default_tracker() + + topn_items = experiment.get_items( + user_id=user_id, + item_list=unranked_items, + tracker=tracker, + num_results=top_n + ) + + app.logger.debug(f"Experiment ranking resolver gave us this ranking: {topn_items}") + + resp_headers['X-Experiment-Name'] = experiment.name + resp_headers['X-Experiment-Type'] = experiment.type + resp_headers['X-Experiment-Id'] = experiment.id + else: + # Fallback to default behavior of checking for campaign ARN parameter and + # then the default product resolver. + values = get_parameter_values([default_campaign_arn_param_name, filter_purchased_param_name]) + app.logger.info(f'Falling back to Personalize: {values}') + + campaign_arn = values[0] + filter_arn = values[1] + + if campaign_arn: + resolver = PersonalizeContextComparePickResolver(campaign_arn=campaign_arn, filter_arn=filter_arn, + with_context={'Discount': 'Yes'}, + without_context={}) + resp_headers['X-Personalize-Recipe'] = get_recipe(campaign_arn) + else: + app.logger.info(f'Falling back to No-op: {values}') + resolver = RandomPickResolver() + + topn_items = resolver.get_items( + user_id=user_id, + product_list=unranked_items, + num_results=top_n + ) + + logger.info(f"Sorted items: returned from resolver: {topn_items}") + + response_items = [] - campaign_arn = values[0] - filter_arn = values[1] + for top_item in topn_items: + # Unlike with /recommendations and /related we are not hitting the products API to get product info back + # The caller may have left that info in there so in case they have we want to leave it in. + item_id = top_item['itemId'] + item = item_map[item_id] - if campaign_arn: - resolver = PersonalizeRankingResolver(campaign_arn = campaign_arn, filter_arn = filter_arn) - resp_headers['X-Personalize-Recipe'] = get_recipe(campaign_arn) + if 'experiment' in top_item: + + item['experiment'] = top_item['experiment'] + + if 'url' in item: + # Append the experiment correlation ID to the product URL so it gets tracked if used by client. + product_url = item.get('url') + if '?' in product_url: + product_url += '&' else: - resolver = RankingProductsNoOpResolver() + product_url += '?' - ranked_items = resolver.get_items( - user_id = user_id, - product_list = unranked_items - ) + product_url += 'exp=' + top_item['experiment']['correlationId'] - response_items = [] - for ranked_item in ranked_items: - item = item_map.get(ranked_item.get('itemId')) + item['url'] = product_url - if 'experiment' in ranked_item and 'url' in item: - # Append the experiment correlation ID to the product URL so it gets tracked if used by client. - product_url = item.get('url') - if '?' in product_url: - product_url += '&' - else: - product_url += '?' + response_items.append(item) - product_url += 'exp=' + ranked_item['experiment']['correlationId'] + logger.info(f"Top-N response: with details added back in: {topn_items}") - item['url'] = product_url + return response_items, resp_headers - response_items.append(item) - resp = Response(json.dumps(response_items, cls=CompatEncoder), content_type = 'application/json', headers = resp_headers) - return resp - - except Exception as e: - app.logger.exception('Unexpected error reranking items', e) - return json.dumps(items) +@app.route('/choose_discounted', methods=['POST']) +def choose_discounted(): + """ + Gets user ID, items list and feature and chooses which items to discount according to the + reranking campaign. Gets a ranking with discount applied and without (using contextual metadata) + and looks at the difference. The products are ordered according to how the response is expected + to improve after applying discount. + + The items that are not chosen for discount will be returned as-is but with the "discounted" key set to False. + The items that are chosen for discount will have the "discounted" key set to True. + + If there is an experiment active for this feature the request for ranking for choosing discounts will have been + routed through the experiment resolver and discounts chosen according to whichever approach is active. The + items will have experiment information recorded against them and if URLs were provided for products these will be + suffixed with an experiment tracking correlation ID. That way, different approaches to discounting can be compared, + as with different approaches to recommendations and reranking in other campaigns. + """ + items = [] + try: + user_id, items, feature = ranking_request_params() + response_items, resp_headers = get_top_n(user_id, items, feature, NUM_DISCOUNTS) + discount_item_map = {item['itemId']: item for item in response_items} + + return_items = [] + for item in items: + item_id = item['itemId'] + if item_id in discount_item_map: + # This was picked for discount so we flag it as a discounted item. It may also have experiment + # information recorded against it by get_ranking() if an experiment is active. + discounted_item = discount_item_map[item_id] + discounted_item['discounted'] = True + return_items.append(discounted_item) + else: + # This was not picked for discount, so is not participating in any experiment comparing + # discount approaches and we also do not flag it as a discounted item + item['discounted'] = False + return_items.append(item) + + resp = Response(json.dumps(items, cls=CompatEncoder), content_type='application/json', + headers=resp_headers) + return resp + except Exception as e: + app.logger.exception('Unexpected error calculating discounted items', e) + return json.dumps(items) - if request.method == 'GET': - app.logger.info("Request Received, Processing") @app.route('/experiment/outcome', methods=['POST']) def experiment_outcome(): @@ -452,16 +692,95 @@ def experiment_outcome(): user_id = correlation_bits[1] variation_index = int(correlation_bits[2]) result_rank = int(correlation_bits[3]) - experiment.track_conversion(user_id = user_id, variation_index = variation_index, result_rank = result_rank) + experiment.track_conversion(user_id=user_id, variation_index=variation_index, result_rank=result_rank) return jsonify(success=True) except Exception as e: app.logger.exception('Unexpected error logging outcome', e) - raise BadRequest(message = 'Unhandled error', status_code = 500) + raise BadRequest(message='Unhandled error', status_code=500) + + +@app.route('/reset/realtime', methods=['POST']) +def reset_realtime(): + """ + Sets the training configuration in SSM so that the polling lambda trains a new campaign and then deletes + the old one. Logic is in that lambda; here we configure the Lambda. + Also re-enables that Lambda's polling rule if it has been disabled. + Returns: + HTTP 200 if all is well. + """ + + logger.info("Will do a full tear-down by inserting a config step for a tear-down. The existing config will" + " be copied but all the names changed as the seconds step so that the campaigns will then be" + " rebuilt with new names") + train_configs = ssm.get_parameter(Name=training_config_param_name) + train_configs = json.loads(train_configs['Parameter']['Value']) + + new_steps = [] + for train_config_step in train_configs['steps']: + + new_train_config_step = {'dataset_groups': {}} + # Let us change the dataset group suffix to avoid any silliness with carrying over events + if train_config_step['dataset_groups'] is not None: + for dataset_group_name, dataset_group_config in train_config_step['dataset_groups'].items(): + + new_dataset_group_name = dataset_group_name_root + str(uuid.uuid4())[:8] + # Let us also bump the solution number in case some things are stored by name there + for campaign_type, campaign_config in dataset_group_config['campaigns'].items(): + campaign_config['desired_campaign_suffixes'] = [v + 1 for v in campaign_config['desired_campaign_suffixes']] + campaign_config['desired_active_version_suffixes'] = campaign_config['desired_active_version_suffixes'] + 1 + + new_train_config_step['dataset_groups'][new_dataset_group_name] = dataset_group_config + + new_steps.append(new_train_config_step) + train_configs['steps'] = new_steps + + # Insert a step to delete dataset groups then retrain the original + train_configs['steps'] = [{"dataset_groups": None}] + train_configs['steps'] + logger.info(f"Putting back to SSM: {train_configs} to key {training_config_param_name}") + ssm.put_parameter( + Name=training_config_param_name, + Description='Retail Demo Store Training Config', + Value=json.dumps(train_configs), + Type='String', + Overwrite=True + ) + + # Enabling polling + rule_name = os.environ['PERSONALIZE_PRECREATE_CAMPAIGNS_EVENTRULENAME'] + try: + logger.info('Enabling event rule {}'.format(rule_name)) + cw_events.enable_rule(Name=rule_name) + + except cw_events.exceptions.ClientError as e: + error_code = e.response['Error']['Code'] + if error_code == 'ResourceNotFoundException': + logger.error('CloudWatch event rule to enable not found') + raise + else: + logger.error(e) + raise + + return ('Rebuild of Amazon Personalize initiated. Check the Amazon Personalize console ' + 'or PersonalizePreCreateLambdaFunction lambda function logs for details'), 200 + if __name__ == '__main__': - logging.getLogger('exerimentation').setLevel(level = logging.DEBUG) + + if DEBUG_LOGGING: + level = logging.DEBUG + else: + level = logging.INFO + app.logger.setLevel(level) + if EXPERIMENTATION_LOGGING: + logging.getLogger('experimentation').setLevel(level=level) + logging.getLogger('experimentation.experiment_manager').setLevel(level=level) + for handler in app.logger.handlers: + logging.getLogger('experimentation').addHandler(handler) + logging.getLogger('experimentation.experiment_manager').addHandler(handler) + handler.setLevel(level) # this will get the main app logs to CloudWatch + app.wsgi_app = LoggingMiddleware(app.wsgi_app) - app.run(debug=True,host='0.0.0.0', port=80) \ No newline at end of file + app.run(debug=True, host='0.0.0.0', port=80) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment.py b/src/recommendations/src/recommendations-service/experimentation/experiment.py index 21edf2dc2..a9c208c63 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment.py @@ -31,7 +31,7 @@ def __init__(self, table, **data): self.variations.append(Variation(**v)) @abstractmethod - def get_items(self, user_id, current_item_id = None, item_list = None, num_results = 10, tracker = None): + def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, context=None): """ For a given user, returns item recommendations for this experiment along with experiment tracking/correlation information """ pass diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_ab.py b/src/recommendations/src/recommendations-service/experimentation/experiment_ab.py index bc958a037..08df6fbac 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_ab.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_ab.py @@ -16,7 +16,7 @@ class ABExperiment(Experiment): def __init__(self, table, **data): super(ABExperiment, self).__init__(table, **data) - def get_items(self, user_id, current_item_id = None, item_list = None, num_results = 10, tracker = None): + def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, context=None): if not user_id: raise Exception('user_id is required') if len(self.variations) < 2: @@ -36,7 +36,8 @@ def get_items(self, user_id, current_item_id = None, item_list = None, num_resul 'user_id': user_id, 'product_id': current_item_id, 'product_list': item_list, - 'num_results': num_results + 'num_results': num_results, + 'context': context } items = variation.resolver.get_items(**resolve_params) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_interleaving.py b/src/recommendations/src/recommendations-service/experimentation/experiment_interleaving.py index 608ff10ce..3fdbdc8be 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_interleaving.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_interleaving.py @@ -21,7 +21,7 @@ def __init__(self, table, **data): super(InterleavingExperiment, self).__init__(table, **data) self.method = data.get('method', InterleavingExperiment.METHOD_BALANCED) - def get_items(self, user_id, current_item_id = None, item_list = None, num_results = 10, tracker = None): + def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, context=None): if not user_id: raise Exception('user_id is required') if len(self.variations) < 2: @@ -34,7 +34,8 @@ def get_items(self, user_id, current_item_id = None, item_list = None, num_resul 'user_id': user_id, 'product_id': current_item_id, 'product_list': item_list, - 'num_results': num_results * 3 # account for overlaps + 'num_results': num_results * 3, # account for overlaps + 'context': context } # Get recomended items for each variation diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_mab.py b/src/recommendations/src/recommendations-service/experimentation/experiment_mab.py index 6892e3051..bb8a20f16 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_mab.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_mab.py @@ -17,7 +17,7 @@ class MultiArmedBanditExperiment(Experiment): def __init__(self, table, **data): super(MultiArmedBanditExperiment, self).__init__(table, **data) - def get_items(self, user_id, current_item_id = None, item_list = None, num_results = 10, tracker = None): + def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, context=None): if not user_id: raise Exception('user_id is required') if len(self.variations) < 2: @@ -37,7 +37,8 @@ def get_items(self, user_id, current_item_id = None, item_list = None, num_resul 'user_id': user_id, 'product_id': current_item_id, 'product_list': item_list, - 'num_results': num_results + 'num_results': num_results, + 'context': context } items = variation.resolver.get_items(**resolve_params) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_optimizely.py b/src/recommendations/src/recommendations-service/experimentation/experiment_optimizely.py index 41a05655c..e9f836a44 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_optimizely.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_optimizely.py @@ -11,7 +11,7 @@ optimizely_sdk = optimizely.Optimizely(sdk_key=os.environ.get('OPTIMIZELY_SDK_KEY')) class OptimizelyFeatureTest(experiment.Experiment): - def get_items(self, user_id, current_item_id = None, item_list = None, num_results = 10, tracker = None): + def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, context=None): assert user_id, "`user_id` is required" # All the kwargs that are passed to ResolverFactory.get will be stored as a JSON feature variable. @@ -22,7 +22,8 @@ def get_items(self, user_id, current_item_id = None, item_list = None, num_resul items = resolver.get_items(user_id=user_id, product_id=current_item_id, product_list=item_list, - num_results=num_results) + num_results=num_results, + context=context) config = optimizely_sdk.get_optimizely_config() diff --git a/src/recommendations/src/recommendations-service/experimentation/resolvers.py b/src/recommendations/src/recommendations-service/experimentation/resolvers.py index 33a7cc7b3..a7a1a584f 100644 --- a/src/recommendations/src/recommendations-service/experimentation/resolvers.py +++ b/src/recommendations/src/recommendations-service/experimentation/resolvers.py @@ -9,6 +9,8 @@ import urllib.parse import logging +from random import shuffle + log = logging.getLogger(__name__) servicediscovery = boto3.client('servicediscovery') @@ -77,10 +79,12 @@ def get_items(self, **kwargs): # Lookup product to determine if it belongs to a category url = f'http://{self.products_service_host}:{self.products_service_port}/products/id/{product_id}' log.debug('DefaultProductResolver - getting product details ' + url) - response = requests.get(url) - - if response.ok: - category = response.json()['category'] + try: + response = requests.get(url) + if response.ok: + category = response.json()['category'] + except requests.ConnectionError as e: + log.error(f"Could not pull product information from URL {url} - error: {e}") if category: # Product belongs to a category so get list of products in same category @@ -163,6 +167,7 @@ def get_items(self, **kwargs): return items + class PersonalizeRecommendationsResolver(Resolver): """ Provides recommendations from an Amazon Personalize campaign """ __personalize_runtime = boto3.client('personalize-runtime') @@ -205,6 +210,9 @@ def get_items(self, **kwargs): elif self.filter_arn: params['filterArn'] = self.filter_arn + # contextual metadata is not supported in related items recipe + if 'context' in kwargs and kwargs['context'] is not None: params['context'] = kwargs['context'] + if item_id: params['itemId'] = item_id @@ -275,9 +283,10 @@ def get_items(self, **kwargs): return items + class PersonalizeRankingResolver(Resolver): """ Provides personalized ranking of products from an Amazon Personalize campaign - + The campaign must be trained using the Personalized-Ranking recipe """ __personalize_runtime = boto3.client('personalize-runtime') @@ -291,9 +300,11 @@ def __init__(self, **params): # Optionally support filter specified at resolver creation. self.filter_arn = params.get('filter_arn') + self.context = params.get('context', None) + def get_items(self, **kwargs): """ Returns reranking items from an Amazon Personalize campaign trained with Personalized-Ranking recipe - + Arguments: user_id - ID for the user for which to rerank items (required for Personalized-Ranking recipe) product_list - list of product IDs to rerank for the user @@ -313,6 +324,11 @@ def get_items(self, **kwargs): 'inputList': input_list } + if 'context' in kwargs and kwargs['context'] is not None: + params['context'] = kwargs['context'] + elif self.context is not None: + params['context'] = self.context + filter_arn = kwargs.get('filter_arn') if filter_arn: params['filterArn'] = filter_arn @@ -325,16 +341,22 @@ def get_items(self, **kwargs): return response['personalizedRanking'] + class RankingProductsNoOpResolver(Resolver): - """ Simply returns the provided items in unchanged order; a dummy or no-op resolver for ranking use-cases + """ Simply returns the provided items in unchanged order; a dummy or no-op resolver for ranking use-cases This class is intended to provide a no-op experience for item/product ranking - use-cases. In other words, if you want the default behavior. The returned items + use-cases. In other words, if you want the default behavior. The returned items are formatted the same as Personalize to support consistent handling for clients. """ + + def __init__(self, **params): + """The resolver factory expects arguments in the constructor""" + pass + def get_items(self, **kwargs): """ Returns reranking items from an Amazon Personalize campaign trained with Personalized-Ranking recipe - + Arguments: user_id - ID for the user for which to rerank items (required for Personalized-Ranking recipe) product_list - list of product IDs to rerank for the user @@ -351,6 +373,80 @@ def get_items(self, **kwargs): return echo_items + +class PersonalizeContextComparePickResolver(Resolver): + """ Provides personalized ranking of products from an Amazon Personalize campaign + + The campaign must be trained using the Personalized-Ranking recipe + """ + + def __init__(self, **params): + with_context = params.get('with_context') + without_context = params.get('with_context') + self.with_resolver = PersonalizeRankingResolver(**params, context=with_context) + self.without_resolver = PersonalizeRankingResolver(**params, context=without_context) + + def get_items(self, **kwargs): + """ Returns reranking items from an Amazon Personalize campaign trained with Personalized-Ranking recipe + + Arguments: + user_id - ID for the user for which to rerank items (required for Personalized-Ranking recipe) + product_list - list of product IDs to rerank for the user + """ + top_n = kwargs.get('num_results') + + if top_n is None: + raise Exception('num_results is required') + + log.debug('PersonalizeContextComparePickResolver - comparing personalized rankings...') + with_ranked = self.with_resolver.get_items(**kwargs) + without_ranked = self.without_resolver.get_items(**kwargs) + without_id_to_item = {item['itemId']: item for item in without_ranked} + with_id_to_item = {item['itemId']: item for item in with_ranked} + score_increases_with_discount = {item_id: with_id_to_item[item_id]['score'] / (0.01 + without_id_to_item[item_id]['score']) + for item_id in with_id_to_item} + # Let us get the items sorted according to this score: + discount_improve_sorted_item_ids = sorted(score_increases_with_discount.keys(), + key=lambda item_id: score_increases_with_discount[item_id]) + + discount_improve_sorted_items = [with_id_to_item[item_id] for item_id in discount_improve_sorted_item_ids] + + return discount_improve_sorted_items[:top_n] + + +class RandomPickResolver(Resolver): + """ Picks random N products. + """ + + def __init__(self, **params): + pass + + def get_items(self, **kwargs): + """ Picks random N products. + + Arguments: + product_list - list of product IDs to rerank for the user + """ + input_list = kwargs.get('product_list') + top_n = kwargs.get('num_results') + + if not top_n: + raise Exception('num_results is required') + + if not input_list: + raise Exception('product_list is required') + + ranked_items = input_list.copy() + shuffle(ranked_items) + ranked_items = ranked_items[:top_n] + + echo_items = [] + for item_id in ranked_items: + echo_items.append({'itemId': item_id}) + + return echo_items + + class ResolverFactory: """ Provides resolver instance given a type and initialization arguments """ TYPE_HTTP = 'http' @@ -359,6 +455,8 @@ class ResolverFactory: TYPE_PERSONALIZE_RECOMMENDATIONS = 'personalize-recommendations' TYPE_PERSONALIZE_RANKING = 'personalize-ranking' TYPE_RANKING_NO_OP = 'ranking-no-op' + TYPE_PERSONALIZE_PICK = 'personalize-pick' + TYPE_RANDOM_PICK = 'random-pick' __resolvers = {} @@ -386,3 +484,7 @@ def get(type, **params): # These resolvers are used with product reranking use-cases ResolverFactory.register_resolver(ResolverFactory.TYPE_PERSONALIZE_RANKING, PersonalizeRankingResolver) ResolverFactory.register_resolver(ResolverFactory.TYPE_RANKING_NO_OP, RankingProductsNoOpResolver) +# These ones are for the top-N use cases +ResolverFactory.register_resolver(ResolverFactory.TYPE_PERSONALIZE_PICK, PersonalizeContextComparePickResolver) +ResolverFactory.register_resolver(ResolverFactory.TYPE_RANDOM_PICK, RandomPickResolver) + diff --git a/src/users/src/users-service/data/users.json.gz b/src/users/src/users-service/data/users.json.gz index b523d486d..08822d9f0 100644 Binary files a/src/users/src/users-service/data/users.json.gz and b/src/users/src/users-service/data/users.json.gz differ diff --git a/src/users/src/users-service/user.go b/src/users/src/users-service/user.go index 87538d80a..6c3964c6b 100644 --- a/src/users/src/users-service/user.go +++ b/src/users/src/users-service/user.go @@ -18,6 +18,7 @@ type User struct { Age int `json:"age" yaml:"age"` Gender string `json:"gender" yaml:"gender"` Persona string `json:"persona" yaml:"persona"` + DiscountPersona string `json:"discount_persona" yaml:"discount_persona"` SignUpDate *time.Time `json:"sign_up_date,omitempty"` LastSignInDate *time.Time `json:"last_sign_in_date,omitempty"` IdentityId string `json:"identity_id,omitempty"` diff --git a/src/videos/Dockerfile b/src/videos/Dockerfile new file mode 100644 index 000000000..9082d8908 --- /dev/null +++ b/src/videos/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.8-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y g++ ffmpeg procps +COPY /src/videos-service/requirements.txt /app/requirements.txt +RUN pip install -r requirements.txt + +RUN mkdir /app/video-files/ + +COPY /src/videos-service /app + +ENTRYPOINT ["python"] +CMD ["app.py"] diff --git a/src/videos/README.md b/src/videos/README.md new file mode 100644 index 000000000..a01ef6df0 --- /dev/null +++ b/src/videos/README.md @@ -0,0 +1,36 @@ +# Retail Demo Store Videos Service + +The Videos service streams product videos and synchronised metadata to [Amazon Interactive Video Service](https://aws.amazon.com/ivs/) and provides stream metadata (stream endpoints and products contained within the stream) via a Flask API. The [Web UI](../web-ui) makes calls to the service when a user views the 'Live' view. The endpoint provides a list of stream ingest endpoints, each with a list of their associated products, allowing the UI to present all products from the video before they appear in the stream. + +When deployed to AWS, CodePipeline is used to build and deploy the Videos service as a Docker container to Amazon ECS behind an Application Load Balancer. The Videos service can also be run locally in a Docker container. This makes it easier to iterate on and test changes locally before commiting. + +## Deploying Channels & Streaming Video + +IVS channels are created and managed by the CloudFormation template. The default CloudFormation settings do not create any new IVS streams - instead the demo directs the UI to four externally hosted IVS streams. + +To create and use IVS channels hosted in your own account, the option 'Use default IVS streams' should be set to 'No' when deploying CloudFormation. In this case, one IVS channel will be created for each '.mkv' video found in the `videos/` path of the staging S3 bucket. These videos should be uploaded by running the provided staging script - any videos in the local `videos/` directory will be uploaded. + +## Custom Videos & Metadata +To enable full UI integration with custom videos, metadata must be embedded into the .mkv file. + +Metadata must be created in the `.srt` format, with each timestamped entry containing data in the form: +`{"productId": }`. The Videos service sends the metadata at the start of the timestamp. The latter section of the timestamp is not used. The file can either be edited manually or using an SRT editor (either software or online). An example metadata file can be seen [here](../../videos/sample.srt). + +This metadata can then be combined with a video file to create an encoded `.mkv` file with embedded metadata by running the following command: +``` +ffmpeg -i -i .srt -vf scale=640x360 -c:v libx264 \ +-pix_fmt yuv420p -profile:v main -tune fastdecode -x264opts “nal-hrd=cbr:no-scenecut” -minrate 3000 \ +-maxrate 3000 -g 60 -c:a aac -b:a 160k -ac 2 -ar 44100 .mkv +``` +An `.mkv` file created with this command is ready to be staged and should provide optimal UI integration. +The command also pre-encodes the video in a format designed to reduce the CPU & memory requirements of the Videos service. + +## Local Development + +The Videos service can be built and run locally (in Docker) using Docker Compose. See the [local development instructions](../) for details. **From the `../src` directory**, run the following command to build and deploy the service locally. + +```console +foo@bar:~$ docker-compose up --build videos +``` + +Once the container is up and running, you can access it in your browser or with a utility such as [Postman](https://www.postman.com/) at [http://localhost:8007](http://localhost:8007). \ No newline at end of file diff --git a/src/videos/buildspec.yml b/src/videos/buildspec.yml new file mode 100644 index 000000000..095906cfd --- /dev/null +++ b/src/videos/buildspec.yml @@ -0,0 +1,17 @@ +version: 0.2 +phases: + pre_build: + commands: + - $(aws ecr get-login --no-include-email) + - TAG="$(echo $CODEBUILD_RESOLVED_SOURCE_VERSION | head -c 8)" + - IMAGE_URI="${REPOSITORY_URI}:${TAG}" + build: + commands: + - cd $SERVICE_PATH + - docker build --tag "$IMAGE_URI" . + post_build: + commands: + - docker push "$IMAGE_URI" + - printf '[{"name":"%s","imageUri":"%s"}]' "$SERVICE_NAME" "$IMAGE_URI" > ../../images.json +artifacts: + files: images.json diff --git a/src/videos/src/videos-service/app.py b/src/videos/src/videos-service/app.py new file mode 100644 index 000000000..96d9e362d --- /dev/null +++ b/src/videos/src/videos-service/app.py @@ -0,0 +1,334 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import logging +import json +import os +import pathlib +import pprint +import subprocess +import threading +import time + +import boto3 +import srt +from flask import Flask, jsonify, Response +from flask_cors import CORS + + +# -- Environment variables - defined by CloudFormation when deployed +VIDEO_BUCKET = os.environ.get('RESOURCE_BUCKET') +SSM_VIDEO_CHANNEL_MAP_PARAM = os.environ.get('PARAMETER_IVS_VIDEO_CHANNEL_MAP', 'retaildemostore-ivs-video-channel-map') + +USE_DEFAULT_IVS_STREAMS = os.environ.get('USE_DEFAULT_IVS_STREAMS') == 'true' + +DEFAULT_THUMB_FNAME = 'default_thumb.png' +STATIC_FOLDER = '/app/static' +STATIC_URL_PATH = '/static' +SUBTITLE_FORMAT = 'srt' +LOCAL_VIDEO_DIR = '/app/video-files/' +DEFAULT_STREAMS_CONFIG_S3_PATH = 'videos/default_streams/default_streams.json' + +# -- Parameterised ffmpeg commands +FFMPEG_STREAM_CMD = """ffmpeg -loglevel panic -hide_banner -re -stream_loop -1 -i \"{video_filepath}\" \ + -r 30 -c:v copy -f flv rtmps://{ingest_endpoint}:443/app/{stream_key} -map 0:s -f {subtitle_format} -""" +FFMPEG_SUBS_COMMAND = "ffmpeg -i \"{video_filepath}\" \"{subtitle_path}\"" + + +# Globally accessed variable to store stream metadata (URLs & associated product IDs). Returned via `stream_details` +# endpoint +stream_details = {} + +ivs_client = boto3.client('ivs') +ssm_client = boto3.client('ssm') +s3_client = boto3.client('s3') + + +# -- Load default streams config +def load_default_streams_config(): + app.logger.info(f"Downloading default streams config from from bucket {VIDEO_BUCKET} with key {DEFAULT_STREAMS_CONFIG_S3_PATH}.") + + config_response = s3_client.get_object(Bucket=VIDEO_BUCKET, Key=DEFAULT_STREAMS_CONFIG_S3_PATH) + config = json.loads(config_response['Body'].read().decode('utf-8')) + for (key, entry) in config.items(): + app.logger.info(f"{key}, {entry}") + config[key] = {**entry, 'thumb_url': STATIC_URL_PATH + '/' + entry['thumb_fname']} + config[key].pop('thumb_fname', None) + + app.logger.info("Pulled config:") + app.logger.info(config) + + return config + + +# -- Video streaming +def download_video_file(s3_key): + """ + Downloads a video file and associated thumbnail from S3. Thumbnails are identified by a .png file with the same + name and in the same location as the video. + """ + local_path = LOCAL_VIDEO_DIR + s3_key.split('/')[-1] + app.logger.info(f"Downloading file {s3_key} from bucket {VIDEO_BUCKET} to {local_path}.") + s3_client.download_file(Bucket=VIDEO_BUCKET, Key=s3_key, Filename=local_path) + app.logger.info(f"File {s3_key} downloaded from bucket {VIDEO_BUCKET} to {local_path}.") + + thumbnail_path = None + thumbnail_key = '.'.join(s3_key.split('.')[:-1]) + '.png' + try: + local_thumbnail_fname = thumbnail_key.split('/')[-1] + local_thumbnail_path = app.static_folder + '/' + local_thumbnail_fname + s3_client.download_file(Bucket=VIDEO_BUCKET, Key=thumbnail_key, Filename=local_thumbnail_path) + app.logger.info(f"File {thumbnail_key} downloaded from bucket {VIDEO_BUCKET} to {local_thumbnail_path}.") + thumbnail_path = app.static_url_path + '/' + local_thumbnail_fname + except Exception as e: + app.logger.warning(f'No thumbnail available for {VIDEO_BUCKET}/{s3_key} as {VIDEO_BUCKET}/{thumbnail_key} - ' + f'exception: {e}') + return local_path, thumbnail_path + + +def get_ffmpeg_stream_cmd(video_filepath, ingest_endpoint, stream_key, subtitle_format): + """ + Returns the command to start streaming a video using ffmpeg. + """ + return FFMPEG_STREAM_CMD.format(**locals()) + + +def get_ffmpeg_subs_cmd(video_filepath, subtitle_path): + """ + Returns the ffmpeg command to rip subtitles (ie. metadata) from a video file. + """ + return FFMPEG_SUBS_COMMAND.format(**locals()) + + +def get_featured_products(video_filepath, channel_id): + """ + Extracts a list of product IDs from the metadata attached to a video file. The values are saved in the global + `stream_details` dict. + """ + subtitle_path = pathlib.Path(video_filepath).with_suffix('.srt') + get_subs_command = get_ffmpeg_subs_cmd(video_filepath, subtitle_path) + process = subprocess.run( + get_subs_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, shell=True) + with open(subtitle_path) as f: + subtitle_content = srt.parse(f) + for line in subtitle_content: + product_id = json.loads(line.content)['productId'] + if 'products' not in stream_details[channel_id]: + stream_details[channel_id]['products'] = [product_id] + else: + if product_id not in stream_details[channel_id]['products']: + stream_details[channel_id]['products'].append(product_id) + + +def is_ssm_parameter_set(parameter_name): + """ + Returns whether an SSM parameter with a given name has been set (ie. value is not 'NONE') + """ + try: + response = ssm_client.get_parameter(Name=parameter_name) + return response['Parameter']['Value'] != 'NONE' + except ssm_client.exceptions.ParameterNotFound: + return False + + +def put_ivs_metadata(channel_arn, line): + """ + Sends metadata to a given IVS stream. Metadata can be any string, but the AWS Retail Demo Store UI expects + metadata of the format {"productId":""} + """ + try: + app.logger.info(f'Sending metadata to stream: {line}') + ivs_client.put_metadata( + channelArn=channel_arn, + metadata=line + ) + except ivs_client.exceptions.ChannelNotBroadcasting: + app.logger.warning(f'Channel not broadcasting. Waiting for 5 seconds.') + app.logger.info('Running ffmpeg processes:') + app.logger.info(os.system("ps aux|grep 'PID\|ffmpeg'")) + time.sleep(5) + + +def get_stream_state(channel_arn): + """ + Returns the state of a stream given it's ARN. One of 'LIVE', 'OFFLINE' (from API response) + or 'NOT_BROADCASTING' (inferred). + """ + try: + stream_response = ivs_client.get_stream(channelArn=channel_arn)['stream'] + stream_state = stream_response['state'] + except ivs_client.exceptions.ChannelNotBroadcasting: + stream_state = "NOT_BROADCASTING" + return stream_state + + +def start_streams(): + """ + Initiates all IVS streams based on environment variables. If the SSM_VIDEO_CHANNEL_MAP_PARAM (map of videos in + S3 to IVS channels) is set and the user has not requested to use the default IVS streams + (USE_DEFAULT_IVS_STREAMS, defined by CloudFormation input) then one stream will be started per video described + in the video to IVS channel map. Each stream runs in a separate thread. + + If streams are not started, then `stream_details` will be set to the details of a collection of existing streams + """ + if is_ssm_parameter_set(SSM_VIDEO_CHANNEL_MAP_PARAM) and not USE_DEFAULT_IVS_STREAMS: + video_channel_param_value = ssm_client.get_parameter(Name=SSM_VIDEO_CHANNEL_MAP_PARAM)['Parameter']['Value'] + app.logger.info(f"Found IVS channel map: {video_channel_param_value}") + video_channel_map = json.loads(video_channel_param_value) + + for idx, (s3_video_key, ivs_channel_arn) in enumerate(video_channel_map.items()): + threading.Thread(target=stream, args=(s3_video_key, ivs_channel_arn, idx)).start() + + else: + global stream_details + stream_details = load_default_streams_config() + + +def stream(s3_video_key, ivs_channel_arn, channel_id): + """ + Starts the stream for a given video file and IVS channel. The video file is streamed on a loop using ffmpeg, and + any attached metadata (from the subtitles embedded in the video file) is sent to the channel's `put_metadata` + endpoint. + """ + video_filepath, thumb_url = download_video_file(s3_video_key) + if thumb_url is None: + thumb_url = app.static_url_path + '/' + DEFAULT_THUMB_FNAME + + channel_response = ivs_client.get_channel(arn=ivs_channel_arn)['channel'] + ingest_endpoint = channel_response['ingestEndpoint'] + playback_endpoint = channel_response['playbackUrl'] + stream_details[channel_id] = {'playback_url': playback_endpoint, + 'thumb_url': thumb_url} + + get_featured_products(video_filepath, channel_id) + + stream_state = get_stream_state(ivs_channel_arn) + stream_arn = ivs_client.list_stream_keys(channelArn=ivs_channel_arn)['streamKeys'][0]['arn'] + stream_key = ivs_client.get_stream_key(arn=stream_arn)['streamKey']['value'] + app.logger.info(f"Stream details:\nIngest endpoint: {ingest_endpoint}\nStream state: {stream_state}") + + if SUBTITLE_FORMAT == 'srt': + while True: + if stream_state != "NOT_BROADCASTING": + app.logger.info(f"Stream {stream_arn} is currently in state {stream_state}. Waiting for state NOT_BROADCASTING") + sleep_time = 20 + app.logger.info(f"Waiting for {sleep_time} seconds") + time.sleep(sleep_time) + stream_state = get_stream_state(ivs_channel_arn) + continue + + app.logger.info('Starting video stream') + ffmpeg_stream_cmd = get_ffmpeg_stream_cmd(video_filepath, ingest_endpoint, stream_key, SUBTITLE_FORMAT) + app.logger.info(f'ffmpeg command: {ffmpeg_stream_cmd}') + + process = subprocess.Popen( + ffmpeg_stream_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, shell=True) + app.logger.info('Running ffmpeg processes:') + app.logger.info(os.system("ps aux|grep 'PID\|ffmpeg'")) + + lines = iter(process.stdout) + app.logger.info('Starting event stream') + while True: + try: + int(next(lines).strip()) + time_range = next(lines).strip() + if not '-->' in time_range: + raise ValueError(f'Expected a time range instead of {time_range}') + send_text = '' + while True: + text = next(lines).strip() + if len(text) == 0: break + if len(send_text)>0: send_text+='\n' + send_text += text + put_ivs_metadata(ivs_channel_arn, send_text) + except StopIteration: + app.logger.warning('Video iteration has stopped unexpectedly. Attempting restart in 10 seconds.') + time.sleep(10) + break + else: + raise NotImplementedError(f'{SUBTITLE_FORMAT} is not currently supported by this demo.') +# -- End Video streaming + + +# -- Logging +class LoggingMiddleware(object): + def __init__(self, app): + self._app = app + + def __call__(self, environ, resp): + errorlog = environ['wsgi.errors'] + pprint.pprint(('REQUEST', environ), stream=errorlog) + + def log_response(status, headers, *args): + pprint.pprint(('RESPONSE', status, headers), stream=errorlog) + return resp(status, headers, *args) + + return self._app(environ, log_response) +# -- End Logging + + +# -- Exceptions +class BadRequest(Exception): + status_code = 400 + + def __init__(self, message, status_code=None, payload=None): + Exception.__init__(self) + self.message = message + if status_code is not None: + self.status_code = status_code + self.payload = payload + + def to_dict(self): + rv = dict(self.payload or ()) + rv['message'] = self.message + return rv + + +# -- Handlers +app = Flask(__name__, + static_folder=STATIC_FOLDER, + static_url_path=STATIC_URL_PATH) +corps = CORS(app) + + +@app.errorhandler(BadRequest) +def handle_bad_request(error): + response = jsonify(error.to_dict()) + response.status_code = error.status_code + return response + + +@app.route('/') +def index(): + return 'Videos Service' + + +@app.route('/stream_details') +def streams(): + response_data = [] + for value in stream_details.values(): + response_data.append(value) + response = { + "streams": response_data + } + return Response(json.dumps(response), content_type = 'application/json') + + +@app.route('/health') +def health(): + return 'OK' + + +if __name__ == '__main__': + app.wsgi_app = LoggingMiddleware(app.wsgi_app) + app.logger.setLevel(level=logging.INFO) + + app.logger.info(f"VIDEO_BUCKET: {VIDEO_BUCKET}") + app.logger.info(f"SSM_VIDEO_CHANNEL_MAP_PARAM: {SSM_VIDEO_CHANNEL_MAP_PARAM}") + app.logger.info(f"USE_DEFAULT_IVS_STREAMS: {USE_DEFAULT_IVS_STREAMS}") + + app.logger.info("Starting video streams") + start_streams() + + app.logger.info("Starting API") + app.run(debug=False, host='0.0.0.0', port=80) diff --git a/src/videos/src/videos-service/requirements.txt b/src/videos/src/videos-service/requirements.txt new file mode 100644 index 000000000..5622a41b1 --- /dev/null +++ b/src/videos/src/videos-service/requirements.txt @@ -0,0 +1,4 @@ +Flask==1.0.2 +flask-cors==3.0.8 +boto3==1.14.57 +srt \ No newline at end of file diff --git a/src/videos/src/videos-service/run_tests.py b/src/videos/src/videos-service/run_tests.py new file mode 100644 index 000000000..23957e196 --- /dev/null +++ b/src/videos/src/videos-service/run_tests.py @@ -0,0 +1,8 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import os, sys, unittest + +sys.argv += ['discover', os.path.dirname(sys.argv[0]), 'test_*.py'] + +unittest.main(module=None) diff --git a/src/videos/src/videos-service/static/default_stream0_thumb.png b/src/videos/src/videos-service/static/default_stream0_thumb.png new file mode 100644 index 000000000..d0f6bcd35 Binary files /dev/null and b/src/videos/src/videos-service/static/default_stream0_thumb.png differ diff --git a/src/videos/src/videos-service/static/default_stream1_thumb.png b/src/videos/src/videos-service/static/default_stream1_thumb.png new file mode 100644 index 000000000..da572d9d3 Binary files /dev/null and b/src/videos/src/videos-service/static/default_stream1_thumb.png differ diff --git a/src/videos/src/videos-service/static/default_stream2_thumb.png b/src/videos/src/videos-service/static/default_stream2_thumb.png new file mode 100644 index 000000000..e785caef6 Binary files /dev/null and b/src/videos/src/videos-service/static/default_stream2_thumb.png differ diff --git a/src/videos/src/videos-service/static/default_stream3_thumb.png b/src/videos/src/videos-service/static/default_stream3_thumb.png new file mode 100644 index 000000000..98d07b98f Binary files /dev/null and b/src/videos/src/videos-service/static/default_stream3_thumb.png differ diff --git a/src/videos/src/videos-service/static/default_thumb.png b/src/videos/src/videos-service/static/default_thumb.png new file mode 100644 index 000000000..9eade1919 Binary files /dev/null and b/src/videos/src/videos-service/static/default_thumb.png differ diff --git a/src/web-ui/.env b/src/web-ui/.env index 1ceb3eb03..15d4fa1f6 100644 --- a/src/web-ui/.env +++ b/src/web-ui/.env @@ -14,6 +14,8 @@ VUE_APP_ORDERS_SERVICE_DOMAIN=http://localhost VUE_APP_ORDERS_SERVICE_PORT=8004 VUE_APP_RECOMMENDATIONS_SERVICE_DOMAIN=http://localhost VUE_APP_RECOMMENDATIONS_SERVICE_PORT=8005 +VUE_APP_VIDEOS_SERVICE_DOMAIN=http://localhost +VUE_APP_VIDEOS_SERVICE_PORT=8007 VUE_APP_SEARCH_SERVICE_DOMAIN=http://localhost VUE_APP_SEARCH_SERVICE_PORT=9200 diff --git a/src/web-ui/gen_env.sh b/src/web-ui/gen_env.sh index cbcd2f612..40ef7f83c 100755 --- a/src/web-ui/gen_env.sh +++ b/src/web-ui/gen_env.sh @@ -21,6 +21,8 @@ printf 'VUE_APP_RECOMMENDATIONS_SERVICE_DOMAIN=%s\n' "$RECOMMENDATIONS_SERVICE_U printf 'VUE_APP_RECOMMENDATIONS_SERVICE_PORT=%s\n' "80" >> .env printf 'VUE_APP_SEARCH_SERVICE_DOMAIN=%s\n' "$SEARCH_SERVICE_URL" >> .env printf 'VUE_APP_SEARCH_SERVICE_PORT=80\n' >> .env +printf 'VUE_APP_VIDEOS_SERVICE_DOMAIN=%s\n' "$VIDEOS_SERVICE_URL" >> .env +printf 'VUE_APP_VIDEOS_SERVICE_PORT=80\n' >> .env printf 'VUE_APP_AWS_REGION=%s\n' "$DEPLOYED_REGION" >> .env printf 'VUE_APP_AWS_IDENTITY_POOL_ID=%s\n' "$COGNITO_IDENTITY_POOL_ID" >> .env printf 'VUE_APP_AWS_USER_POOL_ID=%s\n' "$COGNITO_USER_POOL_ID" >> .env diff --git a/src/web-ui/src/analytics/AnalyticsHandler.js b/src/web-ui/src/analytics/AnalyticsHandler.js index d8d6e5862..758e81d15 100644 --- a/src/web-ui/src/analytics/AnalyticsHandler.js +++ b/src/web-ui/src/analytics/AnalyticsHandler.js @@ -210,7 +210,8 @@ export const AnalyticsHandler = { eventType: 'ProductAdded', userId: user ? user.id : AmplifyStore.state.provisionalUserID, properties: { - itemId: product.id + itemId: product.id, + discount: "No" } }, 'AmazonPersonalize') AmplifyStore.commit('incrementSessionEventsRecorded'); @@ -331,7 +332,8 @@ export const AnalyticsHandler = { eventType: 'ProductQuantityUpdated', userId: user ? user.id : AmplifyStore.state.provisionalUserID, properties: { - itemId: cartItem.product_id + itemId: cartItem.product_id, + discount: "No" } }, 'AmazonPersonalize') AmplifyStore.commit('incrementSessionEventsRecorded'); @@ -353,7 +355,7 @@ export const AnalyticsHandler = { } }, - productViewed(user, product, feature, experimentCorrelationId) { + productViewed(user, product, feature, experimentCorrelationId, discount) { if (user) { AmplifyAnalytics.record({ name: 'ProductViewed', @@ -376,7 +378,8 @@ export const AnalyticsHandler = { eventType: 'ProductViewed', userId: user ? user.id : AmplifyStore.state.provisionalUserID, properties: { - itemId: product.id + itemId: product.id, + discount: discount?"Yes":"No" } }, 'AmazonPersonalize'); AmplifyStore.commit('incrementSessionEventsRecorded'); @@ -433,7 +436,8 @@ export const AnalyticsHandler = { eventType: 'CartViewed', userId: user ? user.id : AmplifyStore.state.provisionalUserID, properties: { - itemId: cart.items[item].product_id + itemId: cart.items[item].product_id, + discount: "No" } }, 'AmazonPersonalize') AmplifyStore.commit('incrementSessionEventsRecorded'); @@ -476,7 +480,8 @@ export const AnalyticsHandler = { eventType: 'CheckoutStarted', userId: user ? user.id : AmplifyStore.state.provisionalUserID, properties: { - itemId: cart.items[item].product_id + itemId: cart.items[item].product_id, + discount: "No" } }, 'AmazonPersonalize') AmplifyStore.commit('incrementSessionEventsRecorded'); @@ -537,7 +542,8 @@ export const AnalyticsHandler = { eventType: 'OrderCompleted', userId: user ? user.id : AmplifyStore.state.provisionalUserID, properties: { - itemId: orderItem.product_id + itemId: orderItem.product_id, + discount: "No" } }, 'AmazonPersonalize') AmplifyStore.commit('incrementSessionEventsRecorded'); diff --git a/src/web-ui/src/authenticated/Profile.vue b/src/web-ui/src/authenticated/Profile.vue index 1534eede7..c9e9c6fd8 100644 --- a/src/web-ui/src/authenticated/Profile.vue +++ b/src/web-ui/src/authenticated/Profile.vue @@ -74,6 +74,7 @@ + diff --git a/src/web-ui/src/components/ProductPrice/ProductPrice.vue b/src/web-ui/src/components/ProductPrice/ProductPrice.vue index 635340fc3..38c20da13 100644 --- a/src/web-ui/src/components/ProductPrice/ProductPrice.vue +++ b/src/web-ui/src/components/ProductPrice/ProductPrice.vue @@ -1,10 +1,11 @@ @@ -27,4 +36,9 @@ export default { .grey { color: var(--grey-600); } + +.discounted { + text-decoration: line-through; + color: red; +} \ No newline at end of file diff --git a/src/web-ui/src/components/RecommendedProductsSection/ProductCarousel/ProductCarousel.vue b/src/web-ui/src/components/RecommendedProductsSection/ProductCarousel/ProductCarousel.vue index af2c7b549..fdbfebb63 100644 --- a/src/web-ui/src/components/RecommendedProductsSection/ProductCarousel/ProductCarousel.vue +++ b/src/web-ui/src/components/RecommendedProductsSection/ProductCarousel/ProductCarousel.vue @@ -11,7 +11,7 @@ params: { id: recommendation.product.id }, query: { exp: getExperimentCorrelationId(recommendation.experiment), feature }, }" - class="product p-3 d-flex flex-column justify-content-between" + class="featured-product p-3 d-flex flex-column justify-content-between" >
@@ -98,7 +98,7 @@ export default { \ No newline at end of file diff --git a/src/web-ui/src/public/Main.vue b/src/web-ui/src/public/Main.vue index a1453b8e6..f26f54728 100644 --- a/src/web-ui/src/public/Main.vue +++ b/src/web-ui/src/public/Main.vue @@ -28,7 +28,7 @@ import { RepositoryFactory } from '@/repositories/RepositoryFactory'; import { AnalyticsHandler } from '@/analytics/AnalyticsHandler'; import Layout from '@/components/Layout/Layout'; -import RecommendedProductsSection from '@/components//RecommendedProductsSection/RecommendedProductsSection'; +import RecommendedProductsSection from '@/components/RecommendedProductsSection/RecommendedProductsSection'; const ProductsRepository = RepositoryFactory.get('products'); const RecommendationsRepository = RepositoryFactory.get('recommendations'); diff --git a/src/web-ui/src/public/ProductDetail.vue b/src/web-ui/src/public/ProductDetail.vue index 9e94d8b33..17371edec 100644 --- a/src/web-ui/src/public/ProductDetail.vue +++ b/src/web-ui/src/public/ProductDetail.vue @@ -9,7 +9,7 @@
- +
@@ -78,6 +78,7 @@ import Layout from '@/components/Layout/Layout'; import ProductPrice from '@/components/ProductPrice/ProductPrice'; import FiveStars from '@/components/FiveStars/FiveStars'; import RecommendedProductsSection from '@/components/RecommendedProductsSection/RecommendedProductsSection'; +import {discountProductPrice} from "@/util/discountProductPrice"; const RecommendationsRepository = RepositoryFactory.get('recommendations'); const MAX_RECOMMENDATIONS = 6; @@ -92,6 +93,13 @@ export default { RecommendedProductsSection, }, mixins: [product], + props: { + discount: { + type: Boolean, + required: false, + default: false + } + }, data() { return { quantity: 1, @@ -143,7 +151,10 @@ export default { }, async addProductToCart() { await this.addToCart({ - product: this.product, + product: { + ...this.product, + price: this.discount ? discountProductPrice(this.product.price) : this.product.price + }, quantity: this.quantity, feature: this.$route.query.feature, exp: this.$route.query.exp, @@ -160,7 +171,7 @@ export default { this.relatedProducts = null; this.getRelatedProducts(); - this.recordProductViewed(this.$route.query.feature, this.$route.query.exp); + this.recordProductViewed(this.$route.query.feature, this.$route.query.exp, this.$route.query.di); }, async getRelatedProducts() { const response = await RecommendationsRepository.getRelatedProducts( diff --git a/src/web-ui/src/public/components/CartItem.vue b/src/web-ui/src/public/components/CartItem.vue index 335ee2187..d8fc1474f 100644 --- a/src/web-ui/src/public/components/CartItem.vue +++ b/src/web-ui/src/public/components/CartItem.vue @@ -1,7 +1,6 @@