From 3e97cbf85962b3e2e0debe17908be79be6861910 Mon Sep 17 00:00:00 2001 From: jordanbreen28 Date: Tue, 30 Jan 2024 17:22:36 +0000 Subject: [PATCH] (bug) - add retries on failed dsc invocation --- .rubocop_todo.yml | 14 +++--- .../dsc_base_provider/dsc_base_provider.rb | 47 ++++++++++++++++++- .../dsc_base_provider_spec.rb | 38 +++++++++++++++ 3 files changed, 91 insertions(+), 8 deletions(-) diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 886b2c95..11f40c40 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,6 +1,6 @@ # This configuration was generated by # `rubocop --auto-gen-config` -# on 2024-01-23 11:29:18 UTC using RuboCop version 1.50.2. +# on 2024-01-30 16:36:55 UTC using RuboCop version 1.50.2. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new @@ -46,14 +46,14 @@ Metrics/BlockLength: # Offense count: 2 # Configuration parameters: CountComments, CountAsOne. Metrics/ClassLength: - Max: 526 + Max: 553 # Offense count: 12 # Configuration parameters: AllowedMethods, AllowedPatterns. Metrics/CyclomaticComplexity: Max: 24 -# Offense count: 22 +# Offense count: 23 # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns. Metrics/MethodLength: Max: 42 @@ -118,12 +118,12 @@ RSpec/DescribeClass: - 'spec/acceptance/dsc/complex.rb' - 'spec/unit/pwsh_spec.rb' -# Offense count: 31 +# Offense count: 32 # Configuration parameters: CountAsOne. RSpec/ExampleLength: Max: 70 -# Offense count: 102 +# Offense count: 105 # Configuration parameters: . # SupportedStyles: have_received, receive RSpec/MessageSpies: @@ -134,7 +134,7 @@ RSpec/MultipleDescribes: Exclude: - 'spec/unit/pwsh_spec.rb' -# Offense count: 147 +# Offense count: 151 RSpec/MultipleExpectations: Max: 15 @@ -156,7 +156,7 @@ RSpec/NoExpectationExample: - 'spec/unit/pwsh/windows_powershell_spec.rb' - 'spec/unit/pwsh_spec.rb' -# Offense count: 55 +# Offense count: 56 RSpec/StubbedMock: Exclude: - 'spec/unit/puppet/provider/dsc_base_provider/dsc_base_provider_spec.rb' diff --git a/lib/puppet/provider/dsc_base_provider/dsc_base_provider.rb b/lib/puppet/provider/dsc_base_provider/dsc_base_provider.rb index 602edf00..3b3dec17 100644 --- a/lib/puppet/provider/dsc_base_provider/dsc_base_provider.rb +++ b/lib/puppet/provider/dsc_base_provider/dsc_base_provider.rb @@ -5,7 +5,7 @@ require 'pathname' require 'json' -class Puppet::Provider::DscBaseProvider +class Puppet::Provider::DscBaseProvider # rubocop:disable Metrics/ClassLength # Initializes the provider, preparing the instance variables which cache: # - the canonicalized resources across calls # - query results @@ -244,6 +244,7 @@ def invoke_dsc_resource(context, name_hash, props, method) script_content = ps_script_content(resource) context.debug("Script:\n #{redact_secrets(script_content)}") output = ps_manager.execute(remove_secret_identifiers(script_content))[:stdout] + if output.nil? context.err('Nothing returned') return nil @@ -256,8 +257,10 @@ def invoke_dsc_resource(context, name_hash, props, method) return nil end context.debug("raw data received: #{data.inspect}") + collision_error_matcher = /The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked/ error = data['errormessage'] + unless error.nil? || error.empty? # NB: We should have a way to stop processing this resource *now* without blowing up the whole Puppet run # Raising an error stops processing but blows things up while context.err alerts but continues to process @@ -267,6 +270,11 @@ def invoke_dsc_resource(context, name_hash, props, method) @logon_failures << name_hash[:dsc_psdscrunascredential].dup # This is a hack to handle the query cache to prevent a second lookup @cached_query_results << name_hash # if fetch_cached_hashes(@cached_query_results, [data]).empty? + elsif error.match?(collision_error_matcher) + context.notice('Invoke-DscResource collision detected: Please stagger the timing of your Puppet runs as this can lead to unexpected behaviour.') + retry_invoke_dsc_resource(context, 5, 60, collision_error_matcher) do + data = ps_manager.execute(remove_secret_identifiers(script_content))[:stdout] + end else context.err(error) end @@ -276,6 +284,43 @@ def invoke_dsc_resource(context, name_hash, props, method) data end + # Retries Invoke-DscResource when returned error matches error regex supplied as param. + # @param context [Object] the Puppet runtime context to operate in and send feedback to + # @param max_retry_count [Int] max number of times to retry Invoke-DscResource + # @param retry_wait_interval_secs [Int] Time delay between retries + # @param error_matcher [String] the regex pattern to match with error + def retry_invoke_dsc_resource(context, max_retry_count, retry_wait_interval_secs, error_matcher) + try = 0 + while try < max_retry_count + try += 1 + # notify and wait for retry interval + context.notice("Sleeping for #{retry_wait_interval_secs} seconds.") + sleep retry_wait_interval_secs + # notify and retry + context.notice("Retrying: attempt #{try} of #{max_retry_count}.") + data = JSON.parse(yield) + # if no error, break + if data['errormessage'].nil? + break + # check if error matches error matcher supplied + elsif data['errormessage'].match?(error_matcher) + # if last attempt, return error + if try == max_retry_count + context.notice("Attempt #{try} of #{max_retry_count} failed. No more retries.") + # all attempts failed, raise error + return context.err(data['errormessage']) + end + # if not last attempt, notify, continue and retry + context.notice("Attempt #{try} of #{max_retry_count} failed.") + next + else + # if we get an unexpected error, return + return context.err(data['errormessage']) + end + end + data + end + # Determine if the DSC Resource is in the desired state, invoking the `Test` method unless it's # already been run for the resource, in which case reuse the result instead of checking for each # property. This behavior is only triggered if the validation_mode is set to resource; by default diff --git a/spec/unit/puppet/provider/dsc_base_provider/dsc_base_provider_spec.rb b/spec/unit/puppet/provider/dsc_base_provider/dsc_base_provider_spec.rb index 2f49d9f1..8b9ce1b6 100644 --- a/spec/unit/puppet/provider/dsc_base_provider/dsc_base_provider_spec.rb +++ b/spec/unit/puppet/provider/dsc_base_provider/dsc_base_provider_spec.rb @@ -794,6 +794,44 @@ end end + context 'when the invocation script errors with a collision' do + it 'writes a notice via context and applies successfully on retry' do + expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked"}' }) + expect(context).to receive(:notice).with(/Invoke-DscResource collision detected: Please stagger the timing of your Puppet runs as this can lead to unexpected behaviour./).once + expect(context).to receive(:notice).with('Sleeping for 60 seconds.').twice + expect(context).to receive(:notice).with(/Retrying: attempt [1-2] of 5/).twice + expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked"}' }) + expect(context).to receive(:notice).with('Attempt 1 of 5 failed.') + allow(provider).to receive(:sleep) + expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": null}' }) + expect { result }.not_to raise_error + end + + it 'writes a error via context and fails to apply when all retry attempts used' do + expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked"}' }) + .exactly(5).times + expect(context).to receive(:notice).with(/Invoke-DscResource collision detected: Please stagger the timing of your Puppet runs as this can lead to unexpected behaviour./).once + expect(context).to receive(:notice).with('Sleeping for 60 seconds.').exactly(5).times + expect(context).to receive(:notice).with(/Retrying: attempt [1-6] of 5/).exactly(5).times + expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked"}' }) + expect(context).to receive(:notice).with(/Attempt [1-6] of 5 failed/).exactly(5).times + expect(context).to receive(:err).with(/The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked/) + allow(provider).to receive(:sleep) + expect(result).to be_nil + end + + it 'writes an error via context and fails to apply when encountering an unexpected error' do + expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked"}' }) + expect(context).to receive(:notice).with(/Invoke-DscResource collision detected: Please stagger the timing of your Puppet runs as this can lead to unexpected behaviour./).once + expect(context).to receive(:notice).with('Sleeping for 60 seconds.').once + expect(context).to receive(:notice).with(/Retrying: attempt 1 of 5/).once + allow(provider).to receive(:sleep) + expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "Some unexpected error"}' }).once + expect(context).to receive(:err).with(/Some unexpected error/) + expect(result).to be_nil + end + end + context 'when the invocation script returns data without errors' do it 'filters for the correct properties to invoke and returns the results' do expect(ps_manager).to receive(:execute).with("Script: #{apply_props}").and_return({ stdout: '{"in_desired_state": true, "errormessage": null}' })