openzeppelin_relayer/jobs/
status_check_context.rs

1//! Status check context for circuit breaker decisions.
2//!
3//! This module provides the `StatusCheckContext` struct which carries failure tracking
4//! information to network handlers, enabling them to make intelligent decisions about
5//! when to force-finalize transactions that have exceeded retry limits.
6//!
7//! Two thresholds are used for circuit breaker decisions:
8//! - **Consecutive failures**: Triggers when RPC is completely down
9//! - **Total failures**: Safety net for flaky RPC that succeeds occasionally but keeps failing
10
11use crate::constants::{EVM_MAX_CONSECUTIVE_STATUS_FAILURES, EVM_MAX_TOTAL_STATUS_FAILURES};
12use crate::models::NetworkType;
13
14/// Metadata key for tracking consecutive status check failures.
15/// Resets to 0 on successful status check (even if transaction not final).
16pub const META_CONSECUTIVE_FAILURES: &str = "consecutive_failures";
17
18/// Metadata key for tracking total status check failures.
19/// Never resets - useful for monitoring, alerting, and as a safety net circuit breaker.
20pub const META_TOTAL_FAILURES: &str = "total_failures";
21
22/// Context for status check circuit breaker decisions.
23///
24/// This struct is passed to network handlers during status checks to provide
25/// failure tracking information. Handlers can use this to decide whether to
26/// force-finalize a transaction that has exceeded the maximum retry attempts.
27///
28/// The circuit breaker triggers when EITHER threshold is exceeded:
29/// - `consecutive_failures >= max_consecutive_failures` (RPC completely down)
30/// - `total_failures >= max_total_failures` (flaky RPC, safety net)
31///
32/// # Example
33///
34/// ```ignore
35/// let context = StatusCheckContext::new(
36///     consecutive_failures,
37///     total_failures,
38///     total_retries,
39///     max_consecutive_failures,
40///     max_total_failures,
41///     NetworkType::Stellar,
42/// );
43///
44/// if context.should_force_finalize() {
45///     // Mark transaction as Failed with appropriate reason
46/// }
47/// ```
48#[derive(Debug, Clone)]
49pub struct StatusCheckContext {
50    /// Number of consecutive failures since last successful status check.
51    /// Resets to 0 when a status check succeeds (even if transaction not final).
52    pub consecutive_failures: u32,
53
54    /// Total number of failures across all status check attempts.
55    /// Never resets - serves as safety net for flaky RPC connections.
56    pub total_failures: u32,
57
58    /// Total number of retries (from Apalis attempt counter).
59    /// Includes both successful and failed attempts.
60    pub total_retries: u32,
61
62    /// Maximum consecutive failures allowed before forcing finalization.
63    /// Network-specific value from constants.
64    pub max_consecutive_failures: u32,
65
66    /// Maximum total failures allowed before forcing finalization.
67    /// Safety net for flaky RPC that occasionally succeeds (resetting consecutive counter).
68    pub max_total_failures: u32,
69
70    /// The network type for this transaction.
71    pub network_type: NetworkType,
72}
73
74impl Default for StatusCheckContext {
75    fn default() -> Self {
76        Self {
77            consecutive_failures: 0,
78            total_failures: 0,
79            total_retries: 0,
80            max_consecutive_failures: EVM_MAX_CONSECUTIVE_STATUS_FAILURES,
81            max_total_failures: EVM_MAX_TOTAL_STATUS_FAILURES,
82            network_type: NetworkType::Evm,
83        }
84    }
85}
86
87impl StatusCheckContext {
88    /// Creates a new `StatusCheckContext` with the specified failure counts and limits.
89    ///
90    /// # Arguments
91    ///
92    /// * `consecutive_failures` - Current count of consecutive failures
93    /// * `total_failures` - Total count of all failures
94    /// * `total_retries` - Total Apalis retry attempts (includes successes)
95    /// * `max_consecutive_failures` - Network-specific consecutive max before force-finalization
96    /// * `max_total_failures` - Network-specific total max (safety net)
97    /// * `network_type` - The blockchain network type
98    pub fn new(
99        consecutive_failures: u32,
100        total_failures: u32,
101        total_retries: u32,
102        max_consecutive_failures: u32,
103        max_total_failures: u32,
104        network_type: NetworkType,
105    ) -> Self {
106        Self {
107            consecutive_failures,
108            total_failures,
109            total_retries,
110            max_consecutive_failures,
111            max_total_failures,
112            network_type,
113        }
114    }
115
116    /// Determines if the circuit breaker should force-finalize the transaction.
117    ///
118    /// Returns `true` if EITHER threshold is exceeded:
119    /// - Consecutive failures reached the network-specific maximum (RPC completely down)
120    /// - Total failures reached the network-specific maximum (flaky RPC safety net)
121    pub fn should_force_finalize(&self) -> bool {
122        self.consecutive_failures >= self.max_consecutive_failures
123            || self.total_failures >= self.max_total_failures
124    }
125
126    /// Returns true if triggered by consecutive failures threshold.
127    pub fn triggered_by_consecutive(&self) -> bool {
128        self.consecutive_failures >= self.max_consecutive_failures
129    }
130
131    /// Returns true if triggered by total failures threshold (safety net).
132    pub fn triggered_by_total(&self) -> bool {
133        self.total_failures >= self.max_total_failures
134    }
135}
136
137/// Reads a counter value from job metadata.
138///
139/// # Arguments
140///
141/// * `metadata` - Optional metadata HashMap from the job
142/// * `key` - The metadata key to read
143///
144/// # Returns
145///
146/// The counter value as u32, or 0 if not present or invalid.
147pub fn read_counter_from_metadata(
148    metadata: &Option<std::collections::HashMap<String, String>>,
149    key: &str,
150) -> u32 {
151    metadata
152        .as_ref()
153        .and_then(|m| m.get(key))
154        .and_then(|v| v.parse().ok())
155        .unwrap_or(0)
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161    use std::collections::HashMap;
162
163    #[test]
164    fn test_status_check_context_default() {
165        let ctx = StatusCheckContext::default();
166        assert_eq!(ctx.consecutive_failures, 0);
167        assert_eq!(ctx.total_failures, 0);
168        assert_eq!(ctx.total_retries, 0);
169        assert_eq!(
170            ctx.max_consecutive_failures,
171            EVM_MAX_CONSECUTIVE_STATUS_FAILURES
172        );
173        assert_eq!(ctx.max_total_failures, EVM_MAX_TOTAL_STATUS_FAILURES);
174        assert_eq!(ctx.network_type, NetworkType::Evm);
175    }
176
177    #[test]
178    fn test_status_check_context_new() {
179        let ctx = StatusCheckContext::new(5, 10, 20, 15, 45, NetworkType::Stellar);
180        assert_eq!(ctx.consecutive_failures, 5);
181        assert_eq!(ctx.total_failures, 10);
182        assert_eq!(ctx.total_retries, 20);
183        assert_eq!(ctx.max_consecutive_failures, 15);
184        assert_eq!(ctx.max_total_failures, 45);
185        assert_eq!(ctx.network_type, NetworkType::Stellar);
186    }
187
188    #[test]
189    fn test_should_force_finalize_below_both_thresholds() {
190        // consecutive: 5 < 15, total: 10 < 45
191        let ctx = StatusCheckContext::new(5, 10, 20, 15, 45, NetworkType::Evm);
192        assert!(!ctx.should_force_finalize());
193    }
194
195    #[test]
196    fn test_should_force_finalize_consecutive_at_threshold() {
197        // consecutive: 15 >= 15 (triggers), total: 20 < 45
198        let ctx = StatusCheckContext::new(15, 20, 30, 15, 45, NetworkType::Evm);
199        assert!(ctx.should_force_finalize());
200        assert!(ctx.triggered_by_consecutive());
201        assert!(!ctx.triggered_by_total());
202    }
203
204    #[test]
205    fn test_should_force_finalize_total_at_threshold() {
206        // consecutive: 5 < 15, total: 45 >= 45 (triggers - safety net)
207        let ctx = StatusCheckContext::new(5, 45, 50, 15, 45, NetworkType::Evm);
208        assert!(ctx.should_force_finalize());
209        assert!(!ctx.triggered_by_consecutive());
210        assert!(ctx.triggered_by_total());
211    }
212
213    #[test]
214    fn test_should_force_finalize_both_exceeded() {
215        // Both thresholds exceeded
216        let ctx = StatusCheckContext::new(20, 50, 60, 15, 45, NetworkType::Evm);
217        assert!(ctx.should_force_finalize());
218        assert!(ctx.triggered_by_consecutive());
219        assert!(ctx.triggered_by_total());
220    }
221
222    #[test]
223    fn test_flaky_rpc_scenario() {
224        // Simulates flaky RPC: consecutive keeps resetting but total grows
225        // consecutive: 3 < 15, total: 100 >= 45 (triggers safety net)
226        let ctx = StatusCheckContext::new(3, 100, 150, 15, 45, NetworkType::Evm);
227        assert!(ctx.should_force_finalize());
228        assert!(!ctx.triggered_by_consecutive());
229        assert!(ctx.triggered_by_total());
230    }
231
232    #[test]
233    fn test_read_counter_from_metadata_present() {
234        let mut metadata = HashMap::new();
235        metadata.insert(META_CONSECUTIVE_FAILURES.to_string(), "5".to_string());
236        let result = read_counter_from_metadata(&Some(metadata), META_CONSECUTIVE_FAILURES);
237        assert_eq!(result, 5);
238    }
239
240    #[test]
241    fn test_read_counter_from_metadata_missing() {
242        let metadata: HashMap<String, String> = HashMap::new();
243        let result = read_counter_from_metadata(&Some(metadata), META_CONSECUTIVE_FAILURES);
244        assert_eq!(result, 0);
245    }
246
247    #[test]
248    fn test_read_counter_from_metadata_none() {
249        let result = read_counter_from_metadata(&None, META_CONSECUTIVE_FAILURES);
250        assert_eq!(result, 0);
251    }
252
253    #[test]
254    fn test_read_counter_from_metadata_invalid() {
255        let mut metadata = HashMap::new();
256        metadata.insert(
257            META_CONSECUTIVE_FAILURES.to_string(),
258            "not_a_number".to_string(),
259        );
260        let result = read_counter_from_metadata(&Some(metadata), META_CONSECUTIVE_FAILURES);
261        assert_eq!(result, 0);
262    }
263}