Skip to main content

Mountain/RunTime/Shutdown/
ShutdownCocoonWithRetry.rs

1#![allow(non_snake_case)]
2
3//! Send `$shutdown` over gRPC to Cocoon (3 attempts), then SIGKILL the child
4//! regardless of gRPC outcome. The hard-kill (Atom I6) is critical: a gRPC
5//! failure (transport error, broken pipe) used to leave the child orphaned,
6//! holding port 50052, and the next Mountain launch hit EADDRINUSE with the
7//! extension host stuck in degraded mode.
8
9use std::sync::Arc;
10
11use CommonLibrary::{Environment::Requires::Requires, Error::CommonError::CommonError, IPC::IPCProvider::IPCProvider};
12
13use crate::{RunTime::ApplicationRunTime::ApplicationRunTime, dev_log};
14
15impl ApplicationRunTime {
16	pub async fn ShutdownCocoonWithRetry(&self) -> Result<(), CommonError> {
17		let IPCProvider:Arc<dyn IPCProvider> = self.Environment.Require();
18
19		let MaximumAttempts = 3;
20
21		let mut Attempts = 0;
22
23		let mut GracefulOk = false;
24
25		let mut LastError:Option<CommonError> = None;
26
27		while Attempts < MaximumAttempts {
28			match IPCProvider
29				.SendNotificationToSideCar("cocoon-main".to_string(), "$shutdown".to_string(), serde_json::Value::Null)
30				.await
31			{
32				Ok(()) => {
33					tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
34
35					GracefulOk = true;
36
37					break;
38				},
39
40				Err(Error) => {
41					Attempts += 1;
42
43					LastError = Some(Error.clone());
44
45					if Attempts < MaximumAttempts {
46						dev_log!(
47							"lifecycle",
48							"warn: [ApplicationRunTime] Cocoon shutdown attempt {} failed: {}. Retrying...",
49							Attempts,
50							Error
51						);
52
53						tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
54					}
55				},
56			}
57		}
58
59		// Mark the Vine gRPC client shutting down BEFORE the SIGKILL so any
60		// background tokio task firing `SendNotification` after this flips
61		// short-circuits to `Ok(())` instead of attempting a TCP connect to
62		// the dead socket and logging a false-positive `Connection refused`.
63		crate::Vine::Client::MarkShutdown::Fn();
64
65		// Atom I6: always reap the child after the graceful attempt. No-op if
66		// the child already exited from $shutdown.
67		crate::ProcessManagement::CocoonManagement::HardKillCocoon().await;
68
69		if GracefulOk {
70			Ok(())
71		} else {
72			Err(LastError.unwrap_or_else(|| {
73				CommonError::Unknown { Description:"Failed to shutdown Cocoon after maximum retries".to_string() }
74			}))
75		}
76	}
77}