-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathinstance_common.rs
540 lines (491 loc) · 19.1 KB
/
instance_common.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! Common helper functions for instance-related sagas.
use std::net::{IpAddr, Ipv6Addr};
use crate::Nexus;
use nexus_db_model::{
ByteCount, ExternalIp, InstanceState, IpAttachState, Ipv4NatEntry,
SledReservationConstraints, SledResource, VmmState,
};
use nexus_db_queries::authz;
use nexus_db_queries::db::lookup::LookupPath;
use nexus_db_queries::{authn, context::OpContext, db, db::DataStore};
use omicron_common::api::external::{Error, NameOrId};
use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, SledUuid};
use serde::{Deserialize, Serialize};
use steno::ActionError;
use super::NexusActionContext;
/// The port propolis-server listens on inside the propolis zone.
const DEFAULT_PROPOLIS_PORT: u16 = 12400;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(super) struct VmmAndSledIds {
pub(super) vmm_id: PropolisUuid,
pub(super) sled_id: SledUuid,
}
/// Reserves resources for a new VMM whose instance has `ncpus` guest logical
/// processors and `guest_memory` bytes of guest RAM. The selected sled is
/// random within the set of sleds allowed by the supplied `constraints`.
///
/// This function succeeds idempotently if called repeatedly with the same
/// `propolis_id`.
pub async fn reserve_vmm_resources(
nexus: &Nexus,
propolis_id: PropolisUuid,
ncpus: u32,
guest_memory: ByteCount,
constraints: SledReservationConstraints,
) -> Result<SledResource, ActionError> {
// ALLOCATION POLICY
//
// NOTE: This policy can - and should! - be changed.
//
// See https://rfd.shared.oxide.computer/rfd/0205 for a more complete
// discussion.
//
// Right now, allocate an instance to any random sled agent. This has a few
// problems:
//
// - There's no consideration for "health of the sled" here, other than
// "time_deleted = Null". If the sled is rebooting, in a known unhealthy
// state, etc, we'd currently provision it here. I don't think this is a
// trivial fix, but it's work we'll need to account for eventually.
//
// - This is selecting a random sled from all sleds in the cluster. For
// multi-rack, this is going to fling the sled to an arbitrary system.
// Maybe that's okay, but worth knowing about explicitly.
//
// - This doesn't take into account anti-affinity - users will want to
// schedule instances that belong to a cluster on different failure
// domains. See https://github.com/oxidecomputer/omicron/issues/1705.
let resources = db::model::Resources::new(
ncpus,
ByteCount::try_from(0i64).unwrap(),
guest_memory,
);
let resource = nexus
.reserve_on_random_sled(
propolis_id.into_untyped_uuid(),
nexus_db_model::SledResourceKind::Instance,
resources,
constraints,
)
.await
.map_err(ActionError::action_failed)?;
Ok(resource)
}
/// Creates a new VMM record from the supplied IDs and stores it in the supplied
/// datastore.
///
/// This function succeeds idempotently if called repeatedly with the same
/// parameters, provided that the VMM record was not mutated by some other actor
/// after the calling saga inserted it.
pub async fn create_and_insert_vmm_record(
datastore: &DataStore,
opctx: &OpContext,
instance_id: InstanceUuid,
propolis_id: PropolisUuid,
sled_id: SledUuid,
propolis_ip: Ipv6Addr,
) -> Result<db::model::Vmm, ActionError> {
let vmm = db::model::Vmm::new(
propolis_id,
instance_id,
sled_id,
IpAddr::V6(propolis_ip).into(),
DEFAULT_PROPOLIS_PORT,
);
let vmm = datastore
.vmm_insert(&opctx, vmm)
.await
.map_err(ActionError::action_failed)?;
Ok(vmm)
}
/// Allocates a new IPv6 address for a propolis instance that will run on the
/// supplied sled.
pub(super) async fn allocate_vmm_ipv6(
opctx: &OpContext,
datastore: &DataStore,
sled_uuid: SledUuid,
) -> Result<Ipv6Addr, ActionError> {
datastore
.next_ipv6_address(opctx, sled_uuid)
.await
.map_err(ActionError::action_failed)
}
/// External IP state needed for IP attach/detachment.
///
/// This holds a record of the mid-processing external IP, where possible.
/// there are cases where this might not be known (e.g., double detach of an
/// ephemeral IP).
/// In particular we need to explicitly no-op if not `do_saga`, to prevent
/// failures borne from instance state changes from knocking out a valid IP binding.
#[derive(Debug, Deserialize, Serialize)]
pub struct ModifyStateForExternalIp {
pub external_ip: Option<ExternalIp>,
pub do_saga: bool,
}
/// Move an external IP from one state to another as a saga operation,
/// returning `Ok(true)` if the record was successfully moved and `Ok(false)`
/// if the record was lost.
///
/// Returns `Err` if given an illegal state transition or several rows
/// were updated, which are programmer errors.
pub async fn instance_ip_move_state(
sagactx: &NexusActionContext,
serialized_authn: &authn::saga::Serialized,
from: IpAttachState,
to: IpAttachState,
new_ip: &ModifyStateForExternalIp,
) -> Result<bool, ActionError> {
let osagactx = sagactx.user_data();
let datastore = osagactx.datastore();
let opctx =
crate::context::op_context_for_saga_action(&sagactx, serialized_authn);
if !new_ip.do_saga {
return Ok(true);
}
let Some(new_ip) = new_ip.external_ip.as_ref() else {
return Err(ActionError::action_failed(Error::internal_error(
"tried to `do_saga` without valid external IP",
)));
};
match datastore
.external_ip_complete_op(&opctx, new_ip.id, new_ip.kind, from, to)
.await
.map_err(ActionError::action_failed)?
{
0 => Ok(false),
1 => Ok(true),
_ => Err(ActionError::action_failed(Error::internal_error(
"ip state change affected > 1 row",
))),
}
}
/// Yields the sled on which an instance is found to be running so that IP
/// attachment and detachment operations can be propagated there.
///
/// # Preconditions
///
/// To synchronize correctly with other concurrent operations on an instance,
/// the calling saga must have placed the IP it is attaching or detaching into
/// the Attaching or Detaching state so that concurrent attempts to start the
/// instance will notice that the IP state is in flux and ask the caller to
/// retry.
pub(super) async fn instance_ip_get_instance_state(
sagactx: &NexusActionContext,
serialized_authn: &authn::saga::Serialized,
authz_instance: &authz::Instance,
verb: &str,
) -> Result<Option<VmmAndSledIds>, ActionError> {
// XXX: we can get instance state (but not sled ID) in same transaction
// as attach (but not detach) wth current design. We need to re-query
// for sled ID anyhow, so keep consistent between attach/detach.
let osagactx = sagactx.user_data();
let datastore = osagactx.datastore();
let opctx =
crate::context::op_context_for_saga_action(&sagactx, serialized_authn);
let inst_and_vmm = datastore
.instance_fetch_with_vmm(&opctx, authz_instance)
.await
.map_err(ActionError::action_failed)?;
let found_vmm_state =
inst_and_vmm.vmm().as_ref().map(|vmm| vmm.runtime.state);
let found_instance_state =
inst_and_vmm.instance().runtime_state.nexus_state;
let mut propolis_and_sled_id =
inst_and_vmm.vmm().as_ref().map(|vmm| VmmAndSledIds {
vmm_id: PropolisUuid::from_untyped_uuid(vmm.id),
sled_id: SledUuid::from_untyped_uuid(vmm.sled_id),
});
slog::debug!(
osagactx.log(), "evaluating instance state for IP attach/detach";
"instance_state" => ?found_instance_state,
"vmm_state" => ?found_vmm_state
);
// Arriving here means we started in a correct state (running/stopped).
// We need to consider how we interact with the other sagas/ops:
// - stopping: this is not sagaized, and the propolis/sled-agent might
// go away. Act as though stopped if we catch it here,
// otherwise convert OPTE ensure to 'service unavailable'
// and undo.
// - deleting: can only be called from stopped -- we won't push to dpd
// or sled-agent, and IP record might be deleted or forcibly
// detached. Catch here just in case.
// - starting: see below.
match (found_instance_state, found_vmm_state) {
// If there's no VMM, the instance is definitely not on any sled.
(InstanceState::NoVmm, _) | (_, Some(VmmState::SagaUnwound)) => {
propolis_and_sled_id = None;
}
// If the instance is running normally or rebooting, it's resident on
// the sled given by its VMM record.
(
InstanceState::Vmm,
Some(VmmState::Running) | Some(VmmState::Rebooting),
) => {}
// If the VMM is in the Creating, Stopping, Migrating, or Starting
// states, its sled assignment is in doubt, so report a transient state
// error and ask the caller to retry.
//
// Although an instance with a Starting (or Creating) VMM has a sled
// assignment, there's no way to tell at this point whether or not
// there's a concurrent instance-start saga that has passed the point
// where it sends IP assignments to the instance's new sled:
//
// - If the start saga is still in progress and hasn't pushed any IP
// information to the instance's new sled yet, then either of two
// things can happen:
// - This function's caller can finish modifying IPs before the start
// saga propagates IP information to the sled. In this case the
// calling saga should do nothing--the start saga will send the
// right IP set to the sled.
// - If the start saga "wins" the race, it will see that the instance
// still has an attaching/detaching IP and bail out.
// - If the start saga is already done, and Nexus is just waiting for
// the VMM to report that it's Running, the calling saga needs to
// send the IP change to the instance's sled.
//
// There's no way to distinguish these cases, so if a VMM is Starting,
// block the attach/detach.
(
InstanceState::Vmm,
Some(state @ VmmState::Starting)
| Some(state @ VmmState::Migrating)
| Some(state @ VmmState::Stopping)
| Some(state @ VmmState::Stopped)
| Some(state @ VmmState::Creating),
) => {
return Err(ActionError::action_failed(Error::unavail(&format!(
"can't {verb} in transient state {state}"
))));
}
(InstanceState::Destroyed, _) => {
return Err(ActionError::action_failed(Error::not_found_by_id(
omicron_common::api::external::ResourceType::Instance,
&authz_instance.id(),
)))
}
(InstanceState::Creating, _) => {
return Err(ActionError::action_failed(Error::invalid_request(
"cannot modify instance IPs, instance is still being created",
)))
}
(InstanceState::Failed, _)
| (InstanceState::Vmm, Some(VmmState::Failed)) => {
return Err(ActionError::action_failed(Error::invalid_request(
"cannot modify instance IPs, instance is in unhealthy state",
)))
}
// This case represents an inconsistency in the database. It should
// never happen, but don't blow up Nexus if it somehow does.
(InstanceState::Vmm, None) => {
return Err(ActionError::action_failed(Error::internal_error(
&format!(
"instance {} is in the 'VMM' state but has no VMM ID",
authz_instance.id(),
),
)));
}
(InstanceState::Vmm, Some(VmmState::Destroyed)) => {
return Err(ActionError::action_failed(Error::internal_error(
&format!(
"instance {} points to destroyed VMM",
authz_instance.id(),
),
)));
}
}
Ok(propolis_and_sled_id)
}
/// Adds a NAT entry to DPD, routing packets bound for `target_ip` to a
/// target sled.
///
/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly
/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`).
pub async fn instance_ip_add_nat(
sagactx: &NexusActionContext,
serialized_authn: &authn::saga::Serialized,
authz_instance: &authz::Instance,
sled_uuid: Option<SledUuid>,
target_ip: ModifyStateForExternalIp,
) -> Result<Option<Ipv4NatEntry>, ActionError> {
let osagactx = sagactx.user_data();
let datastore = osagactx.datastore();
let opctx =
crate::context::op_context_for_saga_action(&sagactx, serialized_authn);
// No physical sled? Don't push NAT.
let Some(sled_uuid) = sled_uuid else {
return Ok(None);
};
if !target_ip.do_saga {
return Ok(None);
}
let Some(target_ip) = target_ip.external_ip else {
return Err(ActionError::action_failed(Error::internal_error(
"tried to `do_saga` without valid external IP",
)));
};
// Querying sleds requires fleet access; use the instance allocator context
// for this.
let (.., sled) = LookupPath::new(&osagactx.nexus().opctx_alloc, &datastore)
.sled_id(sled_uuid.into_untyped_uuid())
.fetch()
.await
.map_err(ActionError::action_failed)?;
osagactx
.nexus()
.instance_ensure_dpd_config(
&opctx,
InstanceUuid::from_untyped_uuid(authz_instance.id()),
&sled.address(),
Some(target_ip.id),
)
.await
.and_then(|v| {
v.into_iter().next().map(Some).ok_or_else(|| {
Error::internal_error(
"NAT RPW failed to return concrete NAT entry",
)
})
})
.map_err(ActionError::action_failed)
}
/// Remove a single NAT entry from DPD, dropping packets bound for `target_ip`.
///
/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly
/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`).
pub async fn instance_ip_remove_nat(
sagactx: &NexusActionContext,
serialized_authn: &authn::saga::Serialized,
sled_uuid: Option<SledUuid>,
target_ip: ModifyStateForExternalIp,
) -> Result<(), ActionError> {
let osagactx = sagactx.user_data();
let opctx =
crate::context::op_context_for_saga_action(&sagactx, serialized_authn);
// No physical sled? Don't push NAT.
if sled_uuid.is_none() {
return Ok(());
};
if !target_ip.do_saga {
return Ok(());
}
let Some(target_ip) = target_ip.external_ip else {
return Err(ActionError::action_failed(Error::internal_error(
"tried to `do_saga` without valid external IP",
)));
};
osagactx
.nexus()
.external_ip_delete_dpd_config(&opctx, &target_ip)
.await
.map_err(ActionError::action_failed)?;
Ok(())
}
/// Inform the OPTE port for a running instance that it should start
/// sending/receiving traffic on a given IP address.
///
/// This call is a no-op if the instance is not active (`propolis_and_sled` is
/// `None`) or the calling saga is explicitly set to be inactive in the event of
/// a double attach/detach (`!target_ip.do_saga`).
pub(super) async fn instance_ip_add_opte(
sagactx: &NexusActionContext,
vmm_and_sled: Option<VmmAndSledIds>,
target_ip: ModifyStateForExternalIp,
) -> Result<(), ActionError> {
let osagactx = sagactx.user_data();
// No physical sled? Don't inform OPTE.
let Some(VmmAndSledIds { vmm_id: propolis_id, sled_id }) = vmm_and_sled
else {
return Ok(());
};
if !target_ip.do_saga {
return Ok(());
}
let Some(target_ip) = target_ip.external_ip else {
return Err(ActionError::action_failed(Error::internal_error(
"tried to `do_saga` without valid external IP",
)));
};
let sled_agent_body =
target_ip.try_into().map_err(ActionError::action_failed)?;
osagactx
.nexus()
.sled_client(&sled_id)
.await
.map_err(|_| {
ActionError::action_failed(Error::unavail(
"sled agent client went away mid-attach/detach",
))
})?
.vmm_put_external_ip(&propolis_id, &sled_agent_body)
.await
.map_err(|e| {
ActionError::action_failed(match e {
progenitor_client::Error::CommunicationError(_) => {
Error::unavail(
"sled agent client went away mid-attach/detach",
)
}
e => Error::internal_error(&format!("{e}")),
})
})?;
Ok(())
}
/// Inform the OPTE port for a running instance that it should cease
/// sending/receiving traffic on a given IP address.
///
/// This call is a no-op if the instance is not active (`propolis_and_sled` is
/// `None`) or the calling saga is explicitly set to be inactive in the event of
/// a double attach/detach (`!target_ip.do_saga`).
pub(super) async fn instance_ip_remove_opte(
sagactx: &NexusActionContext,
propolis_and_sled: Option<VmmAndSledIds>,
target_ip: ModifyStateForExternalIp,
) -> Result<(), ActionError> {
let osagactx = sagactx.user_data();
// No physical sled? Don't inform OPTE.
let Some(VmmAndSledIds { vmm_id: propolis_id, sled_id }) =
propolis_and_sled
else {
return Ok(());
};
if !target_ip.do_saga {
return Ok(());
}
let Some(target_ip) = target_ip.external_ip else {
return Err(ActionError::action_failed(Error::internal_error(
"tried to `do_saga` without valid external IP",
)));
};
let sled_agent_body =
target_ip.try_into().map_err(ActionError::action_failed)?;
osagactx
.nexus()
.sled_client(&sled_id)
.await
.map_err(|_| {
ActionError::action_failed(Error::unavail(
"sled agent client went away mid-attach/detach",
))
})?
.vmm_delete_external_ip(&propolis_id, &sled_agent_body)
.await
.map_err(|e| {
ActionError::action_failed(match e {
progenitor_client::Error::CommunicationError(_) => {
Error::unavail(
"sled agent client went away mid-attach/detach",
)
}
e => Error::internal_error(&format!("{e}")),
})
})?;
Ok(())
}
#[derive(Clone, Debug, Deserialize, Serialize)]
pub enum ExternalIpAttach {
Ephemeral { pool: Option<NameOrId> },
Floating { floating_ip: authz::FloatingIp },
}