diff --git a/chord/topology.go b/chord/topology.go index 4f90ca3..5ec7407 100644 --- a/chord/topology.go +++ b/chord/topology.go @@ -113,6 +113,46 @@ func (n *Node) SetLocal(k ID, v string) bool { return false } +// SuccessorFailed marks the node's current successor as having failed, e.g. +// during stabilization. +func (n *Node) SuccessorFailed() { + // TODO(branden): probably most of this will need rewritten once + // replication and fingers are implemented + n.mu.Lock() + defer n.mu.Unlock() + old := n.succ[0] + // If there are only two nodes in the network, the predecessor is also the + // successor, which means it too has failed. Clear it in that case. + if old.addr == n.pred.addr { + n.pred = Peer{} + } + if len(n.succ) > 1 { + // The successor list has replication. Just shift it down. + copy(n.succ, n.succ[1:]) + n.succ = n.succ[:len(n.succ)-1] + } else { + // Check the finger table. + for _, f := range n.fingers { + if f.addr != n.self.addr { + // TODO(branden): ensure the list stays sorted + n.succ[0] = f + break + } + } + } + if n.succ[0] != old { + return + } + // We couldn't find a new successor in either the successor list or the + // finger table. The only other candidate we have is our predecessor. + // Stabilization will eventually work us out even if that's wrong. + if n.pred.IsValid() { + n.succ[0] = n.pred + return + } + n.succ[0] = n.self +} + // Peer is the ID and address of a node. type Peer struct { id ID diff --git a/main.go b/main.go index d82691a..2e4b965 100644 --- a/main.go +++ b/main.go @@ -181,6 +181,7 @@ func cliJoin(ctx context.Context, cmd *cli.Command) error { for range t.C { if err := chord.Stabilize(ctx, cl, node); err != nil { slog.ErrorContext(ctx, "stabilize", slog.Any("err", err)) + node.SuccessorFailed() } } }()