philipp / rpms / dhcp

Forked from rpms/dhcp 4 years ago
Clone

Blame SOURCES/dhcp-4.2.5-failover-potential-conflict.patch

45d60a
diff --git a/includes/dhcpd.h b/includes/dhcpd.h
45d60a
index 7e756e0..52ba677 100644
45d60a
--- a/includes/dhcpd.h
45d60a
+++ b/includes/dhcpd.h
45d60a
@@ -3347,6 +3347,7 @@ isc_result_t dhcp_failover_state_signal (omapi_object_t *,
45d60a
 isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *,
45d60a
 					     const char *);
45d60a
 isc_result_t dhcp_failover_set_service_state (dhcp_failover_state_t *state);
45d60a
+void dhcp_failover_rescind_updates (dhcp_failover_state_t *);
45d60a
 isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *,
45d60a
 				      enum failover_state);
45d60a
 isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *,
45d60a
diff --git a/server/failover.c b/server/failover.c
45d60a
index 8944102..6083672 100644
45d60a
--- a/server/failover.c
45d60a
+++ b/server/failover.c
45d60a
@@ -1520,8 +1520,16 @@ isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state,
45d60a
 		      /* In these situations, we remain in the current
45d60a
 		       * state, or if in startup enter those states.
45d60a
 		       */
45d60a
-		      case communications_interrupted:
45d60a
 		      case conflict_done:
45d60a
+			/* As the peer may not have received or may have
45d60a
+			 * lost track of updates we sent previously we
45d60a
+			 * rescind them, causing us to retransmit them
45d60a
+			 * on an update request.
45d60a
+			 */
45d60a
+			dhcp_failover_rescind_updates(state);
45d60a
+			/* fall through */
45d60a
+
45d60a
+		      case communications_interrupted:
45d60a
 		      case partner_down:
45d60a
 		      case paused:
45d60a
 		      case recover:
45d60a
@@ -1704,6 +1712,52 @@ isc_result_t dhcp_failover_set_service_state (dhcp_failover_state_t *state)
45d60a
 	return ISC_R_SUCCESS;
45d60a
 }
45d60a
 
45d60a
+/*!
45d60a
+ * \brief Return any leases on the ack queue back to the update queue
45d60a
+ *
45d60a
+ * Re-schedule any pending updates by moving them from the ack queue
45d60a
+ * (update sent awaiting response) back to the update queue (need to
45d60a
+ * send an update for this lease).  This will result in a retransmission
45d60a
+ * of the update.
45d60a
+ *
45d60a
+ * \param state is the state block for the failover connection we are
45d60a
+ * updating.
45d60a
+ */
45d60a
+
45d60a
+void dhcp_failover_rescind_updates (dhcp_failover_state_t *state)
45d60a
+{
45d60a
+    struct lease *lp;
45d60a
+
45d60a
+    if (state->ack_queue_tail == NULL)
45d60a
+	    return;
45d60a
+
45d60a
+    /* Zap the flags. */
45d60a
+    for (lp = state->ack_queue_head; lp; lp = lp->next_pending)
45d60a
+	    lp->flags = ((lp->flags & ~ON_ACK_QUEUE) | ON_UPDATE_QUEUE);
45d60a
+
45d60a
+    /* Now hook the ack queue to the beginning of the update queue. */
45d60a
+    if (state->update_queue_head) {
45d60a
+	    lease_reference(&state->ack_queue_tail->next_pending,
45d60a
+			    state->update_queue_head, MDL);
45d60a
+	    lease_dereference(&state->update_queue_head, MDL);
45d60a
+    }
45d60a
+    lease_reference(&state->update_queue_head, state->ack_queue_head, MDL);
45d60a
+
45d60a
+    if (!state->update_queue_tail) {
45d60a
+#if defined (POINTER_DEBUG)
45d60a
+	    if (state->ack_queue_tail->next_pending) {
45d60a
+		    log_error("next pending on ack queue tail.");
45d60a
+		    abort();
45d60a
+	    }
45d60a
+#endif
45d60a
+	    lease_reference(&state->update_queue_tail,
45d60a
+			    state->ack_queue_tail, MDL);
45d60a
+    }
45d60a
+    lease_dereference(&state->ack_queue_tail, MDL);
45d60a
+    lease_dereference(&state->ack_queue_head, MDL);
45d60a
+    state->cur_unacked_updates = 0;
45d60a
+}
45d60a
+
45d60a
 isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state,
45d60a
 				      enum failover_state new_state)
45d60a
 {
45d60a
@@ -1724,37 +1778,9 @@ isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state,
45d60a
       case normal:
45d60a
       case potential_conflict:
45d60a
       case partner_down:
45d60a
-	if (state -> ack_queue_tail) {
45d60a
-	    struct lease *lp;
45d60a
-		
45d60a
-	    /* Zap the flags. */
45d60a
-	    for (lp = state -> ack_queue_head; lp; lp = lp -> next_pending)
45d60a
-		    lp -> flags = ((lp -> flags & ~ON_ACK_QUEUE) |
45d60a
-				   ON_UPDATE_QUEUE);
45d60a
-
45d60a
-	    /* Now hook the ack queue to the beginning of the update
45d60a
-	       queue. */
45d60a
-	    if (state -> update_queue_head) {
45d60a
-		lease_reference (&state -> ack_queue_tail -> next_pending,
45d60a
-				 state -> update_queue_head, MDL);
45d60a
-		lease_dereference (&state -> update_queue_head, MDL);
45d60a
-	    }
45d60a
-	    lease_reference (&state -> update_queue_head,
45d60a
-			     state -> ack_queue_head, MDL);
45d60a
-	    if (!state -> update_queue_tail) {
45d60a
-#if defined (POINTER_DEBUG)
45d60a
-		if (state -> ack_queue_tail -> next_pending) {
45d60a
-		    log_error ("next pending on ack queue tail.");
45d60a
-		    abort ();
45d60a
-		}
45d60a
-#endif
45d60a
-		lease_reference (&state -> update_queue_tail,
45d60a
-				 state -> ack_queue_tail, MDL);
45d60a
-	    }
45d60a
-	    lease_dereference (&state -> ack_queue_tail, MDL);
45d60a
-	    lease_dereference (&state -> ack_queue_head, MDL);
45d60a
-	    state -> cur_unacked_updates = 0;
45d60a
-	}
45d60a
+	/* Move the ack queue to the update queue */
45d60a
+	dhcp_failover_rescind_updates(state);
45d60a
+
45d60a
 	/* We will re-queue a timeout later, if applicable. */
45d60a
 	cancel_timeout (dhcp_failover_keepalive, state);
45d60a
 	break;
45d60a
@@ -1858,7 +1884,9 @@ isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state,
45d60a
 	    break;
45d60a
 
45d60a
 	  case potential_conflict:
45d60a
-	    if (state -> i_am == primary)
45d60a
+	    if ((state->i_am == primary) ||
45d60a
+		((state->i_am == secondary) &&
45d60a
+		 (state->partner.state == conflict_done)))
45d60a
 		    dhcp_failover_send_update_request (state);
45d60a
 	    break;
45d60a
 
45d60a
@@ -1961,7 +1989,18 @@ isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *state,
45d60a
 	if (state -> partner.state == new_state && state -> me.state) {
45d60a
 		switch (state -> me.state) {
45d60a
 		      case startup:
45d60a
-			dhcp_failover_set_state (state, state -> saved_state);
45d60a
+			/*
45d60a
+			 * If we have a peer state we must be connected.
45d60a
+			 * If so we should move to potential_conflict
45d60a
+			 * instead of resolution_interrupted, otherwise
45d60a
+			 * back to whereever we were before we stopped.
45d60a
+			 */
45d60a
+			if (state->saved_state == resolution_interrupted)
45d60a
+				dhcp_failover_set_state(state,
45d60a
+							potential_conflict);
45d60a
+			else 
45d60a
+				dhcp_failover_set_state(state,
45d60a
+							state->saved_state);
45d60a
 			return ISC_R_SUCCESS;
45d60a
 
45d60a
 		      case unknown_state:
45d60a
@@ -2179,6 +2218,17 @@ isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *state,
45d60a
 			dhcp_failover_set_state(state, new_state);
45d60a
 			break;
45d60a
 
45d60a
+		      case potential_conflict:
45d60a
+		      case resolution_interrupted:
45d60a
+			/*
45d60a
+			 * This can happen when the connection is lost and 
45d60a
+			 * recovered after the primary has moved to 
45d60a
+			 * conflict-done but the secondary is still in 
45d60a
+			 * potential-conflict.  In that case, we have to 
45d60a
+			 * remain in conflict-done.
45d60a
+			 */
45d60a
+			break;
45d60a
+
45d60a
 		      default:
45d60a
 			log_fatal("Peer %s: Invalid attempt to move from %s "
45d60a
 				"to %s while local state is conflict-done.",
45d60a
@@ -4867,16 +4917,17 @@ isc_result_t dhcp_failover_send_update_request (dhcp_failover_state_t *state)
45d60a
 	if (!link -> outer || link -> outer -> type != omapi_type_connection)
45d60a
 		return DHCP_R_INVALIDARG;
45d60a
 
45d60a
-	if (state -> curUPD)
45d60a
-		return ISC_R_ALREADYRUNNING;
45d60a
+	/* We allow an update to be restarted in case we requested an update
45d60a
+	 * and were interrupted by something. If we had an ALL going we need
45d60a
+	 * to restart that.  Otherwise we simply continue with the request */
45d60a
+	if (state -> curUPD == FTM_UPDREQALL) {
45d60a
+		return (dhcp_failover_send_update_request_all(state));
45d60a
+	}
45d60a
 
45d60a
-	status = (dhcp_failover_put_message
45d60a
-		  (link, link -> outer,
45d60a
-		   FTM_UPDREQ, link->xid++,
45d60a
-		   (failover_option_t *)0));
45d60a
+	status = (dhcp_failover_put_message(link, link -> outer, FTM_UPDREQ,
45d60a
+					    link -> xid++, NULL));
45d60a
 
45d60a
-	if (status == ISC_R_SUCCESS)
45d60a
-		state -> curUPD = FTM_UPDREQ;
45d60a
+	state -> curUPD = FTM_UPDREQ;
45d60a
 
45d60a
 #if defined (DEBUG_FAILOVER_MESSAGES)
45d60a
 	if (status != ISC_R_SUCCESS)
45d60a
@@ -4886,7 +4937,12 @@ isc_result_t dhcp_failover_send_update_request (dhcp_failover_state_t *state)
45d60a
 		log_debug ("%s", obuf);
45d60a
 	}
45d60a
 #endif
45d60a
-	log_info ("Sent update request message to %s", state -> name);
45d60a
+	if (status == ISC_R_SUCCESS) {
45d60a
+		log_info("Sent update request message to %s", state -> name);
45d60a
+	} else {
45d60a
+		log_error("Failed to send update request all message to %s: %s",
45d60a
+			 state -> name, isc_result_totext(status));
45d60a
+	}
45d60a
 	return status;
45d60a
 }
45d60a
 
45d60a
@@ -4913,17 +4969,14 @@ isc_result_t dhcp_failover_send_update_request_all (dhcp_failover_state_t
45d60a
 	if (!link -> outer || link -> outer -> type != omapi_type_connection)
45d60a
 		return DHCP_R_INVALIDARG;
45d60a
 
45d60a
-	/* If there is an UPDREQ in progress, then upgrade to UPDREQALL. */
45d60a
-	if (state -> curUPD && (state -> curUPD != FTM_UPDREQ))
45d60a
-		return ISC_R_ALREADYRUNNING;
45d60a
+	/* We allow an update to be restarted in case we requested an update
45d60a
+	 * and were interrupted by something.
45d60a
+	 */
45d60a
 
45d60a
-	status = (dhcp_failover_put_message
45d60a
-		  (link, link -> outer,
45d60a
-		   FTM_UPDREQALL, link->xid++,
45d60a
-		   (failover_option_t *)0));
45d60a
+	status = (dhcp_failover_put_message(link, link -> outer, FTM_UPDREQALL,
45d60a
+					    link -> xid++, NULL));
45d60a
 
45d60a
-	if (status == ISC_R_SUCCESS)
45d60a
-		state -> curUPD = FTM_UPDREQALL;
45d60a
+	state -> curUPD = FTM_UPDREQALL;
45d60a
 
45d60a
 #if defined (DEBUG_FAILOVER_MESSAGES)
45d60a
 	if (status != ISC_R_SUCCESS)
45d60a
@@ -4933,7 +4986,12 @@ isc_result_t dhcp_failover_send_update_request_all (dhcp_failover_state_t
45d60a
 		log_debug ("%s", obuf);
45d60a
 	}
45d60a
 #endif
45d60a
-	log_info ("Sent update request all message to %s", state -> name);
45d60a
+	if (status == ISC_R_SUCCESS) {
45d60a
+		log_info("Sent update request all message to %s", state -> name);
45d60a
+	} else {
45d60a
+		log_error("Failed to send update request all message to %s: %s",
45d60a
+			 state -> name, isc_result_totext(status));
45d60a
+	}
45d60a
 	return status;
45d60a
 }
45d60a