Skip to content

Commit 46695be

Browse files
shuijing198799weekface
authored andcommittedJun 1, 2019
Fix stability tidb pause case (#542)
1 parent 37d6933 commit 46695be

File tree

3 files changed

+89
-64
lines changed

3 files changed

+89
-64
lines changed
 

‎tests/actions.go

+78-64
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,9 @@ type OperatorActions interface {
173173
GetTidbMemberAssignedNodesOrDie(info *TidbClusterConfig) map[string]string
174174
CheckTidbMemberAssignedNodes(info *TidbClusterConfig, oldAssignedNodes map[string]string) error
175175
CheckTidbMemberAssignedNodesOrDie(info *TidbClusterConfig, oldAssignedNodes map[string]string)
176+
SetPartitionAnnotation(tcName string, nameSpace string, ordinal int) error
177+
CheckManualPauseTiDB(info *TidbClusterConfig) error
178+
CheckManualPauseTiDBOrDie(info *TidbClusterConfig)
176179
}
177180

178181
type operatorActions struct {
@@ -659,7 +662,7 @@ func (oa *operatorActions) CheckTidbClusterStatus(info *TidbClusterConfig) error
659662

660663
ns := info.Namespace
661664
tcName := info.ClusterName
662-
if err := wait.Poll(oa.pollInterval, 35*time.Minute, func() (bool, error) {
665+
if err := wait.Poll(oa.pollInterval, 30*time.Minute, func() (bool, error) {
663666
var tc *v1alpha1.TidbCluster
664667
var err error
665668
if tc, err = oa.cli.PingcapV1alpha1().TidbClusters(ns).Get(tcName, metav1.GetOptions{}); err != nil {
@@ -855,7 +858,7 @@ func (oa *operatorActions) CheckScaledCorrectly(info *TidbClusterConfig, podUIDs
855858
})
856859
}
857860

858-
func setPartitionAnnotation(tcName string, nameSpace string, ordinal int) error {
861+
func (oa *operatorActions) SetPartitionAnnotation(tcName string, nameSpace string, ordinal int) error {
859862
// add annotation to pause statefulset upgrade process
860863
cmd := fmt.Sprintf("kubectl annotate tc %s -n %s tidb.pingcap.com/tidb-partition=%d --overwrite",
861864
tcName, nameSpace, ordinal)
@@ -875,19 +878,6 @@ func (oa *operatorActions) UpgradeTidbCluster(info *TidbClusterConfig) error {
875878
}
876879
oa.EmitEvent(info, "UpgradeTidbCluster")
877880

878-
// get tidbSet from apiserver
879-
tidbSetName := controller.TiDBMemberName(info.ClusterName)
880-
tidbSet, err := oa.kubeCli.AppsV1beta1().StatefulSets(info.Namespace).Get(tidbSetName, metav1.GetOptions{})
881-
if err != nil {
882-
return pingcapErrors.Wrapf(err, "failed to get stateful set [%s/%s] setName %s", info.Namespace, info.ClusterName, tidbSetName)
883-
}
884-
885-
// add annotation to pause statefulset upgrade process
886-
err = setPartitionAnnotation(info.ClusterName, info.Namespace, int(tidbSet.Status.Replicas-1))
887-
if err != nil {
888-
return pingcapErrors.Wrapf(err, "failed to add annotation to [%s/%s]", info.Namespace, info.ClusterName)
889-
}
890-
891881
cmd := oa.getHelmUpgradeClusterCmd(info, nil)
892882
glog.Info("[UPGRADE] " + cmd)
893883
res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput()
@@ -1056,38 +1046,6 @@ func (oa *operatorActions) tidbMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, e
10561046
tcName := tc.GetName()
10571047
ns := tc.GetNamespace()
10581048
tidbSetName := controller.TiDBMemberName(tcName)
1059-
tidbUpgradeAnnotationStr, ok := tc.Annotations[label.AnnTiDBPartition]
1060-
if !ok {
1061-
tidbUpgradeAnnotationStr = "0"
1062-
}
1063-
1064-
tidbUpgradeAnnotation, err := strconv.ParseInt(tidbUpgradeAnnotationStr, 10, 32)
1065-
if err != nil {
1066-
return false, nil
1067-
}
1068-
1069-
pauseCorrect := func(set *v1beta1.StatefulSet) bool {
1070-
return (*set.Spec.UpdateStrategy.RollingUpdate.Partition) >= int32(tidbUpgradeAnnotation)
1071-
}
1072-
1073-
upgradePaused := func() bool {
1074-
1075-
podName := fmt.Sprintf("%s-%d", controller.TiDBMemberName(tc.Name), tidbUpgradeAnnotation)
1076-
1077-
tidbPod, err := oa.kubeCli.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{})
1078-
if err != nil {
1079-
glog.Errorf("fail to get tidb po name %s namespace %s ", podName, ns)
1080-
return false
1081-
}
1082-
if tidbPod.Labels[v1beta1.ControllerRevisionHashLabelKey] == tc.Status.TiDB.StatefulSet.UpdateRevision &&
1083-
tc.Status.TiDB.Phase == v1alpha1.UpgradePhase {
1084-
if member, ok := tc.Status.TiDB.Members[tidbPod.Name]; ok && member.Health {
1085-
return true
1086-
}
1087-
}
1088-
1089-
return false
1090-
}
10911049

10921050
tidbSet, err := oa.kubeCli.AppsV1beta1().StatefulSets(ns).Get(tidbSetName, metav1.GetOptions{})
10931051
if err != nil {
@@ -1122,23 +1080,6 @@ func (oa *operatorActions) tidbMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, e
11221080
return false, nil
11231081
}
11241082

1125-
if upgradePaused() {
1126-
1127-
time.Sleep(30 * time.Second)
1128-
1129-
if !pauseCorrect(tidbSet) {
1130-
return false, fmt.Errorf("pause partition is not correct in upgrade phase [%s/%s] partition %d annotation %d",
1131-
ns, tidbSetName, (*tidbSet.Spec.UpdateStrategy.RollingUpdate.Partition), tidbUpgradeAnnotation)
1132-
}
1133-
1134-
err := setPartitionAnnotation(tcName, ns, 0)
1135-
if err != nil {
1136-
glog.Errorf("fail to set annotation for [%s/%s]", ns, tidbSetName)
1137-
return false, nil
1138-
}
1139-
return false, nil
1140-
}
1141-
11421083
if c, ok := getMemberContainer(oa.kubeCli, ns, tidbSetName); !ok || tc.Spec.TiDB.Image != c.Image {
11431084
glog.Infof("statefulset: %s/%s .spec.template.spec.containers[name=tidb].image(%s) != %s",
11441085
ns, tidbSetName, c.Image, tc.Spec.TiDB.Image)
@@ -2431,3 +2372,76 @@ func (oa *operatorActions) getHelmUpgradeClusterCmd(info *TidbClusterConfig, set
24312372

24322373
return cmd
24332374
}
2375+
2376+
func (oa *operatorActions) CheckManualPauseTiDB(info *TidbClusterConfig) error {
2377+
2378+
var tc *v1alpha1.TidbCluster
2379+
var tidbSet *v1beta1.StatefulSet
2380+
var err error
2381+
ns := info.Namespace
2382+
2383+
// set partition annotation to protect tidb pod
2384+
if err = oa.SetPartitionAnnotation(info.ClusterName, ns, 1); err != nil {
2385+
return fmt.Errorf("failed to SetPartitionAnnotation: [%s/%s], %v", ns, info.ClusterName, err)
2386+
}
2387+
2388+
fn := func() (bool, error) {
2389+
2390+
if tc, err = oa.cli.PingcapV1alpha1().TidbClusters(ns).Get(info.ClusterName, metav1.GetOptions{}); err != nil {
2391+
glog.Infof("failed to get tidbcluster: [%s/%s], %v", ns, info.ClusterName, err)
2392+
return false, nil
2393+
}
2394+
2395+
podName := fmt.Sprintf("%s-%d", controller.TiDBMemberName(tc.Name), 1)
2396+
2397+
tidbPod, err := oa.kubeCli.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{})
2398+
if err != nil {
2399+
glog.Infof("fail to get pod in CheckManualPauseTiDB [%s/%s]", ns, podName)
2400+
return false, nil
2401+
}
2402+
2403+
if tidbPod.Labels[v1beta1.ControllerRevisionHashLabelKey] == tc.Status.TiDB.StatefulSet.UpdateRevision &&
2404+
tc.Status.TiDB.Phase == v1alpha1.UpgradePhase {
2405+
if member, ok := tc.Status.TiDB.Members[tidbPod.Name]; !ok || !member.Health {
2406+
glog.Infof("wait for tidb pod [%s/%s] ready member health %t ok %t", ns, podName, member.Health, ok)
2407+
} else {
2408+
return true, nil
2409+
}
2410+
} else {
2411+
glog.Infof("tidbset is not in upgrade phase or pod is not upgrade done [%s/%s]", ns, podName)
2412+
}
2413+
2414+
return false, nil
2415+
}
2416+
2417+
// wait for the tidb statefulset is upgrade to the protect one
2418+
if err = wait.Poll(DefaultPollInterval, DefaultPollTimeout, fn); err != nil {
2419+
return fmt.Errorf("fail to upgrade to annotation TiDB pod : %v", err)
2420+
}
2421+
2422+
time.Sleep(30 * time.Second)
2423+
2424+
tidbSetName := controller.TiDBMemberName(info.ClusterName)
2425+
if tidbSet, err = oa.kubeCli.AppsV1beta1().StatefulSets(ns).Get(tidbSetName, metav1.GetOptions{}); err != nil {
2426+
return fmt.Errorf("failed to get statefulset: [%s/%s], %v", ns, tidbSetName, err)
2427+
}
2428+
2429+
if (*tidbSet.Spec.UpdateStrategy.RollingUpdate.Partition) < 1 {
2430+
return fmt.Errorf("pause partition is not correct in upgrade phase [%s/%s] partition %d annotation %d",
2431+
ns, tidbSetName, (*tidbSet.Spec.UpdateStrategy.RollingUpdate.Partition), 1)
2432+
}
2433+
2434+
if err = oa.SetPartitionAnnotation(tc.Name, ns, 0); err != nil {
2435+
return fmt.Errorf("fail to set annotation for [%s/%s]", ns, tidbSetName)
2436+
}
2437+
2438+
return nil
2439+
}
2440+
2441+
func (oa *operatorActions) CheckManualPauseTiDBOrDie(info *TidbClusterConfig) {
2442+
// add annotation to pause statefulset upgrade process and check
2443+
err := oa.CheckManualPauseTiDB(info)
2444+
if err != nil {
2445+
slack.NotifyAndPanic(err)
2446+
}
2447+
}

‎tests/cmd/e2e/main.go

+6
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ func main() {
232232
glog.Fatal(err)
233233
}
234234
}
235+
236+
// only check manual pause for 1 cluster
237+
if len(clusterInfos) >= 1 {
238+
oa.CheckManualPauseTiDBOrDie(clusterInfos[0])
239+
}
240+
235241
for _, clusterInfo := range clusterInfos {
236242
if err = oa.CheckTidbClusterStatus(clusterInfo); err != nil {
237243
glog.Fatal(err)

‎tests/cmd/stability/main.go

+5
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,13 @@ func run(oa tests.OperatorActions,
254254
cluster2.UpgradeAll(firstUpgradeVersion)
255255
oa.UpgradeTidbClusterOrDie(cluster1)
256256
oa.UpgradeTidbClusterOrDie(cluster2)
257+
258+
// check pause upgrade feature in cluster2
259+
oa.CheckManualPauseTiDBOrDie(cluster2)
260+
257261
oa.CheckTidbClusterStatusOrDie(cluster1)
258262
oa.CheckTidbClusterStatusOrDie(cluster2)
263+
259264
oa.CheckTidbMemberAssignedNodesOrDie(cluster1, assignedNodes1)
260265
oa.CheckTidbMemberAssignedNodesOrDie(cluster2, assignedNodes2)
261266

0 commit comments

Comments
 (0)