Skip to content

Commit 5c58f00

Browse files
committedApr 14, 2023
assorted kubeadm configmap compatibility issues
There is an upstream issue in Kubeadm (affecting at least up till 1.24.4) where if the "certSANs" field of the kubeadm configmap contains unquoted IPv6 addresses starting with colons in "flow style" it will choke while parsing. The problematic formatting looks like this: ClusterConfiguration: | apiServer: certSANs: [::1, 192.168.206.1, 127.0.0.1, 10.20.7.3] While this is fine: ClusterConfiguration: | apiServer: certSANs: - ::1 - 192.168.206.1 - 127.0.0.1 - 10.20.7.3 It also works to wrap each IPv6 address in quotes. It's not clear what causes the certSANs field to be formatted in flow style, but it was seen in testing after a platform upgrade followed by a k8s upgrade. The workaround is to modify the "upgrade first control plane" code to update the configmap 'certSANs' field to block style if it's in flow style and contains IPv6 addresses. I've opened an upstream issue: kubernetes/kubeadm#2858 We'll hit the same error in _get_kubernetes_join_cmd(), but since that code is run more frequently rather than reformatting the configmap we modify the code to explicitly set the certificate key rather than passing in the whole kubeadm config file. This is arguably how it should have been done originally. In StarlingX 7 by default we set the "HugePageStorageMediumSize=true" feature gate in the kube-apiserver section of the kubeadm configmap. In k8s 1.24 it's no longer supported. In StarlingX 8 we remove it from various locations (kubelet config, service parameters, etc.) but we also need to remove it from the kubeadm configmap. Test Plan: PASS: platform upgrade from Starlingx 7 to 8, then K8s upgrade to 1.24 PASS: add "::1" address to certSANS in configmap then upgrade k8s PASS: set HugePageStorageMediumSize in cm then upgrade k8s to 1.24 Change-Id: I45e9e22585a5b2912a339ad5905d011e3adc29ab Closes-Bug: 2016041 Signed-off-by: Chris Friesen <[email protected]>
1 parent 3659adc commit 5c58f00

File tree

5 files changed

+222
-35
lines changed

5 files changed

+222
-35
lines changed
 

‎sysinv/sysinv/sysinv/sysinv/common/kubernetes.py

+59
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@
1111

1212
from __future__ import absolute_import
1313
from distutils.version import LooseVersion
14+
from ipaddress import ip_address
15+
from ipaddress import IPv4Address
1416
import json
1517
import os
1618
import re
19+
import ruamel.yaml as yaml
20+
from ruamel.yaml.compat import StringIO
1721
import time
1822
import tsconfig.tsconfig as tsc
1923

@@ -1293,3 +1297,58 @@ def get_cert_secret(self, name, namespace, max_retries=60):
12931297
return secret
12941298
time.sleep(1)
12951299
return None
1300+
1301+
def kubeadm_configmap_reformat(self, target_version):
1302+
"""
1303+
There is an upstream issue in Kubeadm (affecting at least up till 1.24.4)
1304+
where if the "certSANs" field of the kubeadm configmap contains unquoted
1305+
IPv6 addresses in "flow style" it will choke while parsing. The problematic
1306+
formatting looks like this:
1307+
1308+
ClusterConfiguration: |
1309+
apiServer:
1310+
certSANs: [::1, 192.168.206.1, 127.0.0.1, 10.20.7.3]
1311+
1312+
While this is fine:
1313+
1314+
ClusterConfiguration: |
1315+
apiServer:
1316+
certSANs:
1317+
- ::1
1318+
- 192.168.206.1
1319+
- 127.0.0.1
1320+
- 10.20.7.3
1321+
"""
1322+
try:
1323+
configmap_name = 'kubeadm-config'
1324+
configmap = self.kube_read_config_map(configmap_name, 'kube-system')
1325+
newyaml = yaml.YAML()
1326+
stream = StringIO(configmap.data['ClusterConfiguration'])
1327+
info = newyaml.load(stream)
1328+
flow_style = info['apiServer']['certSANs'].fa.flow_style()
1329+
if flow_style:
1330+
# It's using flow syle, so we need to check if any addresses are IPv6.
1331+
need_reformat = False
1332+
try:
1333+
for addr in info['apiServer']['certSANs']:
1334+
if type(ip_address(addr)) is not IPv4Address:
1335+
need_reformat = True
1336+
break
1337+
except ValueError:
1338+
# Shouldn't happen if addresses are well-formed.
1339+
# If it does then reformat to be safe.
1340+
need_reformat = True
1341+
1342+
if need_reformat:
1343+
LOG.info('Converting kubeadm configmap certSANs to block style.')
1344+
info['apiServer']['certSANs'].fa.set_block_style()
1345+
outstream = StringIO()
1346+
newyaml.dump(info, outstream)
1347+
configmap = {'data': {'ClusterConfiguration': outstream.getvalue()}}
1348+
self.kube_patch_config_map(configmap_name, 'kube-system', configmap)
1349+
LOG.info('Successfully reformatted kubeadm configmap.')
1350+
except Exception as e:
1351+
LOG.exception("Unable to patch kubeadm config_map: %s" % e)
1352+
return 1
1353+
1354+
return 0

‎sysinv/sysinv/sysinv/sysinv/conductor/manager.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -14958,6 +14958,11 @@ def kube_upgrade_control_plane(self, context, host_uuid):
1495814958
LOG.error("Problem sanitizing kubelet configmap feature gates.")
1495914959
rc = 1
1496014960

14961+
# Work around upstream kubeadm configmap parsing issue.
14962+
if self._kube.kubeadm_configmap_reformat(target_version) == 1:
14963+
LOG.error("Problem reformatting kubelet configmap.")
14964+
rc = 1
14965+
1496114966
if rc == 1:
1496214967
kube_upgrade_obj.state = fail_state
1496314968
kube_upgrade_obj.save()
@@ -16592,7 +16597,10 @@ def sanitize_feature_gates_kubeadm_configmap(self, target_version):
1659216597
try:
1659316598
feature_gates = sanitize_feature_gates(feature_gates,
1659416599
'RemoveSelfLink=false')
16595-
if target_version == 'v1.25.3':
16600+
if target_version == 'v1.24.4':
16601+
feature_gates = sanitize_feature_gates(feature_gates,
16602+
'HugePageStorageMediumSize=true')
16603+
elif target_version == 'v1.25.3':
1659616604
feature_gates = sanitize_feature_gates(feature_gates,
1659716605
'TTLAfterFinished=true')
1659816606
if not feature_gates:

‎sysinv/sysinv/sysinv/sysinv/puppet/kubernetes.py

+6-34
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@
99
import json
1010
import keyring
1111
import netaddr
12-
import os
1312
import random
1413
import re
15-
import tempfile
1614

1715
from oslo_log import log as logging
1816
from sysinv.common import constants
@@ -244,41 +242,15 @@ def _get_kubernetes_join_cmd(self, host):
244242
try:
245243
join_cmd_additions = ''
246244
if host.personality == constants.CONTROLLER:
247-
# Upload the certificates used during kubeadm join
248-
# The cert key will be printed in the last line of the output
249-
250-
# We will create a temp file with the kubeadm config
251-
# We need this because the kubeadm config could have changed
252-
# since bootstrap. Reading the kubeadm config each time
253-
# it is needed ensures we are not using stale data
254-
255-
fd, temp_kubeadm_config_view = tempfile.mkstemp(
256-
dir='/tmp', suffix='.yaml')
257-
with os.fdopen(fd, 'w') as f:
258-
cmd = ['kubectl', 'get', 'cm', '-n', 'kube-system',
259-
'kubeadm-config', '-o=jsonpath={.data.ClusterConfiguration}',
260-
KUBECONFIG]
261-
subprocess.check_call(cmd, stdout=f) # pylint: disable=not-callable
262-
263-
# We will use a custom key to encrypt kubeadm certificates
264-
# to make sure all hosts decrypt using the same key
265-
245+
# Upload the certificates used during kubeadm join.
266246
key = str(keyring.get_password(CERTIFICATE_KEY_SERVICE,
267-
CERTIFICATE_KEY_USER))
268-
269-
with open(temp_kubeadm_config_view, "a") as f:
270-
f.write("---\r\napiVersion: kubeadm.k8s.io/v1beta2\r\n"
271-
"kind: InitConfiguration\r\ncertificateKey: "
272-
"{}".format(key))
273-
247+
CERTIFICATE_KEY_USER))
274248
cmd = ['kubeadm', 'init', 'phase', 'upload-certs',
275-
'--upload-certs', '--config',
276-
temp_kubeadm_config_view]
277-
249+
'--upload-certs', '--certificate-key', key]
278250
subprocess.check_call(cmd) # pylint: disable=not-callable
279-
join_cmd_additions = \
280-
" --control-plane --certificate-key %s" % key
281-
os.unlink(temp_kubeadm_config_view)
251+
252+
# Now add the key to the join command.
253+
join_cmd_additions = " --control-plane --certificate-key %s" % key
282254

283255
# Configure the IP address of the API Server for the controller host.
284256
# If not set the default network interface will be used, which does not

‎sysinv/sysinv/sysinv/sysinv/tests/common/test_kubernetes.py

+73
Original file line numberDiff line numberDiff line change
@@ -1249,6 +1249,79 @@ def test_kube_list_custom_resource_pretty(self, mock_list_cluster_custom_object)
12491249
watch=False
12501250
)
12511251

1252+
def test_kubeadm_configmap_reformat(self):
1253+
mock_kube_patch_config_map = mock.MagicMock()
1254+
p2 = mock.patch(
1255+
'sysinv.common.kubernetes.KubeOperator.kube_patch_config_map',
1256+
mock_kube_patch_config_map)
1257+
p2.start().return_value = None
1258+
self.addCleanup(p2.stop)
1259+
1260+
# Test IPv4 only in block style
1261+
self.read_namespaced_config_map_result = kubernetes.client.V1ConfigMap(
1262+
api_version="v1",
1263+
data={"ClusterConfiguration":
1264+
"apiServer:\n"
1265+
" certSANs:\n"
1266+
" - 127.0.0.1\n"
1267+
" - 192.168.206.2\n"
1268+
},
1269+
metadata=kubernetes.client.V1ObjectMeta(
1270+
name="kubeadm-config",
1271+
namespace="kube-system"),
1272+
)
1273+
self.kube_operator.kubeadm_configmap_reformat('dummy')
1274+
mock_kube_patch_config_map.assert_not_called()
1275+
1276+
# Test IPv4 only in flow style
1277+
self.read_namespaced_config_map_result = kubernetes.client.V1ConfigMap(
1278+
api_version="v1",
1279+
data={"ClusterConfiguration":
1280+
"apiServer:\n"
1281+
" certSANs: [127.0.0.1, 192.168.206.2]\n"
1282+
},
1283+
metadata=kubernetes.client.V1ObjectMeta(
1284+
name="kubeadm-config",
1285+
namespace="kube-system"),
1286+
)
1287+
self.kube_operator.kubeadm_configmap_reformat('dummy')
1288+
mock_kube_patch_config_map.assert_not_called()
1289+
1290+
# Test IPv6 and IPv4 in block style
1291+
self.read_namespaced_config_map_result = kubernetes.client.V1ConfigMap(
1292+
api_version="v1",
1293+
data={"ClusterConfiguration":
1294+
"apiServer:\n"
1295+
" certSANs:\n"
1296+
" - ::1\n"
1297+
" - 192.168.206.2\n"
1298+
},
1299+
metadata=kubernetes.client.V1ObjectMeta(
1300+
name="kubeadm-config",
1301+
namespace="kube-system"),
1302+
)
1303+
self.kube_operator.kubeadm_configmap_reformat('dummy')
1304+
mock_kube_patch_config_map.assert_not_called()
1305+
1306+
# Test IPv6 and IPv4 in flow style
1307+
self.read_namespaced_config_map_result = kubernetes.client.V1ConfigMap(
1308+
api_version="v1",
1309+
data={"ClusterConfiguration":
1310+
"apiServer:\n"
1311+
" certSANs: [::1, 127.0.0.1]\n"
1312+
},
1313+
metadata=kubernetes.client.V1ObjectMeta(
1314+
name="kubeadm-config",
1315+
namespace="kube-system"),
1316+
)
1317+
self.kube_operator.kubeadm_configmap_reformat('dummy')
1318+
patch_config_map_arg = {
1319+
'data': {
1320+
'ClusterConfiguration':
1321+
'apiServer:\n certSANs:\n - ::1\n - 127.0.0.1\n'}}
1322+
mock_kube_patch_config_map.assert_called_with(
1323+
'kubeadm-config', 'kube-system', patch_config_map_arg)
1324+
12521325

12531326
class TestKubernetesUtilities(base.TestCase):
12541327
def test_is_kube_version_supported(self):

‎sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py

+75
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ def __init__(self, db_api):
7373
self.update_secure_system_config = mock.MagicMock()
7474

7575

76+
class FakeKubeOperator(object):
77+
def __init__(self):
78+
self.kubeadm_configmap_reformat = mock.MagicMock()
79+
80+
7681
class FakePopen(object):
7782

7883
def __init__(self, **kwargs):
@@ -221,6 +226,43 @@ def setUp(self):
221226
ttl_patch += 'v1.42.1\nscheduler: {}\n'
222227
self.kubeadm_config_map_patch_ttlafterfinished = {'data': {'ClusterConfiguration': ttl_patch}}
223228

229+
self.kubeadm_config_read_HugePageStorageMediumSize = kubernetes.client.V1ConfigMap(
230+
api_version='v1',
231+
data={'ClusterConfiguration': 'apiServer:\n'
232+
' certSANs:\n'
233+
' - 192.168.206.1\n'
234+
' - 127.0.0.1\n'
235+
' - 10.10.6.3\n'
236+
' extraArgs:\n'
237+
' event-ttl: 24h\n'
238+
' feature-gates: HugePageStorageMediumSize=true,Foo=false\n'
239+
' extraVolumes:\n'
240+
' - hostPath: '
241+
'/etc/kubernetes/encryption-provider.yaml\n'
242+
'apiVersion: kubeadm.k8s.io/v1beta3\n'
243+
'controllerManager:\n'
244+
' extraArgs:\n'
245+
' pod-eviction-timeout: 30s\n'
246+
' feature-gates: HugePageStorageMediumSize=true\n'
247+
' extraVolumes:\n'
248+
'kind: ClusterConfiguration\n'
249+
'kubernetesVersion: v1.23.1\n'
250+
'scheduler: {}\n'},
251+
252+
metadata=kubernetes.client.V1ObjectMeta(
253+
name='kubeadm-config',
254+
namespace='kube-system'),
255+
)
256+
ttl_patch = 'apiServer:\n certSANs: [192.168.206.1, 127.0.0.1, '
257+
ttl_patch += '10.10.6.3]\n extraArgs: {event-ttl: 24h, feature-gates: '
258+
ttl_patch += 'Foo=false}\n extraVolumes:\n - '
259+
ttl_patch += '{hostPath: /etc/kubernetes/encryption-provider.yaml}\n'
260+
ttl_patch += 'apiVersion: kubeadm.k8s.io/v1beta3\ncontrollerManager:\n '
261+
ttl_patch += 'extraArgs: {pod-eviction-timeout: 30s}\n extraVolumes: '
262+
ttl_patch += 'null\nkind: ClusterConfiguration\nkubernetesVersion: '
263+
ttl_patch += 'v1.23.1\nscheduler: {}\n'
264+
self.kubeadm_config_map_patch_HugePageStorageMediumSize = {'data': {'ClusterConfiguration': ttl_patch}}
265+
224266
self.kubeadm_config_map_read_image_repository = kubernetes.client.V1ConfigMap(
225267
api_version='v1',
226268
data={'ClusterConfiguration': 'apiServer:\n'
@@ -1272,6 +1314,8 @@ def test_kube_upgrade_control_plane_first_master(self):
12721314
p2.start().return_value = 0
12731315
self.addCleanup(p2.stop)
12741316

1317+
self.service._kube = FakeKubeOperator()
1318+
12751319
# Speed up the test
12761320
kubernetes.MANIFEST_APPLY_INTERVAL = 1
12771321
kubernetes.POD_START_INTERVAL = 1
@@ -1349,6 +1393,8 @@ def test_kube_upgrade_control_plane_first_master_manifest_timeout(self):
13491393
p2.start().return_value = 0
13501394
self.addCleanup(p2.stop)
13511395

1396+
self.service._kube = FakeKubeOperator()
1397+
13521398
# Speed up the test
13531399
kubernetes.MANIFEST_APPLY_INTERVAL = 1
13541400
kubernetes.MANIFEST_APPLY_TIMEOUT = 1
@@ -1425,6 +1471,8 @@ def test_kube_upgrade_control_plane_first_master_upgrade_fail(self):
14251471
p2.start().return_value = 0
14261472
self.addCleanup(p2.stop)
14271473

1474+
self.service._kube = FakeKubeOperator()
1475+
14281476
# Speed up the test
14291477
kubernetes.MANIFEST_APPLY_INTERVAL = 1
14301478
kubernetes.POD_START_INTERVAL = 1
@@ -1788,6 +1836,33 @@ def test_sanitize_feature_gates_kubeadm_configmap_with_RemoveSelfLink(self):
17881836
mock_kube_patch_config_map.assert_called_with(
17891837
'kubeadm-config', 'kube-system', self.kubeadm_config_map_patch_RemoveSelfLink)
17901838

1839+
def test_sanitize_feature_gates_kubeadm_configmap_with_HugePageStorageMediumSize(self):
1840+
"""
1841+
This unit test covers the following use cases:
1842+
1. a component with an 'extraArgs' field containing 'feature-gates' with
1843+
only a "HugePageStorageMediumSize=true" entry
1844+
2. a component with an 'extraArgs' field containing 'feature-gates' with a
1845+
"HugePageStorageMediumSize=true" entry as well as others
1846+
"""
1847+
mock_kube_read_config_map = mock.MagicMock()
1848+
p = mock.patch(
1849+
'sysinv.common.kubernetes.KubeOperator.kube_read_config_map',
1850+
mock_kube_read_config_map)
1851+
p.start().return_value = self.kubeadm_config_read_HugePageStorageMediumSize
1852+
self.addCleanup(p.stop)
1853+
1854+
mock_kube_patch_config_map = mock.MagicMock()
1855+
p2 = mock.patch(
1856+
'sysinv.common.kubernetes.KubeOperator.kube_patch_config_map',
1857+
mock_kube_patch_config_map)
1858+
p2.start().return_value = None
1859+
self.addCleanup(p2.stop)
1860+
1861+
self.service.start()
1862+
self.service.sanitize_feature_gates_kubeadm_configmap('v1.24.4')
1863+
mock_kube_patch_config_map.assert_called_with(
1864+
'kubeadm-config', 'kube-system', self.kubeadm_config_map_patch_HugePageStorageMediumSize)
1865+
17911866
def test_sanitize_feature_gates_kubeadm_configmap_with_ttlafterfinished(self):
17921867
mock_kube_read_config_map = mock.MagicMock()
17931868
p = mock.patch(

0 commit comments

Comments
 (0)
Please sign in to comment.