30 from typing
import List, Dict, Any
31 from cortx.utils.conf_store
import Conf
32 from cortx.utils.cortx
import Const
34 MOTR_SERVER_SCRIPT_PATH =
"/usr/libexec/cortx-motr/motr-start" 35 MOTR_MKFS_SCRIPT_PATH =
"/usr/libexec/cortx-motr/motr-mkfs" 36 MOTR_FSM_SCRIPT_PATH =
"/usr/libexec/cortx-motr/motr-free-space-monitor" 37 MOTR_CONFIG_SCRIPT =
"/opt/seagate/cortx/motr/libexec/motr_cfg.sh" 38 MOTR_MINI_PROV_LOGROTATE_SCRIPT =
"/opt/seagate/cortx/motr/libexec/motr_mini_prov_logrotate.sh" 39 CROND_DIR =
"/etc/cron.hourly" 40 LNET_CONF_FILE =
"/etc/modprobe.d/lnet.conf" 41 LIBFAB_CONF_FILE =
"/etc/libfab.conf" 42 SYS_CLASS_NET_DIR =
"/sys/class/net/" 43 MOTR_SYS_CFG =
"/etc/sysconfig/motr" 44 MOTR_WORKLOAD_DIR =
"/opt/seagate/cortx/motr/workload" 46 LOGFILE =
"/var/log/seagate/motr/mini_provisioner" 47 LOGDIR =
"/var/log/seagate/motr" 48 LOGGER =
"mini_provisioner" 49 IVT_DIR =
"/var/log/seagate/motr/ivt" 50 MOTR_LOG_DIR =
"/var/motr" 53 MOTR_LOG_DIRS = [LOGDIR, MOTR_LOG_DIR]
54 BE_LOG_SZ = 4*1024*1024*1024
55 BE_SEG0_SZ = 128 * 1024 *1024
57 MACHINE_ID_FILE =
"/etc/machine-id" 58 TEMP_FID_FILE =
"/opt/seagate/cortx/motr/conf/service_fid.yaml" 60 MEM_THRESHOLD = 4*1024*1024*1024
61 CVG_COUNT_KEY =
"num_cvg" 64 """ Generic Exception with error code and output """ 68 self.
_desc = message % (args)
71 return f
"error[{self._rc}]: {self._desc}" 74 verbose = False, retries = 1, stdin = None, logging=False):
75 ps = subprocess.Popen(cmd, stdin=subprocess.PIPE,
76 stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
79 ps.stdin.write(stdin.encode())
80 stdout, stderr = ps.communicate(timeout=timeout_secs);
81 stdout = str(stdout,
'utf-8')
84 if ps.returncode != 0:
85 raise MotrError(ps.returncode, f
"\"{cmd}\" command execution failed")
91 logger = logging.getLogger(
"console")
92 if not os.path.exists(LOGDIR):
94 os.makedirs(LOGDIR, exist_ok=
True)
95 with open(f
'{self.logfile}',
'w'):
pass 97 raise MotrError(errno.EINVAL, f
"{self.logfile} creation failed\n")
99 if not os.path.exists(self.logfile):
101 with open(f
'{self.logfile}',
'w'):
pass 103 raise MotrError(errno.EINVAL, f
"{self.logfile} creation failed\n")
104 logger.setLevel(logging.DEBUG)
106 fh = logging.FileHandler(self.logfile)
107 fh.setLevel(logging.DEBUG)
109 ch = logging.StreamHandler()
110 ch.setLevel(logging.INFO)
111 formatter = logging.Formatter(
'%(asctime)s - %(message)s')
112 fh.setFormatter(formatter)
113 ch.setFormatter(formatter)
114 logger.addHandler(fh)
115 logger.addHandler(ch)
116 logger.info(f
"executing command {command}")
118 process = subprocess.Popen(command, stdin=subprocess.PIPE,
119 stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
121 except Exception
as e:
122 logger.error(
"ERROR {} when running {} with exception {}".
format(sys.exc_info()[1],
126 stdout = process.stdout.readline()
127 if process.poll()
is not None:
130 logger.info(stdout.strip().
decode())
135 def execute_command(self, cmd, timeout_secs = TIMEOUT_SECS, verbose = False,
136 retries = 1, stdin = None, logging=True):
142 for i
in range(retries):
144 self.logger.info(f
"Retry: {i}. Executing cmd: '{cmd}'")
146 ps = subprocess.Popen(cmd, stdin=subprocess.PIPE,
147 stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
150 ps.stdin.write(stdin.encode())
151 stdout, stderr = ps.communicate(timeout=timeout_secs);
152 stdout = str(stdout,
'utf-8')
155 self.logger.info(f
"ret={ps.returncode}\n")
157 if (self._debug
or verbose)
and (logging ==
True):
158 self.logger.debug(f
"[CMD] {cmd}\n")
159 self.logger.debug(f
"[OUT]\n{stdout}\n")
160 self.logger.debug(f
"[RET] {ps.returncode}\n")
161 if ps.returncode == 0:
164 if ps.returncode != 0:
165 raise MotrError(ps.returncode, f
"\"{cmd}\" command execution failed")
166 return stdout, ps.returncode
170 def execute_command_verbose(self, cmd, timeout_secs = TIMEOUT_SECS, verbose = False, set_timeout=True, retry_count = CMD_RETRY_COUNT):
171 self.logger.info(f
"Executing cmd : '{cmd}' \n")
173 if set_timeout ==
False:
177 for cmd_retry_count
in range(retry_count):
178 ps = subprocess.run(cmd, stdin=subprocess.PIPE,
179 stdout=subprocess.PIPE, timeout=timeout_secs,
180 stderr=subprocess.PIPE, shell=
True)
181 self.logger.info(f
"ret={ps.returncode}")
182 self.logger.debug(f
"Executing {cmd_retry_count} time")
183 stdout = ps.stdout.decode(
'utf-8')
184 self.logger.debug(f
"[OUT]{stdout}")
185 self.logger.debug(f
"[ERR]{ps.stderr.decode('utf-8')}")
186 self.logger.debug(f
"[RET] {ps.returncode}")
187 if ps.returncode != 0:
188 time.sleep(cmd_retry_delay)
190 return stdout, ps.returncode
194 for i
in range(retries):
195 self.logger.info(f
"Retry: {i}. Executing cmd : '{cmd}'\n")
196 ps = subprocess.run(
list(cmd.split(
' ')), timeout=timeout_secs)
197 self.logger.info(f
"ret={ps.returncode}\n")
198 if ps.returncode == 0:
204 if not isinstance(var, vtype):
205 raise MotrError(errno.EINVAL, f
"Invalid {msg} type. Expected: {vtype}")
207 raise MotrError(errno.EINVAL, f
"Empty {msg}.")
211 self.machine_id = Conf.machine_id
212 if not os.path.exists(f
"{MACHINE_ID_FILE}"):
214 with open(f
"{MACHINE_ID_FILE}",
"w")
as fp:
215 fp.write(f
"{self.machine_id}\n")
217 op =
execute_command(self, f
"cat {MACHINE_ID_FILE}", logging=
False)[0].strip(
"\n")
218 if op != self.machine_id:
219 raise MotrError(errno.EINVAL,
"machine id does not match")
221 raise MotrError(errno.ENOENT,
"machine id not available in conf")
224 """Get current node name using machine-id.""" 226 machine_id = self.machine_id;
227 server_node = Conf.get(self._index,
'node')[machine_id]
229 raise MotrError(errno.EINVAL, f
"MACHINE_ID {machine_id} does not exist in ConfStore")
236 suffixes = [
'K',
'Ki',
'Kib',
'M',
'Mi',
'Mib',
'G',
'Gi',
'Gib']
238 "K": 1024,
"M": 1024*1024,
"G": 1024*1024*1024,
239 "Ki": 1024,
"Mi": 1024*1024,
"Gi": 1024*1024*1024,
240 "Kib": 1024,
"Mib": 1024*1024,
"Gib": 1024*1024*1024 }
246 num_sz = re.sub(
r'[^0-9]',
'', sz)
247 map_val = sz_map[suffix]
248 ret =
int(num_sz) *
int(map_val)
251 self.logger.
error(f
"Invalid format of mem limit: {sz}\n")
252 self.logger.
error(
"Please use valid format Ex: 1024, 1Ki, 1Mi, 1Gi etc..\n")
257 sevices_limits = Conf.get(self._index,
'cortx>motr>limits')[
'services']
260 self.setup_size =
"small" 264 if service
not in [
"ioservice",
"ios",
"io",
"all",
"confd"]:
265 self.setup_size =
"small" 266 self.logger.info(f
"service is {service}. So seting setup size to {self.setup_size}\n")
272 if service
in [
"io",
"ioservice"]:
276 for arr_elem
in sevices_limits:
278 if arr_elem[
'name'] == svc:
279 min_mem = arr_elem[
'memory'][
'min']
281 if min_mem.isnumeric():
286 self.logger.info(f
"mem limit in config is {min_mem} i.e. {sz}\n")
293 elif sz > MEM_THRESHOLD:
294 self.setup_size =
"large" 295 self.logger.info(f
"setup_size set to {self.setup_size}\n")
299 self.setup_size =
"small" 300 self.logger.info(f
"setup_size set to {self.setup_size}\n")
304 raise MotrError(errno.EINVAL, f
"Setup size is not set properly for service {service}." 305 f
"Please update valid mem limits for {service}")
307 self.logger.info(f
"service={service} and setup_size={self.setup_size}\n")
313 val = Conf.get(self._index, key)
315 raise MotrError(errno.EINVAL,
"{key} does not exist in ConfStore")
321 """Get logical_node_class.""" 323 logical_node_class = self.cluster[
'logical_node_class']
325 raise MotrError(errno.EINVAL, f
"{logical_node_class} does not exist in ConfStore")
326 check_type(logical_node_class, list,
"logical_node_class")
327 return logical_node_class
330 for service
in services:
331 self.logger.info(f
"Restarting {service} service\n")
332 cmd = f
"systemctl stop {service}" 334 cmd = f
"systemctl start {service}" 336 cmd = f
"systemctl status {service}" 340 if not os.path.exists(file):
341 raise MotrError(errno.ENOENT, f
"{file} does not exist")
346 if not os.path.exists(file):
347 raise MotrError(errno.ENOENT, f
"{file} does not exist")
352 if not os.path.exists(entry):
353 cmd = f
"mkdir -p {entry}" 358 node_type = self.server_node[
'type']
360 raise MotrError(errno.EINVAL,
"node_type not found")
363 if node_type ==
"HW":
370 1. check m0tr.ko exists in current kernel modules 371 2. check /etc/sysconfig/motr 376 kernel_ver = op.replace(
'\n',
'')
379 kernel_module = f
"/lib/modules/{kernel_ver}/kernel/fs/motr/m0tr.ko" 380 self.logger.info(f
"Checking for {kernel_module}\n")
383 self.logger.info(f
"Checking for {MOTR_SYS_CFG}\n")
389 with open(f
"{MOTR_SYS_CFG}",
"r") as fp: 392 num_lines = len(lines)
393 self.logger.info(f
"Before update, in file {fname}, num_lines={num_lines}\n")
396 for (k, v)
in kv_list:
398 for lno
in range(num_lines):
400 if lines[lno].startswith(f
"{k}="):
401 lines[lno] = f
"{k}={v}\n" 406 lines.append(f
"{k}={v}\n")
409 num_lines = len(lines)
410 self.logger.info(f
"After update, in file {fname}, num_lines={num_lines}\n")
413 with open(f
"{MOTR_SYS_CFG}",
"w+")
as fp:
418 local_path = self.local_path
419 log_path = self.log_path
420 machine_id = self.machine_id
422 MOTR_M0D_DATA_DIR = f
"{local_path}/motr" 423 if not os.path.exists(MOTR_M0D_DATA_DIR):
425 MOTR_LOCAL_SYSCONFIG_DIR = f
"{MOTR_M0D_DATA_DIR}/sysconfig" 426 if not os.path.exists(MOTR_LOCAL_SYSCONFIG_DIR):
429 MOTR_M0D_CONF_DIR = f
"{MOTR_LOCAL_SYSCONFIG_DIR}/{machine_id}" 430 MOTR_M0D_CONF_XC = f
"{MOTR_M0D_CONF_DIR}/confd.xc" 431 MOTR_M0D_ADDB_STOB_DIR = f
"{log_path}/motr/{machine_id}/addb" 432 MOTR_M0D_TRACE_DIR = f
"{log_path}/motr/{machine_id}/trace" 434 dirs = [MOTR_M0D_DATA_DIR, MOTR_M0D_ADDB_STOB_DIR, MOTR_M0D_TRACE_DIR, MOTR_M0D_CONF_DIR]
438 config_kvs = [(
"MOTR_M0D_CONF_DIR", f
"{MOTR_M0D_CONF_DIR}"),
439 (
"MOTR_M0D_DATA_DIR", f
"{MOTR_M0D_DATA_DIR}"),
440 (
"MOTR_M0D_CONF_XC", f
"{MOTR_M0D_CONF_XC}"),
441 (
"MOTR_M0D_ADDB_STOB_DIR", f
"{MOTR_M0D_ADDB_STOB_DIR}"),
442 (
"MOTR_M0D_TRACE_DIR", f
"{MOTR_M0D_TRACE_DIR}")]
447 cmd = f
"cp {MOTR_SYS_CFG} {MOTR_M0D_CONF_DIR}" 457 cvg_count = node_info[CVG_COUNT_KEY]
458 cvg = node_info[
'cvg']
459 for i
in range(cvg_count):
461 if temp_cvg[
'devices'][
'metadata']:
462 md_disks_lists.append(temp_cvg[
'devices'][
'metadata'])
463 self.logger.info(f
"md_disks lists on node = {md_disks_lists}\n")
464 return md_disks_lists
474 md_len_outer = len(md_lists)
475 for i
in range(md_len_outer):
476 md_len_innner = len(md_lists[i])
477 for j
in range(md_len_innner):
478 md_disks.append(md_lists[i][j])
479 self.logger.info(f
"md_disks on node = {md_disks}\n")
484 ncvgs = len(md_disks)
485 for i
in range(ncvgs):
488 for j
in range(len_md):
490 self.logger.info(f
"setting key server>{machine_id}>cvg[{i}]>m0d[{j}]>md_seg1" 491 f
" with value {md_disk} in {url}")
492 Conf.set(index, f
"server>{machine_id}>cvg[{i}]>m0d[{j}]>md_seg1",f
"{md_disk}")
497 machines: Dict[str,Any] = self.nodes
498 storage_nodes: List[str] = []
499 services = Conf.search(self._index,
'node',
'services', Const.SERVICE_MOTR_IO.value)
500 for machine_id
in machines.keys():
501 result = [svc
for svc
in services
if machine_id
in svc]
504 storage_nodes.append(machine_id)
509 for machine_id
in self.storage_nodes:
510 node_info = nodes.get(machine_id)
512 update_to_file(self, self._index_motr_hare, self._url_motr_hare, machine_id, md_disks_lists)
526 MOTR_M0D_DATA_DIR = f
"{self.local_path}/motr" 528 mini_prov_conf_file = f
"{MOTR_M0D_DATA_DIR}/mini_prov_logrotate.conf" 531 lines=[
"{a} {b}\n".
format(a=self.logfile, b=
'{'),
533 f
"{indent}size 10M\n",
534 f
"{indent}rotate 4\n",
535 f
"{indent}delaycompress\n",
536 f
"{indent}copytruncate\n",
538 with open(f
"{mini_prov_conf_file}",
'w+')
as fp:
544 raise MotrError(errno.EINVAL,
"libfabric is not up.")
549 if self.machine_id
not in self.storage_nodes:
557 if self.setup_size ==
"large":
558 cmd =
"{} {}".
format(MOTR_CONFIG_SCRIPT,
" -c")
574 transport_type = self.server_node[
'network'][
'data'][
'transport_type']
576 raise MotrError(errno.EINVAL,
"transport_type not found")
578 check_type(transport_type, str,
"transport_type")
580 if transport_type ==
"lnet":
582 raise MotrError(errno.EINVAL,
"lent is not up.")
583 elif transport_type ==
"libfabric":
585 raise MotrError(errno.EINVAL,
"libfabric is not up.")
589 self.logger.info(f
"Executing {MOTR_CONFIG_SCRIPT}")
593 """Wrapper function to detect lnet/libfabric transport.""" 595 transport_type = Conf.get(self._index,
'cortx>motr>transport_type')
597 raise MotrError(errno.EINVAL,
"transport_type not found")
599 check_type(transport_type, str,
"transport_type")
601 if transport_type ==
"lnet":
603 elif transport_type ==
"libfab":
606 raise MotrError(errno.EINVAL,
"Unknown data transport type\n")
610 Get iface and /etc/modprobe.d/lnet.conf params from 611 conf store. Configure lnet. Start lnet service 614 iface = self.server_node[
'network'][
'data'][
'private_interfaces'][0]
616 raise MotrError(errno.EINVAL,
"private_interfaces[0] not found\n")
618 self.logger.info(f
"Validate private_interfaces[0]: {iface}\n")
619 cmd = f
"ip addr show {iface}" 623 iface_type = self.server_node[
'network'][
'data'][
'interface_type']
625 raise MotrError(errno.EINVAL,
"interface_type not found\n")
627 lnet_config = (f
"options lnet networks={iface_type}({iface}) " 628 f
"config_on_load=1 lnet_peer_discovery_disabled=1\n")
629 self.logger.info(f
"lnet config: {lnet_config}")
631 with open(LNET_CONF_FILE,
"w")
as fp:
632 fp.write(lnet_config)
638 self.logger.info(
"Doing ping to nids\n")
641 raise MotrError(errno.EINVAL,
"lent self ping failed\n")
662 1. check swap entry found in /etc/fstab 663 2. if found, do nothing 664 3. if not found, add swap entry in /etc/fstab 666 swap_entry = f
"{dev_name} swap swap defaults 0 0\n" 671 with open(FSTAB,
"r") as fp: 672 lines = fp.readlines() 674 ret = line.find(dev_name)
677 self.logger.info(f
"Swap entry found: {swap_entry}\n")
680 raise MotrError(errno.EINVAL, f
"Cant read f{FSTAB}\n")
684 with open(FSTAB,
"a")
as fp:
686 self.logger.info(f
"Swap entry added: {swap_entry}\n")
688 raise MotrError(errno.EINVAL, f
"Cant append f{FSTAB}\n")
695 cmd = f
"sed -i '/{vg_name}/d' {FSTAB}" 701 self.logger.info(f
"Make swap of {swap_dev}\n")
702 cmd = f
"mkswap -f {swap_dev}" 705 self.logger.info(f
"Test {swap_dev} swap device\n")
706 cmd = f
"test -e {swap_dev}" 709 self.logger.info(f
"Adding {swap_dev} swap device to {FSTAB}\n")
715 1. validate /etc/fstab 716 2. validate metadata device file 717 3. check requested volume group exist 718 4. if exist, remove volume group and swap related with it. 719 because if user request same volume group with different device. 720 5. If not exist, create volume group and lvm 721 6. create swap from lvm 724 cmd = f
"fdisk -l {metadata_dev}2" 729 metadata_dev = f
"{metadata_dev}2" 732 cmd = f
"pvdisplay {metadata_dev}" 737 self.logger.warning(f
"Volumes are already created on {metadata_dev}\n{out[0]}\n")
741 node_name = self.server_node[
'name']
742 vg_name = f
"vg_{node_name}_md{index}" 743 lv_swap_name = f
"lv_main_swap{index}" 744 lv_md_name = f
"lv_raw_md{index}" 745 swap_dev = f
"/dev/{vg_name}/{lv_swap_name}" 747 self.logger.info(f
"metadata device: {metadata_dev}\n")
749 self.logger.info(f
"Checking for {FSTAB}\n")
752 self.logger.info(f
"Checking for {metadata_dev}\n")
755 cmd = f
"fdisk -l {metadata_dev}" 759 cmd = f
"vgs {vg_name}" 764 self.logger.info(f
"Removing {vg_name} volume group\n")
768 cmd = f
"vgchange -an {vg_name}" 771 cmd = f
"vgremove {vg_name} -ff" 774 self.logger.info(f
"Creating physical volume from {metadata_dev}\n")
775 cmd = f
"pvcreate {metadata_dev} --yes" 778 self.logger.info(f
"Creating {vg_name} volume group from {metadata_dev}\n")
779 cmd = f
"vgcreate {vg_name} {metadata_dev}" 782 self.logger.info(f
"Adding {node_name} tag to {vg_name} volume group\n")
783 cmd = f
"vgchange --addtag {node_name} {vg_name}" 786 self.logger.info(
"Scanning volume group\n")
787 cmd =
"vgscan --cache" 790 self.logger.info(f
"Creating {lv_swap_name} lvm from {vg_name}\n")
791 cmd = f
"lvcreate -n {lv_swap_name} {vg_name} -l 51%VG --yes" 794 self.logger.info(f
"Creating {lv_md_name} lvm from {vg_name}\n")
795 cmd = f
"lvcreate -n {lv_md_name} {vg_name} -l 100%FREE --yes" 798 swap_check_cmd =
"free -m | grep Swap | awk '{print $2}'" 800 allocated_swap_size_before =
int(
float(free_swap_op[0].strip(
' \n')))
803 allocated_swap_size_after =
int(
float(allocated_swap_op[0].strip(
' \n')))
804 if allocated_swap_size_before >= allocated_swap_size_after:
805 raise MotrError(errno.EINVAL, f
"swap size before allocation" 806 f
"({allocated_swap_size_before}M) must be less than " 807 f
"swap size after allocation({allocated_swap_size_after}M)\n")
809 self.logger.info(f
"swap size before allocation ={allocated_swap_size_before}M\n")
810 self.logger.info(f
"swap_size after allocation ={allocated_swap_size_after}M\n")
814 cmd = f
"lsblk --noheadings --bytes {lv_path} | " "awk '{print $4}'" 816 lv_size = res[0].rstrip(
"\n")
817 lv_size =
int(lv_size)
818 self.logger.info(f
"{lv_path} size = {lv_size} \n")
819 if lvm_min_size
is None:
820 lvm_min_size = lv_size
822 lvm_min_size =
min(lv_size, lvm_min_size)
827 cvg_cnt = self.server_node[CVG_COUNT_KEY]
829 raise MotrError(errno.EINVAL,
"cvg_cnt not found\n")
834 cvg = self.server_node[
'cvg']
836 raise MotrError(errno.EINVAL,
"cvg not found\n")
843 raise MotrError(errno.EINVAL,
"cvg is empty\n")
850 for key, val
in elem.items():
857 if key==
"metadata_devices":
862 check_type(val[i], str, f
"metadata_devices[{i}]")
863 if key==
"data_devices":
871 return (
int(val/size) * size)
879 md_len = len(md_disks)
880 for i
in range(md_len):
884 self.logger.info(f
"setting MOTR_M0D_IOS_BESEG_SIZE to {lvm_min_size}\n")
885 cmd = f
'sed -i "/MOTR_M0D_IOS_BESEG_SIZE/s/.*/MOTR_M0D_IOS_BESEG_SIZE={lvm_min_size}/" {MOTR_SYS_CFG}' 895 for i
in range(
int(cvg_cnt)):
898 metadata_devices = cvg_item[
"metadata_devices"]
900 raise MotrError(errno.EINVAL,
"metadata devices not found\n")
901 check_type(metadata_devices, list,
"metadata_devices")
902 self.logger.info(f
"\nlvm metadata_devices: {metadata_devices}\n\n")
904 for device
in metadata_devices:
909 lv_md_name = f
"lv_raw_md{dev_count}" 910 cmd = f
"lvs -o lv_path | grep {lv_md_name}" 912 lv_path = res[0].rstrip(
"\n")
915 self.logger.info(f
"setting MOTR_M0D_IOS_BESEG_SIZE to {lvm_min_size}\n")
916 cmd = f
'sed -i "/MOTR_M0D_IOS_BESEG_SIZE/s/.*/MOTR_M0D_IOS_BESEG_SIZE={lvm_min_size}/" {MOTR_SYS_CFG}' 920 """Get lnet interface.""" 923 with open(LNET_CONF_FILE,
'r') as f: 925 for line
in f.readlines():
926 if len(line.strip()) <= 0:
continue 927 tokens = re.split(
r'\W+', line)
929 lnet_xface = tokens[4]
932 raise MotrError(errno.EINVAL, f
"Cant parse {LNET_CONF_FILE}")
934 if lnet_xface ==
None:
936 f
"Cant obtain iface details from {LNET_CONF_FILE}")
937 if lnet_xface
not in os.listdir(SYS_CLASS_NET_DIR):
939 f
"Invalid iface {lnet_xface} in lnet.conf")
943 """Check rpm packages.""" 946 cmd = f
"rpm -q {pkg}" 955 self.logger.info(f
"rpm found: {pkg}\n")
957 raise MotrError(errno.ENOENT, f
"Missing rpm: {pkg}")
960 """Get lnet nids of all available nodes in cluster.""" 962 myhostname = self.server_node[
"hostname"]
965 if (myhostname == node):
966 cmd =
"lctl list_nids" 972 nids.append(op[0].rstrip(
"\n"))
977 nodes_info = Conf.get(self._index,
'server_node')
979 for value
in nodes_info.values():
980 nodes.append(value[
"hostname"])
984 """Lnet lctl ping on all available nodes in cluster.""" 988 self.logger.info(
"lnet pinging on all nodes in cluster\n")
990 cmd = f
"lctl ping {nid}" 991 self.logger.info(f
"lctl ping on: {nid}\n")
997 2. validate lnet interface which was configured in init 998 3. ping on lnet interface 999 4. lctl ping on all nodes in cluster. motr_setup post_install and prepare 1000 MUST be performed on all nodes before executing this step. 1002 self.logger.info(
"post_install and prepare phases MUST be performed " 1003 "on all nodes before executing test phase\n")
1004 search_lnet_pkgs = [
"kmod-lustre-client",
"lustre-client"]
1008 self.logger.info(f
"lnet interface found: {lnet_xface}\n")
1010 cmd = f
"ip addr show {lnet_xface}" 1012 ip_addr = cmd_res[0]
1015 ip_addr = ip_addr.split(
"inet ")[1].
split(
"/")[0]
1016 self.logger.info(f
"lnet interface ip: {ip_addr}\n")
1018 raise MotrError(errno.EINVAL, f
"Cant parse {lnet_xface} ip addr")
1020 self.logger.info(f
"ping on: {ip_addr}\n")
1021 cmd = f
"ping -c 3 {ip_addr}" 1027 search_libfabric_pkgs = [
"libfabric"]
1033 for i
in range(
int(cvg_cnt)):
1036 metadata_devices = cvg_item[
"metadata_devices"]
1038 raise MotrError(errno.EINVAL,
"metadata devices not found\n")
1039 check_type(metadata_devices, list,
"metadata_devices")
1040 self.logger.info(f
"\nlvm metadata_devices: {metadata_devices}\n\n")
1042 for device
in metadata_devices:
1048 node_name = self.server_node[
'name']
1051 lv_list =
execute_command(self,
"lvdisplay | grep \"LV Path\" | awk \'{ print $3 }\'")[0].
split(
'\n')
1052 lv_list = lv_list[0:len(lv_list)-1]
1056 for i
in range(1, metadata_disks_count+1):
1057 md_lv_path = f
'/dev/vg_{node_name}_md{i}/lv_raw_md{i}' 1058 swap_lv_path = f
'/dev/vg_{node_name}_md{i}/lv_main_swap{i}' 1060 if md_lv_path
in lv_list:
1061 if swap_lv_path
in lv_list:
1064 self.logger.warning(f
"{swap_lv_path} does not exist. Need to create lvm\n")
1067 self.logger.warning(f
"{md_lv_path} does not exist. Need to create lvm\n")
1072 cmd =
'/usr/bin/hctl status' 1073 self.logger.info(f
"Executing cmd : '{cmd}'\n")
1081 cmd = f
'/usr/bin/yum list installed {pkg}' 1084 self.logger.info(f
"{pkg} is installed\n")
1087 self.logger.info(f
"{pkg} is not installed\n")
1091 mix_workload_path = f
"{MOTR_WORKLOAD_DIR}/mix_workload.yaml" 1092 m0worklaod_path = f
"{MOTR_WORKLOAD_DIR}/m0workload" 1093 m0crate_path = f
"{MOTR_WORKLOAD_DIR}/m0crate_workload_batch_1_file1.yaml" 1095 os.path.isfile(m0worklaod_path)
and 1096 os.path.isfile(mix_workload_path)
and 1097 os.path.isfile(m0crate_path)
1099 cmd = f
"{m0worklaod_path} -t {mix_workload_path}" 1101 self.logger.info(f
"{out[0]}\n")
1103 self.logger.
error(
"workload files are missing\n")
1111 logger = logging.getLogger(LOGGER)
1112 if not os.path.exists(LOGDIR):
1114 os.makedirs(LOGDIR, exist_ok=
True)
1115 with open(f
'{self.logfile}',
'w'):
pass 1117 raise MotrError(errno.EINVAL, f
"{self.logfile} creation failed\n")
1119 if not os.path.exists(self.logfile):
1121 with open(f
'{self.logfile}',
'w'):
pass 1123 raise MotrError(errno.EINVAL, f
"{self.logfile} creation failed\n")
1124 logger.setLevel(logging.DEBUG)
1126 fh = logging.FileHandler(self.logfile)
1127 fh.setLevel(logging.DEBUG)
1129 ch = logging.StreamHandler()
1130 ch.setLevel(logging.ERROR)
1131 formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s')
1132 fh.setFormatter(formatter)
1133 ch.setFormatter(formatter)
1134 logger.addHandler(fh)
1135 logger.addHandler(ch)
1139 if not os.path.exists(os.path.dirname(log_dir)):
1140 self.logger.warning(f
"{log_dir} does not exist")
1143 if len(patterns) == 0:
1144 self.logger.info(f
"Removing {log_dir}")
1148 for pattern
in patterns:
1155 search_pat =
"{}/{}*".
format(log_dir, pattern)
1156 for dname
in glob.glob(search_pat, recursive=
True):
1157 removed_dirs.append(dname)
1159 if len(removed_dirs) > 0:
1160 self.logger.info(f
"Removed below directories of pattern {pattern} from {log_dir}.\n{removed_dirs}")
1163 for log_dir
in MOTR_LOG_DIRS:
1164 if os.path.exists(log_dir):
1167 self.logger.warning(f
"{log_dir} does not exist")
1168 if os.path.exists(IVT_DIR):
1169 self.logger.info(f
"Removing {IVT_DIR}")
1173 for service
in services:
1174 self.logger.info(f
"Checking status of {service} service\n")
1175 cmd = f
"systemctl status {service}" 1183 self.logger.info(
"Doing ping to nids.\n")
1189 self.logger.info(
"lnet is not up. Restaring lnet.\n")
1191 self.logger.info(
"Doing ping to nids after 5 seconds.\n")
1193 self.logger.warning(
"lnet is up. Doing ping to nids after 5 seconds.\n")
1202 nids.append(op[0].strip(
"\n"))
1203 self.logger.info(f
"nids= {nids}\n")
1205 cmd = f
"lctl ping {nid}" 1206 self.logger.info(f
"lctl ping on: {nid}\n")
1213 hostname = self.server_node[
"hostname"]
1214 nodes_info = Conf.get(self._index,
'server_node')
1217 for value
in nodes_info.values():
1218 host = value[
"hostname"]
1219 cvg_count = value[CVG_COUNT_KEY]
1220 name = value[
"name"]
1221 self.logger.info(f
"update_motr_hare_keys for {host}\n")
1222 for i
in range(
int(cvg_count)):
1224 lv_md_name = f
"lv_raw_md{i + 1}" 1226 if (hostname == value[
"hostname"]):
1227 cmd = (
"lvs -o lv_path")
1229 r = re.compile(f
".*{lv_md_name}")
1232 lv_path = lvm_find[0].strip()
1233 except Exception
as e:
1234 self.logger.info(f
"exception pass {e}\n")
1236 cmd = (f
"ssh {host}" 1237 f
" \"lvs -o lv_path\"")
1238 for retry
in range(1, retry_count):
1239 self.logger.info(f
"Getting LVM data for {host}, attempt: {retry}\n")
1241 r = re.compile(f
".*{lv_md_name}")
1244 lv_path = lvm_find[0].strip()
1245 except Exception
as e:
1246 self.logger.info(f
"exception pass {e}\n")
1248 self.logger.info(f
"found lvm {lv_path} after {retry} count")
1251 time.sleep(retry_delay)
1253 raise MotrError(res[1], f
"[ERR] {lv_md_name} not found on {host}\n")
1254 self.logger.info(f
"setting key server>{name}>cvg[{i}]>m0d[0]>md_seg1" 1255 f
" with value {lv_path} in {self._motr_hare_conf}")
1256 Conf.set(self._index_motr_hare,f
"server>{name}>cvg[{i}]>m0d[0]>md_seg1",f
"{lv_path.strip()}")
1257 Conf.save(self._index_motr_hare)
1259 for value
in nodes_info.values():
1260 if (hostname == value[
"hostname"]):
1263 host = value[
"hostname"]
1264 cmd = (f
"scp {self._motr_hare_conf}" 1265 f
" {host}:{self._motr_hare_conf}")
1273 for i
in range(
int(cvg_cnt)):
1276 metadata_devices = cvg_item[
"metadata_devices"]
1278 raise MotrError(errno.EINVAL,
"metadata devices not found\n")
1279 check_type(metadata_devices, list,
"metadata_devices")
1280 self.logger.info(f
"lvm metadata_devices: {metadata_devices}")
1282 for device
in metadata_devices:
1283 cmd = f
"pvs | grep {device} " "| awk '{print $2}'" 1286 vol_grps.append(ret[0].strip())
1290 self.logger.info(
"Removing cortx lvms")
1292 if (len(vol_grps) == 0):
1293 self.logger.info(
"No cortx volume groups (e.g. vg_srvnode-1_md1) are found \n")
1295 self.logger.info(f
"Volume groups found: {vol_grps}")
1296 self.logger.info(
"Executing swapoff -a")
1298 self.logger.info(f
"Removing cortx LVM entries from {FSTAB}")
1301 cmd = f
"pvs|grep {vg} |" "awk '{print $1}'" 1303 cmd = f
"lvs|grep {vg} |" "awk '{print $1}'" 1308 lv_path = f
"/dev/{vg}/{lv}" 1309 self.logger.info(f
"Executing lvchange -an {lv_path}")
1311 self.logger.info(f
"Executing lvremove {lv_path}")
1313 if os.path.exists(lv_path):
1314 self.logger.info(
"Removing dmsetup entries using cmd " 1315 f
"\'dmsetup remove {lv_path}\'")
1319 self.logger.info(f
"Executing vgchange -an {vg}")
1321 self.logger.info(f
"Executing vgremove {vg}")
1326 self.logger.info(f
"Executing pvremove {pv}")
1328 self.logger.info(f
"Executing wipefs -a {pv}")
1336 cmd =
"ls -l /dev/vg_srvnode*/* | awk '{print $9}'" 1338 for lv_path
in lv_paths:
1342 if os.path.exists(lv_path):
1343 self.logger.info(f
"dmsetup remove {lv_path}")
1347 cmd = f
"fdisk -l {device} |" f
"grep {device}:" "| awk '{print $5}'" 1349 return ret[0].strip()
1352 fp = open(file,
"r") 1353 file_data = fp.read() 1355 for line
in file_data.splitlines():
1356 if line.startswith(
'#')
or (len(line.strip()) == 0):
1358 entry = line.split(
'=',1)
1359 config_dict[entry[0]] = entry[1]
1366 for i
in range(
int(cvg_cnt)):
1369 metadata_devices = cvg_item[
"metadata_devices"]
1371 raise MotrError(errno.EINVAL,
"metadata devices not found\n")
1372 check_type(metadata_devices, list,
"metadata_devices")
1373 self.logger.info(f
"\nlvm metadata_devices: {metadata_devices}\n\n")
1374 for device
in metadata_devices:
1384 fname = os.path.split(device)
1385 cmd = f
"lsblk -o name | grep -c {fname}" 1392 if total_parts == 0:
1393 self.logger.info(f
"No partitions found on {device}")
1395 self.logger.info(f
"No. of partitions={total_parts} on {device}")
1396 for i
in range(
int(total_parts)):
1400 self.logger.
error(f
"Deletion of partition({part_num}) failed on {device}")
1405 cmd = f
"fdisk {device}" 1406 stdin_str = str(
"d\n"+f
"{part_num}"+
"\n" +
"w\n")
1412 len_fids_list = len(fids)
1415 for i
in range(len_fids_list):
1416 if fids[i][
"name"] == service:
1417 fids_list.append(fids[i][
"fid"])
1419 num_fids = len(fids_list)
1426 return fids_list[idx]
1428 self.logger.
error(f
"Invalid index({idx}) of service({service})" 1429 f
"Valid index should be in range [0-{num_fids-1}]." 1433 self.logger.
error(f
"No fids for service({service}). Returning -1.")
1440 hare_lib_path = f
"{self.local_path}/hare/config/{self.machine_id}" 1441 cmd = f
"hctl fetch-fids --conf-dir {hare_lib_path}" 1443 self.logger.info(f
"Available fids:\n{out[0]}\n")
1444 fp = open(TEMP_FID_FILE,
"w")
1447 fp = open(TEMP_FID_FILE,
"r") 1448 fids = yaml.safe_load(fp) 1450 self.logger.
error(
"No fids returned by 'hctl fetch-fids'. Returning -1.\n")
1452 fid =
get_fid(self, fids, service, idx)
1457 Get list of running m0d process 1461 for proc
in psutil.process_iter():
1464 pinfo = proc.as_dict(attrs=[
'pid',
'name',
'username'])
1465 if pinfo.get(
'name') ==
"m0d":
1467 listOfProc.append(pinfo);
1468 except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
1474 cmd=f
"KILL -SIGTERM {proc.get('pid')}" 1482 self.logger.info(f
"service={service}\nidx={idx}\n")
1484 if service
in [
"fsm",
"client",
"motr_client"]:
1485 cmd = f
"{MOTR_FSM_SCRIPT_PATH}" 1490 cmd = f
"cp {MOTR_MINI_PROV_LOGROTATE_SCRIPT} {CROND_DIR}" 1493 cmd =
"/usr/sbin/crond start" 1498 confd_path = f
"{self.local_path}/motr/sysconfig/{self.machine_id}/confd.xc" 1501 cmd = f
"cp -f {confd_path} /etc/motr/" 1504 cmd = f
"cp -v {self.local_path}/motr/sysconfig/{self.machine_id}/motr /etc/sysconfig/" 1512 cmd = f
"{MOTR_SERVER_SCRIPT_PATH} m0d-{fid}"
def receiveSigTerm(signalNumber, frame)
def add_swap_fstab(self, dev_name)
def create_lvm(self, index, metadata_dev)
static void split(m0_bindex_t offset, int nr, bool commit)
static struct m0_list list
def update_config_file(self, fname, kv_list)
def verify_libfabric(self)
def get_disk_size(self, device)
def check_type(var, vtype, msg)
def get_fid(self, fids, service, idx)
def update_copy_motr_config_file(self)
def execute_command_console(self, command)
def update_bseg_size(self)
static void decode(struct m0_xcode_obj *obj)
def calc_lvm_min_size(self, lv_path, lvm_min_size)
def get_md_disks_lists(self, node_info)
def execute_command_without_exception(self, cmd, timeout_secs=TIMEOUT_SECS, retries=1)
def configure_machine_id(self, phase)
def restart_services(self, services)
def validate_storage_schema(storage)
def delete_parts(self, dev_count, device)
def check_pkgs(self, pkgs)
def execute_command_verbose(self, cmd, timeout_secs=TIMEOUT_SECS, verbose=False, set_timeout=True, retry_count=CMD_RETRY_COUNT)
def delete_part(self, device, part_num)
def validate_motr_rpm(self)
def create_dirs(self, dirs)
def get_cvg_cnt_and_cvg(self)
def __init__(self, rc, message, args)
def create_swap(self, swap_dev)
def remove_logs(self, patterns)
def get_metadata_disks_count(self)
def set_setup_size(self, service)
def validate_files(files)
def get_part_count(self, device)
def remove_dirs(self, log_dir, patterns)
def get_nids(self, nodes)
def fetch_fid(self, service, idx)
def update_motr_hare_keys_for_all_nodes(self)
static long long min(long long a, long long b)
def execute_command(self, cmd, timeout_secs=TIMEOUT_SECS, verbose=False, retries=1, stdin=None, logging=True)
def del_swap_fstab_by_vg_name(self, vg_name)
def execute_command_without_log(cmd, timeout_secs=TIMEOUT_SECS, verbose=False, retries=1, stdin=None, logging=False)
def pkg_installed(self, pkg)
def get_server_node(self)
def update_to_file(self, index, url, machine_id, md_disks)
def add_entry_to_logrotate_conf_file(self)
def get_value(self, key, key_type)
def configure_libfabric(self)
def start_service(self, service, idx)
def get_mdisks_from_list(self, md_lists)
def remove_dm_entries(self)
def check_services(self, services)
def get_logical_node_class(self)
def update_motr_hare_keys(self, nodes)
def getListOfm0dProcess()