Set open file limit for dcmanager services

The current open file limit of 1024 is not enough when executing
operations that involve a large amount of subclouds in parallel
(e.g. patching orchestration). dcmanager-audit already has its limit
increased through the existing global option 'worker_rlimit_nofile',
but the other services are not using this option.

This commit moves the function that sets the limit to the utils.py
module and makes the other dcmanager services use the defined limit.

Test Plan:
1. PASS - Verify that the open file limit gets updated to the defined
          value by checking the service log file and by verifying the
          /proc/<pid>/limits file;
2. PASS - Verify that the new limit it enough to support DC patch
          orchestration with MAX_SUBCLOUD_GROUP_MAX_PARALLEL_SUBCLOUDS
          subclouds;
3. PASS - Verify that the limit can be changed by modifying the
          /etc/dcmanager/dcmanager.conf file with worker_rlimit_nofile
          option;
4. PASS - Verify that it's not possible to increase the soft limit past
          the hard limit.

Story: 2010584
Task: 47751

Signed-off-by: Gustavo Herzmann <gustavo.herzmann@windriver.com>
Change-Id: Ia52e532fd14cd2e61714477a561674d16ca45195
This commit is contained in:
Gustavo Herzmann 2023-03-30 11:36:46 -03:00
parent 46c0c59a9c
commit d2b8556aa6
5 changed files with 29 additions and 25 deletions

View File

@ -14,7 +14,6 @@
#
import functools
import resource
import six
from oslo_config import cfg
@ -30,6 +29,7 @@ from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import messaging as rpc_messaging
from dcmanager.common import scheduler
from dcmanager.common import utils
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
@ -66,6 +66,7 @@ class DCManagerAuditService(service.Service):
self.subcloud_audit_manager = None
def start(self):
utils.set_open_file_limit(cfg.CONF.worker_rlimit_nofile)
target = oslo_messaging.Target(version=self.rpc_api_version,
server=self.host,
topic=self.topic)
@ -183,8 +184,8 @@ class DCManagerAuditWorkerService(service.Service):
self.subcloud_audit_worker_manager = None
def start(self):
utils.set_open_file_limit(cfg.CONF.worker_rlimit_nofile)
self.init_tgm()
self.set_resource_limit()
self.init_audit_managers()
target = oslo_messaging.Target(version=self.rpc_api_version,
server=self.host,
@ -200,29 +201,6 @@ class DCManagerAuditWorkerService(service.Service):
def init_audit_managers(self):
self.subcloud_audit_worker_manager = SubcloudAuditWorkerManager()
@staticmethod
def set_resource_limit():
"""Adjust the maximum number open files for this process (soft limit)"""
try:
(current_soft,
current_hard) = resource.getrlimit(resource.RLIMIT_NOFILE)
new_soft = cfg.CONF.worker_rlimit_nofile
if new_soft > current_hard:
LOG.error('New process open file soft limit [%s] '
'exceeds the hard limit [%s]. '
'Setting to hard limit instead.' % (new_soft,
current_hard))
new_soft = current_hard
if new_soft != current_soft:
LOG.info('Setting process open file limit to %s (from %s)',
new_soft, current_soft)
resource.setrlimit(resource.RLIMIT_NOFILE,
(new_soft, current_hard))
except Exception as ex:
LOG.exception(
'Failed to set the audit worker NOFILE resource limit: %s' %
ex)
def _stop_rpc_server(self):
# Stop RPC connection to prevent new requests
LOG.debug(_("Attempting to stop audit-worker RPC service..."))

View File

@ -22,6 +22,7 @@ import netaddr
import os
import pwd
import re
import resource as sys_resource
import six.moves
import subprocess
import tsconfig.tsconfig as tsc
@ -880,3 +881,22 @@ def get_management_gateway_address(payload):
if payload.get('admin_gateway_address', None):
return payload.get('admin_gateway_address')
return payload.get('management_gateway_address', '')
def set_open_file_limit(new_soft_limit: int):
"""Adjust the maximum number of open files for this process (soft limit)"""
try:
current_soft, current_hard = sys_resource.getrlimit(
sys_resource.RLIMIT_NOFILE)
if new_soft_limit > current_hard:
LOG.error(f'New process open file soft limit [{new_soft_limit}] '
f'exceeds the hard limit [{current_hard}]. Setting to '
'hard limit instead.')
new_soft_limit = current_hard
if new_soft_limit != current_soft:
LOG.info(f'Setting process open file limit to {new_soft_limit} '
f'(from {current_soft})')
sys_resource.setrlimit(sys_resource.RLIMIT_NOFILE,
(new_soft_limit, current_hard))
except Exception as ex:
LOG.exception(f'Failed to set NOFILE resource limit: {ex}')

View File

@ -29,6 +29,7 @@ from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import messaging as rpc_messaging
from dcmanager.common import utils
from dcmanager.manager.subcloud_manager import SubcloudManager
CONF = cfg.CONF
@ -76,6 +77,7 @@ class DCManagerService(service.Service):
self.subcloud_manager = SubcloudManager()
def start(self):
utils.set_open_file_limit(cfg.CONF.worker_rlimit_nofile)
self.dcmanager_id = uuidutils.generate_uuid()
self.init_managers()
target = oslo_messaging.Target(version=self.rpc_api_version,

View File

@ -27,6 +27,7 @@ from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common import messaging as rpc_messaging
from dcmanager.common import scheduler
from dcmanager.common import utils
from dcmanager.orchestrator.sw_update_manager import SwUpdateManager
CONF = cfg.CONF
@ -63,6 +64,7 @@ class DCManagerOrchestratorService(service.Service):
self.sw_update_manager = None
def start(self):
utils.set_open_file_limit(cfg.CONF.worker_rlimit_nofile)
self.init_tgm()
self.init_manager()
target = oslo_messaging.Target(version=self.rpc_api_version,

View File

@ -33,6 +33,7 @@ from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import messaging as rpc_messaging
from dcmanager.common import utils
from dcmanager.state.subcloud_state_manager import SubcloudStateManager
LOG = logging.getLogger(__name__)
@ -79,6 +80,7 @@ class DCManagerStateService(service.Service):
def start(self):
LOG.info("Starting %s", self.__class__.__name__)
utils.set_open_file_limit(cfg.CONF.worker_rlimit_nofile)
self._init_managers()
target = oslo_messaging.Target(version=self.rpc_api_version,
server=self.host,