(This message has been automatically imported from the retired mailing list)
Hello,
When I startup the zato servers (quickstart setup) they always take at
least 2 minutes to startup, sometimes there’s a WORKER TIMEOUT and the
whole process starts again.
2015-05-04 09:37:02,147 - INFO - 3472:Dummy-1 - gunicorn.main:22 -
Starting gunicorn 18.0
2015-05-04 09:37:02,169 - INFO - 3472:Dummy-1 - gunicorn.main:22 -
Listening at: http://127.0.0.1:17010 (3472)
2015-05-04 09:37:02,169 - INFO - 3472:Dummy-1 - gunicorn.main:22 -
Using worker: gevent
2015-05-04 09:37:02,176 - INFO - 3500:Dummy-1 - gunicorn.main:22 -
Booting worker with pid: 3500
2015-05-04 09:37:02,247 - INFO - 3501:Dummy-1 - gunicorn.main:22 -
Booting worker with pid: 3501
2015-05-04 09:41:02,361 - CRITICAL - 3472:Dummy-1 - gunicorn.main:204
- WORKER TIMEOUT (pid:3500)
2015-05-04 09:41:02,362 - CRITICAL - 3472:Dummy-1 - gunicorn.main:204- WORKER TIMEOUT (pid:3501)
2015-05-04 09:41:02,371 - CRITICAL - 3472:Dummy-1 - gunicorn.main:204- WORKER TIMEOUT (pid:3500)
2015-05-04 09:41:02,371 - CRITICAL - 3472:Dummy-1 - gunicorn.main:204- WORKER TIMEOUT (pid:3501)
2015-05-04 09:41:02,375 - CRITICAL - 3472:Dummy-1 - gunicorn.main:204- WORKER TIMEOUT (pid:3501)
2015-05-04 09:41:02,377 - INFO - 3567:Dummy-1 - gunicorn.main:22 -
Booting worker with pid: 3567
2015-05-04 09:41:03,384 - INFO - 3569:Dummy-1 - gunicorn.main:22 -
Booting worker with pid: 3569
2015-05-04 09:44:02,814 - ERROR - 3567:Dummy-1 - gunicorn.main:22 -
Exception in worker process:
Traceback (most recent call last):
File
"/home/altux/zato-2.0.3/code/eggs/gunicorn-18.0-py2.7.egg/gunicorn/arbiter.py",
line 494, in spawn_worker
self.cfg.post_fork(self, worker)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 834, in post_fork
ParallelServer.start_server(worker.app.zato_wsgi_app,
arbiter.zato_deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 796, in start_server
is_first = parallel_server._after_init_common(server,
zato_deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 328, in _after_init_common
is_first = self.maybe_on_first_worker(server, self.kvdb.conn,
deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 227, in maybe_on_first_worker
with Lock(lock_name, self.deployment_lock_expires,
self.deployment_lock_timeout, redis_conn):
File
"/home/altux/zato-2.0.3/code/eggs/retools-0.3-py2.7.egg/retools/lock.py",
line 73, in enter
raise LockTimeout(“Timeout while waiting for lock”)
LockTimeout: Timeout while waiting for lock
Traceback (most recent call last):
File
"/home/altux/zato-2.0.3/code/eggs/gunicorn-18.0-py2.7.egg/gunicorn/arbiter.py",
line 494, in spawn_worker
self.cfg.post_fork(self, worker)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 834, in post_fork
ParallelServer.start_server(worker.app.zato_wsgi_app,
arbiter.zato_deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 796, in start_server
is_first = parallel_server._after_init_common(server,
zato_deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 328, in _after_init_common
is_first = self.maybe_on_first_worker(server, self.kvdb.conn,
deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 227, in maybe_on_first_worker
with Lock(lock_name, self.deployment_lock_expires,
self.deployment_lock_timeout, redis_conn):
File
"/home/altux/zato-2.0.3/code/eggs/retools-0.3-py2.7.egg/retools/lock.py",
line 73, in enter
raise LockTimeout(“Timeout while waiting for lock”)
LockTimeout: Timeout while waiting for lock
2015-05-04 09:44:02,816 - INFO - 3567:Dummy-1 - gunicorn.main:22 -
Worker exiting (pid: 3567)
2015-05-04 09:44:04,142 - ERROR - 3569:Dummy-1 - gunicorn.main:22 -
Exception in worker process:
Traceback (most recent call last):
File
"/home/altux/zato-2.0.3/code/eggs/gunicorn-18.0-py2.7.egg/gunicorn/arbiter.py",
line 494, in spawn_worker
self.cfg.post_fork(self, worker)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 834, in post_fork
ParallelServer.start_server(worker.app.zato_wsgi_app,
arbiter.zato_deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 796, in start_server
is_first = parallel_server._after_init_common(server,
zato_deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 328, in _after_init_common
is_first = self.maybe_on_first_worker(server, self.kvdb.conn,
deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 227, in maybe_on_first_worker
with Lock(lock_name, self.deployment_lock_expires,
self.deployment_lock_timeout, redis_conn):
File
"/home/altux/zato-2.0.3/code/eggs/retools-0.3-py2.7.egg/retools/lock.py",
line 73, in enter
raise LockTimeout(“Timeout while waiting for lock”)
LockTimeout: Timeout while waiting for lock
Traceback (most recent call last):
File
"/home/altux/zato-2.0.3/code/eggs/gunicorn-18.0-py2.7.egg/gunicorn/arbiter.py",
line 494, in spawn_worker
self.cfg.post_fork(self, worker)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 834, in post_fork
ParallelServer.start_server(worker.app.zato_wsgi_app,
arbiter.zato_deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 796, in start_server
is_first = parallel_server._after_init_common(server,
zato_deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 328, in _after_init_common
is_first = self.maybe_on_first_worker(server, self.kvdb.conn,
deployment_key)
File
"/home/altux/zato-2.0.3/code/zato-server/src/zato/server/base/parallel.py",
line 227, in maybe_on_first_worker
with Lock(lock_name, self.deployment_lock_expires,
self.deployment_lock_timeout, redis_conn):
File
"/home/altux/zato-2.0.3/code/eggs/retools-0.3-py2.7.egg/retools/lock.py",
line 73, in enter
raise LockTimeout(“Timeout while waiting for lock”)
LockTimeout: Timeout while waiting for lock
2015-05-04 09:44:04,144 - INFO - 3569:Dummy-1 - gunicorn.main:22 -
Worker exiting (pid: 3569)
2015-05-04 09:44:04,271 - INFO - 3472:Dummy-1 - gunicorn.main:22 -
Shutting down: Master
2015-05-04 09:44:04,272 - INFO - 3472:Dummy-1 - gunicorn.main:22 -
Reason: Worker failed to boot.
After that I have to delete the pid files and start the servers again.
It looks to me that this beviour is quite random, sometimes it works at
first startup, sometimes it works after a first timeout and sometimes I
have to reboot the server (sometimes more that once)
That’s quite a pain.
I assume it is a gunicorn issue, anyone knows about some settings that
could help?
Andrea.