Dealing with accented characters on service input params


#1

Hey there!

So I was coding a new upsert service to check what is the status of the classic INSERT OR UPDATE sentence in SQLAlchemy (specifically, INSERT INTO … ON CONFLICT DO UPDATE from PostgreSQL) and run into this error when trying to insert a guest with the surname Heidkrüger

2019-02-22 16:53:33,151 DEBG 'zato-server1' stdout output:
2019-02-22 16:53:33,151 - INFO - 135:DummyThread-9 - guest.upsert:504 - Result to be saved in the cache: {'surname': u'Heidkr\xfcger', 'name': u'Diane', 'gender': <Gender.Female: '2'>, 'id': 814, 'email': u'dkruger@gmail.com'}

2019-02-22 16:53:33,154 DEBG 'zato-server1' stdout output:
2019-02-22 16:53:33,153 - WARNING - 135:DummyThread-10 - zato.server.connection.cache:860 - Could not run `SET` after_state_changed in cache `guests`, data:`{u'key': u'id:814', u'msg_type': '0002', 'expires_at': 0.0, u'expiry': 0.0, u'source_worker_id': u'1.1.135.bad3879b7ed483dece88f242', u'value': "(dp1\nS'surname'\np2\nVHeidkr\xfcger\np3\nsS'name'\np4\nVDiane\np5\nsS'gender'\np6\ncgenesisng.schema.guest\nGender\np7\n(S'2'\ntRp8\nsS'id'\np9\nI814\nsS'email'\np10\nVdkruger@gmail.com\np11\ns.", 'orig_now': 1550854413.15134, u'action': '106420', u'is_value_pickled': True, u'cache_name': u'guests', u'is_key_pickled': False}`, e:`Traceback (most recent call last):
  File "/opt/zato/3.0/code/zato-server/src/zato/server/connection/cache.py", line 857, in after_state_changed
    self.server.broker_client.publish(data)
  File "/opt/zato/3.0/code/zato-broker/src/zato/broker/client.py", line 163, in publish
    self.pub_client.publish(topic, dumps(msg))
  File "/opt/zato/3.0/code/local/lib/python2.7/site-packages/anyjson/__init__.py", line 141, in dumps
    return implementation.dumps(value, *args, **kwargs)
  File "/opt/zato/3.0/code/local/lib/python2.7/site-packages/anyjson/__init__.py", line 87, in dumps
    return self._encode(data, *args, **kwargs)
  File "/opt/zato/3.0/code/local/lib/python2.7/site-packages/simplejson/__init__.py", line 354, in dumps
    return _default_encoder.encode(obj)
  File "/opt/zato/3.0/code/local/lib/python2.7/site-packages/simplejson/encoder.py", line 262, in encode
    chunks = self.iterencode(o, _one_shot=True)
  File "/opt/zato/3.0/code/local/lib/python2.7/site-packages/simplejson/encoder.py", line 340, in iterencode
    return _iterencode(o, 0)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xfc in position 26: invalid start byte

Now I am not sure whether I tried before with names or surnames with accented characters in all the services I have developed so far, but it came by surprise. This is the code of the service:

class Upsert(Service):
    class SimpleIO:
        input_required = ('name', 'surname', 'email')
        input_optional = ('gender', 'passport', Date('birthdate'), 'address1',
                          'address2', 'locality', 'postcode', 'province',
                          'country', 'home_phone', 'mobile_phone')
        output_optional = ('id', 'name', 'surname', 'gender', 'email',
                           'passport', Date('birthdate'), 'address1',
                           'address2', 'locality', 'postcode', 'province',
                           'country', 'home_phone', 'mobile_phone')
        skip_empty_keys = True

    def handle(self):
        conn = self.user_config.genesisng.database.connection
        p = self.request.input

        params = {
            'name': p.name,
            'surname': p.surname,
            'gender': p.gender,
            'email': p.email,
            'passport': p.passport,
            'birthdate': p.birthdate,
            'address1': p.address1,
            'address2': p.address2,
            'locality': p.locality,
            'postcode': p.postcode,
            'province': p.province,
            'country': p.country,
            'home_phone': p.home_phone,
            'mobile_phone': p.mobile_phone,
            'deleted': None
        }

        # Remove empty strings from params
        for k in params.keys():
            if params[k] == '':
                del(params[k])

        with closing(self.outgoing.sql.get(conn).session()) as session:
            # INSERT .. ON CONFLICT DO UPDATE is not well supported by the
            # current version of SQLAlchemy (1.3), so we do it manually.
            result = session.query(Guest).\
                filter(Guest.email == p.email).one_or_none()

            if result:
                # Update the record
                result.fromdict(params)
            else:
                # Add a new record
                result = Guest().fromdict(params)
                session.add(result)
            session.commit()

            # Save the record in the cache
            cache_key = 'id:%s' % result.id
            cache = self.cache.get_cache('builtin', 'guests')
            result = result.asdict()
            for k in result.keys():
                if result[k] is None:
                    del(result[k])
            self.logger.info('Result to be saved in the cache: %s' % result)
            cache.set(cache_key, result)

            # Return the result
            self.response.status_code = OK
            self.response.payload = result
            url = self.user_config.genesisng.location.guests
            self.response.headers['Location'] = url.format(id=result['id'])
            self.response.headers['Cache-Control'] = 'no-cache'

What am I missing? Should I be decoding the parameters myself? Should I be typing them with Unicode()?