From 09afab1c6208b52689ed7a5dd1f88f4867c20701 Mon Sep 17 00:00:00 2001 From: Mike Bonnet Date: Oct 20 2009 18:39:28 +0000 Subject: backport significant improvements to utf-8/unicode handling from upstream --- diff --git a/cheetah-2.2.2-unicode-fixes.patch b/cheetah-2.2.2-unicode-fixes.patch new file mode 100644 index 0000000..7fac5ff --- /dev/null +++ b/cheetah-2.2.2-unicode-fixes.patch @@ -0,0 +1,191 @@ +diff -uNr Cheetah-2.2.2.orig/cheetah/DummyTransaction.py Cheetah-2.2.2/cheetah/DummyTransaction.py +--- Cheetah-2.2.2.orig/cheetah/DummyTransaction.py 2009-09-14 19:04:00.000000000 -0400 ++++ Cheetah-2.2.2/cheetah/DummyTransaction.py 2009-10-19 09:41:12.568528482 -0400 +@@ -25,6 +25,23 @@ + def flush(self): + pass + ++ def safeConvert(self, chunk): ++ # Exceptionally gross, but the safest way ++ # I've found to ensure I get a legit unicode object ++ if not chunk: ++ return u'' ++ if isinstance(chunk, unicode): ++ return chunk ++ try: ++ return chunk.decode('utf-8', 'strict') ++ except UnicodeDecodeError: ++ try: ++ return chunk.decode('latin-1', 'strict') ++ except UnicodeDecodeError: ++ return chunk.decode('ascii', 'ignore') ++ except AttributeError: ++ return unicode(chunk) ++ + def write(self, value): + self._outputChunks.append(value) + +@@ -35,17 +52,9 @@ + def getvalue(self, outputChunks=None): + chunks = outputChunks or self._outputChunks + try: +- return ''.join(chunks) ++ return u''.join(chunks) + except UnicodeDecodeError, ex: +- nonunicode = [c for c in chunks if not isinstance(c, unicode)] +- raise DummyResponseFailure('''Looks like you're trying to mix encoded strings with Unicode strings +- (most likely utf-8 encoded ones) +- +- This can happen if you're using the `EncodeUnicode` filter, or if you're manually +- encoding strings as utf-8 before passing them in on the searchList (possible offenders: +- %s) +- (%s)''' % (nonunicode, ex)) +- ++ return u''.join((self.safeConvert(c) for c in chunks)) + + def writelines(self, *lines): + ## not used +diff -uNr Cheetah-2.2.2.orig/cheetah/Filters.py Cheetah-2.2.2/cheetah/Filters.py +--- Cheetah-2.2.2.orig/cheetah/Filters.py 2009-09-14 19:04:00.000000000 -0400 ++++ Cheetah-2.2.2/cheetah/Filters.py 2009-10-19 09:36:17.594517096 -0400 +@@ -29,40 +29,19 @@ + if val is None: + return u'' + if isinstance(val, unicode): +- if encoding: +- return val.encode(encoding) +- else: +- return val ++ # ignore the encoding and return the unicode object ++ return val + else: + try: +- return str(val) +- except UnicodeEncodeError: + return unicode(val) +- return u'' ++ except UnicodeDecodeError: ++ # we could put more fallbacks here, but we'll just pass the str ++ # on and let DummyTransaction worry about it ++ return str(val) + + RawOrEncodedUnicode = Filter + +-class EncodeUnicode(Filter): +- def filter(self, val, +- encoding='utf8', +- str=str, +- **kw): +- """Encode Unicode strings, by default in UTF-8. +- +- >>> import Cheetah.Template +- >>> t = Cheetah.Template.Template(''' +- ... $myvar +- ... ${myvar, encoding='utf16'} +- ... ''', searchList=[{'myvar': u'Asni\xe8res'}], +- ... filter='EncodeUnicode') +- >>> print t +- """ +- if isinstance(val, unicode): +- return val.encode(encoding) +- if val is None: +- return '' +- return str(val) +- ++EncodeUnicode = Filter + + class Markdown(EncodeUnicode): + ''' +diff -uNr Cheetah-2.2.2.orig/cheetah/Template.py Cheetah-2.2.2/cheetah/Template.py +--- Cheetah-2.2.2.orig/cheetah/Template.py 2009-09-14 19:04:00.000000000 -0400 ++++ Cheetah-2.2.2/cheetah/Template.py 2009-10-19 09:35:18.135768927 -0400 +@@ -1001,22 +1001,45 @@ + mainMethName = getattr(concreteTemplateClass,mainMethNameAttr, None) + if mainMethName: + def __str__(self): ++ rc = getattr(self, mainMethName)() ++ if isinstance(rc, unicode): ++ return rc.encode('utf-8') ++ return rc ++ def __unicode__(self): + return getattr(self, mainMethName)() + elif (hasattr(concreteTemplateClass, 'respond') + and concreteTemplateClass.respond!=Servlet.respond): + def __str__(self): ++ rc = self.respond() ++ if isinstance(rc, unicode): ++ return rc.encode('utf-8') ++ return rc ++ def __unicode__(self): + return self.respond() + else: + def __str__(self): ++ rc = None ++ if hasattr(self, mainMethNameAttr): ++ rc = getattr(self,mainMethNameAttr)() ++ elif hasattr(self, 'respond'): ++ rc = self.respond() ++ else: ++ rc = super(self.__class__, self).__str__() ++ if isinstance(rc, unicode): ++ return rc.encode('utf-8') ++ return rc ++ def __unicode__(self): + if hasattr(self, mainMethNameAttr): + return getattr(self,mainMethNameAttr)() + elif hasattr(self, 'respond'): + return self.respond() + else: +- return super(self.__class__, self).__str__() ++ return super(self.__class__, self).__unicode__() + + __str__ = new.instancemethod(__str__, None, concreteTemplateClass) ++ __unicode__ = new.instancemethod(__unicode__, None, concreteTemplateClass) + setattr(concreteTemplateClass, '__str__', __str__) ++ setattr(concreteTemplateClass, '__unicode__', __unicode__) + + _addCheetahPlumbingCodeToClass = classmethod(_addCheetahPlumbingCodeToClass) + +diff -uNr Cheetah-2.2.2.orig/cheetah/Tests/Unicode.py Cheetah-2.2.2/cheetah/Tests/Unicode.py +--- Cheetah-2.2.2.orig/cheetah/Tests/Unicode.py 2009-09-14 19:04:01.000000000 -0400 ++++ Cheetah-2.2.2/cheetah/Tests/Unicode.py 2009-10-20 14:08:48.674155892 -0400 +@@ -150,6 +150,22 @@ + a = unicode(template).encode("utf-8") + self.assertEquals("Bébé", a) + ++class EncodeUnicodeCompatTest(unittest.TestCase): ++ """ ++ Taken initially from Red Hat's bugzilla #529332 ++ https://bugzilla.redhat.com/show_bug.cgi?id=529332 ++ """ ++ def runTest(self): ++ t = Template("""Foo ${var}""", filter='EncodeUnicode') ++ t.var = u"Text with some non-ascii characters: åäö" ++ ++ rc = t.respond() ++ assert isinstance(rc, unicode), ('Template.respond() should return unicode', rc) ++ ++ rc = str(t) ++ assert isinstance(rc, str), ('Template.__str__() should return a UTF-8 encoded string', rc) ++ ++ + class Unicode_in_SearchList_Test(CommandLineTest): + def test_BasicASCII(self): + source = '''This is $adjective''' +@@ -168,14 +184,14 @@ + 'adjective' : u'\u0e22\u0e34\u0e19\u0e14\u0e35\u0e15\u0e49\u0e2d\u0e19\u0e23\u0e31\u0e1a'}]) + assert template.respond() + +- def test_ErrorReporting(self): ++ def test_Thai_utf8(self): + utf8 = '\xe0\xb8\xa2\xe0\xb8\xb4\xe0\xb8\x99\xe0\xb8\x94\xe0\xb8\xb5\xe0\xb8\x95\xe0\xb9\x89\xe0\xb8\xad\xe0\xb8\x99\xe0\xb8\xa3\xe0\xb8\xb1\xe0\xb8\x9a' + + source = '''This is $adjective''' + template = self.createAndCompile(source) + assert template and issubclass(template, Template) + template = template(searchList=[{'adjective' : utf8}]) +- self.failUnlessRaises(DummyTransaction.DummyResponseFailure, template.respond) ++ assert template.respond() + + + diff --git a/python-cheetah.spec b/python-cheetah.spec index 6763e31..8089dd8 100644 --- a/python-cheetah.spec +++ b/python-cheetah.spec @@ -2,13 +2,14 @@ Name: python-cheetah Version: 2.2.2 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Template engine and code generator Group: Development/Libraries License: MIT URL: http://cheetahtemplate.org/ Source: http://download.sourceforge.net/cheetahtemplate/Cheetah-%{version}.tar.gz +Patch0: cheetah-2.2.2-unicode-fixes.patch BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) @@ -27,6 +28,7 @@ code, Java, sql, form emails and even Python code. %prep %setup -q -n Cheetah-%{version} +%patch0 -p1 %build %{__python} setup.py build @@ -76,6 +78,9 @@ rm -rf %{buildroot} %{python_sitearch}/Cheetah-%{version}-*.egg-info/*.txt %changelog +* Tue Oct 20 2009 Mike Bonnet - 2.2.2-2 +- backport significant improvements to utf-8/unicode handling from upstream + * Mon Sep 14 2009 Mike Bonnet - 2.2.2-1 - update to the 2.2.2 release - add dependency on python-markdown for consistency with the egg-info