Blob Blame History Raw
From 307ecf22162f58af85d7b4fe571e3d5b8bdf44c7 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Wed, 17 Aug 2022 10:54:37 -0400
Subject: [PATCH] tests: handle libxml 2.10.0 incorrectly-opened comment
 parsing

Related, see:

- https://github.com/sparklemotion/nokogiri/pull/2625
- https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
---
 test/sanitizer_test.rb | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/test/sanitizer_test.rb b/test/sanitizer_test.rb
index e3ce218..e83c54d 100644
--- a/test/sanitizer_test.rb
+++ b/test/sanitizer_test.rb
@@ -54,7 +54,7 @@ def test_remove_xpaths_called_with_enumerable_xpaths
 
   def test_strip_tags_with_quote
     input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
-    expected = libxml_2_9_14_recovery? ? %{&lt;"  hi} : %{ hi}
+    expected = libxml_2_9_14_recovery_lt? ? %{&lt;"  hi} : %{ hi}
     assert_equal(expected, full_sanitize(input))
   end
 
@@ -77,19 +77,19 @@ def test_strip_tags_multiline
 
   def test_remove_unclosed_tags
     input = "This is <-- not\n a comment here."
-    expected = libxml_2_9_14_recovery? ? %{This is &lt;-- not\n a comment here.} : %{This is }
+    expected = libxml_2_9_14_recovery_lt? ? %{This is &lt;-- not\n a comment here.} : %{This is }
     assert_equal(expected, full_sanitize(input))
   end
 
   def test_strip_cdata
     input = "This has a <![CDATA[<section>]]> here."
-    expected = libxml_2_9_14_recovery? ? %{This has a &lt;![CDATA[]]&gt; here.} : %{This has a ]]&gt; here.}
+    expected = libxml_2_9_14_recovery_lt_bang? ? %{This has a &lt;![CDATA[]]&gt; here.} : %{This has a ]]&gt; here.}
     assert_equal(expected, full_sanitize(input))
   end
 
   def test_strip_unclosed_cdata
     input = "This has an unclosed <![CDATA[<section>]] here..."
-    expected = libxml_2_9_14_recovery? ? %{This has an unclosed &lt;![CDATA[]] here...} : %{This has an unclosed ]] here...}
+    expected = libxml_2_9_14_recovery_lt_bang? ? %{This has an unclosed &lt;![CDATA[]] here...} : %{This has an unclosed ]] here...}
     assert_equal(expected, full_sanitize(input))
   end
 
@@ -464,13 +464,13 @@ def test_should_sanitize_img_vbscript
 
   def test_should_sanitize_cdata_section
     input = "<![CDATA[<span>section</span>]]>"
-    expected = libxml_2_9_14_recovery? ? %{&lt;![CDATA[<span>section</span>]]&gt;} : %{section]]&gt;}
+    expected = libxml_2_9_14_recovery_lt_bang? ? %{&lt;![CDATA[<span>section</span>]]&gt;} : %{section]]&gt;}
     assert_sanitized(input, expected)
   end
 
   def test_should_sanitize_unterminated_cdata_section
     input = "<![CDATA[<span>neverending..."
-    expected = libxml_2_9_14_recovery? ? %{&lt;![CDATA[<span>neverending...</span>} : %{neverending...}
+    expected = libxml_2_9_14_recovery_lt_bang? ? %{&lt;![CDATA[<span>neverending...</span>} : %{neverending...}
     assert_sanitized(input, expected)
   end
 
@@ -663,10 +663,17 @@ def convert_to_css_hex(string, escape_parens=false)
     end.join
   end
 
-  def libxml_2_9_14_recovery?
+  def libxml_2_9_14_recovery_lt?
+    # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
     Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?(">= 2.9.14")
   end
 
+  def libxml_2_9_14_recovery_lt_bang?
+    # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
+    # then reverted in 2.10.0, see https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
+    Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?("= 2.9.14")
+  end
+
   def html5_mode?
     ::Loofah.respond_to?(:html5_mode?) && ::Loofah.html5_mode?
   end