From 307ecf22162f58af85d7b4fe571e3d5b8bdf44c7 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Wed, 17 Aug 2022 10:54:37 -0400
Subject: [PATCH] tests: handle libxml 2.10.0 incorrectly-opened comment
parsing
Related, see:
- https://github.com/sparklemotion/nokogiri/pull/2625
- https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
---
test/sanitizer_test.rb | 21 ++++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/test/sanitizer_test.rb b/test/sanitizer_test.rb
index e3ce218..e83c54d 100644
--- a/test/sanitizer_test.rb
+++ b/test/sanitizer_test.rb
@@ -54,7 +54,7 @@ def test_remove_xpaths_called_with_enumerable_xpaths
def test_strip_tags_with_quote
input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
- expected = libxml_2_9_14_recovery? ? %{<" hi} : %{ hi}
+ expected = libxml_2_9_14_recovery_lt? ? %{<" hi} : %{ hi}
assert_equal(expected, full_sanitize(input))
end
@@ -77,19 +77,19 @@ def test_strip_tags_multiline
def test_remove_unclosed_tags
input = "This is <-- not\n a comment here."
- expected = libxml_2_9_14_recovery? ? %{This is <-- not\n a comment here.} : %{This is }
+ expected = libxml_2_9_14_recovery_lt? ? %{This is <-- not\n a comment here.} : %{This is }
assert_equal(expected, full_sanitize(input))
end
def test_strip_cdata
input = "This has a <![CDATA[<section>]]> here."
- expected = libxml_2_9_14_recovery? ? %{This has a <![CDATA[]]> here.} : %{This has a ]]> here.}
+ expected = libxml_2_9_14_recovery_lt_bang? ? %{This has a <![CDATA[]]> here.} : %{This has a ]]> here.}
assert_equal(expected, full_sanitize(input))
end
def test_strip_unclosed_cdata
input = "This has an unclosed <![CDATA[<section>]] here..."
- expected = libxml_2_9_14_recovery? ? %{This has an unclosed <![CDATA[]] here...} : %{This has an unclosed ]] here...}
+ expected = libxml_2_9_14_recovery_lt_bang? ? %{This has an unclosed <![CDATA[]] here...} : %{This has an unclosed ]] here...}
assert_equal(expected, full_sanitize(input))
end
@@ -464,13 +464,13 @@ def test_should_sanitize_img_vbscript
def test_should_sanitize_cdata_section
input = "<![CDATA[<span>section</span>]]>"
- expected = libxml_2_9_14_recovery? ? %{<![CDATA[<span>section</span>]]>} : %{section]]>}
+ expected = libxml_2_9_14_recovery_lt_bang? ? %{<![CDATA[<span>section</span>]]>} : %{section]]>}
assert_sanitized(input, expected)
end
def test_should_sanitize_unterminated_cdata_section
input = "<![CDATA[<span>neverending..."
- expected = libxml_2_9_14_recovery? ? %{<![CDATA[<span>neverending...</span>} : %{neverending...}
+ expected = libxml_2_9_14_recovery_lt_bang? ? %{<![CDATA[<span>neverending...</span>} : %{neverending...}
assert_sanitized(input, expected)
end
@@ -663,10 +663,17 @@ def convert_to_css_hex(string, escape_parens=false)
end.join
end
- def libxml_2_9_14_recovery?
+ def libxml_2_9_14_recovery_lt?
+ # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?(">= 2.9.14")
end
+ def libxml_2_9_14_recovery_lt_bang?
+ # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
+ # then reverted in 2.10.0, see https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
+ Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?("= 2.9.14")
+ end
+
def html5_mode?
::Loofah.respond_to?(:html5_mode?) && ::Loofah.html5_mode?
end