from unittest import TestCase

from .html_cleaner import HtmlCleaner


class HtmlCleanerTestCase(TestCase):
    single_tag_cases = [
        {  # with mixed upper and lower case in tag name
            'tag': 'taG',
            'input_text': 'Begin of sample data <TAG> </tAg>\nEnd of sample data\n',
            'expected_tag_positions': (21, 33),
            'expected_cleaned_text': 'Begin of sample data \nEnd of sample data\n'
        },
        {  # with some extra characters after tag name, inside tag
            'tag': 'tag',
            'input_text': 'Begin of sample data <tag id=""> </tag  >\nEnd of sample data',
            'expected_tag_positions': (21, 41),
            'expected_cleaned_text': 'Begin of sample data \nEnd of sample data'
        },
        {  # without closing symbol '>' in open tag
            'tag': 'tag',
            'input_text': 'Begin of sample data<tag  </tag>\n',
            'expected_tag_positions': (20, 32),
            'expected_cleaned_text': 'Begin of sample data\n'
        },
        {  # without closing symbol'>' in close tag
            'tag': 'tag',
            'input_text': 'Begin of sample data <tag> </tag',
            'expected_tag_positions': (21, 32),
            'expected_cleaned_text': 'Begin of sample data '
        },
        {  # without open tag
            'tag': 'tag',
            'input_text': 'Begin of sample data </tag>\n',
            'expected_tag_positions': (-1, 27),
            'expected_cleaned_text': 'Begin of sample data\n'
        },
        {  # without close tag
            'tag': 'tag',
            'input_text': 'Begin of sample data <tag>',
            'expected_tag_positions': (21, -1),
            'expected_cleaned_text': 'Begin of sample data '
        },
        {  # inverting tags position, first closing tag, and then opening tag
            'tag': 'tag',
            'input_text': 'Begin of sample data </tag >123<tag id="">\nEnd of sample data',
            'expected_tag_positions': (21, -1),
            'expected_cleaned_text': 'Begin of sample data 123\nEnd of sample data'
        },
    ]

    multi_tag_cases = [
        {   # text containing multiple dangerous tags
            'input_text':
                'Row -> <html> CONTENT 1 </html>(1)' +
                'Row -> <head> CONTENT 2 </head>(2)' +
                'Row -> <body> CONTENT 3 </body>(3)' +
                'Row -> <link> CONTENT 4 (4)' +
                'Row -> <script> CONTENT 5 </script>(5)' +
                'Row -> <iframe> CONTENT 6 </iframe>(6)' +
                'Row -> <frameset> CONTENT 7 </frameset>(7)' +
                'Row -> <frame> CONTENT 8 (8)' +
                'Row -> <body> CONTENT 9 </body>(9)' +
                'Row -> <head> CONTENT 10 </head>(10)' +
                'Row -> <script> CONTENT 11 </script>(11)',

                'expected_cleaned_text':
                'Row -&gt; (1)' +
                'Row -&gt; (2)' +
                'Row -&gt; (3)' +
                'Row -&gt;  CONTENT 4 (4)' +
                'Row -&gt; (5)' +
                'Row -&gt; (6)' +
                'Row -&gt; (7)' +
                'Row -&gt;  CONTENT 8 (8)' +
                'Row -&gt; (9)' +
                'Row -&gt; (10)' +
                'Row -&gt; (11)'
        },
    ]

    def setUp(self):
        self.cleaner = HtmlCleaner()

    def test_method__remove_all_dangerous_tags(self):
        for row in self.multi_tag_cases:
            cleaned_text = self.cleaner.remove_all_dangerous_tags(text=row['input_text'])
            self.assertEqual(cleaned_text, row['expected_cleaned_text'])
