masto-image-bot/TootHTMLParser.py

from html.parser import HTMLParser

class TootHTMLParser(HTMLParser):
    def __init__(self):
        super().__init__()
        self.txt = ""
    def handle_data(self, data):
        self.txt += str(data).lstrip().rstrip().lower() + " "
#
#
# content = ""
# with open("input") as f:
#     content = f.readlines()
# content = set(content)
# parser = TootHTMLParser()
# for word in content:
#     parser.feed(word)
# with open("output", "w+") as f:
#     f.write(parser.txt)