This repository has been archived on 2021-02-17. You can view files and clone it, but cannot push or open issues or pull requests.
masto-image-bot/TootHTMLParser.py

21 lines
475 B
Python

from html.parser import HTMLParser
class TootHTMLParser(HTMLParser):
def __init__(self):
super().__init__()
self.txt = ""
def handle_data(self, data):
self.txt += str(data).lstrip().rstrip().lower() + " "
#
#
# content = ""
# with open("input") as f:
# content = f.readlines()
# content = set(content)
# parser = TootHTMLParser()
# for word in content:
# parser.feed(word)
# with open("output", "w+") as f:
# f.write(parser.txt)