#!/bin/env python3 import html import html.parser import sys def sanitize_as_name (s: str): return ' '.join(s.split()) class KISAASListExtractor (html.parser.HTMLParser): _table_started = False _td = list[str]() _tag_stack = list[str]() def handle_starttag (self, tag, attrs): self._tag_stack.append(tag) # FIXME: don't push void elements if self._table_started: if tag == 'tr': self._td.clear() else: if tag == 'table': attr_map = dict[str, str](attrs) if attr_map.get('class') == 'datatable': self._table_started = True def handle_endtag (self, tag): if self._table_started and tag == 'table': self._table_started = False if tag == 'tr' and self._td: print("%-12s\t%s" % (self._td[1], sanitize_as_name(self._td[0]))) self._tag_stack.pop() def handle_data (self, data): if self._table_started and self._tag_stack[-1] == 'td': self._td.append(data) doc_parser = KISAASListExtractor() doc_parser.feed(sys.stdin.read())