blob: b4b1c1073040b08e59c1127325b41005649738ce (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
#!/bin/env python3
import html
import html.parser
import sys
def sanitize_as_name (s: str):
return ' '.join(s.split())
class KISAASListExtractor (html.parser.HTMLParser):
_table_started = False
_td = list[str]()
_tag_stack = list[str]()
def handle_starttag (self, tag, attrs):
self._tag_stack.append(tag) # FIXME: don't push void elements
if self._table_started:
if tag == 'tr':
self._td.clear()
else:
if tag == 'table':
attr_map = dict[str, str](attrs)
if attr_map.get('class') == 'datatable':
self._table_started = True
def handle_endtag (self, tag):
if self._table_started and tag == 'table':
self._table_started = False
if tag == 'tr' and self._td:
print("%-12s\t%s" % (self._td[1], sanitize_as_name(self._td[0])))
self._tag_stack.pop()
def handle_data (self, data):
if self._table_started and self._tag_stack[-1] == 'td':
self._td.append(data)
doc_parser = KISAASListExtractor()
doc_parser.feed(sys.stdin.read())
|