aboutsummaryrefslogtreecommitdiff
path: root/get-kisa-asmap
diff options
context:
space:
mode:
Diffstat (limited to 'get-kisa-asmap')
-rwxr-xr-xget-kisa-asmap41
1 files changed, 41 insertions, 0 deletions
diff --git a/get-kisa-asmap b/get-kisa-asmap
new file mode 100755
index 0000000..b4b1c10
--- /dev/null
+++ b/get-kisa-asmap
@@ -0,0 +1,41 @@
+#!/bin/env python3
+import html
+import html.parser
+import sys
+
+def sanitize_as_name (s: str):
+ return ' '.join(s.split())
+
+class KISAASListExtractor (html.parser.HTMLParser):
+ _table_started = False
+ _td = list[str]()
+ _tag_stack = list[str]()
+
+ def handle_starttag (self, tag, attrs):
+ self._tag_stack.append(tag) # FIXME: don't push void elements
+
+ if self._table_started:
+ if tag == 'tr':
+ self._td.clear()
+ else:
+ if tag == 'table':
+ attr_map = dict[str, str](attrs)
+
+ if attr_map.get('class') == 'datatable':
+ self._table_started = True
+
+ def handle_endtag (self, tag):
+ if self._table_started and tag == 'table':
+ self._table_started = False
+ if tag == 'tr' and self._td:
+ print("%-12s\t%s" % (self._td[1], sanitize_as_name(self._td[0])))
+
+ self._tag_stack.pop()
+
+ def handle_data (self, data):
+ if self._table_started and self._tag_stack[-1] == 'td':
+ self._td.append(data)
+
+
+doc_parser = KISAASListExtractor()
+doc_parser.feed(sys.stdin.read())