-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathopencongress.py
71 lines (59 loc) · 1.71 KB
/
opencongress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import re
import json
import lxml.html
import encode
import dump
from pprint import pprint
import legislators_current as leg
import cache
import dump
"""
extract open congress data and records
"""
import getopt, sys
verbose = False
def wikilink(f_link,obj):
match = re.search("http\:\/\/www\.opencongress\.org\/wiki\/(.+)$", f_link)
if (match):
val = match.group(1)
if(verbose):
print val, obj
obj['opencongwiki']= val
def parse(htmlstr,obj):
html = lxml.html.document_fromstring( htmlstr )
for (f_name_element, attr , f_link, pos) in html.iterlinks():
if(attr == 'href'):
wikilink(f_link,obj)
def load():
legs= leg.load()
for x in sorted(legs['wp'].keys()):
idsobj= legs['wp'][x]['id']
if 'govtrack' in idsobj:
congid = idsobj['govtrack']
cache.cacheweb('http://api.opencongress.org/people?person_id=%d' % congid)
htmlstr = cache.cacheweb('http://www.opencongress.org/people/show/%d' % congid)
parse(htmlstr,idsobj)
dump.dump(legs)
def usage():
print "--help --verbose"
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "hv", ["help", "verbose"])
except getopt.GetoptError as err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized"
usage()
sys.exit(2)
verbose = False
convertInt=False
for o, a in opts:
if (o == "-v", "--verbose"):
verbose = True
elif o in ("-h", "--help"):
usage()
sys.exit()
else:
assert False, "unhandled option"
load()
if __name__ == "__main__":
main()