]> jfr.im git - z_archive/twitter.git/blob - utils/update.py
bandaid unicode/str.encode-related crash bug
[z_archive/twitter.git] / utils / update.py
1 #!/usr/bin/env python
2
3 '''
4 This is a development script, intended for development use only.
5
6 This script generates the POST_ACTIONS variable
7 for placement in the twitter_globals.py
8
9 Example Usage:
10
11 %prog >twitter/twitter_globals.py
12
13 Dependencies:
14
15 (easy_install) BeautifulSoup
16 '''
17
18 import sys
19 from urllib import urlopen as _open
20 from BeautifulSoup import BeautifulSoup
21 from htmlentitydefs import codepoint2name
22
23 def uni2html(u):
24 '''
25 Convert unicode to html.
26
27 Basically leaves ascii chars as is, and attempts to encode unicode chars
28 as HTML entities. If the conversion fails the character is skipped.
29 '''
30 htmlentities = list()
31 for c in u:
32 ord_c = ord(c)
33 if ord_c < 128:
34 # ignoring all ^chars like ^M ^R ^E
35 if ord_c >31:
36 htmlentities.append(c)
37 else:
38 try:
39 htmlentities.append('&%s;' % codepoint2name[ord_c])
40 except KeyError:
41 pass # Charachter unknown
42 return ''.join(htmlentities)
43
44 def print_fw(iterable, joins=', ', prefix='', indent=0, width=79, trail=False):
45 '''
46 PPrint an iterable (of stringable elements).
47
48 Entries are joined using `joins`
49 A fixed_width (fw) is maintained of `width` chars per line
50 Each line is indented with `indent`*4 spaces
51 Lines are then prefixed with `prefix` string
52 if `trail` a trailing comma is sent to stdout
53 A newline is written after all is printed.
54 '''
55 shift_width = 4
56 preline = '%s%s' %(' '*shift_width, prefix)
57 linew = len(preline)
58 sys.stdout.write(preline)
59 for i, entry in enumerate(iterable):
60 if not trail and i == len(iterable) - 1:
61 sentry = str(entry)
62 else:
63 sentry = '%s%s' %(str(entry), joins)
64 if linew + len(sentry) > width:
65 sys.stdout.write('\n%s' %(preline))
66 linew = len(preline)
67 sys.stdout.write(sentry)
68 linew += len(sentry)
69 sys.stdout.write('\n')
70
71 def main_with_options(options, files):
72 '''
73 Main function the prints twitter's _POST_ACTIONS to stdout
74
75 TODO: look at possibly dividing up this function
76 '''
77
78 apifile = _open('http://apiwiki.twitter.com/REST+API+Documentation')
79 try:
80 apihtml = uni2html(apifile.read())
81 finally:
82 apifile.close()
83
84 ## Parsing the ApiWiki Page
85
86 apidoc = BeautifulSoup(apihtml)
87 toc = apidoc.find('div', {'class':'toc'})
88 toc_entries = toc.findAll('li', text=lambda text: 'Methods' in text)
89 method_links = {}
90 for entry in toc_entries:
91 links = entry.parent.parent.findAll('a')
92 method_links[links[0].string] = [x['href'] for x in links[1:]]
93
94 # Create unique hash of mehods with POST_ACTIONS
95 POST_ACTION_HASH = {}
96 for method_type, methods in method_links.items():
97 for method in methods:
98 # Strip the hash (#) mark from the method id/name
99 method = method[1:]
100 method_body = apidoc.find('a', {'name': method})
101 value = list(method_body.findNext(
102 'b', text=lambda text: 'Method' in text
103 ).parent.parent.childGenerator())[-1]
104 if 'POST' in value:
105 method_name = method_body.findNext('h3').string
106 try:
107 POST_ACTION_HASH[method_name] += (method_type,)
108 except KeyError:
109 POST_ACTION_HASH[method_name] = (method_type,)
110
111 # Reverse the POST_ACTION_HASH
112 # this is done to allow generation of nice comment strings
113 POST_ACTION_HASH_R = {}
114 for method, method_types in POST_ACTION_HASH.items():
115 try:
116 POST_ACTION_HASH_R[method_types].append(method)
117 except KeyError:
118 POST_ACTION_HASH_R[method_types] = [method]
119
120 ## Print the POST_ACTIONS to stdout as a Python List
121 print """'''
122 This module is automatically generated using `update.py`
123
124 .. data:: POST_ACTIONS
125 List of twitter method names that require the use of POST
126 '''
127 """
128 print 'POST_ACTIONS = [\n'
129 for method_types, methods in POST_ACTION_HASH_R.items():
130 print_fw(method_types, prefix='# ', indent=1)
131 print_fw([repr(str(x)) for x in methods], indent=1, trail=True)
132 print ""
133 print ']'
134
135 def main():
136 import optparse
137
138 class CustomFormatter(optparse.IndentedHelpFormatter):
139 """formatter that overrides description reformatting."""
140 def format_description(self, description):
141 ''' indents each line in the description '''
142 return "\n".join([
143 "%s%s" %(" "*((self.level+1)*self.indent_increment), line)
144 for line in description.splitlines()
145 ]) + "\n"
146
147 parser = optparse.OptionParser(
148 formatter=CustomFormatter(),
149 description=__doc__
150 )
151 (options, files) = parser.parse_args()
152 main_with_options(options, files)
153
154
155 if __name__ == "__main__":
156 main()