--- /dev/null
+"""USAGE
+ twitter-archiver [options] <-|user> [<user> ...]
+
+DESCRIPTION
+ Archive tweets of users, sorted by date from oldest to newest, in
+ the following format: <id> <date> <<screen_name>> <tweet_text>
+ Date format is: YYYY-MM-DD HH:MM:SS TZ. Tweet <id> is used to
+ resume archiving on next run. Archive file name is the user name.
+ Provide "-" instead of users to read users from standard input.
+
+OPTIONS
+ -o --oauth authenticate to Twitter using OAuth (default no)
+ -s --save-dir <path> directory to save archives (default: current dir)
+ -a --api-rate see current API rate limit status
+ -t --timeline <file> archive own timeline into given file name (requires
+ OAuth, max 800 statuses).
+
+AUTHENTICATION
+ Authenticate to Twitter using OAuth to archive tweets of private profiles
+ and have higher API rate limits. OAuth authentication tokens are stored
+ in ~/.twitter-archiver_oauth.
+"""
+
+from __future__ import print_function
+
+import os, sys, time, calendar, urllib2, httplib
+from getopt import gnu_getopt as getopt, GetoptError
+
+# T-Archiver (Twitter-Archiver) application registered by @stalkr_
+CONSUMER_KEY='d8hIyfzs7ievqeeZLjZrqQ'
+CONSUMER_SECRET='AnZmK0rnvaX7BoJ75l6XlilnbyMv7FoiDXWVmPD8'
+
+from .api import Twitter, TwitterError
+from .oauth import OAuth, read_token_file
+from .oauth_dance import oauth_dance
+from .auth import NoAuth
+from .util import Fail, err
+from .follow import lookup
+
+def parse_args(args, options):
+ """Parse arguments from command-line to set options."""
+ long_opts = ['help', 'oauth', 'save-dir=', 'api-rate', 'timeline=']
+ short_opts = "hos:at:"
+ opts, extra_args = getopt(args, short_opts, long_opts)
+
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ print(__doc__)
+ raise SystemExit(0)
+ elif opt in ('-o', '--oauth'):
+ options['oauth'] = True
+ elif opt in ('-s', '--save-dir'):
+ options['save-dir'] = arg
+ elif opt in ('-a', '--api-rate'):
+ options['api-rate' ] = True
+ elif opt in ('-t', '--timeline'):
+ options['timeline'] = arg
+
+ options['extra_args'] = extra_args
+
+def load_tweets(filename):
+ """Load tweets from file into dict, see save_tweets()."""
+ try:
+ archive = open(filename,"r")
+ except IOError: # no archive (yet)
+ return {}
+
+ tweets = {}
+ for line in archive.readlines():
+ tid, text = line.strip().split(" ", 1)
+ tweets[int(tid)] = text.decode("utf-8")
+
+ archive.close()
+ return tweets
+
+def save_tweets(filename, tweets):
+ """Save tweets from dict to file.
+
+ Save tweets from dict to UTF-8 encoded file, one per line:
+ <tweet id (number)> <tweet text>
+ Tweet text is:
+ <date> <<user>> [RT @<user>: ]<text>
+
+ Args:
+ filename: A string representing the file name to save tweets to.
+ tweets: A dict mapping tweet-ids (int) to tweet text (str).
+ """
+ if len(tweets) == 0:
+ return
+
+ try:
+ archive = open(filename,"w")
+ except IOError as e:
+ err("Cannot save tweets: %s" % str(e))
+ return
+
+ for k in sorted(tweets.keys()):
+ archive.write("%i %s\n" % (k, tweets[k].encode('utf-8')))
+
+ archive.close()
+
+def format_date(utc, to_localtime=True):
+ """Parse Twitter's UTC date into UTC or local time."""
+ u = time.strptime(utc.replace('+0000','UTC'), '%a %b %d %H:%M:%S %Z %Y')
+ if to_localtime and time.timezone != 0:
+ t = time.localtime(calendar.timegm(u))
+ return time.strftime("%Y-%m-%d %H:%M:%S", t) + " " + time.tzname[1]
+ else:
+ return time.strftime("%Y-%m-%d %H:%M:%S UTC", u)
+
+def format_text(text):
+ """Transform special chars in text to have only one line."""
+ return text.replace('\n','\\n').replace('\r','\\r')
+
+def timeline_resolve_uids(twitter, tl):
+ """Resolve user ids to screen names from a timeline."""
+ # get all user ids that needs a lookup (no screen_name key)
+ user_ids = []
+ for t in tl:
+ rt = t.get('retweeted_status')
+ if rt and not rt['user'].get('screen_name'):
+ user_ids.append(rt['user']['id'])
+ if not t['user'].get('screen_name'):
+ user_ids.append(t['user']['id'])
+
+ # resolve all of them at once
+ names = lookup(twitter, list(set(user_ids)))
+
+ # build new timeline with resolved uids
+ new_tl = []
+ for t in tl:
+ rt = t.get('retweeted_status')
+ if rt and not rt['user'].get('screen_name'):
+ name = names[rt['user']['id']]
+ t['retweeted_status']['user']['screen_name'] = name
+ if not t['user'].get('screen_name'):
+ name = names[t['user']['id']]
+ t['user']['screen_name'] = name
+ new_tl.append(t)
+
+ return new_tl
+
+def timeline_portion(twitter, screen_name, max_id=None):
+ """Get a portion of the timeline of a screen name."""
+ kwargs = dict(count=200, include_rts=1, screen_name=screen_name)
+ if max_id:
+ kwargs['max_id'] = max_id
+
+ tweets = {}
+ if screen_name:
+ tl = twitter.statuses.user_timeline(**kwargs)
+ else: # self
+ tl = twitter.statuses.home_timeline(**kwargs)
+
+ # some tweets do not provide screen name but user id, resolve those
+ for t in timeline_resolve_uids(twitter, tl):
+ text = t['text']
+ rt = t.get('retweeted_status')
+ if rt:
+ text = "RT @%s: %s" % (rt['user']['screen_name'], rt['text'])
+ tweets[t['id']] = "%s <%s> %s" % (format_date(t['created_at']),
+ t['user']['screen_name'],
+ format_text(text))
+
+ return tweets
+
+def timeline(twitter, screen_name, tweets):
+ """Get the entire timeline of tweets for a screen name."""
+ max_id = None
+ fail = Fail()
+ # get portions of timeline, incrementing max id until no new tweets appear
+ while True:
+ try:
+ portion = timeline_portion(twitter, screen_name, max_id)
+ except TwitterError as e:
+ if e.e.code == 401:
+ err("Fail: %i Unauthorized (tweets of that user are protected)"
+ % e.e.code)
+ break
+ elif e.e.code == 400:
+ err("Fail: %i API rate limit exceeded" % e.e.code)
+ rate = twitter.account.rate_limit_status()
+ reset = rate['reset_time_in_seconds']
+ reset = time.asctime(time.localtime(reset))
+ delay = int(rate['reset_time_in_seconds']
+ - time.time()) + 5 # avoid race
+ err("Hourly limit of %i requests reached, next reset on %s: "
+ "going to sleep for %i secs" % (rate['hourly_limit'],
+ reset, delay))
+ fail.wait(delay)
+ continue
+ elif e.e.code == 502:
+ err("Fail: %i Service currently unavailable, retrying..."
+ % e.e.code)
+ else:
+ err("Fail: %s\nRetrying..." % str(e)[:500])
+ fail.wait(3)
+ except urllib2.URLError as e:
+ err("Fail: urllib2.URLError %s - Retrying..." % str(e))
+ fail.wait(3)
+ except httplib.error as e:
+ err("Fail: httplib.error %s - Retrying..." % str(e))
+ fail.wait(3)
+ except KeyError as e:
+ err("Fail: KeyError %s - Retrying..." % str(e))
+ fail.wait(3)
+ else:
+ new = -len(tweets)
+ tweets.update(portion)
+ new += len(tweets)
+ err("Browsing %s timeline, new tweets: %i"
+ % (screen_name if screen_name else "home", new))
+ if new < 190:
+ break
+ max_id = min(portion.keys()) # browse backwards
+ fail = Fail()
+
+def rate_limit_status(twitter):
+ """Print current Twitter API rate limit status."""
+ r = twitter.account.rate_limit_status()
+ print("Remaining API requests: %i/%i (hourly limit)"
+ % (r['remaining_hits'], r['hourly_limit']))
+ print("Next reset in %is (%s)"
+ % (int(r['reset_time_in_seconds'] - time.time()),
+ time.asctime(time.localtime(r['reset_time_in_seconds']))))
+
+def main(args=sys.argv[1:]):
+ options = {
+ 'oauth': False,
+ 'save-dir': ".",
+ 'api-rate': False,
+ 'timeline': ""
+ }
+ try:
+ parse_args(args, options)
+ except GetoptError as e:
+ err("I can't do that, %s." % e)
+ raise SystemExit(1)
+
+ # exit if no user given
+ # except if asking for API rate or archive of timeline
+ if not options['extra_args'] and not (options['api-rate'] or
+ options['timeline']):
+ print(__doc__)
+ return
+
+ # authenticate using OAuth, asking for token if necessary
+ if options['oauth']:
+ oauth_filename = (os.getenv("HOME", "") + os.sep
+ + ".twitter-archiver_oauth")
+ if not os.path.exists(oauth_filename):
+ oauth_dance("Twitter-Archiver", CONSUMER_KEY, CONSUMER_SECRET,
+ oauth_filename)
+ oauth_token, oauth_token_secret = read_token_file(oauth_filename)
+ auth = OAuth(oauth_token, oauth_token_secret, CONSUMER_KEY,
+ CONSUMER_SECRET)
+ else:
+ auth = NoAuth()
+
+ twitter = Twitter(auth=auth, api_version='1', domain='api.twitter.com')
+
+ if options['api-rate']:
+ rate_limit_status(twitter)
+ return
+
+ # save own timeline (the user used in OAuth)
+ if options['timeline']:
+ if isinstance(auth, NoAuth):
+ err("You must be authenticated to save timeline.")
+ raise SystemExit(1)
+
+ filename = options['save-dir'] + os.sep + options['timeline']
+ print("* Archiving own timeline in %s" % filename)
+
+ tweets = {}
+ try:
+ tweets = load_tweets(filename)
+ except Exception, e:
+ err("Error when loading saved tweets: %s - continuing without"
+ % str(e))
+
+ try:
+ # no screen_name means we want home_timeline, not user_timeline
+ timeline(twitter, "", tweets)
+ except KeyboardInterrupt:
+ err()
+ err("Interrupted")
+ raise SystemExit(1)
+
+ save_tweets(filename, tweets)
+ print("Total tweets in own timeline: %i" % len(tweets))
+
+ # read users from command-line or stdin
+ users = options['extra_args']
+ if len(users) == 1 and users[0] == "-":
+ users = [line.strip() for line in sys.stdin.readlines()]
+
+ # save tweets for every user
+ total, total_new = 0, 0
+ for user in users:
+ filename = options['save-dir'] + os.sep + user
+ print("* Archiving %s tweets in %s" % (user, filename))
+
+ tweets = {}
+ try:
+ tweets = load_tweets(filename)
+ except Exception, e:
+ err("Error when loading saved tweets: %s - continuing without"
+ % str(e))
+
+ new = 0
+ before = len(tweets)
+ try:
+ timeline(twitter, user, tweets)
+ except KeyboardInterrupt:
+ err()
+ err("Interrupted")
+ raise SystemExit(1)
+
+ save_tweets(filename, tweets)
+ total += len(tweets)
+ new = len(tweets) - before
+ total_new += new
+ print("Total tweets for %s: %i (%i new)" % (user, len(tweets), new))
+
+ print("Total: %i tweets (%i new) for %i users"
+ % (total, total_new, len(users)))
--- /dev/null
+"""USAGE
+ twitter-follow [options] <user>
+
+DESCRIPTION
+ Display all following/followers of a user, one user per line.
+
+OPTIONS
+ -o --oauth authenticate to Twitter using OAuth (default no)
+ -r --followers display followers of the given user (default)
+ -g --following display users the given user is following
+ -a --api-rate see your current API rate limit status
+
+AUTHENTICATION
+ Authenticate to Twitter using OAuth to see following/followers of private
+ profiles and have higher API rate limits. OAuth authentication tokens
+ are stored in the file .twitter-follow_oauth in your home directory.
+"""
+
+from __future__ import print_function
+
+import os, sys, time, calendar, urllib2, httplib
+from getopt import gnu_getopt as getopt, GetoptError
+
+# T-Follow (Twitter-Follow) application registered by @stalkr_
+CONSUMER_KEY='USRZQfvFFjB6UvZIN2Edww'
+CONSUMER_SECRET='AwGAaSzZa5r0TDL8RKCDtffnI9H9mooZUdOa95nw8'
+
+from .api import Twitter, TwitterError
+from .oauth import OAuth, read_token_file
+from .oauth_dance import oauth_dance
+from .auth import NoAuth
+from .util import Fail, err
+
+def parse_args(args, options):
+ """Parse arguments from command-line to set options."""
+ long_opts = ['help', 'oauth', 'followers', 'following', 'api-rate']
+ short_opts = "horga"
+ opts, extra_args = getopt(args, short_opts, long_opts)
+
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ print(__doc__)
+ raise SystemExit(1)
+ elif opt in ('-o', '--oauth'):
+ options['oauth'] = True
+ elif opt in ('-r', '--followers'):
+ options['followers'] = True
+ elif opt in ('-g', '--following'):
+ options['followers'] = False
+ elif opt in ('-a', '--api-rate'):
+ options['api-rate' ] = True
+
+ options['extra_args'] = extra_args
+
+def lookup_portion(twitter, user_ids):
+ """Resolve a limited list of user ids to screen names."""
+ users = {}
+ kwargs = dict(user_id=",".join(map(str, user_ids)), skip_status=1)
+ for u in twitter.users.lookup(**kwargs):
+ users[int(u['id'])] = u['screen_name']
+ return users
+
+def lookup(twitter, user_ids):
+ """Resolve an entire list of user ids to screen names."""
+ users = {}
+ api_limit = 100
+ for i in range(0, len(user_ids), api_limit):
+ fail = Fail()
+ while True:
+ try:
+ portion = lookup_portion(twitter, user_ids[i:][:api_limit])
+ except TwitterError as e:
+ if e.e.code == 400:
+ err("Fail: %i API rate limit exceeded" % e.e.code)
+ rate = twitter.account.rate_limit_status()
+ reset = rate['reset_time_in_seconds']
+ reset = time.asctime(time.localtime(reset))
+ delay = int(rate['reset_time_in_seconds']
+ - time.time()) + 5 # avoid race
+ err("Hourly limit of %i requests reached, next reset on "
+ "%s: going to sleep for %i secs"
+ % (rate['hourly_limit'], reset, delay))
+ fail.wait(delay)
+ continue
+ elif e.e.code == 502:
+ err("Fail: %i Service currently unavailable, retrying..."
+ % e.e.code)
+ else:
+ err("Fail: %s\nRetrying..." % str(e)[:500])
+ fail.wait(3)
+ except urllib2.URLError as e:
+ err("Fail: urllib2.URLError %s - Retrying..." % str(e))
+ fail.wait(3)
+ except httplib.error as e:
+ err("Fail: httplib.error %s - Retrying..." % str(e))
+ fail.wait(3)
+ except KeyError as e:
+ err("Fail: KeyError %s - Retrying..." % str(e))
+ fail.wait(3)
+ else:
+ users.update(portion)
+ err("Resolving user ids to screen names: %i/%i"
+ % (len(users), len(user_ids)))
+ break
+ return users
+
+def follow_portion(twitter, screen_name, cursor=-1, followers=True):
+ """Get a portion of followers/following for a user."""
+ kwargs = dict(screen_name=screen_name, cursor=cursor)
+ if followers:
+ t = twitter.followers.ids(**kwargs)
+ else: # following
+ t = twitter.friends.ids(**kwargs)
+ return t['ids'], t['next_cursor']
+
+def follow(twitter, screen_name, followers=True):
+ """Get the entire list of followers/following for a user."""
+ user_ids = []
+ cursor = -1
+ fail = Fail()
+ while True:
+ try:
+ portion, cursor = follow_portion(twitter, screen_name, cursor,
+ followers)
+ except TwitterError as e:
+ if e.e.code == 401:
+ reason = ("follow%s of that user are protected"
+ % ("ers" if followers else "ing"))
+ err("Fail: %i Unauthorized (%s)" % (e.e.code, reason))
+ break
+ elif e.e.code == 400:
+ err("Fail: %i API rate limit exceeded" % e.e.code)
+ rate = twitter.account.rate_limit_status()
+ reset = rate['reset_time_in_seconds']
+ reset = time.asctime(time.localtime(reset))
+ delay = int(rate['reset_time_in_seconds']
+ - time.time()) + 5 # avoid race
+ err("Hourly limit of %i requests reached, next reset on %s: "
+ "going to sleep for %i secs" % (rate['hourly_limit'],
+ reset, delay))
+ fail.wait(delay)
+ continue
+ elif e.e.code == 502:
+ err("Fail: %i Service currently unavailable, retrying..."
+ % e.e.code)
+ else:
+ err("Fail: %s\nRetrying..." % str(e)[:500])
+ fail.wait(3)
+ except urllib2.URLError as e:
+ err("Fail: urllib2.URLError %s - Retrying..." % str(e))
+ fail.wait(3)
+ except httplib.error as e:
+ err("Fail: httplib.error %s - Retrying..." % str(e))
+ fail.wait(3)
+ except KeyError as e:
+ err("Fail: KeyError %s - Retrying..." % str(e))
+ fail.wait(3)
+ else:
+ new = -len(user_ids)
+ user_ids = list(set(user_ids + portion))
+ new += len(user_ids)
+ what = "follow%s" % ("ers" if followers else "ing")
+ err("Browsing %s %s, new: %i" % (screen_name, what, new))
+ if cursor == 0:
+ break
+ fail = Fail()
+ return user_ids
+
+
+def rate_limit_status(twitter):
+ """Print current Twitter API rate limit status."""
+ r = twitter.account.rate_limit_status()
+ print("Remaining API requests: %i/%i (hourly limit)"
+ % (r['remaining_hits'], r['hourly_limit']))
+ print("Next reset in %is (%s)"
+ % (int(r['reset_time_in_seconds'] - time.time()),
+ time.asctime(time.localtime(r['reset_time_in_seconds']))))
+
+def main(args=sys.argv[1:]):
+ options = {
+ 'oauth': False,
+ 'followers': True,
+ 'api-rate': False
+ }
+ try:
+ parse_args(args, options)
+ except GetoptError as e:
+ err("I can't do that, %s." % e)
+ raise SystemExit(1)
+
+ # exit if no user or given, except if asking for API rate
+ if not options['extra_args'] and not options['api-rate']:
+ print(__doc__)
+ raise SystemExit(1)
+
+ # authenticate using OAuth, asking for token if necessary
+ if options['oauth']:
+ oauth_filename = (os.getenv("HOME", "") + os.sep
+ + ".twitter-follow_oauth")
+ if not os.path.exists(oauth_filename):
+ oauth_dance("Twitter-Follow", CONSUMER_KEY, CONSUMER_SECRET,
+ oauth_filename)
+ oauth_token, oauth_token_secret = read_token_file(oauth_filename)
+ auth = OAuth(oauth_token, oauth_token_secret, CONSUMER_KEY,
+ CONSUMER_SECRET)
+ else:
+ auth = NoAuth()
+
+ twitter = Twitter(auth=auth, api_version='1', domain='api.twitter.com')
+
+ if options['api-rate']:
+ rate_limit_status(twitter)
+ return
+
+ # obtain list of followers (or following) for every given user
+ for user in options['extra_args']:
+ user_ids, users = [], {}
+ try:
+ user_ids = follow(twitter, user, options['followers'])
+ users = lookup(twitter, user_ids)
+ except KeyboardInterrupt as e:
+ err()
+ err("Interrupted.")
+ raise SystemExit(1)
+
+ for uid in user_ids:
+ print(users[uid].encode("utf-8"))
+
+ # print total on stderr to separate from user list on stdout
+ if options['followers']:
+ err("Total followers for %s: %i" % (user, len(user_ids)))
+ else:
+ err("Total users %s is following: %i" % (user, len(user_ids)))