-v --favorites archive user's favorites instead of timeline
-f --follow-redirects follow redirects of urls
-r --redirect-sites follow redirects for this comma separated list of hosts
+ -d --dms <file> archive own direct messages (both received and
+ sent) into given file name.
+ -i --isoformat store dates in ISO format (specifically RFC 3339)
AUTHENTICATION
Authenticate to Twitter using OAuth to archive tweets of private profiles
from __future__ import print_function
-import os, sys, time, calendar, functools
+import os, sys, time as _time, calendar, functools
+from datetime import time, date, datetime
from getopt import gnu_getopt as getopt, GetoptError
try:
from .auth import NoAuth
from .util import Fail, err, expand_line, parse_host_list
from .follow import lookup
+from .timezones import utc as UTC, Local
def parse_args(args, options):
"""Parse arguments from command-line to set options."""
- long_opts = ['help', 'oauth', 'save-dir=', 'api-rate', 'timeline=', 'mentions=', 'favorites', 'follow-redirects',"redirect-sites="]
- short_opts = "hos:at:m:vfr:"
+ long_opts = ['help', 'oauth', 'save-dir=', 'api-rate', 'timeline=', 'mentions=', 'favorites', 'follow-redirects',"redirect-sites=", 'dms=', 'isoformat']
+ short_opts = "hos:at:m:vfr:d:i"
opts, extra_args = getopt(args, short_opts, long_opts)
for opt, arg in opts:
options['follow-redirects'] = True
elif opt in ('-r', '--redirect-sites'):
options['redirect-sites'] = arg
+ elif opt in ('-d', '--dms'):
+ options['dms'] = arg
+ elif opt in ('-i', '--isoformat'):
+ options['isoformat'] = True
options['extra_args'] = extra_args
tweets = {}
for line in archive.readlines():
- tid, text = line.strip().split(" ", 1)
- tweets[int(tid)] = text.decode("utf-8")
+ try:
+ tid, text = line.strip().split(" ", 1)
+ tweets[int(tid)] = text.decode("utf-8")
+ except Exception as e:
+ err("loading tweet %s failed due to %s" % (line, unicode(e)))
archive.close()
return tweets
return
for k in sorted(tweets.keys()):
- archive.write("%i %s\n" % (k, tweets[k].encode('utf-8')))
+ try:
+ archive.write("%i %s\n" % (k, tweets[k].encode('utf-8')))
+ except Exception as ex:
+ err("archiving tweet %s failed due to %s" % (k, unicode(ex)))
archive.close()
-def format_date(utc, to_localtime=True):
+def format_date(utc, isoformat=False):
"""Parse Twitter's UTC date into UTC or local time."""
- u = time.strptime(utc.replace('+0000','UTC'), '%a %b %d %H:%M:%S %Z %Y')
- if to_localtime and time.timezone != 0:
- t = time.localtime(calendar.timegm(u))
- return time.strftime("%Y-%m-%d %H:%M:%S", t) + " " + time.tzname[1]
+ u = datetime.strptime(utc.replace('+0000','UTC'), '%a %b %d %H:%M:%S %Z %Y')
+ # This is the least painful way I could find to create a non-naive
+ # datetime including a UTC timezone. Alternative suggestions
+ # welcome.
+ unew = datetime.combine(u.date(), time(u.time().hour,
+ u.time().minute, u.time().second, tzinfo=UTC))
+
+ # Convert to localtime
+ unew = unew.astimezone(Local)
+
+ if isoformat:
+ return unew.isoformat()
else:
- return time.strftime("%Y-%m-%d %H:%M:%S UTC", u)
+ return unew.strftime('%Y-%m-%d %H:%M:%S %Z')
def expand_format_text(hosts, text):
"""Following redirects in links."""
return new_tl
-def statuses_portion(twitter, screen_name, max_id=None, mentions=False, favorites=False):
+def statuses_portion(twitter, screen_name, max_id=None, mentions=False, favorites=False, received_dms=None, isoformat=False):
"""Get a portion of the statuses of a screen name."""
kwargs = dict(count=200, include_rts=1, screen_name=screen_name)
if max_id:
tweets = {}
if mentions:
- tl = twitter.statuses.mentions(**kwargs)
+ tl = twitter.statuses.mentions_timeline(**kwargs)
elif favorites:
- tl = twitter.favorites(**kwargs) # API v1, favorites.list() in v1.1
+ tl = twitter.favorites.list(**kwargs)
+ elif received_dms != None:
+ if received_dms:
+ tl = twitter.direct_messages(**kwargs)
+ else: # sent DMs
+ tl = twitter.direct_messages.sent(**kwargs)
else: # timeline
if screen_name:
tl = twitter.statuses.user_timeline(**kwargs)
tl = twitter.statuses.home_timeline(**kwargs)
# some tweets do not provide screen name but user id, resolve those
- for t in statuses_resolve_uids(twitter, tl):
+ # this isn't a valid operation for DMs, so special-case them
+ if received_dms == None:
+ newtl = statuses_resolve_uids(twitter, tl)
+ else:
+ newtl = tl
+ for t in newtl:
text = t['text']
rt = t.get('retweeted_status')
if rt:
text = "RT @%s: %s" % (rt['user']['screen_name'], rt['text'])
- tweets[t['id']] = "%s <%s> %s" % (format_date(t['created_at']),
- t['user']['screen_name'],
- format_text(text))
+ # DMs don't include mentions by default, so in order to show who
+ # the recipient was, we synthesise a mention. If we're not
+ # operating on DMs, behave as normal
+ if received_dms == None:
+ tweets[t['id']] = "%s <%s> %s" % (format_date(t['created_at'], isoformat=isoformat),
+ t['user']['screen_name'],
+ format_text(text))
+ else:
+ tweets[t['id']] = "%s <%s> @%s %s" % (format_date(t['created_at'], isoformat=isoformat),
+ t['sender_screen_name'],
+ t['recipient']['screen_name'],
+ format_text(text))
return tweets
-def statuses(twitter, screen_name, tweets, mentions=False, favorites=False):
+def statuses(twitter, screen_name, tweets, mentions=False, favorites=False, received_dms=None, isoformat=False):
"""Get all the statuses for a screen name."""
max_id = None
fail = Fail()
# get portions of statuses, incrementing max id until no new tweets appear
while True:
try:
- portion = statuses_portion(twitter, screen_name, max_id, mentions, favorites)
+ portion = statuses_portion(twitter, screen_name, max_id, mentions, favorites, received_dms, isoformat)
except TwitterError as e:
if e.e.code == 401:
err("Fail: %i Unauthorized (tweets of that user are protected)"
% e.e.code)
break
- elif e.e.code == 400:
+ elif e.e.code == 429:
err("Fail: %i API rate limit exceeded" % e.e.code)
- rate = twitter.account.rate_limit_status()
- reset = rate['reset_time_in_seconds']
- reset = time.asctime(time.localtime(reset))
- delay = int(rate['reset_time_in_seconds']
- - time.time()) + 5 # avoid race
- err("Hourly limit of %i requests reached, next reset on %s: "
- "going to sleep for %i secs" % (rate['hourly_limit'],
+ rls = twitter.application.rate_limit_status()
+ reset = rls.rate_limit_reset
+ reset = _time.asctime(_time.localtime(reset))
+ delay = int(rls.rate_limit_reset
+ - _time.time()) + 5 # avoid race
+ err("Interval limit of %i requests reached, next reset on %s: "
+ "going to sleep for %i secs" % (rls.rate_limit_limit,
reset, delay))
fail.wait(delay)
continue
def rate_limit_status(twitter):
"""Print current Twitter API rate limit status."""
- r = twitter.account.rate_limit_status()
- print("Remaining API requests: %i/%i (hourly limit)"
- % (r['remaining_hits'], r['hourly_limit']))
+ rls = twitter.application.rate_limit_status()
+ print("Remaining API requests: %i/%i (interval limit)"
+ % (rls.rate_limit_remaining, rls.rate_limit_limit))
print("Next reset in %is (%s)"
- % (int(r['reset_time_in_seconds'] - time.time()),
- time.asctime(time.localtime(r['reset_time_in_seconds']))))
+ % (int(rls.rate_limit_reset - _time.time()),
+ _time.asctime(_time.localtime(rls.rate_limit_reset))))
def main(args=sys.argv[1:]):
options = {
'api-rate': False,
'timeline': "",
'mentions': "",
+ 'dms': "",
'favorites': False,
'follow-redirects': False,
'redirect-sites': None,
+ 'isoformat': False,
}
try:
parse_args(args, options)
# except if asking for API rate, or archive of timeline or mentions
if not options['extra_args'] and not (options['api-rate'] or
options['timeline'] or
- options['mentions']):
+ options['mentions'] or
+ options['dms']):
print(__doc__)
return
# authenticate using OAuth, asking for token if necessary
if options['oauth']:
- oauth_filename = (os.getenv("HOME", "") + os.sep
- + ".twitter-archiver_oauth")
+ oauth_filename = (os.environ.get('HOME',
+ os.environ.get('USERPROFILE', ''))
+ + os.sep
+ + '.twitter-archiver_oauth')
+
if not os.path.exists(oauth_filename):
oauth_dance("Twitter-Archiver", CONSUMER_KEY, CONSUMER_SECRET,
oauth_filename)
else:
auth = NoAuth()
- twitter = Twitter(auth=auth, api_version='1', domain='api.twitter.com')
+ twitter = Twitter(auth=auth, api_version='1.1', domain='api.twitter.com')
if options['api-rate']:
rate_limit_status(twitter)
format_text = functools.partial(expand_format_text, hosts)
else:
format_text = direct_format_text
-
+
# save own timeline or mentions (the user used in OAuth)
if options['timeline'] or options['mentions']:
if isinstance(auth, NoAuth):
% str(e))
try:
- statuses(twitter, "", tweets, options['mentions'], options['favorites'])
+ statuses(twitter, "", tweets, options['mentions'], options['favorites'], isoformat=options['isoformat'])
except KeyboardInterrupt:
err()
err("Interrupted")
elif options['mentions']:
print("Total mentions: %i" % len(tweets))
+ if options['dms']:
+ if isinstance(auth, NoAuth):
+ err("You must be authenticated to save DMs.")
+ raise SystemExit(1)
+
+ filename = options['save-dir'] + os.sep + options['dms']
+ print("* Archiving own DMs in %s" % filename)
+
+ dms = {}
+ try:
+ dms = load_tweets(filename)
+ except Exception as e:
+ err("Error when loading saved DMs: %s - continuing without"
+ % str(e))
+
+ try:
+ statuses(twitter, "", dms, received_dms=True, isoformat=options['isoformat'])
+ statuses(twitter, "", dms, received_dms=False, isoformat=options['isoformat'])
+ except KeyboardInterrupt:
+ err()
+ err("Interrupted")
+ raise SystemExit(1)
+
+ save_tweets(filename, dms)
+ print("Total DMs sent and received: %i" % len(dms))
+
+
# read users from command-line or stdin
users = options['extra_args']
if len(users) == 1 and users[0] == "-":
new = 0
before = len(tweets)
try:
- statuses(twitter, user, tweets, options['mentions'], options['favorites'])
+ statuses(twitter, user, tweets, options['mentions'], options['favorites'], isoformat=options['isoformat'])
except KeyboardInterrupt:
err()
err("Interrupted")