Provide "-" instead of users to read users from standard input.
OPTIONS
- -o --oauth authenticate to Twitter using OAuth (default no)
+ -o --oauth authenticate to Twitter using OAuth (default: no)
-s --save-dir <path> directory to save archives (default: current dir)
-a --api-rate see current API rate limit status
-t --timeline <file> archive own timeline into given file name (requires
- OAuth, max 800 statuses).
+ OAuth, max 800 statuses)
+ -m --mentions <file> archive own mentions instead of timeline into
+ given file name (requires OAuth, max 800 statuses)
+ -v --favorites archive user's favorites instead of timeline
+ -f --follow-redirects follow redirects of urls
+ -r --redirect-sites follow redirects for this comma separated list of hosts
AUTHENTICATION
Authenticate to Twitter using OAuth to archive tweets of private profiles
from __future__ import print_function
-import os, sys, time, calendar, urllib2, httplib
+import os, sys, time, calendar, functools
from getopt import gnu_getopt as getopt, GetoptError
+try:
+ import urllib.request as urllib2
+ import http.client as httplib
+except ImportError:
+ import urllib2
+ import httplib
+
+
# T-Archiver (Twitter-Archiver) application registered by @stalkr_
CONSUMER_KEY='d8hIyfzs7ievqeeZLjZrqQ'
CONSUMER_SECRET='AnZmK0rnvaX7BoJ75l6XlilnbyMv7FoiDXWVmPD8'
from .oauth import OAuth, read_token_file
from .oauth_dance import oauth_dance
from .auth import NoAuth
-from .util import Fail, err
+from .util import Fail, err, expand_line, parse_host_list
from .follow import lookup
def parse_args(args, options):
"""Parse arguments from command-line to set options."""
- long_opts = ['help', 'oauth', 'save-dir=', 'api-rate', 'timeline=']
- short_opts = "hos:at:"
+ long_opts = ['help', 'oauth', 'save-dir=', 'api-rate', 'timeline=', 'mentions=', 'favorites', 'follow-redirects',"redirect-sites="]
+ short_opts = "hos:at:m:vfr:"
opts, extra_args = getopt(args, short_opts, long_opts)
for opt, arg in opts:
options['api-rate' ] = True
elif opt in ('-t', '--timeline'):
options['timeline'] = arg
+ elif opt in ('-m', '--mentions'):
+ options['mentions'] = arg
+ elif opt in ('-v', '--favorites'):
+ options['favorites'] = True
+ elif opt in ('-f', '--follow-redirects'):
+ options['follow-redirects'] = True
+ elif opt in ('-r', '--redirect-sites'):
+ options['redirect-sites'] = arg
options['extra_args'] = extra_args
else:
return time.strftime("%Y-%m-%d %H:%M:%S UTC", u)
-def format_text(text):
+def expand_format_text(hosts, text):
+ """Following redirects in links."""
+ return direct_format_text(expand_line(text, hosts))
+
+def direct_format_text(text):
"""Transform special chars in text to have only one line."""
return text.replace('\n','\\n').replace('\r','\\r')
-def timeline_resolve_uids(twitter, tl):
- """Resolve user ids to screen names from a timeline."""
+def statuses_resolve_uids(twitter, tl):
+ """Resolve user ids to screen names from statuses."""
# get all user ids that needs a lookup (no screen_name key)
user_ids = []
for t in tl:
# resolve all of them at once
names = lookup(twitter, list(set(user_ids)))
- # build new timeline with resolved uids
+ # build new statuses with resolved uids
new_tl = []
for t in tl:
rt = t.get('retweeted_status')
return new_tl
-def timeline_portion(twitter, screen_name, max_id=None):
- """Get a portion of the timeline of a screen name."""
+def statuses_portion(twitter, screen_name, max_id=None, mentions=False, favorites=False):
+ """Get a portion of the statuses of a screen name."""
kwargs = dict(count=200, include_rts=1, screen_name=screen_name)
if max_id:
kwargs['max_id'] = max_id
tweets = {}
- if screen_name:
- tl = twitter.statuses.user_timeline(**kwargs)
- else: # self
- tl = twitter.statuses.home_timeline(**kwargs)
+ if mentions:
+ tl = twitter.statuses.mentions(**kwargs)
+ elif favorites:
+ tl = twitter.favorites(**kwargs) # API v1, favorites.list() in v1.1
+ else: # timeline
+ if screen_name:
+ tl = twitter.statuses.user_timeline(**kwargs)
+ else: # self
+ tl = twitter.statuses.home_timeline(**kwargs)
# some tweets do not provide screen name but user id, resolve those
- for t in timeline_resolve_uids(twitter, tl):
+ for t in statuses_resolve_uids(twitter, tl):
text = t['text']
rt = t.get('retweeted_status')
if rt:
tweets[t['id']] = "%s <%s> %s" % (format_date(t['created_at']),
t['user']['screen_name'],
format_text(text))
-
return tweets
-def timeline(twitter, screen_name, tweets):
- """Get the entire timeline of tweets for a screen name."""
+def statuses(twitter, screen_name, tweets, mentions=False, favorites=False):
+ """Get all the statuses for a screen name."""
max_id = None
fail = Fail()
- # get portions of timeline, incrementing max id until no new tweets appear
+ # get portions of statuses, incrementing max id until no new tweets appear
while True:
try:
- portion = timeline_portion(twitter, screen_name, max_id)
+ portion = statuses_portion(twitter, screen_name, max_id, mentions, favorites)
except TwitterError as e:
if e.e.code == 401:
err("Fail: %i Unauthorized (tweets of that user are protected)"
new = -len(tweets)
tweets.update(portion)
new += len(tweets)
- err("Browsing %s timeline, new tweets: %i"
+ err("Browsing %s statuses, new tweets: %i"
% (screen_name if screen_name else "home", new))
if new < 190:
break
'oauth': False,
'save-dir': ".",
'api-rate': False,
- 'timeline': ""
+ 'timeline': "",
+ 'mentions': "",
+ 'favorites': False,
+ 'follow-redirects': False,
+ 'redirect-sites': None,
}
try:
parse_args(args, options)
raise SystemExit(1)
# exit if no user given
- # except if asking for API rate or archive of timeline
+ # except if asking for API rate, or archive of timeline or mentions
if not options['extra_args'] and not (options['api-rate'] or
- options['timeline']):
+ options['timeline'] or
+ options['mentions']):
print(__doc__)
return
rate_limit_status(twitter)
return
- # save own timeline (the user used in OAuth)
- if options['timeline']:
+ global format_text
+ if options['follow-redirects'] or options['redirect-sites'] :
+ if options['redirect-sites']:
+ hosts = parse_host_list(options['redirect-sites'])
+ else:
+ hosts = None
+ format_text = functools.partial(expand_format_text, hosts)
+ else:
+ format_text = direct_format_text
+
+ # save own timeline or mentions (the user used in OAuth)
+ if options['timeline'] or options['mentions']:
if isinstance(auth, NoAuth):
- err("You must be authenticated to save timeline.")
+ err("You must be authenticated to save timeline or mentions.")
raise SystemExit(1)
- filename = options['save-dir'] + os.sep + options['timeline']
- print("* Archiving own timeline in %s" % filename)
+ if options['timeline']:
+ filename = options['save-dir'] + os.sep + options['timeline']
+ print("* Archiving own timeline in %s" % filename)
+ elif options['mentions']:
+ filename = options['save-dir'] + os.sep + options['mentions']
+ print("* Archiving own mentions in %s" % filename)
tweets = {}
try:
tweets = load_tweets(filename)
- except Exception, e:
+ except Exception as e:
err("Error when loading saved tweets: %s - continuing without"
% str(e))
try:
- # no screen_name means we want home_timeline, not user_timeline
- timeline(twitter, "", tweets)
+ statuses(twitter, "", tweets, options['mentions'], options['favorites'])
except KeyboardInterrupt:
err()
err("Interrupted")
raise SystemExit(1)
save_tweets(filename, tweets)
- print("Total tweets in own timeline: %i" % len(tweets))
+ if options['timeline']:
+ print("Total tweets in own timeline: %i" % len(tweets))
+ elif options['mentions']:
+ print("Total mentions: %i" % len(tweets))
# read users from command-line or stdin
users = options['extra_args']
total, total_new = 0, 0
for user in users:
filename = options['save-dir'] + os.sep + user
+ if options['favorites']:
+ filename = filename + "-favorites"
print("* Archiving %s tweets in %s" % (user, filename))
tweets = {}
try:
tweets = load_tweets(filename)
- except Exception, e:
+ except Exception as e:
err("Error when loading saved tweets: %s - continuing without"
% str(e))
new = 0
before = len(tweets)
try:
- timeline(twitter, user, tweets)
+ statuses(twitter, user, tweets, options['mentions'], options['favorites'])
except KeyboardInterrupt:
err()
err("Interrupted")