#!/usr/bin/python -tt # google-reader-shared-items v1.0 - Retrieve all shared items for an account # Copyright (c) 2011, John Morrissey # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, 5th Floor, Boston, MA 02110-1301 # USA. # This script is a bit of a hack; we imitate a web browser and scrape # the result. It would be nice if Google Reader shared items could be # downloaded through some Google Reader API. GOOGLE_USERNAME = '' GOOGLE_PASSWORD = '' SHARED_ITEMS_URL = 'https://www.google.com/reader/shared/GOOGLEUSERNAME' DEBUG = False import getopt import logging import os import re import sys import urllib2 import BeautifulSoup import mechanize def usage(): print 'google-reader-shared-items v1.0' print 'Usage: %s [-h] [-u USER] [-p PASSWORD]' % \ os.path.basename(sys.argv[0]) print '' print ' -h, --help display this help and exit' print ' -u USER, --user=USER log in as USER' print ' -p PASSWORD, --user=PASSWORD use password PASSWORD' try: options = getopt.gnu_getopt(sys.argv[1:], 'hu:p:', ['help=', 'user=', 'password=']) except getopt.GetoptError, e: print '%s: %s' % (os.path.basename(sys.argv[0]), str(e)) usage() sys.exit(2) if len(options[1]) != 0: usage() sys.exit(2) for option in options[0]: if option[0] == '-h' or option[0] == '--help': usage() sys.exit(0) elif option[0] == '-u' or option[0] == '--user': GOOGLE_USERNAME = option[1] elif option[0] == '-p' or option[0] == '--password': GOOGLE_PASSWORD = option[1] if not GOOGLE_USERNAME or not GOOGLE_PASSWORD: usage() sys.exit(2) br = mechanize.Browser() br.set_handle_robots(False) br.set_handle_refresh(True, 10, True) br.set_handle_redirect(True) br.addheaders = [ ('User-agent', 'google-reader-shared-items v1.0'), ] if DEBUG: br.set_debug_http(True) br.set_debug_responses(True) br.set_debug_redirects(True) logger = logging.getLogger('mechanize') logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(logging.DEBUG) try: br.open('https://www.google.com/accounts/ServiceLogin') except urllib2.HTTPError, e: sys.exit('%d %s' % (e.code, e.msg)) if not br.viewing_html(): sys.exit('Unable to retrieve HTML for login page, has %s changed?' % host) def find_login_form(form): try: form.find_control('Email') form.find_control('Passwd') except: return False return True try: br.select_form(predicate=find_login_form) except mechanize.FormNotFoundError: sys.exit('Unable to locate login form, has google.com changed?') br['Email'] = GOOGLE_USERNAME br['Passwd'] = GOOGLE_PASSWORD try: r = br.submit() except urllib2.HTTPError, e: sys.exit('%d %s' % (e.code, e.msg)) def getPage(url): if DEBUG: print 'Retrieving %s' % url try: r = br.open(url) except urllib2.HTTPError, e: sys.exit('Unable to fetch Google Reader OPML for %s: %s' % (GOOGLE_USERNAME, str(e))) soup = BeautifulSoup.BeautifulSoup(r.get_data()) sidebar = soup.find(id='sidebar') if sidebar: sidebar.extract() more = soup.find(id='more') if more: more = more.extract() return (more, soup) more, soup = getPage(SHARED_ITEMS_URL) print soup.find(id='items').extract() while more: url = more.find('a', attrs={'href': True})['href'] more, soup = getPage(url) print soup.find(id='items').extract()