[PATCH 3/3] add PubSubHubbub-Publisher 1.0 to planet/vendor/, use it to publish at the end of each run
Matt Domsch
Matt_Domsch at dell.com
Wed Jun 16 02:31:21 EST 2010
>From 91fc1c00ace9245ea69b79c3e3f776d860e71eb5 Mon Sep 17 00:00:00 2001
From: Matt Domsch <Matt_Domsch at dell.com>
Date: Tue, 15 Jun 2010 11:21:41 -0500
Subject: [PATCH 3/3] add PubSubHubbub-Publisher 1.0 to planet/vendor/, use it to publish at the end of each run
---
planet.py | 8 ++
planet/__init__.py | 2 +
planet/publish.py | 15 ++++
planet/vendor/pubsubhubbub_publisher/PKG-INFO | 10 +++
planet/vendor/pubsubhubbub_publisher/__init__.py | 2 +
.../pubsubhubbub_publisher/pubsubhubbub_publish.py | 77 ++++++++++++++++++++
6 files changed, 114 insertions(+), 0 deletions(-)
create mode 100644 planet/publish.py
create mode 100644 planet/vendor/pubsubhubbub_publisher/PKG-INFO
create mode 100644 planet/vendor/pubsubhubbub_publisher/__init__.py
create mode 100644 planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py
diff --git a/planet.py b/planet.py
index 881f756..c08949d 100755
--- a/planet.py
+++ b/planet.py
@@ -23,6 +23,7 @@ if __name__ == "__main__":
only_if_new = 0
expunge = 0
debug_splice = 0
+ no_publish = 0
for arg in sys.argv[1:]:
if arg == "-h" or arg == "--help":
@@ -34,6 +35,7 @@ if __name__ == "__main__":
print " -h, --help Display this help message and exit"
print " -n, --only-if-new Only spider new feeds"
print " -x, --expunge Expunge old entries from cache"
+ print " --no-publish Do not publish feeds using PubSubHubbub"
print
sys.exit(0)
elif arg == "-v" or arg == "--verbose":
@@ -46,6 +48,8 @@ if __name__ == "__main__":
expunge = 1
elif arg == "-d" or arg == "--debug-splice":
debug_splice = 1
+ elif arg == "--no-publish":
+ no_publish = 1
elif arg.startswith("-"):
print >>sys.stderr, "Unknown option:", arg
sys.exit(1)
@@ -84,6 +88,10 @@ if __name__ == "__main__":
splice.apply(doc.toxml('utf-8'))
+ if not no_publish:
+ from planet import publish
+ publish.publish(config)
+
if expunge:
from planet import expunge
expunge.expungeCache
diff --git a/planet/__init__.py b/planet/__init__.py
index 3f4bb7f..61c2cb1 100644
--- a/planet/__init__.py
+++ b/planet/__init__.py
@@ -38,3 +38,5 @@ sys.path.insert(1, os.path.join(os.path.dirname(__file__),'vendor'))
import feedparser
feedparser.SANITIZE_HTML=1
feedparser.RESOLVE_RELATIVE_URIS=0
+
+import publish
diff --git a/planet/publish.py b/planet/publish.py
new file mode 100644
index 0000000..ce88cd3
--- /dev/null
+++ b/planet/publish.py
@@ -0,0 +1,15 @@
+import os, sys
+import urlparse
+import pubsubhubbub_publisher as PuSH
+
+def publish(config):
+ hub = config.pubsubhubbub_hub()
+ link = config.link()
+ if hub and link:
+ for root, dirs, files in os.walk(config.output_dir()):
+ xmlfiles = [urlparse.urljoin(link, f) for f in files if f.endswith('.xml')]
+ try:
+ PuSH.publish(hub, xmlfiles)
+ except PuSH.PublishError, e:
+ sys.stderr.write("PubSubHubbub publishing error: %s\n" % e)
+ break
diff --git a/planet/vendor/pubsubhubbub_publisher/PKG-INFO b/planet/vendor/pubsubhubbub_publisher/PKG-INFO
new file mode 100644
index 0000000..072227a
--- /dev/null
+++ b/planet/vendor/pubsubhubbub_publisher/PKG-INFO
@@ -0,0 +1,10 @@
+Metadata-Version: 1.0
+Name: PubSubHubbub_Publisher
+Version: 1.0
+Summary: Publisher client for PubSubHubbub
+Home-page: http://code.google.com/p/pubsubhubbub/
+Author: Brett Slatkin
+Author-email: bslatkin at gmail.com
+License: Apache 2.0
+Description: A simple, open, server-to-server web-hook-based pubsub (publish/subscribe) protocol as a simple extension to Atom. Parties (servers) speaking the PubSubHubbub protocol can get near-instant notifications (via webhook callbacks) when a topic (Atom URL) they're interested in is updated.
+Platform: UNKNOWN
diff --git a/planet/vendor/pubsubhubbub_publisher/__init__.py b/planet/vendor/pubsubhubbub_publisher/__init__.py
new file mode 100644
index 0000000..d9dbb68
--- /dev/null
+++ b/planet/vendor/pubsubhubbub_publisher/__init__.py
@@ -0,0 +1,2 @@
+from pubsubhubbub_publish import *
+
diff --git a/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py b/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py
new file mode 100644
index 0000000..9ae6e66
--- /dev/null
+++ b/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Simple Publisher client for PubSubHubbub.
+
+Example usage:
+
+ from pubsubhubbub_publish import *
+ try:
+ publish('http://pubsubhubbub.appspot.com',
+ 'http://example.com/feed1/atom.xml',
+ 'http://example.com/feed2/atom.xml',
+ 'http://example.com/feed3/atom.xml')
+ except PublishError, e:
+ # handle exception...
+
+Set the 'http_proxy' environment variable on *nix or Windows to use an
+HTTP proxy.
+"""
+
+__author__ = 'bslatkin at gmail.com (Brett Slatkin)'
+
+import urllib
+import urllib2
+
+
+class PublishError(Exception):
+ """An error occurred while trying to publish to the hub."""
+
+
+URL_BATCH_SIZE = 100
+
+
+def publish(hub, *urls):
+ """Publishes an event to a hub.
+
+ Args:
+ hub: The hub to publish the event to.
+ **urls: One or more URLs to publish to. If only a single URL argument is
+ passed and that item is an iterable that is not a string, the contents of
+ that iterable will be used to produce the list of published URLs. If
+ more than URL_BATCH_SIZE URLs are supplied, this function will batch them
+ into chunks across multiple requests.
+
+ Raises:
+ PublishError if anything went wrong during publishing.
+ """
+ if len(urls) == 1 and not isinstance(urls[0], basestring):
+ urls = list(urls[0])
+
+ for i in xrange(0, len(urls), URL_BATCH_SIZE):
+ chunk = urls[i:i+URL_BATCH_SIZE]
+ data = urllib.urlencode(
+ {'hub.url': chunk, 'hub.mode': 'publish'}, doseq=True)
+ try:
+ response = urllib2.urlopen(hub, data)
+ except (IOError, urllib2.HTTPError), e:
+ if hasattr(e, 'code') and e.code == 204:
+ continue
+ error = ''
+ if hasattr(e, 'read'):
+ error = e.read()
+ raise PublishError('%s, Response: "%s"' % (e, error))
--
1.7.0.1
More information about the devel
mailing list