[PATCH 3/3] add PubSubHubbub-Publisher 1.0 to planet/vendor/, use it to publish at the end of each run

Matt Domsch Matt_Domsch at dell.com
Wed Jun 16 02:31:21 EST 2010


>From 91fc1c00ace9245ea69b79c3e3f776d860e71eb5 Mon Sep 17 00:00:00 2001
From: Matt Domsch <Matt_Domsch at dell.com>
Date: Tue, 15 Jun 2010 11:21:41 -0500
Subject: [PATCH 3/3] add PubSubHubbub-Publisher 1.0 to planet/vendor/, use it to publish at the end of each run

---
 planet.py                                          |    8 ++
 planet/__init__.py                                 |    2 +
 planet/publish.py                                  |   15 ++++
 planet/vendor/pubsubhubbub_publisher/PKG-INFO      |   10 +++
 planet/vendor/pubsubhubbub_publisher/__init__.py   |    2 +
 .../pubsubhubbub_publisher/pubsubhubbub_publish.py |   77 ++++++++++++++++++++
 6 files changed, 114 insertions(+), 0 deletions(-)
 create mode 100644 planet/publish.py
 create mode 100644 planet/vendor/pubsubhubbub_publisher/PKG-INFO
 create mode 100644 planet/vendor/pubsubhubbub_publisher/__init__.py
 create mode 100644 planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py

diff --git a/planet.py b/planet.py
index 881f756..c08949d 100755
--- a/planet.py
+++ b/planet.py
@@ -23,6 +23,7 @@ if __name__ == "__main__":
     only_if_new = 0
     expunge = 0
     debug_splice = 0
+    no_publish = 0
 
     for arg in sys.argv[1:]:
         if arg == "-h" or arg == "--help":
@@ -34,6 +35,7 @@ if __name__ == "__main__":
             print " -h, --help          Display this help message and exit"
             print " -n, --only-if-new   Only spider new feeds"
             print " -x, --expunge       Expunge old entries from cache"
+            print " --no-publish        Do not publish feeds using PubSubHubbub"
             print
             sys.exit(0)
         elif arg == "-v" or arg == "--verbose":
@@ -46,6 +48,8 @@ if __name__ == "__main__":
             expunge = 1
         elif arg == "-d" or arg == "--debug-splice":
             debug_splice = 1
+        elif arg == "--no-publish":
+            no_publish = 1
         elif arg.startswith("-"):
             print >>sys.stderr, "Unknown option:", arg
             sys.exit(1)
@@ -84,6 +88,10 @@ if __name__ == "__main__":
 
     splice.apply(doc.toxml('utf-8'))
 
+    if not no_publish:
+        from planet import publish
+        publish.publish(config)
+
     if expunge:
         from planet import expunge
         expunge.expungeCache
diff --git a/planet/__init__.py b/planet/__init__.py
index 3f4bb7f..61c2cb1 100644
--- a/planet/__init__.py
+++ b/planet/__init__.py
@@ -38,3 +38,5 @@ sys.path.insert(1, os.path.join(os.path.dirname(__file__),'vendor'))
 import feedparser
 feedparser.SANITIZE_HTML=1
 feedparser.RESOLVE_RELATIVE_URIS=0
+
+import publish
diff --git a/planet/publish.py b/planet/publish.py
new file mode 100644
index 0000000..ce88cd3
--- /dev/null
+++ b/planet/publish.py
@@ -0,0 +1,15 @@
+import os, sys
+import urlparse
+import pubsubhubbub_publisher as PuSH
+
+def publish(config):
+    hub = config.pubsubhubbub_hub()
+    link = config.link()
+    if hub and link:
+        for root, dirs, files in os.walk(config.output_dir()):
+            xmlfiles = [urlparse.urljoin(link, f) for f in files if f.endswith('.xml')]
+            try:
+                PuSH.publish(hub, xmlfiles)
+            except PuSH.PublishError, e:
+                sys.stderr.write("PubSubHubbub publishing error: %s\n" % e)
+            break
diff --git a/planet/vendor/pubsubhubbub_publisher/PKG-INFO b/planet/vendor/pubsubhubbub_publisher/PKG-INFO
new file mode 100644
index 0000000..072227a
--- /dev/null
+++ b/planet/vendor/pubsubhubbub_publisher/PKG-INFO
@@ -0,0 +1,10 @@
+Metadata-Version: 1.0
+Name: PubSubHubbub_Publisher
+Version: 1.0
+Summary: Publisher client for PubSubHubbub
+Home-page: http://code.google.com/p/pubsubhubbub/
+Author: Brett Slatkin
+Author-email: bslatkin at gmail.com
+License: Apache 2.0
+Description: A simple, open, server-to-server web-hook-based pubsub (publish/subscribe) protocol as a simple extension to Atom. Parties (servers) speaking the PubSubHubbub protocol can get near-instant notifications (via webhook callbacks) when a topic (Atom URL) they're interested in is updated.
+Platform: UNKNOWN
diff --git a/planet/vendor/pubsubhubbub_publisher/__init__.py b/planet/vendor/pubsubhubbub_publisher/__init__.py
new file mode 100644
index 0000000..d9dbb68
--- /dev/null
+++ b/planet/vendor/pubsubhubbub_publisher/__init__.py
@@ -0,0 +1,2 @@
+from pubsubhubbub_publish import *
+
diff --git a/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py b/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py
new file mode 100644
index 0000000..9ae6e66
--- /dev/null
+++ b/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Simple Publisher client for PubSubHubbub.
+
+Example usage:
+
+  from pubsubhubbub_publish import *
+  try:
+    publish('http://pubsubhubbub.appspot.com',
+            'http://example.com/feed1/atom.xml',
+            'http://example.com/feed2/atom.xml',
+            'http://example.com/feed3/atom.xml')
+  except PublishError, e:
+    # handle exception...
+
+Set the 'http_proxy' environment variable on *nix or Windows to use an
+HTTP proxy.
+"""
+
+__author__ = 'bslatkin at gmail.com (Brett Slatkin)'
+
+import urllib
+import urllib2
+
+
+class PublishError(Exception):
+  """An error occurred while trying to publish to the hub."""
+
+
+URL_BATCH_SIZE = 100
+
+
+def publish(hub, *urls):
+  """Publishes an event to a hub.
+
+  Args:
+    hub: The hub to publish the event to.
+    **urls: One or more URLs to publish to. If only a single URL argument is
+      passed and that item is an iterable that is not a string, the contents of
+      that iterable will be used to produce the list of published URLs. If
+      more than URL_BATCH_SIZE URLs are supplied, this function will batch them
+      into chunks across multiple requests.
+
+  Raises:
+    PublishError if anything went wrong during publishing.
+  """
+  if len(urls) == 1 and not isinstance(urls[0], basestring):
+    urls = list(urls[0])
+
+  for i in xrange(0, len(urls), URL_BATCH_SIZE):
+    chunk = urls[i:i+URL_BATCH_SIZE]
+    data = urllib.urlencode(
+        {'hub.url': chunk, 'hub.mode': 'publish'}, doseq=True)
+    try:
+      response = urllib2.urlopen(hub, data)
+    except (IOError, urllib2.HTTPError), e:
+      if hasattr(e, 'code') and e.code == 204:
+        continue
+      error = ''
+      if hasattr(e, 'read'):
+        error = e.read()
+      raise PublishError('%s, Response: "%s"' % (e, error))
-- 
1.7.0.1



More information about the devel mailing list