X-Git-Url: https://review.fuel-infra.org/gitweb?a=blobdiff_plain;f=eventlet%2Fexamples%2Fproducer_consumer.py;fp=eventlet%2Fexamples%2Fproducer_consumer.py;h=0000000000000000000000000000000000000000;hb=358bd9258c2b6d2ee74de4dfd07a5123107abad4;hp=214ed3ffbebd41fa3f007847c8c8a0930ab87c7a;hpb=376ff3bfe7071cc0793184a378c4e74508fb0d97;p=packages%2Ftrusty%2Fpython-eventlet.git diff --git a/eventlet/examples/producer_consumer.py b/eventlet/examples/producer_consumer.py deleted file mode 100644 index 214ed3f..0000000 --- a/eventlet/examples/producer_consumer.py +++ /dev/null @@ -1,57 +0,0 @@ -"""This is a recursive web crawler. Don't go pointing this at random sites; -it doesn't respect robots.txt and it is pretty brutal about how quickly it -fetches pages. - -This is a kind of "producer/consumer" example; the fetch function produces -jobs, and the GreenPool itself is the consumer, farming out work concurrently. -It's easier to write it this way rather than writing a standard consumer loop; -GreenPool handles any exceptions raised and arranges so that there's a set -number of "workers", so you don't have to write that tedious management code -yourself. -""" -from __future__ import with_statement - -from eventlet.green import urllib2 -import eventlet -import re - -# http://daringfireball.net/2009/11/liberal_regex_for_matching_urls -url_regex = re.compile(r'\b(([\w-]+://?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/)))') - - -def fetch(url, outq): - """Fetch a url and push any urls found into a queue.""" - print("fetching", url) - data = '' - with eventlet.Timeout(5, False): - data = urllib2.urlopen(url).read() - for url_match in url_regex.finditer(data): - new_url = url_match.group(0) - outq.put(new_url) - - -def producer(start_url): - """Recursively crawl starting from *start_url*. Returns a set of - urls that were found.""" - pool = eventlet.GreenPool() - seen = set() - q = eventlet.Queue() - q.put(start_url) - # keep looping if there are new urls, or workers that may produce more urls - while True: - while not q.empty(): - url = q.get() - # limit requests to eventlet.net so we don't crash all over the internet - if url not in seen and 'eventlet.net' in url: - seen.add(url) - pool.spawn_n(fetch, url, q) - pool.waitall() - if q.empty(): - break - - return seen - - -seen = producer("http://eventlet.net") -print("I saw these urls:") -print("\n".join(seen))