made a collection of twitter api solutions
[twitter-api-cdsw-solutions] / solution-topics-3.py
1 # Do the previous step but eliminate retweets [hint: look at the tweet object!]
2
3 import encoding_fix
4 import tweepy
5 from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
6 import time
7
8 auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
9 auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
10
11 api = tweepy.API(auth)
12
13 counter = 0
14 for page in tweepy.Cursor(api.search, "community data", count=100).pages():
15     counter = counter + len(page)
16     for tweet in page:
17         # use the "hasattr()" function to determine if a tweet is a retweet
18         if not hasattr(tweet, 'retweeted_status'):
19             print(tweet.user.screen_name + "\t" +  str(tweet.created_at) + "\t" + tweet.text)
20             
21     # end this loop if we've gotten 1000
22     if counter >= 1000:
23         break
24
25     # This page suggests we can do one request every 5 seconds:
26     # https://dev.twitter.com/rest/reference/get/search/tweets
27     time.sleep(5)
28
29
30     

Benjamin Mako Hill || Want to submit a patch?