--- /dev/null
+## Followers
+
+1. Write a program to find out how many people a particular user follows?
+2. For each of your followers, find out how many followers they have.
+3. Make a "famous ratio" for a given user which I'll define as 'number of followers a person has divided by number of people they follow. Try out @makoshark, and @pontifex (the Pope). Who is higher?
+4. [SKIPPED] Identify the follower you have that also follows the most of your followers.
+5. [SKIPPED] How many users follow you but none of your followers?
+6. [SKIPPED] Repeat these analyses for people you follow, rather than that follow you.
+7. Identify the "famous ratio" for every one of your followers or friends? Who has the highest one?
+
+## Topics and Trends
+
+1. Modify twitter3.py to produce a list of 1000 tweets about a topic of your choice.
+2. Look at those tweets. How does twitter interpret a two word query like "data science"
+3. Do the previous step but eliminate retweets [hint: look at the tweet object!]
+4. For each original tweet, list the number of times you see it retweeted.
+5. Get a list of the URLs that are associated with your topic using Twitter.
+
+## Geolocation
+
+1. Alter the streaming code to include a "locations" filter. You need to use the order sw_lng, sw_lat, ne_lng, ne_lat for the four coordinates.
+2. What are people tweeting about in Times Square today?
+3. Set up a bounding box around TS and around NYC as a whole.
+4. Do "static" (i.e., not using the streaming API) geolocation search using code like this: d = api.search(geocode='37.781157,-122.398720,1mi')
+# Q Write a program to find out how many people a particular user follows?
+
import encoding_fix
import tweepy
from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
user = api.get_user('makoshark')
-print(user.screen_name + " has " + str(user.followers_count) + " followers.")
-
-print("They include these 100 people:")
+print(user.screen_name + " follows " + str(user.friends_count) + " accounts.")
-for follower in user.followers(count=100):
- print(follower.screen_name)
--- /dev/null
+# For each of your followers, find out how many followers they have.
+
+import encoding_fix
+import tweepy
+from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
+import time
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
+
+user = api.get_user("makoshark")
+
+# I found the list of functions in Tweepy here:
+# https://tweepy.readthedocs.org/en/v3.2.0/api.html
+
+# I found the idea of how to the user the Cursor here:
+# https://tweepy.readthedocs.org/en/v3.2.0/cursor_tutorial.html
+
+follower_ids = []
+for page in tweepy.Cursor(api.followers_ids, screen_name="makoshark").pages():
+ for follower in page:
+ follower_ids.append(follower)
+
+
+# the answer is using the api.lookup_users() code. unfortunately, this
+# seems to only work with 100 users at a time. the following code makes that
+# work
+counter = 0
+tmp_ids = []
+users = []
+for follower in follower_ids:
+ tmp_ids.append(follower)
+ counter = counter + 1
+
+ # if we've hit 100, we grab data and then reset things and keep going
+ if counter == 100:
+ tmp_users = api.lookup_users(user_ids=tmp_ids)
+ users = users + tmp_users
+
+ counter = 0
+ tmp_ids = []
+
+# run once more when we're done
+tmp_users = api.lookup_users(user_ids=tmp_ids)
+users = users + tmp_users
+
+# run through and print out the list of followers
+for user in users:
+ print("%s : %s" % (user.screen_name, user.followers_count))
+
+
--- /dev/null
+# For each of your followers, find out how many followers they have.
+
+import encoding_fix
+import tweepy
+from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
+import time
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
+
+user = api.get_user("makoshark")
+
+for follower in user.followers():
+ print("%s : %s" % (follower.screen_name, follower.followers_count))
+
+ # According to this page, we can make 180 requests for user
+ # information each 15 minute period or one every 5 seconds:
+ #
+ # https://dev.twitter.com/rest/reference/get/users/show
+ time.sleep(5)
+
--- /dev/null
+# Make a "famous ratio" for a given user which I'll define as 'number
+# of followers a person has divided by number of people they
+# follow. Try out @makoshark, and @pontifex (the Pope). Who is higher?
+
+import encoding_fix
+import tweepy
+from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
+
+def famous_ratio(username):
+ user = api.get_user(username)
+ return(user.followers_count / user.friends_count)
+
+print("mako: %s" % famous_ratio('makoshark'))
+print("the pope: %s" % famous_ratio('pontifex'))
+
--- /dev/null
+# Make a "famous ratio" for a given user which I'll define as 'number
+# of followers a person has divided by number of people they
+# follow. Try out @makoshark, and @pontifex (the Pope). Who is higher?
+#
+# This works for all users in my follower list.
+
+import encoding_fix
+import tweepy
+from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
+
+user = api.get_user("makoshark")
+
+# I found the list of functions in Tweepy here:
+# https://tweepy.readthedocs.org/en/v3.2.0/api.html
+
+# I found the idea of how to the user the Cursor here:
+# https://tweepy.readthedocs.org/en/v3.2.0/cursor_tutorial.html
+
+follower_ids = []
+for page in tweepy.Cursor(api.followers_ids, screen_name="makoshark").pages():
+ for follower in page:
+ follower_ids.append(follower)
+
+
+# the answer is using the api.lookup_users() code. unfortunately, this
+# seems to only work with 100 users at a time. the following code makes that
+# work
+counter = 0
+tmp_ids = []
+users = []
+for follower in follower_ids:
+ tmp_ids.append(follower)
+ counter = counter + 1
+
+ # if we've hit 100, we grab data and then reset things and keep going
+ if counter == 100:
+ tmp_users = api.lookup_users(user_ids=tmp_ids)
+ users = users + tmp_users
+
+ counter = 0
+ tmp_ids = []
+
+# run once more when we're done
+tmp_users = api.lookup_users(user_ids=tmp_ids)
+users = users + tmp_users
+
+# print out the famous ratios for users
+famous_ratios = {}
+for user in users:
+ famous_ratios[user.screen_name] = user.followers_count / user.friends_count
+
+for user in sorted(famous_ratios, key=famous_ratios.get, reverse=True):
+ print(user, famous_ratios[user])
+
+# Alter the streaming code to include a "locations" filter. You need
+# to use the order sw_lng, sw_lat, ne_lng, ne_lat for the four
+# coordinates.
+
+# Note: to answer this, I used this website to find a good box:
+# http://boundingbox.klokantech.com/
+
import encoding_fix
import tweepy
from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
print(tweet.author.screen_name + "\t" + tweet.text)
def on_error(self, status_code):
- print( 'Error: ' + repr(status_code))
+ print('Error: ' + repr(status_code))
return False
l = StreamListener()
streamer = tweepy.Stream(auth=auth, listener=l)
-keywords = ['python', 'perl']
-streamer.filter(track = keywords)
+# This should grab tweets within Seattle:
+streamer.filter(locations=[-122.459696, 47.481002, -122.224433, 47.734136])
+
+# What are people tweeting about in Times Square today?
+
+# Note: to answer this, I used this website to find a good box:
+# http://boundingbox.klokantech.com/
+
import encoding_fix
import tweepy
from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
l = StreamListener()
streamer = tweepy.Stream(auth=auth, listener=l)
-streamer.sample()
+# This should grab tweets in Times Square
+streamer.filter(locations=[-73.9864799803,40.7575460197,-73.9837820197,40.7602439803])
+
--- /dev/null
+# Set up a bounding box around Times Square and around NYC as a whole.
+
+# Alter the streaming code to include a "locations" filter. You need
+# to use the order sw_lng, sw_lat, ne_lng, ne_lat for the four
+# coordinates.
+
+# Note: to answer this, I used this website to find a good box:
+# http://boundingbox.klokantech.com/
+
+import encoding_fix
+import tweepy
+from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+api = tweepy.API(auth)
+
+class StreamListener(tweepy.StreamListener):
+ def on_status(self, tweet):
+ print(tweet.author.screen_name + "\t" + tweet.text)
+
+ def on_error(self, status_code):
+ print('Error: ' + repr(status_code))
+ return False
+
+l = StreamListener()
+streamer = tweepy.Stream(auth=auth, listener=l)
+
+# This should grab tweets in Times Square /and/ NYC as a whole
+streamer.filter(locations=[-73.9864799803,40.7575460197,-73.9837820197,40.7602439803,
+ -74.25909,40.477399,-73.700171,40.917577])
+
--- /dev/null
+# Do "static" (i.e., not using the streaming API) geolocation search
+# using code like this: d = api.search(geocode='37.781157,-122.398720,1mi')
+
+import encoding_fix
+import tweepy
+from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
+import time
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+api = tweepy.API(auth)
+
+# 100 is the maximum number taht can be returned according to:
+# https://dev.twitter.com/rest/reference/get/search/tweets
+
+counter = 0
+for page in tweepy.Cursor(api.search, "party", geocode='37.781157,-122.398720,1mi', count=100).pages():
+ counter = counter + len(page)
+ for tweet in page:
+ print(tweet.user.screen_name + "\t" + str(tweet.created_at) + "\t" + tweet.text)
+ # end this loop if we've gotten 1000
+ if counter == 1000:
+ break
+
+ # This page suggests we can do one request every 5 seconds:
+ # https://dev.twitter.com/rest/reference/get/search/tweets
+ time.sleep(5)
--- /dev/null
+# Modify twitter3.py to produce a list of 1000 tweets about a topic of
+# your choice.
+
+# Note: I've changed it to search for "community data" instead of "data science."
+
+import encoding_fix
+import tweepy
+from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
+import time
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+api = tweepy.API(auth)
+
+# 100 is the maximum number taht can be returned according to:
+# https://dev.twitter.com/rest/reference/get/search/tweets
+
+counter = 0
+for page in tweepy.Cursor(api.search, "community data", count=100).pages():
+ counter = counter + len(page)
+ for tweet in page:
+ print(tweet.user.screen_name + "\t" + str(tweet.created_at) + "\t" + tweet.text)
+ # end this loop if we've gotten 1000
+ if counter == 1000:
+ break
+
+ # This page suggests we can do one request every 5 seconds:
+ # https://dev.twitter.com/rest/reference/get/search/tweets
+ time.sleep(5)
+
--- /dev/null
+# 2. Look at those tweets. How does twitter interpret a two word query
+# like "data science"
+
+# For two words searches, it seems to search for community *or* data.
--- /dev/null
+# Do the previous step but eliminate retweets [hint: look at the tweet object!]
+
+import encoding_fix
+import tweepy
+from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
+import time
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+api = tweepy.API(auth)
+
+counter = 0
+for page in tweepy.Cursor(api.search, "community data", count=100).pages():
+ counter = counter + len(page)
+ for tweet in page:
+ # use the "hasattr()" function to determine if a tweet is a retweet
+ if not hasattr(tweet, 'retweeted_status'):
+ print(tweet.user.screen_name + "\t" + str(tweet.created_at) + "\t" + tweet.text)
+
+ # end this loop if we've gotten 1000
+ if counter >= 1000:
+ break
+
+ # This page suggests we can do one request every 5 seconds:
+ # https://dev.twitter.com/rest/reference/get/search/tweets
+ time.sleep(5)
+
+
+
--- /dev/null
+# For each original tweet, list the number of times you see it retweeted.
+
+import encoding_fix
+import tweepy
+from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
+import time
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+api = tweepy.API(auth)
+
+counter = 0
+for page in tweepy.Cursor(api.search, "community data", count=100).pages():
+ counter = counter + len(page)
+ for tweet in page:
+ # use the "hasattr()" function to determine if a tweet is a retweet
+ if not hasattr(tweet, 'retweeted_status'):
+ print("%s : %s " % (tweet.text, tweet.retweet_count))
+
+ # end this loop if we've gotten 1000
+ if counter >= 1000:
+ break
+
+ # This page suggests we can do one request every 5 seconds:
+ # https://dev.twitter.com/rest/reference/get/search/tweets
+ time.sleep(5)
+
+
+
--- /dev/null
+# For each original tweet, list the number of times you see it retweeted.
+
+import encoding_fix
+import tweepy
+from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
+import time
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+api = tweepy.API(auth)
+
+counter = 0
+for page in tweepy.Cursor(api.search, "community data", count=100).pages():
+ counter = counter + len(page)
+ for tweet in page:
+
+ # urls seem to be stored in tweet.entities["urls"]
+ for url in tweet.entities["urls"]:
+ print(url["expanded_url"])
+
+ # end this loop if we've gotten 1000
+ if counter >= 1000:
+ break
+
+ # This page suggests we can do one request every 5 seconds:
+ # https://dev.twitter.com/rest/reference/get/search/tweets
+ time.sleep(5)
+++ /dev/null
-import encoding_fix
-import json
-import tweepy
-from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
-
-auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
-auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
-
-api = tweepy.API(auth, parser=tweepy.parsers.RawParser())
-
-@classmethod
-def parse(cls, api, raw):
- status = cls.first_parse(api, raw)
- setattr(status, 'json', json.dumps(raw))
- return status
-
-tweepy.models.Status.first_parse = tweepy.models.Status.parse
-tweepy.models.Status.parse = parse
-
-class StreamListener(tweepy.StreamListener):
- def on_status(self, tweet):
- print(tweet.json)
-
- def on_error(self, status_code):
- print('Error: ' + repr(status_code))
- return False
-
-l = StreamListener()
-streamer = tweepy.Stream(auth=auth, listener=l)
-
-streamer.sample()
+++ /dev/null
-import encoding_fix
-import tweepy
-from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
-import time
-
-auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
-auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
-
-api = tweepy.API(auth)
-
-# I found the idea of how to the user the Cursor here:
-# https://tweepy.readthedocs.org/en/v3.2.0/cursor_tutorial.html
-for page in tweepy.Cursor(api.home_timeline, count=200).pages():
- for tweet in page:
- print(tweet.text)
- time.sleep(1)
+++ /dev/null
-import encoding_fix
-import tweepy
-from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
-
-auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
-auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
-
-api = tweepy.API(auth)
-
-public_tweets = api.home_timeline(count=100)
-
-for tweet in public_tweets:
- print(tweet.text)
+++ /dev/null
-import encoding_fix
-import tweepy
-from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
-
-auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
-auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
-
-api = tweepy.API(auth)
-
-public_tweets = api.search("data science", count=20)
-
-for tweet in public_tweets:
- print(tweet.user.screen_name + "\t" + str(tweet.created_at) + "\t" + tweet.text)
+++ /dev/null
-import encoding_fix
-import tweepy
-from twitter_authentication import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
-
-auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
-auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
-
-api = tweepy.API(auth)
-
-# code to write the file
-output_file = open("MY_DATA.tsv", "w", encoding="utf-8")
-
-public_tweets = api.search("data science", count=10)
-
-for tweet in public_tweets:
- print(tweet.user.screen_name + "\t" + str(tweet.created_at) + "\t" + tweet.text, file=output_file)
-