import of github code used for the hackathon
[github-barcamp-201407] / parse_json.py
1 #!/usr/bin/env python
2
3 import os
4 import json
5 import re
6
7 #shameless copy paste from json/decoder.py
8 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
9 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
10
11 class ConcatJSONDecoder(json.JSONDecoder):
12     def decode(self, s, _w=WHITESPACE.match):
13         s_len = len(s)
14
15         objs = []
16         end = 0
17         while end != s_len:
18             obj, end = self.raw_decode(s, idx=_w(s, end).end())
19             end = _w(s, end).end()
20             objs.append(obj)
21         return objs
22
23 def print_json_file(filename):
24     events = json.loads(os.popen("zcat "+ filename).read(), cls=ConcatJSONDecoder)
25  
26     for event in events:
27         # remove events which are not done to repositories
28         if event['type'] in ["GistEvent"]:
29             continue
30
31         ev = {}
32         ev['actor'] = event['actor']
33         ev['type'] = event['type']
34         ev['date'] = event['created_at']
35         ev['url'] = event['url']
36
37         
38         if event.has_key('repository'):
39             ev['repo.name'] = event['repository']['name']
40             ev['repo.owner'] = event['repository']['owner']
41             ev['repo.watchers'] = str(event['repository']['watchers'])
42             ev['repo.forks'] = str(event['repository']['forks'])
43         else:
44             ev['repo.name'] = ""
45             ev['repo.owner'] = ""
46             ev['repo.watchers'] = ""
47             ev['repo.forks'] = ""
48
49         # print event # debug code
50         print u"\t".join([ev['actor'],
51                          ev['type'],
52                          ev['date'],
53                          ev['url'],
54                          ev['repo.name'],
55                          ev['repo.owner'],
56                          ev['repo.watchers'],
57                          ev['repo.forks']]).encode("utf8")
58
59 print "\t".join(["actor", "type", "date", "url", "name", "repo.owner",
60                  "repo.watchers", "repo.forks"])
61
62 for filename in os.listdir("data/"):
63     print_json_file("data/" + filename)

Benjamin Mako Hill || Want to submit a patch?