changed it to build dataset with a few more variables
[babynames-cdsw] / ssadata.py
1 import glob
2 import re
3
4 def import_yob_file(filename):
5     boys = {}
6     girls = {}
7
8     with open(filename, "r") as f:
9         for line in f.readlines():
10             name, gender, count = line.strip().split(",")
11             count = int(count)
12             if gender == "F":
13                 girls[name.lower()] = count
14             elif gender == "M":
15                 boys[name.lower()] = count
16                 
17     return((boys, girls))
18
19 years = {}
20 for filename in glob.glob('yob*.txt'):
21     year = re.match(r'yob(\d{4})\.txt$', filename).group(1)
22     tmp_boys, tmp_girls = import_yob_file(filename)
23     years[year] = {'girls' : tmp_girls,
24                    'boys' : tmp_boys}
25
26 ## resort by year
27 years = dict(sorted(years.items()))
28
29 girls = years["2021"]["girls"]
30 boys = years["2021"]["boys"]
31
32
33     
34     

Benjamin Mako Hill || Want to submit a patch?