updated version from nate
[babynames-cdsw] / BabyNames.ipynb
diff --git a/BabyNames.ipynb b/BabyNames.ipynb
new file mode 100644 (file)
index 0000000..8e3d040
--- /dev/null
@@ -0,0 +1,259 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NAMES_LIST = \"yob2018.txt\"\n",
+    "\n",
+    "boys = {}\n",
+    "girls = {}\n",
+    "\n",
+    "for line in open(NAMES_LIST, 'r').readlines():\n",
+    "    name, gender, count = line.strip().split(\",\")\n",
+    "    count = int(count)\n",
+    "\n",
+    "    if gender == \"F\":\n",
+    "        girls[name.lower()] = count\n",
+    "    elif gender == \"M\":\n",
+    "        boys[name.lower()] = count"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for name in boys.keys():\n",
+    "    if name == \"mako\":\n",
+    "        print(\"There were \" + str(ssadata.boys[name]) + \" boys named \" + name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Print all the boy's names that are also girl's names"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "for name in boys.keys():\n",
+    "    if name in girls.keys():\n",
+    "        print(name)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### How many boys are named 'King'? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for name in boys.keys():\n",
+    "    if 'king' == name:\n",
+    "        print(name + \" \" + str(ssadata.boys[name]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " ### How many girls are named 'Queen'?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for name in girls.keys():\n",
+    "    if 'queen' == name:\n",
+    "        print(name + \" \" + str(ssadata.girls[name]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Now practice your new skills.  Here are some examples of questions you might ask. If you think of any other questions that you think it might be interesting to answer then you should also try to answer those as well!\n",
+    "\n",
+    "\n",
+    "Search for your own name. Are there both boys and girls that have your name? Is it more popular for one group than for the other?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "Are there more boys' names or girls' names? What about for particular first letters? What about for ''every'' first letter?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "What is the longest name in the dataset?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "What is the most common name and how often does it occur?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "What is the least common name and how often does it occur? Does that concern you?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "How many boys and girls are described in the dataset (i.e., how many boys and girls born in 2018 have names given to at least four others)?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "How many boys' names are also girls' names? How many girls' names are also boys' name\n",
+    "s?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "How many names are subsets of other names?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "What is the most popular girls' name that is also a boys' name?\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Take a prefix as input and print the number of boys and girls with that prefix (i.e., \"m\" would list babies whose names start with \"m\" and \"ma\" would list babies whose names start with \"ma\", etc).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

Benjamin Mako Hill || Want to submit a patch?