Blacken notebooks

direct-phonology · Jul 17, 2023 · 079d5fe · 079d5fe
1 parent 0a93873
commit 079d5fe
Show file tree

Hide file tree

Showing 3 changed files with 96 additions and 76 deletions.
diff --git a/notebooks/annotations.ipynb b/notebooks/annotations.ipynb
diff --git a/notebooks/corpus_curation.ipynb b/notebooks/corpus_curation.ipynb
@@ -51,8 +51,8 @@
     "from scripts.recipes.spancat import doc_spans_jdsw, span_rels_jdsw, SPAN_LABELS\n",
     "\n",
     "# set the default font for everything so chinese characters display correctly\n",
-    "matplotlib.rcParams['font.family'] = \"Heiti TC\"\n",
-    "matplotlib.rcParams.update({'font.size': 14})\n",
+    "matplotlib.rcParams[\"font.family\"] = \"Heiti TC\"\n",
+    "matplotlib.rcParams.update({\"font.size\": 14})\n",
     "\n",
     "# load the data\n",
     "df = pd.read_csv(\"../assets/annotations.csv\")\n",
@@ -65,16 +65,19 @@
     "\n",
     "# limit to annotations more than 2 std and less than 10 deviations longer than the mean\n",
     "limited_df = df[(df[\"length_std\"] > 2) & (df[\"length_std\"] < 10)]\n",
-    "print(f\"Found {len(limited_df)} annotations more than 2 and less than 10 std deviations from the mean.\")\n",
+    "print(\n",
+    "    f\"Found {len(limited_df)} annotations more than 2 and less than 10 std deviations from the mean.\"\n",
+    ")\n",
     "\n",
     "# plot the distribution of texts the annotations are from vs. the overall distribution\n",
     "total_counts = df[\"title\"].value_counts().values\n",
     "long_counts = limited_df[\"title\"].value_counts().values\n",
     "index = df[\"title\"].value_counts().keys()\n",
     "cmp_df = pd.DataFrame({\"total\": total_counts, \"long\": long_counts}, index=index)\n",
-    "cmp_df.plot.bar(figsize=(20, 10), rot=45, title=\"Distribution of Annotations by Text\", logy=True)\n",
-    "df = limited_df\n",
-    "\n"
+    "cmp_df.plot.bar(\n",
+    "    figsize=(20, 10), rot=45, title=\"Distribution of Annotations by Text\", logy=True\n",
+    ")\n",
+    "df = limited_df\n"
    ]
   },
   {

diff --git a/notebooks/ent_detect.ipynb b/notebooks/ent_detect.ipynb
@@ -16,11 +16,11 @@
     "import re\n",
     "\n",
     "# load the annotation data file\n",
-    "df = pd.read_csv('../assets/annotations.csv')\n",
+    "df = pd.read_csv(\"../assets/annotations.csv\")\n",
     "\n",
     "# set the default font for everything so chinese characters display correctly\n",
-    "matplotlib.rcParams['font.family'] = \"Heiti TC\"\n",
-    "matplotlib.rcParams.update({'font.size': 14})"
+    "matplotlib.rcParams[\"font.family\"] = \"Heiti TC\"\n",
+    "matplotlib.rcParams.update({\"font.size\": 14})\n"
    ]
   },
   {
@@ -30,7 +30,7 @@
    "source": [
     "# Entity Detection\n",
     "\n",
-    "After preprocessing using heuristics, what are the most common spans that occur just before the markers \"云\" and \"作\"?"
+    "After preprocessing using heuristics, what are the most common spans that occur just before the markers \"云\" and \"作\"?\n"
    ]
   },
   {
@@ -931,17 +931,17 @@
     "\n",
     "ents = Counter()\n",
     "\n",
-    "for annotation in df['annotation']:\n",
+    "for annotation in df[\"annotation\"]:\n",
     "    spans = doc_to_spans(annotation)\n",
     "    for i, span in enumerate(spans):\n",
     "        if \"云\" in span.text or span.label == \"GRAPHIC\":\n",
     "            if not spans[i - 1].label:\n",
     "                print(annotation)\n",
-    "                ents[spans[i-1].text] += 1\n",
+    "                ents[spans[i - 1].text] += 1\n",
     "\n",
     "patterns = defaultdict(list)\n",
-    "for pattern in srsly.read_jsonl('../assets/ner_patterns.jsonl'):\n",
-    "    patterns[pattern['label']].append(pattern['pattern'])\n",
+    "for pattern in srsly.read_jsonl(\"../assets/ner_patterns.jsonl\"):\n",
+    "    patterns[pattern[\"label\"]].append(pattern[\"pattern\"])\n",
     "\n",
     "nlp = spacy.blank(\"zh\")\n",
     "matcher = PhraseMatcher(nlp.vocab)\n",
@@ -956,8 +956,7 @@
     "        hits += 1\n",
     "\n",
     "print(len(ents))\n",
-    "print(ents.most_common(20))\n",
-    "\n"
+    "print(ents.most_common(20))\n"
    ]
   }
  ],