diff --git a/France/Paris/Paris data collection.ipynb b/France/Paris/Paris data collection.ipynb index a17728b..dd7c4b2 100644 --- a/France/Paris/Paris data collection.ipynb +++ b/France/Paris/Paris data collection.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -17,7 +17,7 @@ "output_type": "stream", "text": [ "importing Jupyter notebook from pandas2quickstatements.ipynb\n", - "191124_192347\n", + "191124_193203\n", "Python v 3.6.5\n" ] } @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": { "scrolled": false }, @@ -186,7 +186,7 @@ "4 False False no match " ] }, - "execution_count": 2, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -213,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -234,12 +234,13 @@ "nearest_has_water_type False\n", "match_found unclear\n", "Name: 1094, dtype: object\n", - "wrote 'quickstatement_commands_Paris_drink_191124_192508.txt' with 5582 lines\n" + "wrote 'quickstatement_commands_Paris_drink_191124_193259.txt' with 4161 lines\n" ] } ], "source": [ - "write_query(paris_fountain_data, location)" + "statedId = \"\\tS248\\tQ76424180\"\n", + "write_query(paris_fountain_data, location, statedId)" ] }, { diff --git a/France/Paris/pandas2quickstatements.ipynb b/France/Paris/pandas2quickstatements.ipynb index e301073..ff5a82c 100644 --- a/France/Paris/pandas2quickstatements.ipynb +++ b/France/Paris/pandas2quickstatements.ipynb @@ -16,14 +16,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "191124_130125\n", + "191124_192953\n", "Python v 3.6.5\n" ] } @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -111,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -252,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -279,7 +279,7 @@ " return '\"Brunnen ({})\"'.format(text)\n", " \n", "\n", - "def createline(lines, item, prop, value, qualifiers=[]):\n", + "def createline(lines, item, prop, value, ref, qualifiers=[]):\n", " # general function to create Quickstatement v1 commands\n", " if value != '' and value != '\"\"':\n", " statement = '{}\\t{}\\t{}'.format(item, prop, value)\n", @@ -287,6 +287,7 @@ " # append qualifiers if applicable\n", " for q in qualifiers:\n", " statement += '\\t{}\\t{}'.format(q['prop'], q['value'])\n", + " statement += ref\n", " statement += '\\n'\n", " lines.append(statement)\n", " return lines " @@ -294,11 +295,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "def write_query(df, location):\n", + "def write_query(df, location, ref):\n", " lines = []\n", "\n", " for index, row in df.iterrows():\n", @@ -320,12 +321,10 @@ " # Add this basic information only if creating a new entity\n", " if item == 'LAST':\n", " # instance of drinking fountain\n", - " lines = createline(lines, item, 'P31', 'Q1630622')\n", + " lines = createline(lines, item, 'P31', 'Q1630622', ref)\n", "\n", " # coordinates\n", - " lines = createline(lines, item, 'P625', process_coordinates(row['X'], row['Y']))\n", - "\n", - " # TODO put reference \"https://opendata.swiss/dataset/bade-trinkwasser-und-zierbrunnen-in-basel\"\n", + " lines = createline(lines, item, 'P625', process_coordinates(row['X'], row['Y']),ref)\n", "\n", "\n", " # For other properties, add information if the entity is new or if property does not yet exist\n", @@ -338,8 +337,8 @@ " #if item == 'LAST' or not row['nearest_has_date']:\n", " # lines = createline(lines, item, 'P571', process_year(row['date']))\n", "\n", - " # operated by IWB \n", - " lines = createline(lines, item, 'P137', 'Q72936279')\n", + " # operated by t.b.d. operator per location \n", + " # lines = createline(lines, item, 'P137', 'Q72936279')\n", "\n", " # catalog number can always be added (it is hard to check for)\n", " #lines = createline(lines, item, 'P528', '\"{}\"'.format(row['operator_id']), [{\n",