-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Paris dataset, Paris preprocessing and first preprocessing tool, pari…
…sh quickstatement
- Loading branch information
unknown
committed
Nov 24, 2019
1 parent
744965a
commit 0fe788c
Showing
4 changed files
with
7,669 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,274 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"The goal of these modules is to streamline the checking of water fountains as items in Wikidata. The input should be a pandas dataframe that includes the structure: 'X' = longitude, 'Y' = latitude, 'name' = unique name of item" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"importing Jupyter notebook from pandas2quickstatements.ipynb\n", | ||
"191124_192347\n", | ||
"Python v 3.6.5\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import import_ipynb\n", | ||
"import warnings; warnings.simplefilter('ignore')\n", | ||
"from pandas2quickstatements import write_query, identify_nearest_fountains" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"scrolled": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" ID Geo Point A_BOIRE\n", | ||
"0 1032.0 48.8455274608,2.25550660889 1\n", | ||
"1 1019.0 48.8381521871,2.25870518213 0\n", | ||
"2 1036.0 48.8572651165,2.26686423645 1\n", | ||
"3 986.0 48.8530939586,2.2705187223 1\n", | ||
"4 856.0 48.8409169077,2.27577864693 1\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>ID</th>\n", | ||
" <th>Geo Point</th>\n", | ||
" <th>A_BOIRE</th>\n", | ||
" <th>Y</th>\n", | ||
" <th>X</th>\n", | ||
" <th>nearest_qid</th>\n", | ||
" <th>nearest_has_label_de</th>\n", | ||
" <th>nearest_has_date</th>\n", | ||
" <th>nearest_has_operator</th>\n", | ||
" <th>nearest_has_code</th>\n", | ||
" <th>nearest_has_water_type</th>\n", | ||
" <th>match_found</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>1032.0</td>\n", | ||
" <td>48.8455274608,2.25550660889</td>\n", | ||
" <td>1</td>\n", | ||
" <td>48.845527</td>\n", | ||
" <td>2.255507</td>\n", | ||
" <td>Q3076303</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>no match</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>1019.0</td>\n", | ||
" <td>48.8381521871,2.25870518213</td>\n", | ||
" <td>0</td>\n", | ||
" <td>48.838152</td>\n", | ||
" <td>2.258705</td>\n", | ||
" <td>Q3076303</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>no match</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>1036.0</td>\n", | ||
" <td>48.8572651165,2.26686423645</td>\n", | ||
" <td>1</td>\n", | ||
" <td>48.857265</td>\n", | ||
" <td>2.266864</td>\n", | ||
" <td>Q3076299</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>no match</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>986.0</td>\n", | ||
" <td>48.8530939586,2.2705187223</td>\n", | ||
" <td>1</td>\n", | ||
" <td>48.853094</td>\n", | ||
" <td>2.270519</td>\n", | ||
" <td>Q3076299</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>no match</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>856.0</td>\n", | ||
" <td>48.8409169077,2.27577864693</td>\n", | ||
" <td>1</td>\n", | ||
" <td>48.840917</td>\n", | ||
" <td>2.275779</td>\n", | ||
" <td>Q23459533</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>False</td>\n", | ||
" <td>no match</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" ID Geo Point A_BOIRE Y X \\\n", | ||
"0 1032.0 48.8455274608,2.25550660889 1 48.845527 2.255507 \n", | ||
"1 1019.0 48.8381521871,2.25870518213 0 48.838152 2.258705 \n", | ||
"2 1036.0 48.8572651165,2.26686423645 1 48.857265 2.266864 \n", | ||
"3 986.0 48.8530939586,2.2705187223 1 48.853094 2.270519 \n", | ||
"4 856.0 48.8409169077,2.27577864693 1 48.840917 2.275779 \n", | ||
"\n", | ||
" nearest_qid nearest_has_label_de nearest_has_date nearest_has_operator \\\n", | ||
"0 Q3076303 False False False \n", | ||
"1 Q3076303 False False False \n", | ||
"2 Q3076299 False False False \n", | ||
"3 Q3076299 False False False \n", | ||
"4 Q23459533 False False False \n", | ||
"\n", | ||
" nearest_has_code nearest_has_water_type match_found \n", | ||
"0 False False no match \n", | ||
"1 False False no match \n", | ||
"2 False False no match \n", | ||
"3 False False no match \n", | ||
"4 False False no match " | ||
] | ||
}, | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"#Take the fountain data from csv file, convert to pandas\n", | ||
"df = pd.read_csv(\"fontaines-a-boire.csv\", sep = \";\")\n", | ||
"location = \"Paris\"\n", | ||
"\n", | ||
"paris_fountain_data = df[['ID', 'Geo Point', 'A_BOIRE']]\n", | ||
"\n", | ||
"print(paris_fountain_data.head())\n", | ||
"\n", | ||
"#Split the geolocation into X and Y coordinates\n", | ||
"geo_split = paris_fountain_data['Geo Point'].str.split(\",\", n = 1, expand = True)\n", | ||
"paris_fountain_data['Y'] = geo_split[0].apply(lambda x: float(x))\n", | ||
"paris_fountain_data['X'] = geo_split[1].apply(lambda x: float(x))\n", | ||
"paris_fountain_data.drop(columns = 'Geo Point')\n", | ||
"\n", | ||
"paris_fountain_data = identify_nearest_fountains(paris_fountain_data, location)\n", | ||
"\n", | ||
"paris_fountain_data.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"unclear match\n", | ||
"ID 296\n", | ||
"Geo Point 48.8673321136,2.31791349622\n", | ||
"A_BOIRE 0\n", | ||
"Y 48.8673\n", | ||
"X 2.31791\n", | ||
"nearest_qid Q3076190\n", | ||
"nearest_has_label_de False\n", | ||
"nearest_has_date True\n", | ||
"nearest_has_operator False\n", | ||
"nearest_has_code False\n", | ||
"nearest_has_water_type False\n", | ||
"match_found unclear\n", | ||
"Name: 1094, dtype: object\n", | ||
"wrote 'quickstatement_commands_Paris_drink_191124_192508.txt' with 5582 lines\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"write_query(paris_fountain_data, location)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.