dataiku · alexbourret · May 17, 2023 · May 17, 2023 · May 17, 2023 · May 19, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## [Version 1.1.0](https://github.com/dataiku/dss-plugin-sharepoint-online/releases/tag/v1.1.0) - Feature release - 2023-05-17
+
+- Adding dataset for documents metadata retrieval
+
 ## [Version 1.0.14](https://github.com/dataiku/dss-plugin-sharepoint-online/releases/tag/v1.0.14) - Bugfix release - 2023-04-18
 
 - Updated code-env descriptor for DSS 12

diff --git a/plugin.json b/plugin.json
@@ -1,6 +1,6 @@
 {
     "id": "sharepoint-online",
-    "version": "1.0.14",
+    "version": "1.1.0",
     "meta": {
         "label": "SharePoint Online",
         "description": "Read and write data from/to your SharePoint Online account",

diff --git a/python-connectors/sharepoint-online_documents-metadata/connector.json b/python-connectors/sharepoint-online_documents-metadata/connector.json
@@ -0,0 +1,78 @@
+{
+    "meta" : {
+        "label": "Documents' metadata",
+        "description": "Retrieve metadata for all documents stored on your SharePoint server",
+        "icon": "icon-cloud"
+    },
+    "readable": true,
+    "writable": false,
+    "params": [
+        {
+            "name": "auth_type",
+            "label": "Type of authentication",
+            "type": "SELECT",
+            "selectChoices": [
+                {
+                    "value": "login",
+                    "label": "User name / password"
+                },
+                {
+                    "value": "oauth",
+                    "label": "Azure Single Sign On"
+                },
+                {
+                    "value": "site-app-permissions",
+                    "label": "Site App Permissions"
+                }
+            ]
+        },
+        {
+            "name": "sharepoint_oauth",
+            "label": "Azure preset",
+            "type": "PRESET",
+            "parameterSetId": "oauth-login",
+            "visibilityCondition": "model.auth_type == 'oauth'"
+        },
+        {
+            "name": "sharepoint_sharepy",
+            "label": "SharePoint preset",
+            "type": "PRESET",
+            "parameterSetId": "sharepoint-login",
+            "visibilityCondition": "model.auth_type == 'login'"
+        },
+        {
+            "name": "site_app_permissions",
+            "label": "Site App preset",
+            "type": "PRESET",
+            "parameterSetId": "site-app-permissions",
+            "visibilityCondition": "model.auth_type == 'site-app-permissions'"
+        },
+        {
+            "name": "advanced_parameters",
+            "label": "Show advanced parameters",
+            "description": "",
+            "type": "BOOLEAN",
+            "defaultValue": false
+        },
+        {
+            "name": "sharepoint_site_overwrite",
+            "label": "Site path preset overwrite",
+            "type": "STRING",
+            "description": "sites/site_name/subsite...",
+            "visibilityCondition": "model.advanced_parameters == true"
+        },
+        {
+            "name": "sharepoint_root_overwrite",
+            "label": "Root directory preset overwrite",
+            "type": "STRING",
+            "description": "",
+            "visibilityCondition": "model.advanced_parameters == true"
+        },
+        {
+            "name": "search_path",
+            "label": "Path to folder",
+            "type": "STRING",
+            "description": "Path to your folder of interest..."
+        }
+    ]
+}
diff --git a/python-connectors/sharepoint-online_documents-metadata/connector.py b/python-connectors/sharepoint-online_documents-metadata/connector.py
@@ -0,0 +1,55 @@
+from sharepoint_client import SharePointClient
+from common import ItemsLimit
+from dataiku.connector import Connector
+from safe_logger import SafeLogger
+from dss_constants import DSSConstants
+
+
+logger = SafeLogger("sharepoint-online plugin", DSSConstants.SECRET_PARAMETERS_KEYS)
+
+
+class SharePointDocumentsMetadataConnector(Connector):
+
+    def __init__(self, config, plugin_config):
+        Connector.__init__(self, config, plugin_config)
+        logger.info('SharePoint Online plugin metadata dataset v{}'.format(DSSConstants.PLUGIN_VERSION))
+        self.client = SharePointClient(config)
+        self.search_path = config.get("search_path", None)
+
+    def get_read_schema(self):
+        return None
+
+    def generate_rows(self, dataset_schema=None, dataset_partitioning=None,
+                      partition_id=None, records_limit=-1):
+        limit = ItemsLimit(records_limit)
+        for row in self.client.get_documents_medatada(search_path=self.search_path):
+            yield row
+            if limit.is_reached():
+                break
+
+    def get_writer(self, dataset_schema=None, dataset_partitioning=None,
+                   partition_id=None):
+        raise NotImplementedError
+
+    def get_partitioning(self):
+        raise NotImplementedError
+
+    def list_partitions(self, partitioning):
+        return []
+
+    def partition_exists(self, partitioning, partition_id):
+        raise NotImplementedError
+
+    def get_records_count(self, partitioning=None, partition_id=None):
+        raise NotImplementedError
+
+
+class CustomDatasetWriter(object):
+    def __init__(self):
+        pass
+
+    def write_row(self, row):
+        raise NotImplementedError
+
+    def close(self):
+        pass
diff --git a/python-lib/dss_constants.py b/python-lib/dss_constants.py
@@ -28,6 +28,7 @@ class DSSConstants(object):
         "sharepoint_oauth": "The access token is missing"
     }
     PATH = 'path'
+    PLUGIN_VERSION = '1.1.0-beta.1'
     SECRET_PARAMETERS_KEYS = ["Authorization", "sharepoint_username", "sharepoint_password", "client_secret"]
     SITE_APP_DETAILS = {
         "sharepoint_tenant": "The tenant name is missing",

diff --git a/python-lib/sharepoint_client.py b/python-lib/sharepoint_client.py
@@ -324,6 +324,30 @@ def get_list_items(self, list_title, params=None):
         self.assert_response_ok(response, calling_method="get_list_items")
         return response.json().get("ListData", {})
 
+    def get_documents_medatada(self, search_path=None):
+        headers = DSSConstants.JSON_HEADERS
+        next_page_url = "{}/{}/_vti_bin/listdata.svc/Documents".format(self.sharepoint_origin, self.sharepoint_site)
+        first = True
+        initial_params = {"Query": "*"}
+        if search_path:
+            initial_params.update({"$filter": "Path eq '/{}/{}/{}'".format(self.sharepoint_site, self.sharepoint_root, search_path.strip("/"))})
+        while next_page_url:
+            params = None
+            if first:
+                params = initial_params
+                first = False
+            response = self.session.get(
+                url=next_page_url,
+                headers=headers,
+                params=params
+            )
+            self.assert_response_ok(response, calling_method="get_documents_medatada")
+            json_response = response.json()
+            next_page_url = get_value_from_path(json_response, [SharePointConstants.RESULTS_CONTAINER_V2, SharePointConstants.NEXT_PAGE])
+            rows = get_value_from_path(json_response, [SharePointConstants.RESULTS_CONTAINER_V2, "results"])
+            for row in rows:
+                yield row
+
     def create_list(self, list_name):
         headers = DSSConstants.JSON_HEADERS
         data = {