-
Notifications
You must be signed in to change notification settings - Fork 148
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'google-drive-retrieve-permissions-for-users-with-read-o…
…nly-access' of github.com:elastic/connectors-python into google-drive-retrieve-permissions-for-users-with-read-only-access
- Loading branch information
Showing
18 changed files
with
532 additions
and
348 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
# | ||
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
# or more contributor license agreements. Licensed under the Elastic License 2.0; | ||
# you may not use this file except in compliance with the Elastic License 2.0. | ||
# | ||
|
||
from elasticsearch import ( | ||
NotFoundError as ElasticNotFoundError, | ||
) | ||
from elasticsearch.helpers import async_scan | ||
|
||
from connectors.es.client import ESClient | ||
from connectors.es.settings import TIMESTAMP_FIELD, Mappings, Settings | ||
from connectors.logger import logger | ||
|
||
|
||
class ESManagementClient(ESClient): | ||
""" | ||
Elasticsearch client with methods to manage connector-related indices. | ||
Additionally to regular methods of ESClient, this client provides methods to work with arbitrary indices, | ||
for example allowing to list indices, delete indices, wipe data from indices and such. | ||
ESClient should be used to provide rich clients that operate on "domains", such as: | ||
- specific connector | ||
- specific job | ||
This client, on the contrary, is used to manage a number of indices outside of connector protocol operations. | ||
""" | ||
|
||
def __init__(self, config): | ||
logger.debug(f"ESManagementClient connecting to {config['host']}") | ||
# initialize ESIndex instance | ||
super().__init__(config) | ||
|
||
async def ensure_exists(self, indices=None): | ||
if indices is None: | ||
indices = [] | ||
|
||
for index in indices: | ||
logger.debug(f"Checking index {index}") | ||
if not await self.client.indices.exists(index=index): | ||
await self.client.indices.create(index=index) | ||
logger.debug(f"Created index {index}") | ||
|
||
async def create_content_index(self, search_index_name, language_code): | ||
settings = Settings(language_code=language_code, analysis_icu=False).to_hash() | ||
mappings = Mappings.default_text_fields_mappings(is_connectors_index=True) | ||
|
||
return await self.client.indices.create( | ||
index=search_index_name, mappings=mappings, settings=settings | ||
) | ||
|
||
async def ensure_content_index_mappings(self, index, mappings): | ||
# open = Match open, non-hidden indices. Also matches any non-hidden data stream. | ||
# Content indices are always non-hidden. | ||
response = await self.client.indices.get_mapping(index=index) | ||
|
||
existing_mappings = response[index].get("mappings", {}) | ||
if len(existing_mappings) == 0: | ||
if mappings: | ||
logger.debug( | ||
"Index %s has no mappings or it's empty. Adding mappings...", index | ||
) | ||
await self.client.indices.put_mapping( | ||
index=index, | ||
dynamic=mappings.get("dynamic", False), | ||
dynamic_templates=mappings.get("dynamic_templates", []), | ||
properties=mappings.get("properties", {}), | ||
) | ||
logger.debug("Successfully added mappings for index %s", index) | ||
else: | ||
logger.debug( | ||
"Index %s has no mappings but no mappings are provided, skipping mappings creation" | ||
) | ||
else: | ||
logger.debug( | ||
"Index %s already has mappings, skipping mappings creation", index | ||
) | ||
|
||
async def ensure_ingest_pipeline_exists( | ||
self, pipeline_id, version, description, processors | ||
): | ||
try: | ||
await self.client.ingest.get_pipeline(id=pipeline_id) | ||
except ElasticNotFoundError: | ||
await self.client.ingest.put_pipeline( | ||
id=pipeline_id, | ||
version=version, | ||
description=description, | ||
processors=processors, | ||
) | ||
|
||
async def delete_indices(self, indices): | ||
await self.client.indices.delete(index=indices, ignore_unavailable=True) | ||
|
||
async def clean_index(self, index_name): | ||
return await self.client.delete_by_query( | ||
index=index_name, body={"query": {"match_all": {}}}, ignore_unavailable=True | ||
) | ||
|
||
async def list_indices(self): | ||
return await self.client.indices.stats(index="search-*") | ||
|
||
async def index_exists(self, index_name): | ||
return await self.client.indices.exists(index=index_name) | ||
|
||
async def upsert(self, _id, index_name, doc): | ||
await self.client.index( | ||
id=_id, | ||
index=index_name, | ||
document=doc, | ||
) | ||
|
||
async def yield_existing_documents_metadata(self, index): | ||
"""Returns an iterator on the `id` and `_timestamp` fields of all documents in an index. | ||
WARNING | ||
This function will load all ids in memory -- on very large indices, | ||
depending on the id length, it can be quite large. | ||
300,000 ids will be around 50MiB | ||
""" | ||
logger.debug(f"Scanning existing index {index}") | ||
if not await self.index_exists(index): | ||
return | ||
|
||
async for doc in async_scan( | ||
client=self.client, index=index, _source=["id", TIMESTAMP_FIELD] | ||
): | ||
source = doc["_source"] | ||
doc_id = source.get("id", doc["_id"]) | ||
timestamp = source.get(TIMESTAMP_FIELD) | ||
|
||
yield doc_id, timestamp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.