Skip to content

Commit

Permalink
Fix for RM 41527, Issue EnterpriseDB#41
Browse files Browse the repository at this point in the history
Problem Statement:
Hive and Spark both support HiveQL, and are compatible except
for the behaviour of the ANALYZE command.
The difference is as follows:
In Hive, ANALYZE is a utility command and does not return any
result set whereas in Spark it returns a result set.
For example:
In Hive we get this output:
--------------------------
0: jdbc:hive2://localhost:10000/testdb>  analyze table names_tab compute statistics;
INFO  : Number of reduce tasks is set to 0 since there's no reduce operator
INFO  : number of splits:1
INFO  : Submitting tokens for job: job_1488090103001_0007
INFO  : The url to track the job: http://localhost:8088/proxy/application_1488090103001_0007/
INFO  : Starting Job = job_1488090103001_0007, Tracking URL = http://localhost:8088/proxy/application_1488090103001_0007/
INFO  : Kill Command = /home/abbasbutt/Projects/hadoop_fdw/hadoop/bin/hadoop job  -kill job_1488090103001_0007
INFO  : Hadoop job information for Stage-0: number of mappers: 1; number of reducers: 0
INFO  : 2017-08-22 19:08:11,328 Stage-0 map = 0%,  reduce = 0%
No rows affected (11.949 seconds)
INFO  : 2017-08-22 19:08:15,465 Stage-0 map = 100%,  reduce = 0%, Cumulative CPU 0.93 sec
INFO  : MapReduce Total cumulative CPU time: 930 msec
INFO  : Ended Job = job_1488090103001_0007
INFO  : Table testdb.names_tab stats: [numFiles=2, numRows=12, totalSize=76, rawDataSize=64]
0: jdbc:hive2://localhost:10000/testdb> [abbasbutt@localhost bin]$

In Spark we get this output:
---------------------------
0: jdbc:hive2://localhost:10000/my_spark_db> analyze table junk_table compute statistics;
+---------+--+
| Result  |
+---------+--+
+---------+--+
No rows selected (1.462 seconds)

Solution:
The CREATE SERVER command already has a client_type parameter
that currently supports one value 'hiveserver2'.
To support ANALYZE on Spark the client type can also have the value
'spark'.
If the client_type is not specified the default will be hive
and analyze command will fail when Spark is used.
Otherwise if correct client_type is specified ANALYZE will work
fine with Spark.

For Example:
postgres=# CREATE EXTENSION hdfs_fdw;
CREATE EXTENSION
postgres=# CREATE SERVER hdfs_svr FOREIGN DATA WRAPPER hdfs_fdw OPTIONS (host '127.0.0.1',port '10000',client_type 'spark');
CREATE SERVER
postgres=# CREATE USER MAPPING FOR abbasbutt server hdfs_svr OPTIONS (username 'ldapadm', password 'ldapadm');
CREATE USER MAPPING
postgres=# CREATE FOREIGN TABLE fnt( a int, name varchar(255)) SERVER hdfs_svr OPTIONS (dbname 'my_spark_db', table_name 'junk_table');
CREATE FOREIGN TABLE
postgres=# ANALYZE fnt;
ANALYZE
  • Loading branch information
gabbasb committed Aug 23, 2017
1 parent 6a6dc6d commit 917bc84
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 14 deletions.
5 changes: 0 additions & 5 deletions hdfs_fdw.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,6 @@ static const char* DEFAULT_HOST = "localhost";
static const char* DEFAULT_PORT = "10000";


typedef enum CLIENT_TYPE
{
HIVESERVER1,
HIVESERVER2
} CLIENT_TYPE;

typedef struct hdfs_col
{
Expand Down
15 changes: 11 additions & 4 deletions hdfs_option.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,17 @@ hdfs_get_options(Oid foreigntableid)
if (strcasecmp(defGetString(def), "hiveserver2") == 0)
opt->client_type = HIVESERVER2;
else
ereport(ERROR,
(errcode(ERRCODE_FDW_INVALID_OPTION_NAME),
errmsg("invalid option \"%s\"", defGetString(def)),
errhint("Valid client_type is hiveserver2, this option will be deprecated soon")));
{
if (strcasecmp(defGetString(def), "spark") == 0)
opt->client_type = SPARKSERVER;
else
{
ereport(ERROR,
(errcode(ERRCODE_FDW_INVALID_OPTION_NAME),
errmsg("invalid option \"%s\"", defGetString(def)),
errhint("Valid client_type values are hiveserver2 and spark")));
}
}
}

if (strcmp(def->defname, "auth_type") == 0)
Expand Down
5 changes: 4 additions & 1 deletion hdfs_query.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ hdfs_analyze(int con_index, hdfs_opt *opt)

initStringInfo(&sql);
hdfs_deparse_analyze(&sql, opt);
hdfs_query_execute_utility(con_index, opt, sql.data);
if (opt->client_type == SPARKSERVER)
hdfs_query_execute(con_index, opt, sql.data);
else
hdfs_query_execute_utility(con_index, opt, sql.data);
hdfs_close_result_set(con_index, opt);
}

Expand Down
9 changes: 5 additions & 4 deletions libhive/jdbc/hiveclient.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@
extern "C" {
#endif // __cplusplus

typedef enum HIVE_SERVER_TYPE {
HIVE_SERVER1 = 0,
HIVE_SERVER2 = 1
} HIVE_SERVER_TYPE;
typedef enum CLIENT_TYPE
{
HIVESERVER2 = 0,
SPARKSERVER
} CLIENT_TYPE;

typedef enum AUTH_TYPE
{
Expand Down

0 comments on commit 917bc84

Please sign in to comment.