Skip to content

Commit

Permalink
Merge pull request #42 from gabbasb/master
Browse files Browse the repository at this point in the history
Issue - (#41, #43) - Analyze command fix for Spark server.
  • Loading branch information
ibrarahmad authored Aug 23, 2017
2 parents e8f2e3a + 830d3b2 commit a360f75
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 23 deletions.
17 changes: 8 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ While creating the foreign server object for HDFS FDW the following can be speci

* `host`: IP Address or hostname of the Hive Thrift Server OR Spark Thrift Server. Defaults to `127.0.0.1`
* `port`: Port number of the Hive Thrift Server OR Spark Thrift Server. Defaults to `10000`
* `client_type`: HiveServer2. HiveServer1 is not supported. This option will be deprecated soon.
* `client_type`: hiveserver2 or spark. Hive and Spark both support HiveQL and are compatible but ANALYZE command behaves differently. Default is hiveserver2 and will work with Spark too except the ANALYZE command.
* `auth_type`: NOSASL or LDAP. Specify which authentication type is required while connecting to the Hive or Spark server. Default is unspecified and the FDW uses the username option in the user mapping to infer the auth_type. If the username is empty or not specified it uses NOSASL otherwise it uses LDAP.
* `connect_timeout`: Connection timeout, default value is 300 seconds.
* `query_timeout`: Query timeout is not supported by the Hive JDBC driver.

Expand Down Expand Up @@ -114,10 +115,9 @@ Step 5: Create Table in Hive
these steps.

```sql
-- export LD_LIBRARY_PATH before starting the server, for example
export LD_LIBRARY_PATH=/home/edb/Projects/hadoop_fdw/jdk1.8.0_111/jre/lib/amd64/server/:/usr/local/edb95/lib/postgresql/
-- set the GUC class path variable
-- set the GUC variables
hdfs_fdw.jvmpath='/home/edb/Projects/hadoop_fdw/jdk1.8.0_111/jre/lib/amd64/server/'
hdfs_fdw.classpath='/usr/local/edb95/lib/postgresql/HiveJdbcClient-1.0.jar:
/home/edb/Projects/hadoop_fdw/hadoop/share/hadoop/common/hadoop-common-2.6.4.jar:
/home/edb/Projects/hadoop_fdw/apache-hive-1.0.1-bin/lib/hive-jdbc-1.0.1-standalone.jar'
Expand Down Expand Up @@ -242,9 +242,8 @@ Using HDFS FDW with Apache Spark on top of Hadoop
1. Install PPAS 9.5 and hdfs_fdw using installer.
2. Export LD_LIBRARY_PATH before starting the server, for example
export LD_LIBRARY_PATH=/home/edb/Projects/hadoop_fdw/jdk1.8.0_111/jre/lib/amd64/server/:/usr/local/edb95/lib/postgresql/
2. Set the GUC JVM path variable
hdfs_fdw.jvmpath='/home/edb/Projects/hadoop_fdw/jdk1.8.0_111/jre/lib/amd64/server/'
3. Set the GUC class path variable
hdfs_fdw.classpath='/usr/local/edb95/lib/postgresql/HiveJdbcClient-1.0.jar:
/home/edb/Projects/hadoop_fdw/hadoop/share/hadoop/common/hadoop-common-2.6.4.jar:
Expand All @@ -254,13 +253,13 @@ Using HDFS FDW with Apache Spark on top of Hadoop
```sql
CREATE EXTENSION hdfs_fdw;
CREATE SERVER hdfs_svr FOREIGN DATA WRAPPER hdfs_fdw
OPTIONS (host '127.0.0.1',port '10000',client_type 'hiveserver2');
OPTIONS (host '127.0.0.1',port '10000',client_type 'spark');
CREATE USER MAPPING FOR postgres server hdfs_svr OPTIONS (username 'ldapadm', password 'ldapadm');
CREATE FOREIGN TABLE f_names_tab( a int, name varchar(255)) SERVER hdfs_svr
OPTIONS (dbname 'testdb', table_name 'my_names_tab');
```
Please note that we are using the same port and client_type while creating foreign server because Spark Thrift Server is compatible with Hive Thrift Server. Applications using Hiveserver2 would work with Spark without any code changes.
Please note that we are using the same port while creating foreign server because Spark Thrift Server is compatible with Hive Thrift Server. Applications using Hiveserver2 would work with Spark except for the ANALYZE command. It is better to use ALTER SERVER and change the client_type option if Hive is to be replaced with Spark.
5. Download & install Apache Spark in local mode
Expand Down
5 changes: 0 additions & 5 deletions hdfs_fdw.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,6 @@ static const char* DEFAULT_HOST = "localhost";
static const char* DEFAULT_PORT = "10000";


typedef enum CLIENT_TYPE
{
HIVESERVER1,
HIVESERVER2
} CLIENT_TYPE;

typedef struct hdfs_col
{
Expand Down
15 changes: 11 additions & 4 deletions hdfs_option.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,17 @@ hdfs_get_options(Oid foreigntableid)
if (strcasecmp(defGetString(def), "hiveserver2") == 0)
opt->client_type = HIVESERVER2;
else
ereport(ERROR,
(errcode(ERRCODE_FDW_INVALID_OPTION_NAME),
errmsg("invalid option \"%s\"", defGetString(def)),
errhint("Valid client_type is hiveserver2, this option will be deprecated soon")));
{
if (strcasecmp(defGetString(def), "spark") == 0)
opt->client_type = SPARKSERVER;
else
{
ereport(ERROR,
(errcode(ERRCODE_FDW_INVALID_OPTION_NAME),
errmsg("invalid option \"%s\"", defGetString(def)),
errhint("Valid client_type values are hiveserver2 and spark")));
}
}
}

if (strcmp(def->defname, "auth_type") == 0)
Expand Down
5 changes: 4 additions & 1 deletion hdfs_query.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ hdfs_analyze(int con_index, hdfs_opt *opt)

initStringInfo(&sql);
hdfs_deparse_analyze(&sql, opt);
hdfs_query_execute_utility(con_index, opt, sql.data);
if (opt->client_type == SPARKSERVER)
hdfs_query_execute(con_index, opt, sql.data);
else
hdfs_query_execute_utility(con_index, opt, sql.data);
hdfs_close_result_set(con_index, opt);
}

Expand Down
9 changes: 5 additions & 4 deletions libhive/jdbc/hiveclient.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@
extern "C" {
#endif // __cplusplus

typedef enum HIVE_SERVER_TYPE {
HIVE_SERVER1 = 0,
HIVE_SERVER2 = 1
} HIVE_SERVER_TYPE;
typedef enum CLIENT_TYPE
{
HIVESERVER2 = 0,
SPARKSERVER
} CLIENT_TYPE;

typedef enum AUTH_TYPE
{
Expand Down

0 comments on commit a360f75

Please sign in to comment.