SAS, STATISTICAL ANALYSIS SYSTEM.
根据个人习惯,以下采用叙事格式为:
code
# output
修订如下:
C000 ;
语句简易,DATA皆PROC;
玩味一下,丰富天池代码;
Score: 0.8525
相关教程可咨:
配置数据路径
OPTIONS COMPRESS = YES;
/* 00.逻辑库 */
LIBNAME ANA ".\data\hy_round1";
%LET CP = .\code\Hy_round1;
%LET DP = .\data\hy_round1\hy_round1_train_20200102\hy_round1_train_20200102;
%LET TP = .\data\hy_round1\hy_round1_testA_20200102\hy_round1_testA_20200102;
%LET PN = 000001;
csv数据读取,并打印日志
/* 01.数据读取 */
PROC PRINTTO LOG = "&CP.\RUN.log" NEW;RUN;
%MACRO R();
PROC DELETE DATA = ANA.RESULT;RUN;
DATA ANA.RESULT;
FORMAT VAR1 VAR2 VAR3 VAR4 VAR5 VAR6 VAR7 $30.;
STOP;
RUN;
%DO I = 0 %TO 6999;
%PUT &I.;
FILENAME INPT "&DP.\&I..csv" ENCODING = "UTF-8";
PROC IMPORT
DATAFILE = INPT
OUT = DEMO
DBMS = CSV
REPLACE;
GETNAMES = NO;
RUN;
PROC APPEND DATA = DEMO BASE = ANA.RESULT;RUN;
PROC DELETE DATA = DEMO;RUN;
%END;
DATA ANA.RESULT;
SET ANA.RESULT;
WHERE VAR7 ^= "type";
RUN;
%MEND;
%MACRO T();
PROC DELETE DATA = ANA.TEST;RUN;
%DO I = 7000 %TO 8999;
%PUT &I.;
FILENAME INPT "&TP.\&I..csv" ENCODING = "UTF-8";
PROC IMPORT
DATAFILE = INPT
OUT = DEMO
DBMS = CSV
REPLACE;
GETNAMES = NO;
RUN;
DATA DEMO;
SET DEMO(WHERE = (VAR6 ^= "time"));
FORMAT
ID $4.
X_1 X_2 X_1V X_2V 24.12
X_3 X_3_1 8.2 X_3_2 8.4
X_4 X_4_1 8.2 X_4_2 8.4 X_3_4 8.2
X_5_MON X_5_DAY X_5_HOR X_5_MIS X_5_SEC 8. X_5_DMY DATETIME20.
X_5_DMYW 2.
;
ID = VAR1;
X_1 = VAR2 + 0;
X_2 = VAR3 + 0;
X_1V = X_1/20037508.34*180;
X_2V = 180/CONSTANT("PI")*(2*ATAN(EXP(X_2/20037508.34*180*CONSTANT("PI")/180))-CONSTANT("PI")/2);
X_3 = VAR4 + 0;
X_3_1 = X_3 > 0;
X_3_2 = X_3/100;
X_4 = VAR5 + 0;
X_4_1 = X_4 > 0;
X_4_2 = X_4/360;
X_3_4 = X_3 > 0 OR X_4 > 0;
X_5_MON = SUBSTR(COMPRESS(VAR6,"0123456789","K"),1,2)+0;
X_5_DAY = SUBSTR(COMPRESS(VAR6,"0123456789","K"),3,2)+0;
X_5_HOR = SUBSTR(COMPRESS(VAR6,"0123456789","K"),5,2)+0;
X_5_MIS = SUBSTR(COMPRESS(VAR6,"0123456789","K"),7,2)+0;
X_5_SEC = SUBSTR(COMPRESS(VAR6,"0123456789","K"),9,2)+0;
X_5_DMY = DHMS(MDY(X_5_MON,X_5_DAY,2019),X_5_HOR,X_5_MIS,X_5_SEC);
X_5_DMYW = WEEKDAY(DATEPART(X_5_DMY));
KEEP ID X_:;
RUN;
PROC APPEND DATA = DEMO BASE = ANA.TEST;RUN;
PROC DELETE DATA = DEMO;RUN;
%END;
%MEND;
%R();
%T();
PROC PRINTTO LOG = LOG;RUN;
变量如下:
X_1 X_2 X_1V X_2V 24.12
X_3 X_3_1 8.2 X_3_2 8.4
X_4 X_4_1 8.2 X_4_2 8.4 X_3_4 8.2
X_5_MON X_5_DAY X_5_HOR X_5_MIS X_5_SEC 8.
X_5_DMY DATETIME20.
X_5_DMYW 2.
其中,
/* //墨卡托转经纬度*/
/* public Vector2D Mercator2lonLat(Vector2D mercator)*/
/* {*/
/* Vector2D lonLat = new Vector2D();*/
/* double x = mercator.X / 20037508.34 * 180;*/
/* double y = mercator.Y / 20037508.34 * 180;*/
/* y = 180 / Math.PI * (2 * Math.Atan(Math.Exp(y * Math.PI / 180)) - Math.PI / 2);*/
/* lonLat.X = x;*/
/* lonLat.Y = y;*/
/* return lonLat;*/
/* }*/
转化为SAS代码
/* DATA _T;*/
/* SET ANA.TEST(KEEP = X_1 X_2);*/
/* X_1V = X_1/20037508.34*180;*/
/* X_2V = 180/CONSTANT("PI")*(2*ATAN(EXP(X_2/20037508.34*180*CONSTANT("PI")/180))-CONSTANT("PI")/2);*/
/* RUN;*/
同样的,我们对测试集也处理相同变量
DATA ANA.TEST;
SET ANA.TEST;
FORMAT
X_1V X_2V 24.12
X_3_1 8.2 X_3_2 8.4
X_4_1 8.2 X_4_2 8.4 X_3_4 8.2
X_5_DMYW 2.
;
X_1V = X_1/20037508.34*180;
X_2V = 180/CONSTANT("PI")*(2*ATAN(EXP(X_2/20037508.34*180*CONSTANT("PI")/180))-CONSTANT("PI")/2);
X_3_1 = X_3 > 0;
X_3_2 = X_3/100;
X_4_1 = X_4 > 0;
X_4_2 = X_4/360;
X_3_4 = X_3 > 0 OR X_4 > 0;
X_5_DMYW = WEEKDAY(DATEPART(X_5_DMY));
RUN;
刺网、围网、拖网,拆分为三个数据集
/* 02.训练数据集 */
PROC FREQ DATA = ANA.RESULT;
TABLE VAR7/OUT = DEMO;
RUN;
DATA ANA.DATA1;
SET ANA.RESULT;
FORMAT
ID $4.
TARGET 1. X_1 X_2 X_1V X_2V 24.12
X_3 X_3_1 8.2 X_3_2 8.4
X_4 X_4_1 8.2 X_4_2 8.4 X_3_4 8.2
X_5_MON X_5_DAY X_5_HOR X_5_MIS X_5_SEC 8. X_5_DMY DATETIME20.
X_5_DMYW 2.
;
IF VAR7 = "刺网" THEN TARGET = 1;
IF VAR7 = "围网" THEN TARGET = 0;
IF VAR7 = "拖网" THEN TARGET = 0;
ID = VAR1;
X_1 = VAR2 + 0;
X_2 = VAR3 + 0;
X_1V = X_1/20037508.34*180;
X_2V = 180/CONSTANT("PI")*(2*ATAN(EXP(X_2/20037508.34*180*CONSTANT("PI")/180))-CONSTANT("PI")/2);
X_3 = VAR4 + 0;
X_3_1 = X_3 > 0;
X_3_2 = X_3/100;
X_4 = VAR5 + 0;
X_4_1 = X_4 > 0;
X_4_2 = X_4/360;
X_3_4 = X_3 > 0 OR X_4 > 0;
X_5_MON = SUBSTR(COMPRESS(VAR6,"0123456789","K"),1,2)+0;
X_5_DAY = SUBSTR(COMPRESS(VAR6,"0123456789","K"),3,2)+0;
X_5_HOR = SUBSTR(COMPRESS(VAR6,"0123456789","K"),5,2)+0;
X_5_MIS = SUBSTR(COMPRESS(VAR6,"0123456789","K"),7,2)+0;
X_5_SEC = SUBSTR(COMPRESS(VAR6,"0123456789","K"),9,2)+0;
X_5_DMY = DHMS(MDY(X_5_MON,X_5_DAY,2019),X_5_HOR,X_5_MIS,X_5_SEC);
X_5_DMYW = WEEKDAY(DATEPART(X_5_DMY));
KEEP ID X_: TARGET;
RUN;
PROC PRINT DATA = ANA.DATA1(OBS=10);
RUN;
DATA ANA.DATA2;
SET ANA.RESULT;
FORMAT
ID $4.
TARGET 1. X_1 X_2 X_1V X_2V 24.12
X_3 X_3_1 8.2 X_3_2 8.4
X_4 X_4_1 8.2 X_4_2 8.4 X_3_4 8.2
X_5_MON X_5_DAY X_5_HOR X_5_MIS X_5_SEC 8. X_5_DMY DATETIME20.
X_5_DMYW 2.
;
IF VAR7 = "刺网" THEN TARGET = 0;
IF VAR7 = "围网" THEN TARGET = 1;
IF VAR7 = "拖网" THEN TARGET = 0;
ID = VAR1;
X_1 = VAR2 + 0;
X_2 = VAR3 + 0;
X_1V = X_1/20037508.34*180;
X_2V = 180/CONSTANT("PI")*(2*ATAN(EXP(X_2/20037508.34*180*CONSTANT("PI")/180))-CONSTANT("PI")/2);
X_3 = VAR4 + 0;
X_3_1 = X_3 > 0;
X_3_2 = X_3/100;
X_4 = VAR5 + 0;
X_4_1 = X_4 > 0;
X_4_2 = X_4/360;
X_3_4 = X_3 > 0 OR X_4 > 0;
X_5_MON = SUBSTR(COMPRESS(VAR6,"0123456789","K"),1,2)+0;
X_5_DAY = SUBSTR(COMPRESS(VAR6,"0123456789","K"),3,2)+0;
X_5_HOR = SUBSTR(COMPRESS(VAR6,"0123456789","K"),5,2)+0;
X_5_MIS = SUBSTR(COMPRESS(VAR6,"0123456789","K"),7,2)+0;
X_5_SEC = SUBSTR(COMPRESS(VAR6,"0123456789","K"),9,2)+0;
X_5_DMY = DHMS(MDY(X_5_MON,X_5_DAY,2019),X_5_HOR,X_5_MIS,X_5_SEC);
X_5_DMYW = WEEKDAY(DATEPART(X_5_DMY));
KEEP ID X_: TARGET;
RUN;
DATA ANA.DATA3;
SET ANA.RESULT;
FORMAT
ID $4.
TARGET 1. X_1 X_2 X_1V X_2V 24.12
X_3 X_3_1 8.2 X_3_2 8.4
X_4 X_4_1 8.2 X_4_2 8.4 X_3_4 8.2
X_5_MON X_5_DAY X_5_HOR X_5_MIS X_5_SEC 8. X_5_DMY DATETIME20.
X_5_DMYW 2.
;
IF VAR7 = "刺网" THEN TARGET = 0;
IF VAR7 = "围网" THEN TARGET = 0;
IF VAR7 = "拖网" THEN TARGET = 1;
ID = VAR1;
X_1 = VAR2 + 0;
X_2 = VAR3 + 0;
X_1V = X_1/20037508.34*180;
X_2V = 180/CONSTANT("PI")*(2*ATAN(EXP(X_2/20037508.34*180*CONSTANT("PI")/180))-CONSTANT("PI")/2);
X_3 = VAR4 + 0;
X_3_1 = X_3 > 0;
X_3_2 = X_3/100;
X_4 = VAR5 + 0;
X_4_1 = X_4 > 0;
X_4_2 = X_4/360;
X_3_4 = X_3 > 0 OR X_4 > 0;
X_5_MON = SUBSTR(COMPRESS(VAR6,"0123456789","K"),1,2)+0;
X_5_DAY = SUBSTR(COMPRESS(VAR6,"0123456789","K"),3,2)+0;
X_5_HOR = SUBSTR(COMPRESS(VAR6,"0123456789","K"),5,2)+0;
X_5_MIS = SUBSTR(COMPRESS(VAR6,"0123456789","K"),7,2)+0;
X_5_SEC = SUBSTR(COMPRESS(VAR6,"0123456789","K"),9,2)+0;
X_5_DMY = DHMS(MDY(X_5_MON,X_5_DAY,2019),X_5_HOR,X_5_MIS,X_5_SEC);
X_5_DMYW = WEEKDAY(DATEPART(X_5_DMY));
KEEP ID X_: TARGET;
RUN;
FREQ过程输出:
VAR7 | 频数 | 百分比 | 累积频数 | 累积百分比 |
---|---|---|---|---|
刺网 | 391806 | 14.51 | 391806 | 14.51 |
拖网 | 1681225 | 62.28 | 2073031 | 76.79 |
围网 | 626607 | 23.21 | 2699638 | 100.00 |
二分类分别建模
模型效果
拟合统计量
选择模型基于: 验证: 误分类率 (_VMISC_)
验证: 误 训练: 平均平 训练: 误 验证: 平均平
选定模型 模型节点 模型说明 分类率 方误差 分类率 方误差
Y MBR MBR 0.028503 0.017774 0.025565 0.020405
MBR2 MBR (2) 0.055410 0.033583 0.048880 0.038736
MBR3 MBR (3) 0.058785 0.036211 0.052174 0.041722
拟合统计量表
目标: TARGET
数据角色=Train
统计量 MBR MBR2 MBR3
训练: 基于箱的双因子 Kolmogorov-Smirnov 概率截止值 0.22 0.25 0.75
训练: Kolmogorov-Smirnov 统计量 0.93 0.90 0.90
训练: Akaike 信息准则 118690.80 223132.74 241682.76
训练: 平均平方误差 0.02 0.03 0.04
训练: ROC 索引 1.00 0.99 0.99
训练: 平均误差函数 0.05 0.10 0.11
训练: 累积捕获响应百分比 68.90 43.08 16.06
训练: 捕获响应百分比 34.45 21.54 8.03
选择准则: 验证: 误分类率 0.03 0.06 0.06
训练: 误差自由度 1079839.00 1079839.00 1079839.00
训练: 模型自由度 16.00 16.00 16.00
训练: 总自由度 1079855.00 1079855.00 1079855.00
训练: ASE 的除数 2159710.00 2159710.00 2159710.00
训练: 误差函数 118658.80 223100.74 241650.76
训练: 最终预测误差 0.02 0.03 0.04
训练: 增益 589.03 330.83 60.58
训练: Gini 系数 0.99 0.98 0.98
训练: 基于箱的双因子 Kolmogorov-Smirnov 统计量 0.93 0.89 0.90
训练: Kolmogorov-Smirnov 概率截止值 0.13 0.19 0.57
训练: 累积提升度 6.89 4.31 1.61
训练: 提升度 6.89 4.31 1.61
训练: 最大绝对误差 0.94 0.94 0.94
训练: 误分类率 0.03 0.05 0.05
训练: 均方误差 0.02 0.03 0.04
训练: 频数总和 1079855.00 1079855.00 1079855.00
训练: 估计权重数 16.00 16.00 16.00
训练: 平均平方根误差 0.13 0.18 0.19
训练: 累积响应百分比 100.00 100.00 100.00
训练: 响应百分比 100.00 100.00 100.00
训练: 根最终预测误差 0.13 0.18 0.19
训练: 均方根误差 0.13 0.18 0.19
训练: Schwarz Bayesian 准则 118881.08 223323.02 241873.04
训练: 误差平方和 38386.91 72529.92 78204.80
训练: 观测权重和乘以频数 2159710.00 2159710.00 2159710.00
训练: 错误分类数 27606.00 52783.00 56340.00
数据角色=Valid
统计量 MBR MBR2 MBR3
验证: Kolmogorov-Smirnov 统计量 0.92 0.88 0.88
验证: 平均平方误差 0.02 0.04 0.04
验证: ROC 索引 0.99 0.98 0.99
验证: 平均误差函数 0.07 0.13 0.14
验证: 基于箱的双因子 Kolmogorov-Smirnov 概率截止值 0.22 0.25 0.75
验证: 累积捕获响应百分比 68.62 42.75 15.98
验证: 捕获响应百分比 34.31 21.38 7.99
验证: VASE 的除数 1619782.00 1619778.00 1619780.00
验证: 误差函数 113834.20 213722.09 231084.72
验证: 增益 586.16 327.53 59.79
验证: Gini 系数 0.98 0.96 0.97
验证: 基于箱的双因子 Kolmogorov-Smirnov 统计量 0.91 0.88 0.88
验证: Kolmogorov-Smirnov 概率截止值 0.13 0.19 0.63
验证: 累积提升度 6.86 4.28 1.60
验证: 提升度 6.86 4.28 1.60
验证: 最大绝对误差 1.00 1.00 1.00
验证: 误分类率 0.03 0.06 0.06
验证: 均方误差 0.02 0.04 0.04
验证: 频数总和 809891.00 809889.00 809890.00
验证: 平均平方根误差 0.14 0.20 0.20
验证: 累积响应百分比 99.58 99.23 99.51
验证: 响应百分比 99.58 99.23 99.51
验证: 均方根误差 0.14 0.20 0.20
验证: 误差平方和 33052.06 62744.19 67580.98
验证: 观测权重和乘以频数 1619782.00 1619778.00 1619780.00
验证: 错误分类数 23084.00 44876.00 47609.00
数据角色=Test
统计量 MBR MBR2 MBR3
测试: Kolmogorov-Smirnov 统计量 0.92 0.88 0.88
检验: 平均平方误差 0.02 0.04 0.04
测试: ROC 索引 0.99 0.98 0.99
检验: 平均误差函数 0.07 0.13 0.14
测试: 基于箱的双因子 Kolmogorov-Smirnov 概率截止值 0.22 0.25 0.75
测试: 累积捕获响应百分比 68.63 42.74 15.98
测试: 捕获响应百分比 34.31 21.37 7.99
检验: TASE 的除数 1619784.00 1619788.00 1619786.00
检验: 误差函数 113559.61 214377.91 230062.07
测试: 增益 586.27 327.41 59.79
测试: Gini 系数 0.98 0.96 0.97
测试: 基于箱的双因子 Kolmogorov-Smirnov 统计量 0.91 0.87 0.88
测试: Kolmogorov-Smirnov 概率截止值 0.13 0.19 0.63
测试: 累积提升度 6.86 4.27 1.60
测试: 提升度 6.86 4.27 1.60
检验: 最大绝对误差 1.00 1.00 1.00
检验: 误分类率 0.03 0.06 0.06
检验: TMISC 的 95% 置信下限 0.03 0.05 0.06
检验: TMISC 的 95% 置信上限 0.03 0.06 0.06
检验: 均方误差 0.02 0.04 0.04
检验: 频数总和 809892.00 809894.00 809893.00
检验: 平均平方根误差 0.14 0.20 0.20
测试: 累积响应百分比 99.60 99.21 99.51
测试: 响应百分比 99.60 99.21 99.51
检验: 均方根误差 0.14 0.20 0.20
检验: 误差平方和 32982.50 62796.87 67318.38
检验: 观测权重和乘以频数 1619784.00 1619788.00 1619786.00
检验: 错误分类数 23039.00 44903.00 47477.00
事件分类表
选择模型基于: 验证: 误分类率 (_VMISC_)
模型节点 模型说明 数据角色 目标 目标标签 漏报 特异 误报 命中
MBR MBR TRAIN TARGET 18107 913634 9499 138615
MBR MBR VALIDATE TARGET 14865 684130 8219 102677
MBR2 MBR (2) TRAIN TARGET 30006 806435 22777 220637
MBR2 MBR (2) VALIDATE TARGET 25167 602199 19709 162814
MBR3 MBR (3) TRAIN TARGET 25946 376971 30394 646544
MBR3 MBR (3) VALIDATE TARGET 22262 280176 25347 482105
输出模型宏,详见附录
导入模型宏
/* 03.导入模型宏 */
%INCLUDE "&CP.\02.MacroS1.sas";
%INCLUDE "&CP.\02.MacroS2.sas";
%INCLUDE "&CP.\02.MacroS3.sas";
计算模型概率,转计F1分数
/* 04.F1分数计算 */
DATA DATA1;
SET ANA.DATA1;
%S1();
KEEP P_TARGET1 TARGET;
RUN;
DATA DATA2;
SET ANA.DATA2;
%S2();
KEEP P_TARGET1 TARGET;
RUN;
DATA DATA3;
SET ANA.DATA3;
%S3();
KEEP P_TARGET1 TARGET;
RUN;
%MACRO F1(DT=,);
PROC DELETE DATA = &DT._F1;
RUN;
%DO I = 1 %TO 100;
%LET T = %SYSEVALF(&I./100);
%PUT &T.;
PROC SQL;
CREATE TABLE _T1 AS
SELECT
&T. AS T,
SUM(CASE WHEN P_TARGET1 > &T. AND TARGET = 1 THEN 1 ELSE 0 END)/
SUM(CASE WHEN P_TARGET1 > &T. THEN 1 ELSE 0 END) AS P,
SUM(CASE WHEN P_TARGET1 > &T. AND TARGET = 1 THEN 1 ELSE 0 END)/
SUM(CASE WHEN TARGET = 1 THEN 1 ELSE 0 END) AS R,
2 * (CALCULATED P) * (CALCULATED R)/((CALCULATED P) + (CALCULATED R)) AS F1
FROM &DT.;
QUIT;
PROC APPEND DATA = _T1 BASE = &DT._F1;RUN;
PROC DELETE DATA = _T1;RUN;
%END;
PROC SQL;
CREATE TABLE &DT._F1 AS
SELECT
*,
MAX(F1) AS MAXF1,
CASE WHEN F1 = CALCULATED MAXF1 THEN 1 ELSE 0 END AS IS_MAXF1
FROM &DT._F1;
QUIT;
PROC UNIVARIATE DATA = &DT._F1;
VAR F1;
RUN;
%MEND;
%F1(DT=DATA1);
%F1(DT=DATA2);
%F1(DT=DATA3);
记录最大F1分数至宏变量
PROC SQL NOPRINT;
SELECT
MAX(CASE WHEN IS_MAXF1 = 1 THEN T ELSE . END) INTO: TDATA1
FROM DATA1_F1;
SELECT
MAX(CASE WHEN IS_MAXF1 = 1 THEN T ELSE . END) INTO: TDATA2
FROM DATA2_F1;
SELECT
MAX(CASE WHEN IS_MAXF1 = 1 THEN T ELSE . END) INTO: TDATA3
FROM DATA3_F1;
CREATE TABLE ANA.R&PN. AS
SELECT *
FROM (
SELECT * FROM DATA1_F1 UNION ALL
SELECT * FROM DATA2_F1 UNION ALL
SELECT * FROM DATA3_F1
)
WHERE IS_MAXF1 = 1;
QUIT;
%PUT &TDATA1. &TDATA2. &TDATA3.;
PROC PRINT DATA = ANA.R&PN.;
RUN;
根据最大F1的cut off概率,预测为0/1
/* 05.模型结果 */
DATA TEST1;
SET ANA.TEST;
%S1();
FORMAT MTYPE 1. FID $32.;
MTYPE = 1;
FID = PUT(MD5(CAT(ID, CATX("|", OF X_:))), HEX32.);
RUN;
PROC SORT DATA = TEST1 NODUPKEY;
BY FID;
RUN;
DATA TEST2;
SET ANA.TEST;
%S2();
FORMAT MTYPE 1. FID $32.;
MTYPE = 1;
FID = PUT(MD5(CAT(ID, CATX("|", OF X_:))), HEX32.);
RUN;
PROC SORT DATA = TEST2 NODUPKEY;
BY FID;
RUN;
DATA TEST3;
SET ANA.TEST;
%S3();
FORMAT MTYPE 1. FID $32.;
MTYPE = 1;
FID = PUT(MD5(CAT(ID, CATX("|", OF X_:))), HEX32.);
RUN;
PROC SORT DATA = TEST3 NODUPKEY;
BY FID;
RUN;
DATA TEST123;
MERGE
TEST1(IN=T1 RENAME = (P_TARGET1 = P1) KEEP = FID ID P_TARGET1)
TEST2(IN=T2 RENAME = (P_TARGET1 = P2) KEEP = FID P_TARGET1)
TEST3(IN=T3 RENAME = (P_TARGET1 = P3) KEEP = FID P_TARGET1)
;
BY FID;
IF T1 OR T2 OR T3;
TP1 = P1 > &TDATA1.;
TP2 = P2 > &TDATA2.;
TP3 = P3 > &TDATA3.;
RUN;
根据模型结果,整合3个模型
PROC SQL;
CREATE TABLE RESULT AS
SELECT
ID,
SUM(1) AS CNT,
MEAN(TP1) AS TP1,
MEAN(TP2) AS TP2,
MEAN(TP3) AS TP3
FROM TEST123
GROUP BY ID;
QUIT;
DATA RESULT;
SET RESULT;
FORMAT T $6.;
/* 1 刺网 2 围网 3 拖网 */
IF TP1 = MAX(TP1, TP2, TP3)
THEN T = "刺网";
ELSE IF TP2 = MAX(TP1, TP2, TP3)
THEN T = "围网";
ELSE IF TP3 = MAX(TP1, TP2, TP3)
THEN T = "拖网";
ELSE T = "拖网";
RUN;
FILENAME EXPT ".\data\hy_round1\result&PN..csv" ENCODING="UTF-8";
PROC EXPORT
DATA = RESULT(KEEP = ID T)
OUTFILE = EXPT DBMS = CSV REPLACE;
PUTNAMES = NO;
RUN;
相关链接,https://github.com/IvanaXu/TianChiProj/tree/master/HOcean_Round1