SAS By Dr Yang YiChiang 2005 01 INPUT
SAS基礎篇 By Dr. Yang Yi-Chiang 2005. 01
INPUT 指令 讀入資料 INPUT 指令的目的 是將原始資料解釋成 變項值 INPUT 撰寫方式 1. List input 2. Column input 3. Formatted input
FORMATTED INPUT 撰寫方式 INPUT #1 name $ 10. /@1 income E 7. #3 tax 6. 2 ; INPUT name $ @12 sex $ 1. +3 addrs $ 13 -25 +3 age 2. @30 income COMMA 7. 2 ; INPUT name $ 10. (q 1 -q 20)(1. ) ;
建構SAS資料檔的其它方法 多筆記錄整合成一個觀測體 (1) / / input name $ 10. / income E 7. / tax 6. 2; (2)# # input #3 tax 6. 2 #2 income E. 7 #1 name $ 10. ; (3); ; ; input name $ 10. ; input income E 7. ; input tax 6. 2;
建構SAS資料檔的其它方法 單一記錄中讀取多個觀測體 DATA a; input group $ score @@; datalines; 1 60 1 70 1 80 2 95 2 55 2 65 3 79 3 89 3 99 ;
建構SAS資料檔的其它方法 依據記錄內涵 區分資料 DATA a; input type $ 1. @; if type=‘c’ then input name $ pop comma 10. 2; else if type=‘r’ then input incomec 10. 2 tax 6. 2; datalines; c sinchuang 8000 c sanchong 19520 r 79389 3520 r 65000 20000
建構SAS資料檔的其它方法 MISSOVER選項的用途 DATA miss; INFILE DATALINES MISSOVER; INPUT name $ item 1 -item 5; DATALINES; A 1 2 3 B 5 5 5 C 2 2 2 ;
使用/* */將SAS程式文件化 DATA abc; /* a SAS data set abc is created */ total=SUM(OF q 1 q 2 q 4); /* a new variale is the total of 3 */
使用 * ; 將SAS程式文件化 * this is my first SAS program;
LABEL指令 詮釋變項 DATA a; INPUT (att 1 -att 20) (1. ); avg=MEAN(OF att 1 -att 20); IF avg >3. 5 THEN attitude=‘+’; ELSE attitude=‘-’; LABEL att 1=‘attitude item 1’. .
LABEL指令 變項標名 LABEL att 1=‘attitude item 1’. . att 10=‘attitude item 10’ avg=‘average attitude’ attitude=‘attitude toward…. ’; 通常變項名稱都只 8個字元 故常須標名 標名可達 40個字元
FORMAT指令 變項值標名 PROC FORMAT ; VALUE $gender ‘f’ =‘female’ ‘m’=‘male’ ; VALUE status ‘ 1’ =‘freshman’ ‘ 2’ =‘sophomore’ ‘ 3’ =‘junior’ ‘ 4’ =‘senior’ ; DATA study; INFILE ‘C: dataa. fat’; INPUT name $ sex $ stand quiz 1 -quiz 5 ; FORMAT sex $gender. stand status. ;
OPTIONS指令 輸出報表格式 LS= PAGENO= CENTER DATE LABEL /* 64 ~ 256 */ /* 15 ~32767 */ /* 從 1 開始 */ NOCENTER NODATE NOLABEL
SAS資料檔之建構&重整 指令 SET MERGE UPDATE FILE/PUT LIST OUTPUT /* 要 sort */
OUTPUT 由 1筆記錄產生數個觀測體 DATA Q; INPUT name $ quiz 1 quiz 2 quiz 3; quiz=1; score=quiz 1; output; quiz=2; score=quiz 2; output; quiz=3; score=quiz 3; output; DROP quiz 1 -quiz 3; DATALINES; A 75 85 95 B 60 70 90 C 59 69 79 ;
OUTPUT 由 1筆記錄產生數個資料檔 DATA coll hisch; INPUT name $ educ; if educ >12 then OUTPUT collage; if educ <=12 then OUTPUT hischool; DATALINES; A 16 B 9 C 6 D 10 ;
FILE 建立外部數據檔 DATA A; INFILE ‘C: datab. dat’; INPUT name $ quiz 1 quiz 2 quiz 3; avg=MEAN(OF quiz 1 -quiz 3); FILE ‘C: datac. dat’; /*建立外部檔案*/ PUT name $ 10. @ 12 avg 4. ; /*請用formatted input*/ RUN;
LIST 在 LOG看觀測體 DATA A; INFILE ‘C: datab. dat’; INPUT name $ quiz 1 quiz 2 quiz 3; avg=MEAN(OF quiz 1 -quiz 3); IF avg< 60 THEN DO; LIST; delete; END;
SET 複製(1) DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; DATA c; SET a; IF sex=‘m’; PROC PRINT DATA=c; RUN;
SET 複製(2) DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; DATA d; SET a; IF sex=‘f’; PROC PRINT DATA=d; RUN;
SET 合併=(複製 1&2) DATA both; SET c d; /*按序合併*/ PROC PRINT DATA=both; RUN;
SET垂直連結=(sort後複製 1&2) DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; DATA c; SET a; IF sex=‘m’;
SET垂直連結=(sort後複製 1&2) DATA d; SET a; IF sex=‘f’;
SET垂直連結=(sort後複製 1&2) PROC SORT DATA=c; by name ; RUN; PROC SORT DATA=d; by name ; RUN; DATA bothsort; SET c d ; BY name ; PROC PRINT DATA=bothsort; RUN;
SET 用迴路複製(1) DATA a; INPUT name $ sex $ ht wt; DATALINES; Fly m 167 68 Jima f 146 50 Ken m 171. Anna f 156 61 Green f 159 57 ;
SET 用迴路複製(2) DATA b; DO index=2 to 5 by 2; SET a POINT=index; OUTPUT; END; STOP; PROC PRINT DATA=b; RUN;
MERGE 平行連接(1) DATA a; INFILE C: datab. dat; INPUT name $ q 1 q 4 q 5;
MERGE 平行連接(2) DATA c; INFILE C: datad. dat; INPUT name $ q 2 q 3;
MERGE 平行連接 PROC SORT DATA=a; BY name; RUN; PROC SORT DATA=c; BY name; RUN; Ø DATA new; MERGE a(IN=x) c(IN=y); IF x AND y; BY name; PROC PRINT DATA=new; RUN;
UPDATE更新主檔案(1) DATA a; INPUT name $ sex $ ht wt; DATALINES; Fly m 167 68 Jima f 146 50 Ken m 171. Anna f 156 61 Green f 159 57 ;
UPDATE更新主檔案(2) DATA b; INPUT name $ wt; DATALINES; Ken 80 ;
UPDATE更新主檔案(3) PROC SORT DATA=a; BY name; RUN; DATA current; UPDATE a b(IN=r); IF r; BY name; PROC PRINT DATA=current; RUN;
UPDATE更新主檔案(3’) PROC SORT DATA=a; BY name; RUN; PROC SORT DATA=b; BY name; RUN; Ø DATA current; UPDATE a b; BY name; PROC PRINT DATA=current; RUN;
IF 條件指令 選取觀測體 DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; IF sex=‘m’; PROC PRINT;
IF THEN 選取觀測體 DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; IF ht<155 THEN DELETE; PROC PRINT;
IF THEN/ELSE 選取觀測體 DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; IF ht>=165 THEN OUTPUT high; ELSE OUTPUT common; PROC PRINTdata=high; PROC PRINTdata=common;
WHERE 條件指令 選取觀測體 DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; DATA fat; set a; WHERE (wt/ht/ht)>25; PROC PRINT DATA=fat;
WHERE BETWEEN AND DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; DATA normal; set a; where (wt/ht/ht) between 19 and 24; PROC PRINT DATA=normal;
WHERE CONTAINS DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; DATA normal; set a; where (wt/ht/ht) between 19 and 24; PROC PRINT DATA=normal; WHERE name CONTAINS ‘楊’;
WHERE IS NULL(or is missing) DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; DATA normal; set a; where (wt/ht/ht) between 19 and 24; PROC PRINT DATA=normal; WHERE name IS NULL ; /*缺漏值者*/
KEEP保留變項 DROP排除變項 KEEP指令 DROP指令 DATA步驟中 單一指令 eg. SET a; KEEP x y; DROP z KEEP=選項 DROP=選項 資料檔的選項之一 DATA b (KEEP=x y); DATA c (DROP=z); SET a;
KEEP保留變項 DROP排除變項 KEEP=選項 DROP=選項 資料檔的選項之一 DATA d; DATA e; SET a (KEEP=x y); SET a (DROP=z); PROC PRINT DATA=a (KEEP=x y); PROC PRINT DATA=a (DROP=z);
POINT=選項 指定觀測體 DATA f; DO index=2 to 5 by 2; SET a POINT=index; OUTPUT; END; STOP;
WHERE=選項 選取觀測體 DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; DATA fat; set a (WHERE ={wt/ht/ht}>25); PROC PRINT DATA=fat;
FIRSTOBS=選項 選取觀察體 DATA a; INFILE C: datab. dat; INPUT name $ sex $ ht wt; DATA fat; set a (FIRSTOBS=7 OBS=10); PROC PRINT DATA=fat;
ARRAY 向量指令 ARRAY q {3} $ 1 q 2 q 3 (0 0 0); ARRAY q {*} _NUMERIC_; ARRAY q {*} _CHARACTER_;
DO/END指令 巡迴式 轉換 (1’) DATA a; INPUT name $ q 1 -q 10; ARRAY q {*} q 1 -q 10; DO i= 1, 3, 5, 7, 9; /* i=1 to 10 by 2*/ q(i)=6 -q(i); /* 轉換 群組變項 */ END; DROP i; /*排除注標變項*/
DO WHILE/END指令 巡迴轉換 DATA a; INPUT x 1 -x 5 y; ARRAY x {5} x 1 -x 5; n=1; DO WHILE (x {n} LE y); /*LESS & EQUAL*/ PUT x(n)= y= ; n=n+1; END; DATALINES; 123413 023676 ;
DO UNTIL/END指令 巡迴轉換 DATA a; INPUT x 1 -x 5 y; ARRAY x {5} x 1 -x 5; n=1; DO UNTIL (x {n} GT y); /*GREAT THAN*/ PUT x(n)= y= ; n=n+1; END; DATALINES; 123413 023676 ;
PRINT程序 列印資料 DATA a ; INFILE ; INPUT ; LABEL ; PROC SORT DATA=a; BY sex; RUN; PROC PRINT DATA=a (OBSFIRST=3 OBS=10) DOUBLE; /*兩倍的列印間距*/ /*若只想列印幾筆而已可插入( )*/ BY sex; /*使用BY前 要先SORT 成 2個子集*/ SUM quiz 1 -quiz 10; SUMBY sex; RUN;
MEANS程序 列印資料 DATA a ; INFILE ; INPUT ; LABEL ; PROC SORT DATA=a; BY sex; RUN; PROC MEANS DATA=a MAXDEC=4 ; VAR quiz 1 -quiz 3; CLASS grade; BY sex; RUN;
MEANS程序 可列印之資料 DATA a ; INFILE ; INPUT ; LABEL ; PROC SORT DATA=a; BY sex; RUN; PROC MEANS DATA=a MAXDEC=4 N MIN MAX RANGE MEAN CSS VAR STDERR SKEWNESS KURTOSIS; VAR quiz 1 -quiz 3; RUN;
SORT程序 重新編排資料 DATA a ; INFILE ; INPUT ; avg=MEAN(OF quiz 1 -quiz 10); PROC SORT DATA=a; BY DESCENDING avg; RUN; PROC PRINT DATA=a; RUN;
SORT程序 重新編排資料 DATA a ; INFILE ; INPUT ; avg=MEAN(OF quiz 1 -quiz 10); PROC SORT DATA=a; BY name sex ; RUN; PROC PRINT DATA=a; RUN;
RANK程序 資料等級化 DATA a ; INFILE ; INPUT ; avg=MEAN(OF quiz 1 -quiz 10); PROC RANK DATA=a OUT=a 1; VAR avg ; RANKS rankavg; RUN; PROC PRINT DATA=a 1; RUN;
RANK程序 資料由高至低等級 DATA a ; INFILE ; INPUT ; avg=MEAN(OF quiz 1 -quiz 10); PROC RANK DATA=a OUT=a 2 DESCENDING; VAR avg ; RANKS rankavg; RUN; PROC PRINT DATA=a 2; RUN;
RANK程序 資料由低至高等級 DATA a ; INFILE ; INPUT ; avg=MEAN(OF quiz 1 -quiz 10); PROC RANK DATA=a OUT=a 3; VAR quiz 1 avg ; RANKS rankquiz 1 rankavg; RUN; PROC PRINT DATA=a 3; RUN;
RANK程序 資料由低至高等級 DATA a ; INFILE ; INPUT ; avg=MEAN(OF quiz 1 -quiz 10); PROC RANK DATA=a OUT=a 3; VAR quiz 1 avg ; RANKS rankquiz 1 rankavg; RUN; by …; /*子集 此var要先sort */ PROC PRINT DATA=a 3; RUN;
- Slides: 75