SAS9.3中ProcSQL的应用与提高缘梦缘Email: moonlight2006h@163.comg@Structured Query Language (SQL)‐‐结构化查询语言SQL语言是一种用于查询和更新数据的标准语言的标准语言主要特点不需要采用SORT预处理就可以(点对点)连接(或称合并)多个数据库连接多个数据库时不需要变量名相同一条SQL语句的输出可以作为另一条SQL语句的条SQL语句的输出可以作为另条SQL语句的输入,所以SQL语句可以嵌套,使其具有极大的灵活性和强大的功能2标准的SQL语言构成(4部分)数据定义语言(DDL):例如:CREATE、DROP、ALTER等语句。数据操作语言(DML):例如:INSERTUPDATEDELETE语句数据操作语言(DML):例如:INSERT、UPDATE、DELETE语句。数据查询语言(DQL):例如:SELECT语句。数据控制语言()数据控制语言(DCL):例如::GRANT(授予用户访问权限)、REVOKE(解除用户访问权限)、COMMIT(结束当前事务)、ROLLBACK(中止当前事务)等语句。)SAS中的PROC SQL过程并没有特定的数据控制语句过的外部序来实数据句,而是通过PROC SQL的外部程序来实现数据控制。SAS的PROC SQL突出的是其数据查询功能。3数据查询语句(SELECT语句)的基本语法介绍SELECT DISTINCT object‐item , ...object‐itemFROM from‐list WHERE sql‐expression GROUP BY group‐by‐item , ... group‐by‐item HAVINGsql‐expressionHAVING sql‐expression ORDER BY order‐by‐item , ... order‐by‐item; SELECT指定被选择的列DISTINCT删除重复记录SELECT:指定被选择的列DISTINCT: 删除重复记录FROM:指定被查询的表名WHERE:子数据集的条件WHERE:子数据集的条件GROUP BY:将数据集通过GROUP变量来进行分类HAVING:根据GROUP BY的变量得到子数据集ORDER BY:对数据集进行排序4第一章SQL的基本功能DATA preteen;SETsashelpclass;SET sashelp.class;WHERE age13;LABEL name = 'First Name';RENAMEname=FName;RENAME name = FName;FORMAT height weight 5.1;RUN;5昀简单的SQL语言PROC SQL;SELECT*SELECT*FROM preteen;QUIT;QUIT;6SQL语言的基本功能相当于有选择的PROC PRINT过程PROC SQL;SELECTfname, agegFROM preteen;;QUIT;/*等同于以下语句*/PROC PRINT NOOBS LABEL DATA=preteen;VAR fnameage;RUN;7利用SQL创建新表(create table …as)PROC SQL;CREATETABLEnewASCREATETABLE new ASSELECT *FROM preteen;;QUIT;8利用列创建子数据集PROC SQL;CREATE TABLE subset ASSELECT fname, sex, ageFROM preteen;;QUIT;/*等同于以下语句*/PROC SQL;CREATE TABLE subset(DROP=height weight) ASSELECT *FROMpreteenFROM preteen;QUIT;9利用列创建新变量PROC SQL;CREATETABLEratiosASCREATE TABLE ratios ASSELECT *,weight / height ASRatioFORMAT=52LABEL='体重:身高比'FORMAT=5.2 LABEL=体重:身高比FROM preteen;QUIT;Q;10利用SQL进行简单的数据统计(类似Proc summary)PROC SQL;CREATE TABLE overall_averagesASSELECT MIN (age) AS Youngest,MAX (age) AS Oldest,MEAN(height) AS Avg_HeightFORMAT=5.1,MEAN(weight) AS Avg_WeightFORMAT=5.1FROM preteen;QUIT; SQL常用函数/*等同于以下语句*/PROC SUMMARY DATA=preteen;VAR age height weight;SQL常用函数MEAN或AVG:均值COUNT或N或FREQ:非缺失值个数ggg;OUTPUT OUT=overall_averages(DROP = _type_ _freq_)MIN (age )=YoungestMAX (age )=Oldest非缺值个数MAX:最大值MIN:最小值NMISS:缺失值个数STD标准差MEAN(height)=Avg_HeightMEAN(weight)=Avg_Weight;RUN;11STD:标准差SUM:求和VAR:方差利用SQL进行分组统计PROC SQL;CREATE TABLE group_averagesASSELECT sex,MIN(age)ASYoungest,MIN (age) AS Youngest,MAX (age) AS Oldest,MEAN(height) AS Avg_HeightFORMAT=5.1,MEAN(weight) AS Avg_WeightFORMAT=5.1FROM preteenGROUP BY sex;QUIT;QUIT; /*等同于以下语句*/PROC SUMMARY DATA=preteen NWAY;CLASS sex;VAR age height weight;OUTPUT OUT=overall_averages(DROP = _type_ _freq_)MIN (age )=YoungestMAX(age)=OldestMAX (age )=OldestMEAN(height)=Avg_HeightMEAN(weight)=Avg_Weight;RUN;12根据条件语句创建数据集PROC SQL;CREATE TABLE trip_listASSELECTfname,;QUIT;SELECT fname,age,sex,CASEWHENage=11THEN'Zoo'/*等同于以下语句*/DATA trip_list;SETpreteen;CASE WHEN age11 THENZooWHENsex='F' THEN'Museum'ELSE'[None]'ENDSET preteen;IF age=11 THEN Trip = 'Zoo ';ELSE IF sex='F' THEN trip = 'Museum';ELSEtrip='[None]';ENDAS TripFROM preteenELSE trip = [None];KEEP fnameage sex trip;RUN;CASE语句通常来实现条件处理,语法:SELECTcolumn…CASEcase-operandWHENwhenconditionTHENresultexpression13WHENwhen-conditionTHENresult-expressionWHENwhen-conditionTHENresult-expressionELSEresult-expressionEND;利用WHERE从句选择子数据集PROC SQL;CREATETABLEgirlsASWHERE从句中的运算符(1)比较运算符:LT小于CREATE TABLE girls ASSELECT *LT小于GT大于EQ=等于LE=小于或等于FROM preteenWHEREsex=‘女'LE=小于或等于GE=大于或等于NE^=不等于WHERE sex=女;(2)逻辑运算符OR|或AND&是QUIT;AND&是NOT^非14利用HAVING从句替代WHERE从句PROC SQL;CREATE TABLE hiloASQUIT;/*等同于以下语句*/SELECT sex,age,MAX(height)ASTallestPROC SUMMARY DATA=preteen NWAY;CLASS sex age;OUTPUTMAX(height)=TallestMAX(height) AS Tallest,MIN(height) AS ShortestFROM preteenOUTPUT MAX(height)=Tallest MIN(height)=ShortestOUT=hilo(WHERE = (tallest ‐shortest GROUP BY sex, ageHAVING tallest ‐shortest 44) DROP = _type_ _freq_ );RUN;;HAVING从句与WHERE从句的主要区别15HAVING从句与WHERE从句的主要区别HAVING从句WHERE从句典型应用是根据条件从表中选出(或排除)分组的行典型应用是根据条件从表中选出(或排除)单个的行必须跟在GROUPBY之后,没有GROUPBY,HAVING等于WHERE如有GROUPBY,必须在GROUPBY之前利用ORDER BY对数据进行排序PROC SQL;CREATE TABLE age_sortAS/*等同于以下语句*/PROC SORT DATA=preteen g_SELECT *FROM preteenpOUT=age_sort;BY DESCENDING age pORDER BY age DESCENDING, fnamefname;RUN;;QUIT;16利用DISTINCT剔除重复记录PROC SQL;CREATE TABLE sex_ageASSELECT sex, ageFROM preteen;;PROC SQL;CREATE TABLE sex_age_distinctASSELECT DISTINCT *FROM sex_age;QUIT;QUIT;17第二章SQL语言的初步提升PROC SQL;CREATETABLEteensASCREATE TABLE teens ASSELECT name AS FName,ageFROMsashelpclassFROM sashelp.classWHERE age12;QUIT;Q;18将原始变量和二次分析生成的变将原析成量进行合并PROC SQL;CREATE TABLE detail_and_countsAS__SELECT fname,age,g,COUNT(*) AS ManyFROM teensGROUP BY ageORDERBYfnameORDER BY fname;QUIT;QUIT;19COUNT(*),N(*),或FREQ(*):返回非缺失值个数COUNT(DISTINCTvaria