· 6 years ago · Apr 29, 2019, 05:00 PM
1/**************************************************************
2Program: Code_For_SQL_How_SGF_2019 Lavery
3programmer: r lavery
4date: 2018/02/28
5Purpose: code for attendees to run and modify in SQL How
6**************************************************************/
7/*
8Topics are:
9Section A 1 OF 11) THE SAS DATA ENGINE `
10Section B 2 OF 11) INDEXING
11Section B 3 OF 11) THE SQL OPTIMIZER AND IMPROVING PERFORMANCE
12Section D 4 OF 11) SUB-QUERIES: CORRELATED AND UNCORRELATED
13Section E 5 OF 11) PLACEMENT OF SUB-QUERIES IN SQL SYNTAX
14Section F 6 OF 11) VIEWS
15Section G 7 OF 11) FUZZY MERGING
16Section H 8 OF 11) COALESCING
17Section I 9 OF 11) FINDING DUPLICATES
18Section J 10 of 11) REFLEXIVE JOINS
19Section K 11 of 11) USE SQL DICTOINARY TABLES TO DOCUMENT DATA SETS IN HYPERLINKED EXCEL WORKBOOK
20*/
21
22
23/****Section: Data Set Creation *************************/
24Options msglevel=i;
25ODS Listing;
26ODS HTML close;
27options nocenter MCompilenote = all;
28
29
30PROC SQL; create table MySchool
31 ( Name Char(8) , Sex Char(1) , age num
32 , Height num , weight num
33 );
34insert into MySchool
35values('Joy' ,'F', 11 , 51.3 , 50.5)
36values('Jane' ,'F', 12 , 59.8 , 84.5)
37values('Jim' ,'M', 12 , 57.3 , 83.0)
38values('Alice' ,'F', 13 , 56.5 , 84.0)
39values('Jeff' ,'M', 13 , 62.5 , 84.0)
40values('Bob' ,'M', 14 , 64.2 , 90.0)
41values('Philip' ,'M', 16 , 72.0 ,150.0);
42
43PROC SQL; create table ExtraInfo
44 ( Name Char(8) , age num, Sex Char(1)
45 , Allerg Char(4), Diet Char(6) , sport num , ResDay Char(8)
46 );
47insert into ExtraInfo
48values('Joy' , 11 ,'F', 'None', 'Vegan' , 3 ,'Resident')
49values('Jane' , 12 ,'F', 'Nuts', 'Meat' , 1 ,'Day')
50values('Jim' , 12 ,'M', 'None', 'Meat' , 0 ,'Resident')
51values('Alice' , 13 ,'F', 'Nuts', 'NoMeat' , 1 ,'Resident')
52values('Jeff' , 13 ,'M', 'Dust', 'Vegan' , 1 ,'Day')
53values('Philip', 16 ,'M', 'None', 'NoMeat' , 2 ,'Resident');
54quit;
55
56/*******************************************************************************************/
57/** Section A- 1 of 11: THE SAS DATA ENGINE ******/
58/* The fact that different numbers of observations were read supports the idea
59 that a data engine exists that is close to the hard drive.*/
60/*******************************************************************************************/
61/*Example A_1*/
62Data A01_IF_example ;
63 /*Reads 19 obs*/
64 set sashelp.class ;
65 if sex NE "F";
66 run;
67
68
69Data A02_Where_Example ;
70 set sashelp.class ;
71 /*Reads 10 obs*/
72 Where sex NE "F";
73 run;
74
75
76/*******************************************************************************************/
77/** Section B - 2 of 11: INDEXING**/
78/*******************************************************************************************/
79/*Example B_1 run in small steps and look for notes in the log*/
80PROC SQL;
81/*Create a dataset on which you can create an index*/
82Create table B01_MyClass
83 as select *
84 from SASHelp.class;
85/*Data set B01 has an index created by SQL, B02 will create the index using a data step*/
86Create index name on B01_MyClass(name);
87
88PROC Contents data=B01_MyClass centiles;
89Title "Example B_1 The PROC Contents tells you about indexes";
90TITLE2 "but it does not tell you much - read Mike Raithal's paper";
91run;
92title "";
93
94
95PROC SQL;
96Select *
97 /*B_01_A: No note in log*/
98 from B01_MyClass;
99
100Select *
101 from B01_MyClass
102/*B_01_B: There is a note in the log
103 INFO: Index Name selected for WHERE clause optimization*/
104 where name="Jane";
105
106
107Select *
108 from B01_MyClass
109/*B_01_C: There is a note in the log
110 INFO: Index Name selected for WHERE clause optimization*/
111 where substr(name,1,1)="J";
112
113 Select *
114 from B01_MyClass
115/*B_01_D: There is a note in the log
116 INFO: Index Name selected for WHERE clause optimization*/
117 where substr(name,2,1)="o";
118
119 Select *
120 from B01_MyClass
121 /*B_01_E: No Note in log (violates the 10% rule?)*/
122 where substr(name,2,1)in ("a","e","i","o","u");
123
124 Select *
125 from B01_MyClass
126 /*B_01_F: No note in log - we did not have an index on age*/
127 where age=13;
128
129/*Only Difference between B01 and B02 is that the index for Data B02 was created in a data step - B01 used SQL*/
130/*Example B_2 Run code in steps and look for notes in the log - resuilts will match B01*/
131Data B02_My_Class(index=(name));
132 set SASHelp.class;
133 run;
134
135PROC Contents data=B02_My_Class centiles;
136Title "Example B_2 The PROC Contents tells you about indexes";
137TITLE2 "but it does not tell you much - read Mike Raithal's paper";
138run;
139title "";
140
141Data _null_;
142 set B02_My_Class;
143 /*No Where -- No INFO in log. SAS Reads ALL 19 obs*/
144 run;
145
146Data _null_;
147 set B02_My_Class;
148 /*INFO: Index Name selected for WHERE clause optimization. is in log*/
149 where name="Jane";
150 run;
151
152 Data _null_;
153 set B02_My_Class;
154 /*INFO: Index Name selected for WHERE clause optimization. is in log*/
155 where substr(name,1,1)="J";
156 run;
157
158 Data _null_;
159 set B02_My_Class;
160 /*INFO: Index Name selected for WHERE clause optimization. is in log*/
161 where substr(name,2,1)="o";
162 run;
163
164Data _null_;
165 set B02_My_Class;
166 /*No Note in log (violates the 10% rule?)*/
167 where substr(name,2,1)in ("a","e","i","o","u");
168 run;
169
170Data _null_;
171 set B02_My_Class;
172 /*NO INFO in log, but reads 3 obs*/
173 where age=13;
174 run;
175title "";
176
177
178/*******************************************************************************************/
179/** Section C - 3 of 11: THE SQL OPTIMIZER AND IMPROVING PERFORMANCE **/
180/*******************************************************************************************/
181/*Example C_1 run this and look in the log.
182 The log will show a summary for the program that the optimizer plans to run*/
183PROC SQL _method _tree;
184title "Example C_1 Look in Log to see the explain plan. The output from _method and _Tree";
185 select name from sashelp.class
186 where sex="M"
187 order by Age;
188title "";
189
190/*******************************************************************************************/
191/** Section D - 4 OF 11) SUB-QUERIES: CORRELATED AND UNCORRELATED*/
192/*******************************************************************************************/
193/*Example D_1*/
194PROC SQL _method _tree;
195 Title "Example D_1 UNcorrelated Sub query in the FROM clause";
196 Select *
197 From (Select name , sex , height
198 from MySchool as I
199 where I.sex='F');
200title "";
201
202/*Example D_2*/
203PROC SQL _method _tree;
204 Title "Example D_2 UNcorretated Sub query in the WHERE clause";
205 Select name , sex, age
206 From MySchool as O
207 Where O.Sex=
208 (select sex
209 from ExtraInfo
210 having Sport=max(Sport)
211 );
212title "";
213
214/*Example D_3*/
215PROC Sql;
216 Title "Example D_3 use join to replace UNcorretated Sub query in the FROM clause";
217 select name , Outer.sex, age
218 from
219 MySchool as Outer
220 Inner join
221 (select sex
222 From ExtraInfo
223 having Sport=max(Sport)
224 ) as sub
225 on Outer.sex=sub.sex;
226
227/*Example D_4*/
228PROC SQL;
229 Title "Example D_4 Correlated Sub query in the Where clause";
230 select name , sex, age
231 from MySchool as O
232 where O.Age =
233 (select Age from ExtraInfo as I
234 Where I.sex EQ O.Sex
235 Having Sport=Max(sport)
236 );
237title "";
238
239/*Example D_5*/
240/*Correlated query vs Join*/
241PROC SQL;
242/*This is a correlated query in the WHERE clause ** note I.sex NE O.Sex */
243 Create table D05_Corr_Sub_Q as
244 Select name , sex, age
245 From MySchool as O
246 Where EXISTS
247 (select * from ExtraInfo as I
248 having I.age = O.age
249 and I.sex NE O.Sex
250 ) ;
251
252PROC SQL;
253 /*This is a join query" ** note inner join */
254 Create table D05_Equivolent_Join as
255 Select O.name , O.sex, O.age
256 From
257 MySchool as O
258 Inner join
259 (select name, sex, age
260 from ExtraInfo
261 ) as WasSub
262 On O.Age=WasSub.Age
263 and O.sex NE WasSub.sex;
264
265 PROC compare base=D05_Corr_Sub_Q compare =D05_Equivolent_Join;
266 title "Example D_5 This compares the resuolts from the correlated query and the join";
267 run;
268title "";
269
270/*******************************************************************************************/
271/** Section E 5 OF 11) PLACEMENT OF SUB-QUERIES IN SQL SYNTAX **/
272/*******************************************************************************************/
273/*Example E_1*/
274PROC SQL;
275 title "Example E_1 Subquery returns a 1 BY 1 to one 'cell' as SQL processes EACH ROW in the Select";
276 title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
277 select O.name, O.age
278 ,(select I.age
279 from ExtraInfo I
280 where Name='Joy') as Age_of_Joy
281 , O.sex
282 From MySchool as O;
283title "";
284
285/*Example E_2 INTENTIONAL FAIL*/
286PROC SQL;
287 title "Example E_2 Subquery returns A COLUMN OF DATA to one 'cell' as SQL processes EACH ROW in the Select";
288 title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
289 select O.name
290 , O.age
291 ,(select DISTINCT I.age
292 from ExtraInfo I) as Returns_col
293 , O.sex
294 From MySchool as O;
295title "";
296
297/*Example E_3 INTENTIONAL FAIL*/
298PROC SQL;
299 title "Example E_3 Subquery returns MULTIPLE ROWS to one 'cell' as SQL processes EACH ROW in the Select";
300 title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
301 select O.name
302 , O.age
303 /*Subquery returns multiple vars TO EACH ROW - in the Select*/
304 ,(select I.name, I.Sex, I.sport
305 from ExtraInfo as I
306 where name='Joy') as Mult_Vars
307 , O.sex
308From MySchool as O;
309title "";
310
311/*Example E_4 INTENTIONAL FAIL*/
312PROC SQL;
313 title "Example E_4 Subquery returns A TABLE to one 'cell' as SQL processes EACH ROW in the Select";
314 title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
315 select O.name
316 ,O.age
317 /*Subquery returns a table - in the select*/
318 ,(select I.age , I.diet , I.sport
319 from ExtraInfo as I)
320 , O.sex
321 From MySchool as O;
322title "";
323
324/*Example E_5*/
325/* Shapes allowed in the Where and having*/
326PROC SQL;
327 title "Example E_5 UNcorrelated Subquery returns A 1 by 1 to the Where";
328 title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
329 select *
330 From MySchool as O
331 /*UNCoirrelated Subquery returns 1 by 1 in the Where or Having*/
332 where O.Age LE
333 (Select MAX(I.age)
334 from ExtraInfo as I
335 );
336title "";
337
338/*Example E_6*/
339PROC SQL;
340 title "Example E_6 UNcorrelated Subquery returns A COLUMN of data to the Where";
341 title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
342 select O.*
343 From MySchool as O
344 /*Subquery returns column - in the Where or Having*/
345 where O.Age IN
346 (Select distinct I.age
347 from ExtraInfo as I
348 );
349title "";
350
351/*Example E_7*/
352PROC SQL;
353 title "Example E_7 Correlated Subquery returns A ROW of data to the Where";
354 title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
355 title3 "if you are in MySchool AND in ExraINFO, you show up ";
356 title4 "We lose Bob";
357 select O.*
358 From MySchool as O
359 /*Subquery returns column - in the Where or Having*/
360 where Exists
361 (Select I.Name, I.sex, I.sport
362 from ExtraInfo as I
363 where O.name=I.name
364 );
365title "";
366
367/*Example E_8*/
368Proc SQL;
369 title1 "Example E_8 Correlated Subquery returns A TABLE of data to the Where";
370 title2 "Exploring what shapes of data can be returned to what parts of a SQL Query";
371 title3 "There is one or more people in ExtraInfo of the same sex - you show up in the report";
372 select *
373 From MySchool as O
374 /*Subquery returns table - in the Where or Having*/
375 where Exists
376 (Select I.*
377 from ExtraInfo as I
378 where I.sex= O.sex
379 );
380title "";
381
382/*******************************************************************************************/
383/** Section F - 6 OF 11) Views**/
384/*******************************************************************************************/
385/*Example F_1 Show Views being used*/
386Proc SQL;
387Create View F1_Old_guys as
388select name, sex, age
389 , Weight/2.2 as Wt_KG
390from SAShelp.class as c
391where sex='M' and age > 13;
392quit;
393
394proc print data=F1_Old_guys; run;
395title "xample F_1 Show Views being used";
396run;
397title "";
398
399/*Example F_2 Show Views being used*/
400PROC Gchart data=F1_Old_guys;
401title "Example F_2 Show Views being used";
402pie age /discrete; run;
403
404PROC Univariate data=F1_Old_guys;
405var age; run;
406title "";
407
408/*Example F_3 Views being chained*/
409PROC SQL;
410 create VIEW F3_boys as
411 select * from sashelp.class
412 where sex='M';
413
414PROC SQL;
415 create VIEW F4_Old_boys as
416 select * from F3_boys
417 where age GE 14;
418
419PROC SQL;
420 CREATE TABLE F05_Chained_Views as
421/*"This is the result of chained views";*/
422/*"Note that the log only shows one data set being created";*/
423select
424 'number of old boys' as tag
425 ,count(*) as Nmbr_old_boys
426from F4_Old_boys
427;
428QUIT;
429
430PROC PRINT data=F05_Chained_Views;
431title "Example F_3 Views being chained This is the result of chained views";
432title2 "Note that the log only showed one data set being created when we created F01_Chained_Views";
433run;
434
435/*Example F_4 table vs view*/
436/*Show the diifference between a table of data and a view of data*/
437PROC SQL;
438 Create table F06_Class_table as
439 select *
440 from SASHelp.class;
441
442 /*Use the SQL command "Describe" to see "what is in" F02_Class_table..Data */
443 Describe table F06_Class_table;
444
445
446PROC SQL;
447 Create view F06_Class_View as
448 select *
449 from SASHelp.class;
450
451 /*Use the SQL command "Describe" to see "what is in" F02_Class_View..INSTRUCTIONS */
452 DESCRIBE VIEW F06_Class_View;
453
454
455
456/*Example F_5 Making a perm View - handling the libname issue*/
457/*The View ITSELF must contain a libname because not evey user uses the same libnames*/
458options noxsync noxwait;
459x "mkdir C:\temp"; /*Make a dir*/
460x "mkdir C:\Perm_Views"; /*Make a dir*/
461
462Libname OneSpot "C:\temp"; /*define libraries to the session*/
463Libname ViewLoc "C:\Perm_Views";
464
465/*Place some data in C:\temp
466 at a place that the SAS session thinks of as OneSpot*/
467Data OneSpot.F03_Class_data;
468set SASHelp.Class; run;
469
470
471Proc SQL ;
472/*"This is a Permanent view so has a two part name";*/
473 Create view ViewLoc.F04_Eng_2_Met as
474 select name
475 ,Weight/2.2 as Wt_Kg
476 ,height*2.25 as Hgt_cm
477 from PermLoc.F03_Class_data
478 /*Get some data From C:\temp
479 A place that the View thinks of as PermLoc*/
480 using libname PermLoc 'C:\TEMP';
481quit;
482
483PROC Print data= ViewLoc.F04_Eng_2_Met;
484title "Example F_5 Making a perm View This is a Permanent view so has a two part name";
485run;
486title "";
487
488
489
490
491
492/*******************************************************************************************/
493/** Section G - 7 OF 11) FUZZY MERGING */
494/*******************************************************************************************/
495/* Misspelling of names is very common
496 We have purchased physican information from two different sources (two conferences)
497 and want to find peopel who attended both conferences.
498 We will match on name and other characteristics */
499
500/*Example G_1 Fuzzy merging is common when you have to match up names and addresses*/
501Data G01_Doctors_2010;
502Infile datalines truncover firstobs=4;
503input @1 name $Char15.
504 @17 City $Char6.
505 @25 BrdCert $Char3.; /*BrdCert = board certified*/
506datalines;
507Name CITY Board Cert.
508 1 2 3
509123456789012345678901234567890
510Dr. Sebastian Paris YES
511Dr. O'Banion Dublin YES
512Dr. Jaramillo Madrid YES
513Dr. Mafume Tokyo YES
514Dr. Lu Boston NO
515;run;
516
517Data G02_Doctors_2011;
518Infile datalines truncover firstobs=1;
519input @1 name $Char15.
520 @17 City $Char6.
521 @25 BrdCert $Char3.;
522datalines;
523Dr. Sebastian Paris YES
524Dr. O'Banionn Dublin YES
525Dr. Jaramillo Madr d YES
526Dr. Mafumee T kyo yES
527Dr. Lu Boston NO
528;run;
529
530Proc SQL;
531 title "Example G_1 Fuzzy merging";
532 select ( (O.name=N.name) *9
533 +(O.City=N.City) *5
534 +(O.BrdCert=N.BrdCert) *3 /*BrdCert = board certified*/
535 ) as score
536 /* O stands for Old=2010 and N stands for New=2011*/
537 ,O.name as Old_N , O.city as Old_C , O.BrdCert as OBC
538 ,N.name as New_N , N.city as New_C , N.BrdCert as NBC
539
540
541 from G01_Doctors_2010 as O , G02_Doctors_2011 as N
542 having score >= 2 order by score desc ,O.name ;
543title "";
544quit;
545
546/*Example G_2 use a format to make the Fuzzy merging results more readable*/
547Proc format ;
548value Mtching
54917="Name & City & Board"
55012="Name & Board"
5518="City & Board"
5529="Name"
5535="City"
5543="Board only"
5550="no Match";run;
556
557Proc SQL;
558 title "Example G_2 Fuzzy merging w format";
559select ((O.name=N.name)*9
560 +(O.City=N.City)*5
561 +(O.BrdCert=N.BrdCert)*3)
562 as score format=Mtching.
563 /* stands for Old-2010 and N stands for New=2011*/
564 ,O.name as ON , O.city as OC ,O.BrdCert as OBC
565 ,N.name as NN , N.city as NC ,N.BrdCert as NBC
566 from G01_Doctors_2010 as O , G02_Doctors_2011 as N
567 having score >= 2 order by score desc ,O.name;
568quit;
569title "";
570
571/*******************************************************************************************/
572/** Section H - 8 OF 11) COALESCING **/
573/*******************************************************************************************/
574/*Example H_1 missing values protagate and make report look bad
575 Using a coalesce to replace missings with zeros*/
576Proc SQL;
577create table H01_Nm_money
578( name char(4)
579 ,balance num );
580
581insert into H01_Nm_money
582 values('russ' , . )
583 values('joe' ,10000)
584 values('Chi' ,60000)
585;
586
587Proc SQL;/*INTENTIONAL ERROR*/
588 create table H02_interest1 as
589 select
590 name
591 , coalesce(balance,0) as balance2
592 , balance2*.05 as interest1 /*INTENTIONAL ERROR*/
593
594 From H01_Nm_money;
595quit;/*INTENTIONAL ERROR*/
596
597/*Example H_2 missing values propagate and make report look bad
598 Using a Calculated to fix problem*/
599Proc SQL;
600 create table H02_interest3 as
601 select
602 name
603 , coalesce(balance,0) as balance2
604 , calculated balance2*.05 as interest
605
606 From H01_Nm_money;
607quit;
608
609PROC Print data=H02_interest3;
610 title "Example H_1 Using a coalesce to replace missings with zeros";
611run;
612title "" ;
613
614/*Example H_3 A Coalescing issue Russ is spelled differently and the coalesce does not work*/
615PROC SQL;
616 create table H05_Nm_job_Mismatch
617 ( name char(4)
618 ,job char(5) );
619
620insert into H05_Nm_job_Mismatch
621 values('Russ','Geek')
622 values('Joe' ,'Prgmr')
623 values('Chi' ,'Mgr.')
624;
625
626PROC SQL;
627 create table H06_Nm_Time
628 ( name char(4)
629 ,Time_W_Co num );
630
631insert into H06_Nm_Time
632 values('russ',6)
633 values('Chi' ,8)
634;
635
636Proc SQL;
637 Create table H07_name_Job_Time_Mismatch as
638 Select
639 coalesce (J.name,T.name) as name
640 ,j.job as job
641 ,coalesce(T.Time_W_Co, 0) as time_w_co
642
643 From
644 H05_Nm_job_Mismatch as J
645 left join
646 H06_Nm_Time as T
647 On j.name=t.name;
648 ;
649
650Proc print data=H07_name_Job_Time_Mismatch;
651 title "Example H_2 A Coalescing issue Russ is spelled differently and the coalesce does not work";
652run;
653
654/************************************************************/
655/*Example H_4 Using a coalesce to get the most recent data*/
656/*We have three Years of donation data and want most recent info (people have an ID*/
657Data H08_Yr2006;
658 infile datalines truncover
659 firstobs=3;
660 input @1 ID
661 @5 Name $char6.
662 @15 state $char6.;
663Datalines;
664ID Name State
66512345678901234567890
666001 Robert TN2006
667002 Calvin NH2006
668005 Carl NJ2006
669007 Earl NY2006
670008 Ell DE2006
671025 Ted WI2006
672;
673run;
674
675data H09_Yr2005;
676infile datalines truncover
677 firstobs=3;
678input @1 ID
679 @5 Name $char6.
680 @15 state $char6.;
681Datalines;
682ID Name State
68312345678901234567890
684001 Bob PA2005
685002 Cal NH2005
686005 Carl NJ2005
687006 Errol CA2005
688020 Sue NJ2005
689;
690run;
691
692data H10_Yr2004;
693infile datalines truncover
694 firstobs=3;
695input @1 ID
696 @5 Name $char6.
697 @15 state $char6.;
698Datalines;
699ID Name State
70012345678901234567890
701001 Bob PA2004
702005 Carl NJ2004
703010 Fan DE2004
704011 Mike PA2004
705;
706
707Proc SQL;
708create table H11_current as
709 select
710 coalesce(six.ID ,Five.ID ,Four.ID) as Coalesced_ID
711 ,coalesce(six.name ,Five.name ,Four.name) as Most_recent_name
712 ,coalesce(six.State,Five.State ,Four.State) as Most_recent_Add
713
714from H08_yr2006 as six
715 full join
716 H09_yr2005 as five on six.id=five.ID
717 full join
718 H10_yr2004 as four on (four.ID=six.ID or four.id=five.id)
719 ORDER BY Coalesced_ID;
720
721PROC Print data=H11_Current;
722 title "Example H_3 Using a coalesce to get the most recent data";
723run;
724title "";
725
726/*******************************************************************************************/
727/** Section I - 9 OF 11) FINDING DUPLICATES **/
728/*******************************************************************************************/
729/* This is a very useful, and fairly simple, bit of SAS code */
730/*Example I_1 Flexible way of finding and a easily understood reporting on duplicates*/
731Data I01_DataW_duplicates;
732infile datalines truncover firstobs=4 ;
733input @1 name $Char6.
734 @9 Sex $char1.
735 @13 Age 2.
736 @17 height 4.1
737 @25 Weight 5.1
738 ;
739datalines ;
740Name Sex Age Height Weight
741 1 2 3 4 5
74212345678901234567890123456789012345678901234567890
743Jane F . 59.8 84.5
744Alfred M 14 69.0 112.5
745Carol F 14 62.8 102.5
746Fred M 12 57.3 83.0
747Jane F 12 . .5
748Alfred M 99 69.0 112.5
749Louise F 12 56.3 77.0
750Jane F 12 59.8 84.5
751;
752run;
753
754PROC SQL;
755title "Example I_1 Flexible way of finding and a easily understood reporting on duplicates";
756 select I01_in.Number_Of_Dupes
757 ,I01_Out.*
758 from
759 I01_DataW_duplicates as I01_Out
760 inner join
761 (select name, sex, count(*) as Number_Of_Dupes
762 from I01_DataW_duplicates
763 group by name, sex
764 having Number_Of_Dupes >1) as I01_in
765 on I01_in.name=I01_Out.name
766 and I01_in.sex=I01_out.sex
767 order by Number_Of_Dupes desc , name, sex;
768title "";
769
770/*******************************************************************************************/
771/** Section J - 10 of 11) REFLEXIVE JOINS **/
772/*******************************************************************************************/
773/*This is the classic example of using a reflexive join to find a person's boss*/
774/*Example J_1 The common reflexive join*/
775PROC SQL;
776/*Here is the data set - the ida is to */
777create table J01_employees
778 (EmpNo num
779 ,job char(15)
780 ,name char(15)
781 ,SupervisorEmpNo num
782 );
783insert into J01_employees
784values(1, "1_Pres" ,"Goodnight" ,.)
785values(4, "2_V.P. Sales" ,"Kurd" ,1)
786values(6, '2_V.P. R&D' ,"Church" ,1)
787values(8, "2_CFO" ,"Lee" ,1)
788values(14, "3_Salesman" ,"Wang" ,4)
789values(18, "3_Salesman" ,"Rama" ,4)
790values(26, "3_Chemist" ,"Levin" ,6)
791values(28, "3_Metalurgist" ,"Klien" ,6)
792values(31, "3_Acntg. Mgr" ,"Dowd" ,8)
793values(36, "3_Acntg. Mgr" ,"Shu" ,8)
794;
795
796Proc SQl;
797Select empl.EmpNo
798 , empl.job
799 , empl.name
800 , Rpt2.name as supervisor
801 , Rpt2.job as supv_job
802from J01_employees as empl
803 inner join
804 J01_employees as Rpt2
805 on empl.supervisorEmpNo=Rpt2.EmpNo
806order by supv_job;
807
808
809/*Example J_2 Finding connecting flights to get you home********************/
810proc SQl;
811Title "Example J_2 Finding connecting flights to get you from LA home to Phila";
812Title2 "Use a reflexive join to find the fastest epath through a network";
813create table J02_Flights
814 (origin Char(3)
815 ,flight num
816 ,Destination char(3)
817 ,time num);
818insert into J02_Flights
819values("SFO",111,"CHI",240) /*San Fran to Chicago*/
820values("LAX",111,"CHI",210) /*LA to to All Chicago*/
821values("LAX",121,"NOH",220) /*LA to Just O'Hare*/
822values("LAX",131,"CAK",266) /*LA to Akron */
823values("CHI",241,"PHL",145) /*All Chicago to Philadelphia*/
824values("NOH",201,"PHL",167) /*O'Hare to Phila*/
825values("CAK",201,"PHL",145) /*Akron to Phila*/
826values("CAK",201,"EWK",145); /*Akron to Newark*/
827;
828
829proc SQL;
830select wc.origin as WCStart /*West Coast Start airport*/
831 , wc.flight as WCFlight /*West Coast Flight*/
832 , wc.time as WCTime /*West Coast time*/
833 , wc.Destination as WCEnd /*West Coast ending airport*/
834 , "->" as spacer label="#"
835 ,ec.origin as ReStart /*East coast Start Airport*/
836 ,ec.flight as ECFlight /*East Coast Flight*/
837 ,ec.time as ECTime /*East Coast time*/
838 ,ec.Destination as ECEnd /*East Coast ending airport*/
839 , (ec.time+wc.time) as TotalTime /*flying time*/
840 from J02_Flights as wc inner join J02_Flights as ec
841 on wc.Destination=ec.origin and WC.origin="LAX" and EC.Destination="PHL"
842 order by totalTime desc;
843 quit;
844title "";
845
846
847/*******************************************************************************************/
848/** Section K 11 of 11) USE SQL DICTIONARY TABLES TO DOCUMENT YOUR DATA SETS IN A HYPERLINKED EXCEL WORKBOOK*/
849/*******************************************************************************************/
850/********ONLY WORKS FOR SAS 9.4 tm3 - tm4 and tm5 need slightly different coding *********************/
851%MACRO Check_by_VDG_V94_TM3(LibName=SASHelp /*<-- only use upper case letters*/
852 );
853/*excel limit 1,048,576 rows by 16,384 columns*/
854%local lp Libname FileList DateList NObsList SizeList Lenlist NOfVarslist;
855%local ThisFile ThisDate ThisNObs ThisSize ThisLen ThisNVar;
856%let Libname=%UpCase(&LibName);
857%put Libname=&Libname;
858
859ods _all_ close;
860%let path2lib = %sysfunc(pathname(&LibName));
861ODS Excel File="C:\HOW\Lavery\Contents_of_&Libname..xlsx "
862 nogtitle nogfootnote style=HTMLBlue ;
863ODS Excel options(embedded_titles='yes' embedded_footnotes='yes');
864
865Proc SQL noprint /*inobs=10*/;
866select memname , modate, nobs, filesize, obslen , NVar
867 into :filelist separated by " "
868 ,:DateList separated by " "
869 ,:NObsList separated by " "
870 ,:SizeList separated by " "
871 ,:Lenlist separated by " "
872 ,:NOfVarslist separated by " "
873 from Dictionary.tables
874 /*Below will eliminate views and graphic data types*/
875 where libname="&Libname" & MEMTYPE="DATA" and typemem ="DATA" and nobs GE 0;
876%put filelist = &filelist ;
877%put DateList = &DateList ;
878%put NObsList = &NObsList ;
879%put SizeList = &SizeList ;
880%put Lenlist = &Lenlist ;
881%put NOfVarslist=&NOfVarslist;
882
883 /*this is the list of all the tables and goes on the first tab*/
884ods Excel options(sheet_name="List_of_tables_in_lib" );
885Proc report data=sashelp.vtable /*(obs=10)*/ nowd;
886title "Tables in this library (&Libname) and workbook";
887title2 "S=sorted ** SK= sorted with no duplicate key values ** SR - sorted with no duplicate records";
888title3 "YES | CHAR= compresses (variable-length records) by SAS using RLE (Run Length Encoding). Compresses repeated comsiecutive characters.";
889
890title4 "Binary=obs. compresses (variable-length records) by SAS using RLE (Run Length Encoding).";
891 Column ('Click MemName to see desired data' libname memname MEMTYPE modate typemem nobs filesize obslen NVar
892 indxtype sorttype sortchar compress pcompress);
893 compute memname;
894 urlstring="#'"||strip(memname)||"'!A1";
895 call define(_col_,'url',urlstring);
896 call define(_col_,'style', 'style=[textdecoration=underline]');
897 endcomp;
898 where libname="&Libname" & MEMTYPE="DATA" and typemem ="DATA" and nobs GE 0;
899run;quit;
900
901/***/
902ods Excel options(sheet_name="List_of_indexes_lib" );
903Proc Report data=sashelp.vindex(obs=10) ;
904title "Indeces in this workbook";
905where libname="&Libname" & MEMTYPE="DATA" ;
906run;quit;
907
908
909/*Title link="#'List_of_tables_in_lib'!A1" '(Click to return to list of tables (first tab))';*/
910/*Footnote link="#'List_of_tables_in_lib'!A1" '(Click to return to list of tables (first tab))';*/
911
912%let lp=1;
913 %do %while(%scan(&filelist,&Lp) NE);
914 %let ThisFile = %scan(&filelist,&Lp);
915 %let ThisDate = %scan(&DateList,&Lp);
916 %let ThisNObs = %scan(&NObsList,&Lp);
917 %let ThisSize = %scan(&SizeList,&Lp);
918 %let ThisLen = %scan(&Lenlist,&Lp);
919 %let ThisNVar = %scan(&NOfVarslist,&Lp);
920
921 ods excel options(sheet_interval='table');
922 ods exclude all;
923 data _null_;
924 declare odsout obj();
925 run;
926
927 ods select all;
928 ods excel options(sheet_interval='none' sheet_name="&ThisFile" );
929
930
931 title "&Libname &ThisFile: rows=&ThisNObs NVars=&ThisNVar ModDate=&ThisDate Size=&ThisSize Obslen=&ThisLen";
932 Title link="#'List_of_tables_in_lib'!A1" '(Click to return to list of tables (first tab))';
933 Footnote link="#'List_of_tables_in_lib'!A1" '(Click to return to list of tables (first tab))';
934 *footnote2 "&libname is: &path2lib and workbook" ;
935 Proc Report data=sashelp.VColumn nowd;
936 Column
937 libname memname memtype name type length npos varnum label format informat
938 idxusage sortedby xtype notnull precision scale transcode diagnostic ;
939 where libname="&Libname" & MemName="&ThisFile";
940 run;quit;
941
942 title "&Libname &ThisFile: rows=&ThisNObs NVars=&ThisNVar :: SHOW ten obs" ;
943 Proc print data=&Libname..&ThisFile(obs=10) ;
944 run;quit;
945 title "";
946
947 %let Lp = %eval(&Lp+1);
948%end;
949ods Excel Close;
950%MEND Check_by_VDG_V94_TM3;
951
952%Check_by_VDG_V94_TM3(Libname=SASHELP /*<-- only use upper case letters*/
953 );