#!/afs/isis/pkg/perl-582/bin/perl #tower_strip.perl #Time-stamp: <2006-06-12 11:52:14 lstearns> # #Abstract: # This script is designed to parse existing tower data for a given # time range and save the output as a netcdf file containing the # converted data values as well as ancillary information. # # ./tower_parse.perl -help # For more information on input arguments # #Created: 2003.09?.?? maybe #Author: Luke Stearns #Modifications: started back up 2004.03.?? still going.... # # Luke Stearns 2006.03.15 - going back to improve some of the comments # ###################################################################### # Set up libraries and paths. use lib '/opt/local/seacoos/bin/'; use lib '/opt/local/seacoos/bin/meta_data/'; use meta_data; $bin_dir = '/opt/local/seacoos/bin/meta_data/'; #meta_data.pm is a module I wrote to access some data tables in #/opt/local/seacoos/bin/meta_data/ #I intended these tables as a first step towards designing a relational #database to store instrument configuration and deployment information. #Development paths and libraries: # use lib '/afs/isis.unc.edu/depts/marine/workspace/hseim/sablam4/'. # 'lstearns/perl/test_nemo/'; # use lib '/afs/isis.unc.edu/depts/marine/workspace/hseim/sablam4/'. # 'lstearns/perl/test_nemo/meta_data/'; # use meta_data; # $bin_dir = '/afs/isis.unc.edu/depts/marine/workspace/hseim/sablam4/'. # 'lstearns/perl/test_nemo/meta_data/'; #Other included modules: use NetCDF; use Date::Manip; use Statistics::Basic::StdDev; use Math::BigFloat; ###################################################################### # Keep track of the time it takes to run the script $starttime = time; ###################################################################### # Handle input arguments, namely a date string and a path, if given # I'd like to handle a variety of input arguments &handle_input; ###################################################################### # Read information about variables from the tables: parsing info, # conversions, and meta-data #This is now handled by the 'use meta_data;' line above. ###################################################################### # Read in the data files and parse each line of data! # This subroutine drives the bulk of the script! if ($debug) {print "\nReading data files:\n";} $first_one = 1; &read_data; ###################################################################### # Finally, write the data to the NetCDF file. This is where it # crashes more often than not. #&test_data; &write_netcdf; ###################################################################### # Let us know how long it took! $cummtime = time - $starttime; if ($debug) { print "Closing NetCDF files:\nALL DONE!"; print "\nTotal time $cummtime sec\n"; } if ($nemo) { print "Attempting to scp file to nemo... (will require passwd)\n"; system("scp $out_data_dir$out_file_name nemo.isis.unc.edu:/seacoos/". "data/nc-coos/roof/"); } ###################################################################### ####################### BEGIN SUBROUTINES ############################ ###################################################################### ###################################################################### ### Read Data ######################################################## sub read_data { #Set the starting time index (will be incremented to 0 first) $time_index = -1; $file_index = -1; #Set the fill value in case of missing data. $fill_value = -9999; #Get all of the filenames (including path) to the files in the #directory of data that satisfy the matching pattern. @tower_files = (); #For each of the dates determined by the user input (&handle_input #takes us to &handle_date that sets the array @dates) find the files #for that date and store them in the array @tower_files foreach $date_string (@dates) { $data_sub_dir = $month_dirs{$date_string}; @t_files = glob("${d_data_dir}/$data_sub_dir/${tower_id}_${date_string}*.tx*"); push(@tower_files,@t_files); } if ($debug) { print "... Found ".eval($#tower_files +1)." files.\n"; } #I'm not sure where else to put this... I want to hang on to the #sample loop time, and this is where I put it... $expected_vars{time} = "SAMPLE_LOOP"; #Loop through the filenames loading, then reading each file... foreach $file (@tower_files) { $file_time = $file; #$file_time =~m/([0-9_]+).txt/; $file_time =~m/([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]{2})([0-9]{2})[0-9]+.txt/; $file_time_str = "$1/$2/$3 $4:$5:00"; #Convert to number of seconds since epoch $file_u_date = &UnixDate(&ParseDate("$file_time_str UTC"),"%s"); #1970/01/01 00:00:00 as a datenum $epoch_dn = 719529; #Convert the seconds since epoch to a datenum $file_datenum = $file_u_date/86400 + $epoch_dn; $file_index++; ${$Vars{'file_time'}}[$file_index] = $file_datenum; open(DATA_FILE,$file); #Loop through all lines of data while ($data_line=) { #Here's the work! Seperate all of the data and store it in #a way that we can easily stuff it into the NetCDF files at #the end of it all &seperate_sensors; } #Done reading this file close(DATA_FILE); } #Done looping through the files #Just a thought here... perhaps all parsing and conversions should #occur here, operating on the arrays of data, rather than for each #time... That might also be a better way to handle derived #variables like pir, density, etc. Maybe later. } ###################################################################### ### Test Data ######################################################## sub test_data { print "Trying all keys of data array\n"; foreach $param (@vars) { print "...$param,"; @temp_array = @{$Vars{$param}}; print "Array: $#temp_array, $temp_array[0]\n"; } } ###################################################################### ### Write Netcdf ##################################################### sub write_netcdf { #I think I'd like to pull this out into another script. The point #of this subroutine is to initialize and populate the NetCDF file #that will contain the data. #################################################################### # Create a NetCDF file to work with! if ($debug) { print "\nCreating NetCDF files:\n"; } $nc_name = "${out_data_dir}$out_file_name"; $nc_sname = "${out_data_dir}$out_file_sname"; unless ($#platform_time>0) { print "Exiting Now! \nFATAL ERROR:\n"; die " There must be at least one valid record!\n". " Please try another date range!\n"; } $ncid = NetCDF::create($nc_name,NetCDF::CLOBBER); die "Couldn't open $nc_name\n" if $ncid<0; $ncid_system = NetCDF::create($nc_sname,NetCDF::CLOBBER); die "Couldn't open $nc_sname\n" if $ncid_system<0; #################################################################### # Put Global Attributes... if ($debug) { print "... Putting global variables\n"; } #I had this, but it puts them in random order, so I'll use the order #direct from globals.table foreach $global_att (@globals) { $g_a_value = eval("$global_value{$global_att}"); $g_a_format = $global_format{$global_att}; unless ($g_a_value =~/N\/A/) { if ($debug>2) { print "$global_att is $g_a_value\n" } $attid = NetCDF::attput($ncid,NetCDF::GLOBAL,"$global_att", eval('NetCDF::'.$g_a_format), "$g_a_value"); $attid = NetCDF::attput($ncid_system,NetCDF::GLOBAL,"$global_att", eval('NetCDF::'.$g_a_format), "$g_a_value"); $attid != -1 || die "\n Couldn't write global attribute $global_att\n"; } } #################################################################### # Define dimensions, fixed ones first @{$Vars{LONGITUDE}} = ($tower_lon{$tower_id}); @{$Vars{LATITUDE}} = ($tower_lat{$tower_id}); #I did it this way because I was starting from Brians script (which #was generally extremely helpful) %dims = ( LONGITUDE => 1, LATITUDE => 1, TOWER_FILES => $#tower_files+1, CHARACTERS => 50, time => $#platform_time+1 ); %dim_names = ( 'a' => 'LATITUDE', 'o' => 'LONGITUDE', 'f' => 'TOWER_FILES', 'c' => 'CHARACTERS', 't' => 'time' ); foreach $dim (keys %dims) { if ($dimensions{$dim}=~/[fc]/){ $ncid{$dim} = $ncid_system; } elsif ($dimensions{$dim}=~/[tao]/) { $ncid{$dim} = $ncid; } unless ($dim =~/time/) { $dimid{$dim} = NetCDF::dimdef($ncid{$dim},$dim,$dims{$dim}); die "Couldn't define $dim dimension\n" if $dimid{$dim}<0; } } #################################################################### # Define time dimension $dimid{time} = NetCDF::dimdef($ncid,"time",NetCDF::UNLIMITED); die "Couldn't define record dimension\n" if $dimid{time}<0; #################################################################### # Define variables and put attributes, fixed ones first if ($debug) { print "... Defining variables and putting attributes\n"; } #################################################################### # Define variables and put attributes, now time dependant ones #Dimension variables like lat and lon. @all_attributes = ('long_name', 'standard_name', 'units', 'description', 'range', 'precision' ); @zeros = (0,0,0); #Variables are dependant at most on 4 dimensions #foreach $param (keys %Vars) { foreach $param (@vars) { if ($param =~/CLOCK/ || $param =~/BBAD/) { #We'll skip these for now... } elsif (defined $Vars{$param}) { &vardef_attput; } } #################################################################### # End Definition period! $status = NetCDF::endef($ncid); die "Couldn't end definition\n" if $status<0; $status = NetCDF::endef($ncid_system); die "Couldn't end definition\n" if $status<0; #################################################################### # Start putting variables, first the fixed variables if ($debug) { print "... Putting variables:\n\n"; } #foreach $param (keys %Vars) { foreach $param (@vars) { if ($param =~/CLOCK/ || $param =~/BBAD/) { #We'll skip these for now... } elsif (defined $Vars{$param}) { &varput; } } #################################################################### # All done, let's close up! $status = NetCDF::close($ncid); $status = NetCDF::close($ncid_system); } ###################################################################### ### Seperate Sensors ################################################# sub seperate_sensors { #This subroutine parses each line of data in a data file: determines #what the format of the line is, and sends the results onward to #compute derived variables, parse complicated data strings, and #convert to scientific units. The organization of this one is #pretty confusing... @data_line = split(', ',$data_line); @va = (); #If there is at least one comma in the data line it is of the usual #data format (not ADCP!) and we'll treat it normally. if ($#data_line) { #Everything other than the CTD and ADCP should fit this... if ($#data_line == 5) { ($time,$param_id,$type,$value,$units,$error_string) = @data_line; } #The CTD is still a special case becuase data is comma delimited. elsif ($#data_line == 8) { ($time,$param_id,$type,$va[0],$va[1],$va[2],$va[3],$units, $error_string) = @data_line; $value = join(',',@va); $value =~s/\s+//g; } #The CTD is still a special case becuase data is comma delimited. elsif ($#data_line == 9) { #print "9\n@data_line\n"; ($time,$param_id,$type,$va[0],$va[1],$va[2],$va[3],$va[4], $units,$error_string) = @data_line; $value = join(',',@va); $value =~s/\s+//g; } #The CTD is still a special case becuase data is comma delimited. elsif ($#data_line == 10) { #print "10\n@data_line\n"; ($time,$param_id,$type,$va[0],$va[1],$va[2],$va[3],$va[4], $va[5],$units,$error_string) = @data_line; $value = join(',',@va); $value =~s/\s+//g; } else { #print "WARNING: Data line not parsed correctly!\n@data_line\n"; #This is so a failed parse doesn't end up going to the previous #param_id ($time,$param_id,$type,@junk) = @data_line; #print "!!!!$param_id\n"; #$param_id = 'failed'; $value = "$fill_value,$fill_value,$fill_value,$fill_value"; } if($param_id=~m/CTD1/) { #$press = 0; #print "HEY! @data_line"; #print "$sal = $temp = $press = $cond = $fill_value\n"; } #If it has failed the previous two options, than something is wrong. #In fact, the second choice shouldn't ever occur, because Sara has #implemented a change that puts quotes around data values that need #special parsing. #elsif ($debug>2) { #print "Error suspected for data line:\n$data_line\n"; #} $big_type = $type; $big_type =~s/\([0-9]\)//g; #This is a first attempt at dealing with variables that contain #a number of sub variables. For example the CTD output string #contains temparature conductivity salinity and density. #our (@sub_vars,%sub_vars); @sub_vars = @{$sub_vars{$param_id}}; &compute_sub_vars; #If we are at the start of a file, keep track of the settings #listed, especially the param_id's that we expect to be in #each record, so that we can insert fill values where needed. if ($type=~/SETTING/ && $param_id=~/ACTIVE/) { #The following pattern is used in several of these cases. It #should probably be a subroutine... #1. Get rid of double quotes in the data string #2. Seperate the string into an array (each character is an entry) #3. If it is empty put in a dash as the first character (not sure # why I chose that... #4. Fill the unused portion of the 50 character array with spaces #5. Add the result to the master hash array for this param_id #1 $value =~s/\"//g; #2 @value = split("","$value"); @this_val = @value; #3 unless ($#value+1) { @value = ("-"); } #4 for ($ii=$#value+1; $ii<50; $ii++) { @this_val[$ii] = " "; } #5 push(@{$Vars{$param_id}},@this_val); $expected_vars = $value; $expected_vars=~s/\"//g; @expected_vars = split('\s',$expected_vars); my($t_type) = $param_id; $t_type =~s/^ACTIVE\_//; @{$expected_vars{$t_type}} = @expected_vars; } elsif ($type=~/CPU/) { #see if ($type=~/SETTING/ && $param_id=~/ACTIVE/) { $value =~s/\"//g; @value = split("","$value"); @this_val = @value; unless ($#value+1) { @value = (" "); } for ($ii=$#value+1; $ii<50; $ii++) { @this_val[$ii] = " "; } push(@{$Vars{$param_id}},@this_val); } elsif ($type=~/CLOCK/) { #see if ($type=~/SETTING/ && $param_id=~/ACTIVE/) { $value =~s/\"//g; @value = split("","$value"); @this_val = @value; unless ($#value+1) { @value = (" "); } for ($ii=$#value+1; $ii<50; $ii++) { @this_val[$ii] = " "; } push(@{$Vars{$param_id}},@this_val); } #The next little clause here is sort of weird. There are some #computed or parsed variables that are not in the raw file as #a param_id. For example PIRA PIRB and PIRC are all parameters #but we really want to know what PIR is and we don't care about #the raw data that goes into it... I'm just letting the script #know to expect these variables to be included later. elsif ($#{$expected_vars{$big_type}}) { push(@{$loop_vars{$big_type}},$param_id); if ($param_id=~/LINK$/) { if ($first_one) { @expected_vars = ('LINK','LINK_AMPS','LINK_VOLTS','LINK_AMP_HRS', 'LINK_TEMP'); push(@{$expected_vars{$big_type}},@expected_vars); @expected_vars = ('SAMPLE_LOOP','BBAD_SAMPLE_LOOP'); push(@{$expected_vars{TIME}},@expected_vars); @expected_vars = ('PIR'); push(@{$expected_vars{RLCAD}},@expected_vars); $first_one = 0; } push(@{$loop_vars{$big_type}}, 'LINK','LINK_AMPS','LINK_VOLTS', 'LINK_AMP_HRS','LINK_TEMP'); } } #If we are at the start of a data record, use this time for the #rest of the samples in this loop (may want to change this...) if ($param_id=~/ATEMP/) { $time_index++; #Here I'm trying to work in some logic to deal with times when #not every expected variable gets reported in the loop, for #example when the BBAD has a time-out error. This leads to #the system not reporting WSPDA or WDIRA, which is otherwise #not noticed, and the empty values lead to a segmentation fault #when the netcdf file is written. foreach $expected_type (keys %expected_vars) { $num_expected = $#{$expected_vars{$expected_type}}; $num_recieved = $#{$loop_vars{$expected_type}}; #Check to see if the number expected is greater than the number #recieved (and also greater than 0, to deal with system info that #doesn't describe variables in the loop!) if ($num_expected>$num_recieved && $num_expected && $time_index>=0) { #For every parameter that we are expecting, we should put in a #fill value. Actually this all should probably go outside the #loop as part of the initialization process. foreach $expected_var (@{$expected_vars{$expected_type}}) { #write fill value! ${$Vars{$expected_var}}[$time_index] = $fill_value; } #End Foreach expected var #Okay, this one is a bit wierd. Clock (sample_loop) is not #yet included in RLCAD, BBAD, or SERIAL, so it isn't expected. #Thus if it isn't reported, we need to insert a fill_value #In the current version, this IS fixed. In fact CLOCK isn't used #at all. It has been replaced by SAMPLE_LOOP. unless(${$Vars{"CLOCK"}}[$time_index]) { ${$Vars{"CLOCK"}}[$time_index] = $fill_value; } #end unless clock thing... }#end if still expecting more... }#end foreach expected type %loop_vars = (); #Now that is out of the way, time is a bit more normal, but still #needs to be dealt with seperately as it occurs only once per #sample loop and is not explicitly a loop variable. $platform_time[$time_index] = $time; $u_date = &UnixDate(&ParseDate("$time"),"%Y,%m,%d,%H,%M,%S"); ($year,$month,$day,$hour,$minute,$second) = split(',',$u_date); $u_date = &UnixDate(&ParseDate("$time UTC"),"%s"); $um1_date = &UnixDate(&ParseDate("$platform_time[$time_index -1] UTC"), "%s"); $sample_interval = $u_date - $um1_date; $hrs_per_sample = $sample_interval/3600; #Seconds/hr unless ($time_index) { $hrs_per_sample = 1/30; } #1970/01/01 00:00:00 as a datenum $epoch_dn = 719529; #Convert the seconds since epoch to a datenum $datenum = $u_date/86400 + $epoch_dn; ${$Vars{'year'}}[$time_index] = $year; ${$Vars{'month'}}[$time_index] = $month; ${$Vars{'day'}}[$time_index] = $day; ${$Vars{'hour'}}[$time_index] = $hour; ${$Vars{'minute'}}[$time_index] = $minute; ${$Vars{'second'}}[$time_index] = $second; ${$Vars{'time'}}[$time_index] = $datenum; }#end if ATEMP #Apply information from the parse table to split up the output &parse_vars; if ($param_id=~/GPS1/) { $param_id = 'GPSLON'; &convert_vars; $param_id = 'GPSLAT'; &convert_vars; $param_id = 'GPS1'; } #Use the coefficients and formulas from the conversions table to #apply any necessary conversions &convert_vars; #I'm not sure of a good way to get it to automatically convert #and derive pir from the case temperature and the thermistor... #This way is definitely a hack... #If we have data for PIRA and PIRC, then we can compute PIR if ($param_id =~/PIRC/ | $param_id =~/PIRA/) { if ($Vars{PIRA}[-1]!=~/$fill_value/ && $Vars{PIRC}[-1]!=~/$fill_value/){ $temporary_param = $param_id; $param_id = 'PIR'; $pir_ct = $Vars{PIRA}[-1]; $pir_th = $Vars{PIRC}[-1]; &convert_vars; $param_id = $temporary_param; } else { $Vars{PIR} = $fill_value; } } $value = undef; } #Otherwise the data line has no commas, so it must be an adcp line else { #I haven't put any logic in here yet #&parse_adcp; } } ###################################################################### ### Parse Vars ####################################################### sub parse_vars { #Seperate out the output from matching conditions #The hash array parse_output comes from the meta_data perl module that #looks into the data tables. It explains the string format for each #variable and how to assign the variables given that format. $output = $parse_output{$param_id}; @output = split(',',$output); if ($units{$param_id}=~/string/) { #It was complaining about the gps checksum until I added '' eval("$output[0] = '$value';"); } else { #Apply matching conditions from the parse table $old_v = $value; #Okay, this is hard wired... I know it's better to avoid, but I wanted #a quick fix... This handles wind, which is an array of values separated #by spaces. An average value should be presented for later conversion. if ($parse_info{$param_id}=~/\@avg/) { #Spit up space delimited values and ignore fill values $old_v =~s/\ \-9999//g; @value = split(' ',$old_v); eval("$output[0]".' = @value;'); } elsif ($parse_info{$param_id}=~/\@gva/) { #This is for the compass pitch and roll which is weird... @comp = @pitch = @roll = (); @value = split('\$',$old_v); foreach $vals (@value) { $vals =~m/$C([0-9\.\-]*)P([0-9\.\-]*)R([0-9\.\-]*)T.*/; push(@comp,$1); push(@pitch,$2); push(@roll,$3); #print "so $1, $2, $3\n"; } #eval("$output[0]".' = @value;'); } #If the output is one parameter (applies to almost all sensors) elsif ($#output==0) { #Assign the output value to the parse_info matching condition #Here $1 signifies the part that matches what is inside the #first set of parenthesis in a matching condition. #Example: if $value is "PS=+14.693" # then $value =~m/"PS\=\+(.*)"/; # sets $1 to be 14.693 $value =~m/$parse_info{$param_id}/; @value = $1; #As long as there is a defined value, set the output to that if (defined $value[0]) { eval("$output[0]".' = $value[0];'); } else { eval("$output[0] = $fill_value;"); } } #Applies to the ctd and the link, in particular elsif ($#output>0) { if ($param_id =~m/CTD/) { #As long as the string begins with '"#0' proceed. if ($value=~m/^\"\#0/) { $pi = $parse_info{$param_id}; } else { $pi = 'NOBAD!!'; } } else { $pi = $parse_info{$param_id}; } if ($param_id=~/LINK/) { #print "$value\n$pi\n"; } elsif ($param_id=~/GPS1/) { #print "$value\n"; @v = split('',$value); $mygps_lat = join('',@v[11..17]); $mygps_lon = join('',@v[19..25]); } $value =~m/$pi/; #Loop through each output parameter and assign the value #For example suppose there were three variables in the matching #condition '$value =~m/$pi/;', then they will be assigned to #$1,$2, and $3, thus you want to assign them as #eval("$output[0] = $1;"); #so if @output = ($temp,$humidity,$pressure) #then you have $temp set to the contents of $1. Clear??? for ($out_num=1; $out_num<=$#output+1; $out_num++) { eval("$output[$out_num-1] =\$$out_num;"); } if ($param_id=~/LINK/) { #print ": $link_time, $link_volts, \n"; } elsif ($param_id=~/GPS1/) { if ($mygps_lat+0) {$gps_lat = $mygps_lat;} else {$gps_lat = $fill_value;} if ($mygps_lon+0) {$gps_lon = $mygps_lon;} else {$gps_lon = $fill_value;} } } } } ###################################################################### ### Convert Vars ##################################################### sub convert_vars { our($fill_value); #Again, these coefficients and formulas are coming from the #meta_data perl module, and the corresponding data tables. #Since conversion coefficients are different for the different #platforms it looks like this $R4_conversion_coeff{PSP} #Get conversion coefficients and formula from hash array for #each tower id and each parameter id. eval ('$c_coeff = $'.$tower_id.'_conversion_coeff{$param_id};'); eval ('$c_formula = $'.$tower_id.'_conversion_formula{$param_id};'); #Split up conversion coefficients from the converstions table @v = split(',',$c_coeff); #If there is any set of conversion coefficients (and hence a #conversion to be applied), apply them! if ($#v+1) { $temp_var = ''; if ($c_formula=~/^\&/) { #If the conversion formula is a subroutine call, evaluate! eval("$c_formula"); } else { #As long as the output wasn't the fill value, convert it. my ($out) = eval("$output[0]"); unless($out=~/$fill_value/) { $temp_var = eval("$c_formula"); } #Otherwise, we don't want to convert a fill value to #something else! else { $temp_var = $fill_value; } #Set the array index based upon the record dimension &set_index; #This should go somewhere else... if ($param_id=~/RAIN/) { #print "Hrs per : $hrs_per_sample\n"; $temp_var = $temp_var/$hrs_per_sample; } #Finally, set this value of the Vars super duper array #for this time to the value that we've computed unless ($index +1) { print "Warning. $param_id has index $index. Adjusting...\n"; ${$Vars{$param_id}}[$index+1] = $temp_var; } else { ${$Vars{$param_id}}[$index] = $temp_var; } } } else { #print "No conversion info available for $param_id at $tower_id\n"; } } ###################################################################### ### Handle Input ##################################################### sub handle_input { #By putting default values in a file, the script should easily switch #back and forth between the afs version and the local nemo version! open(DEFAULTS,"${bin_dir}tower_parse.defaults"); while () { #Get rid of the newline chop; #Ignore comments starting with # $_=~s/#.*//; #If the line is non-empty if ($_) { my($var_name, $default_value) = split(','); eval("$var_name = $default_value;"); } } ($date_string,%varargin) = @ARGV; #Associate each input argument pair with the appropriate variable %varargout = ( '-out' => '$out_file_name', '-d' => '$debug', '-t' => '$tower_id', '-p' => '$package', ); #Set each variable to the value of the associated input argument #Example 1: '-d 1' sets the debug flag to true #Example 2: '-out 2004_06.nc' sets the output file name to 2004_06.nc foreach $argin (keys %varargin) { eval("$varargout{$argin} = '$varargin{$argin}';"); } $d_data_dir = "$d_data_dir$tower_id/"; $out_data_dir = "$out_data_dir$tower_id/"; #Since there are a few different types of date string inputs, we need #to figure out how to deal with this one! if ($date_string) { &handle_date; } #If there's no date string, then we don't know what to look for! else { die "Must at least input a date string!\n". "./tower_parse.perl '200406' -out 2004_06.nc -d 1\n"; } #Helps the user chooose an appropriate filename unless ($out_file_name=~/.nc$/) { if ($out_file_name) { $out_file_sname = "${out_file_name}_sys.nc"; $out_file_name = "${out_file_name}.nc"; } else { $out_file_name = $d_out_file_name; @do = split('',$d_out_file_name); $out_file_sname = join('',@do[0..$#do-3],'_sys.nc'); } } else { unless ($out_file_sname) { @do = split('',$out_file_name); $out_file_sname = join('',@do[0..$#do-3],'_sys.nc'); } } #This will come up many times throughout the script. Another way of #looking at the debug flag is that it turns on comments (verbose). if ($debug) { print "\n\nData dir : $d_data_dir\nOutput dir : $out_data_dir\n". "Outfile name : $out_file_name and $out_file_sname\n"; if ($#dates) { print "\nLooking for data between $dates[0] and $dates[-1]\n"; } } } ###################################################################### ### Handle Date ###################################################### sub handle_date { #Deal with date input. if ($date_string=~/\ /) { ($st_date,$en_date) = split(' ',$date_string); @st_date = split('',$st_date); @en_date = split('',$en_date); #print "So $st_date, $en_date is $#st_date or @st_date\n"; $st_date = join('',(@st_date[0..3],' ',@st_date[4..5],' ',@st_date[6..7])); $en_date = join('',(@en_date[0..3],' ',@en_date[4..5],' ',@en_date[6..7])); #print "So $st_date, $en_date is @st_date\n"; $this_date = $st_date; $this_parsed = &ParseDate($st_date); $en_parsed = &ParseDate($en_date); @dates = (); %month_dirs = (); while ($this_parsed <= $en_parsed) { $ymd = &UnixDate($this_parsed,"%Y%m%d"); push(@dates,$ymd); $month_dirs{$ymd} = &UnixDate($this_parsed,"%Y_%m"); $this_date = &UnixDate(&DateCalc($this_date,"+ 1 day"), "%Y %m %d"); $this_parsed = &ParseDate($this_date); } #print "$#dates Dates! @dates\n"; } elsif ($date_string=~/latest/) { $today = &ParseDate("today"); $date1 = &UnixDate(&DateCalc("$today","- 2 day"),"%Y%m%d"); $date2 = &UnixDate(&DateCalc("$today","- 1 day"),"%Y%m%d"); $date3 = &UnixDate("$today","%Y%m%d"); $mond1 = &UnixDate(&DateCalc("$today","- 2 day"),"%Y_%m"); $mond2 = &UnixDate(&DateCalc("$today","- 1 day"),"%Y_%m"); $mond3 = &UnixDate("$today","%Y_%m"); @dates = ($date1,$date2,$date3); %month_dirs = ($date1,$mond1,$date2,$mond2,$date3,$mond3); #print "@dates\n"; } elsif ($date_string =~/help/) { #oops this isn't a date string at all! # I should probably put this somewhere else... print "\nUsage: ./tower_parse.perl date_string\n\n". "Optional argument pairs:\n". " -out output file name\n". " -d debug\n". # " -dir directory path to data\n". " -t tower id\n". " -nemo scp data file to nemo\n\n"; print "Date string can have any of the following formats:\n". " yyyymm Finds all data for the month\n". " yyyymmdd Finds data just for one day\n". " 'yyyymmdd yyyymmdd' Finds data within the date range\n". " latest Finds today, yesterday and the previous day\n"; die "\n"; } else { @dates = ($date_string); @date_str = split('',$date_string); $fn = join('',(@date_str[0..3],'_',@date_str[4..5])); unless($out_file_name) { #Unless it has been input by the user! $out_file_name = "${tower_id}_${fn}.nc"; $out_file_sname = "${tower_id}_${fn}_sys.nc"; } #Also change default data directory! $month_dirs{$date_string} = $fn; } @month_dirs = values (%month_dirs); } ###################################################################### ### Parse ADCP ####################################################### sub parse_adcp { #print "We have ADCP data!\n"; #$data = join('',@data); } ###################################################################### ### Vardef Attput #################################################### sub vardef_attput { #This is the generic variable definition and attribute put used by #write_netcdf. $format = 'NetCDF::'.$format{$param}; if ($debug>2) { print "$param is of format $format\n"; } @dimensions = split('\s',$dimensions{$param}); @dim_names = @dim_names{@dimensions}; @dim_ids = @dimid{@dim_names}; $dimensions = join('',@dimensions); #Set the record dimension to be time or file number based on #contents of variables.table if ($dimensions=~/f/) { $ncid{$param} = $ncid_system; } elsif ($dimensions=~/t/) { $ncid{$param} = $ncid; } $varid{$param} = NetCDF::vardef($ncid{$param},$param, eval("$format"), [@dim_ids]); die "Couldn't define variable $param\n" if $varid{$param}<0; #print "$param should have $#all_attributes\n"; foreach $attribute (@all_attributes) { $att_val = eval('\$'.$attribute.'{$param}'); if (defined $att_val && $att_val!=~/^N\/A/) { $attid = NetCDF::attput($ncid{$param},$varid{$param},"$attribute", NetCDF::CHAR, eval('\$'.$attribute.'{$param}')); }#end if }#end foreach $attribute } ###################################################################### ### Varput ########################################################### sub varput { $format = 'NetCDF::'.$format{$param}; @dimensions = split('\s',$dimensions{$param}); @dim_names = @dim_names{@dimensions}; #@dim_ids = @dimid{@dim_names}; @dim_counts = @dims{@dim_names}; @dim_starts = @zeros[0..$#dim_names]; unless ($param =~/CLOCK/ || $param =~/BBAD/ || $param =~/GPS$/ || $param =~/LINK$/) { #No matter how good it looks, test the arrray for undefined values! #(This is painfully slow and really should be replaced by initializing #the arrays at the start...) @temp_array = @{$Vars{$param}}; for ($ii=0; $ii<=$#temp_array; $ii++) { unless (defined($temp_array[$ii])) { ${$Vars{$param}}[$ii] = $fill_value; } } if ($debug>1) { print "$param length ".eval($#{$Vars{$param}}+1). ", id $varid{$param}\n"; } unless ($format=~/CHAR/) { #Put the variable into the netcdf file! $status = NetCDF::varput($ncid{$param},$varid{$param}, \@dim_starts,\@dim_counts, \@{$Vars{$param}}); } else { #Test to make sure it is as long as we expected... if ($#{$Vars{$param}}+1==($#tower_files+1)*50) { $status = NetCDF::varput($ncid{$param},$varid{$param}, \@dim_starts,\@dim_counts, \@{$Vars{$param}}); } elsif ($debug>1) { print "Didn't have enough values for $param\n"; } } }#End unless that ignores clock,bbad,gps and link strings else { print "$param being ignored\n"; } } ###################################################################### ### Compute Sub Vars ################################################# sub compute_sub_vars { if ($#sub_vars +1) { $orig_param = $param_id; foreach $sub_param (@sub_vars) { $param_id = $sub_param; &convert_vars; } $param_id = $orig_param; } } ###################################################################### ### Set Index ######################################################## sub set_index { #If the record dimension is time, use the time index if ($dimensions{$param_id}=~/t/) { $index = $time_index; } #If the record dimension is file number use that index elsif ($dimensions{$param_id}=~/f/) { $index = $file_index; } } ###################################################################### ### Avg Wind ######################################################### sub avg_wind { #1. Check for all necessary components: # a. 'spda' -> WSPDA # b. 'dira' -> WDIRA # c. 'spdb' -> WSPDB (Optional) [alternative is 'spdn' if empty] # d. 'dirb' -> WDIRB (Optional) [alternative is 'dirn' if empty] # e. 'cmp1' -> COMP1 (Optional) [alternative is an offset, which # defaults to 0 unless otherwise set... Does not requre pkgb] # f. 'cmp2' -> COMP2 [Not Used!] # #2. Loop through times: # a. Replace out of range values or empty package with fill_value # b. Apply direction offset # c. Compute vector components (u,v) # #3. Compute statistics # #4. Store data values my(@junk) = (); my(@input) = (@_); %names = ('spda'=>'wi_spd_a', 'dira'=>'wi_dir_a', 'spdb'=>'wi_spd_b', 'dirb'=>'wi_dir_b', 'spdn'=>'junk', 'dirn'=>'junk', 'cmp1'=>'comp_1', 'cmp2'=>'comp_2'); #1. Check for all necessary components: if ($input[0]=~/spda/) { #First variable on the list, initialize temporary data arrays #If we've already been through the loop once... if ($dira[0]) { $real_last_var = $last_var; if ($#cmp1<0) { #No compass data. Nor is there an available offset. @cmp1 = (0); } } else { #for the first one, so it doesn't confuse the real_last_var $real_last_var = 'NOPE'; } #Now initialize the arrays @spda = @dira = @spdb = @dirb = @cmp1 = @cmp2 = (); } elsif ($input[0]=~/dir/) { if ($#input==1) { #Then we have been given a directional offset and should #apply it! $offset = $input[1]; } else { $offset = 0; } } #print("\@$input[0] = \@$names{$input[0]};\n"); eval("\@$input[0] = \@$names{$input[0]};"); if ($input[0]=~/$real_last_var/) { #Write the data from this sample. &handle_wind; &write_wind; } $last_var = $input[0]; } ###################################################################### ### Handle Wind ###################################################### sub handle_wind { #This subroutine works for average_wind: once data arrays are #collected it preforms all the necessary operations on them. my($time); our($u_mean_a,$u_max_a,$u_std_a,$u_mean_b,$u_max_b,$u_std_b, $v_mean_a,$v_max_a,$v_std_a,$v_mean_b,$v_max_b,$v_std_b, $mean_spd_a,$mean_dir_a,$mean_spd_b,$mean_dir_b,$spd_std_a, $dir_std_a,$spd_std_b,$dir_std_b,$gust_a,$gust_b) = $fill_value; my(@u_a,@v_a,@u_b,@v_b,@temp_cmp,@comp_off) = (); #Loop through samples... for ($time=0; $time<=$#spda; $time++) { #Make sure there is some directional offset to apply! if ($#cmp1<1) { #If compass data wasn't parsed correctly, fix it! if ($cmp1[0]=~/\ /) { @temp_cmp = split(' ',$cmp1[0]); @cmp1 = @temp_cmp; $comp_off[$time] = &volts2comp($cmp1[$time]) + $offset; } #No compass, using fixed offset else { $comp_off[$time] = &volts2comp($cmp1[0]) + $offset; } } else{ #There is a compass reading so use that (buoy) $comp_off[$time] = &volts2comp($cmp1[$time]) + $offset; } #Apply simple, static conversion from voltage to scientific values ($spda[$time],$dira[$time]) = &volts2spddir($spda[$time],$dira[$time]); ($spdb[$time],$dirb[$time]) = &volts2spddir($spdb[$time],$dirb[$time]); #Make sure speeds are reasonable and if speed or direction are #bad, both are badflagged $bada = ($spda[$time]>100 | $spda[$time]==$fill_value | $dira[$time]==$fill_value); if ($bada) { $spda[$time] = $dira[$time] = $u_a[$time] = $v_a[$time] = $fill_value; } else { #Take the direction mod 360 $dira[$time] = &modulo($dira[$time] + $comp_off[$time], 360); #Convert Spd and Dir to U and V components ($u_a[$time],$v_a[$time]) = &spddir2uv($spda[$time],$dira[$time]); } #Ditto for the b package! (as long as there is a b package...) unless ($#spdb<0) { $badb = ($spdb[$time]>100 | $spdb[$time]==$fill_value | $dirb[$time]==$fill_value); if ($badb) { $spdb[$time] = $dirb[$time] = $u_b[$time] = $v_b[$time] = $fill_value; } else { $dirb[$time] = &modulo($dirb[$time] + $comp_off[$time], 360); ($u_b[$time],$v_b[$time]) = &spddir2uv($spdb[$time],$dirb[$time]); } } } #a package stats #($mean,$std,$max) = &fill_stats(@array); ($u_mean_a,$u_std_a,$u_max_a) = &fill_stats(@u_a); ($v_mean_a,$v_std_a,$v_max_a) = &fill_stats(@v_a); #In terms of speed, mean speed is wrong (undef,$spd_std_a,$gust_a) = &fill_stats(@spda); #For direction, only std dev is right... (undef,$dir_std_a,undef) = &fill_stats(@dira); ($mean_spd_a,$mean_dir_a) = &uv2spddir($u_mean_a,$v_mean_a); ($comp_mean,$comp_std,undef) = &fill_stats(@comp_off); #b package stats unless ($#u_b<0) { ($u_mean_b,$u_std_b,$u_max_b) = &fill_stats(@u_b); ($v_mean_b,$v_std_b,$v_max_b) = &fill_stats(@v_b); #In terms of speed, mean speed is wrong (undef,$spd_std_b,$gust_b) = &fill_stats(@spdb); #For direction, only std dev is right... (undef,$dir_std_b,undef) = &fill_stats(@dirb); ($mean_spd_b,$mean_dir_b) = &uv2spddir($u_mean_b,$v_mean_b); } } ###################################################################### ### Write_Wind ######################################################## sub write_wind { unless ($mean_dir_a == $fill_value || $mean_spd_a == $fill_value) { #put variables into array!!! ${$Vars{WSPDA}}[$index] = $mean_spd_a; ${$Vars{WDIRA}}[$index] = $mean_dir_a; ${$Vars{WEASTA}}[$index] = $u_mean_a; ${$Vars{WNORTHA}}[$index] = $v_mean_a; ${$Vars{WSPDSTDA}}[$index] = $spd_std_a; ${$Vars{WDIRSTDA}}[$index] = $dir_std_a; ${$Vars{WGSTA}}[$index] = $gust_a; } else { ${$Vars{WSPDA}}[$index] = $fill_value; ${$Vars{WDIRA}}[$index] = $fill_value; ${$Vars{WEASTA}}[$index] = $fill_value; ${$Vars{WNORTHA}}[$index] = $fill_value; ${$Vars{WSPDSTDA}}[$index] = $fill_value; ${$Vars{WDIRSTDA}}[$index] = $fill_value; ${$Vars{WGSTA}}[$index] = $fill_value; } unless ($mean_dir_b == $fill_value || $mean_spd_b == $fill_value) { #put variables into array!!! ${$Vars{WSPDB}}[$index] = $mean_spd_b; ${$Vars{WDIRB}}[$index] = $mean_dir_b; ${$Vars{WEASTB}}[$index] = $u_mean_b; ${$Vars{WNORTHB}}[$index] = $v_mean_b; ${$Vars{WSPDSTDB}}[$index] = $spd_std_b; ${$Vars{WDIRSTDB}}[$index] = $dir_std_b; ${$Vars{WGSTB}}[$index] = $gust_b; } else { ${$Vars{WSPDB}}[$index] = $fill_value; ${$Vars{WDIRB}}[$index] = $fill_value; ${$Vars{WEASTB}}[$index] = $fill_value; ${$Vars{WNORTHB}}[$index] = $fill_value; ${$Vars{WSPDSTDB}}[$index] = $fill_value; ${$Vars{WDIRSTDB}}[$index] = $fill_value; ${$Vars{WGSTB}}[$index] = $fill_value; } unless ($comp_std == 0 || $comp_mean == -9999) { ${$Vars{COMP1}}[$index] = $comp_mean; } else { ${$Vars{COMP1}}[$index] = $fill_value; } } ###################################################################### ### Handle Comp ###################################################### sub handle_comp { #This subroutine works for average_wind: once data arrays are #collected it preforms all the necessary operations on them. #Trying to figure out what to do with hourly compass measurements #including pitch and roll. Need mean and std #print "HANDLE COMP!$#comp $#pitch $#roll\n"; our($compd_mean,$compd_std,$compd_max,$pitch_mean,$pitch_std, $pitch_max,$roll_mean,$roll_std,$roll_max) = $fill_value; my(@u_c,@v_c,@u_p,@v_p,@u_r,@v_r) = (); #print "Okay? $comp[0] $pitch[0] $roll[0]?\n"; if ($#comp>0) { #print "ARRAY: @comp\n"; #print "Looping compass...\n"; #Loop through samples... for ($time=0; $time<=$#comp; $time++) { #Since we don't have a speed, we will use the unit vector for #the averaging... #print "C: $comp[$time] P: $pitch[$time] R: $roll[$time] at 1\n"; #Take the direction mod 360 $comp[$time] = &modulo($comp[$time], 360); $pitch[$time] = &modulo($pitch[$time], 360); $roll[$time] = &modulo($roll[$time], 360); #print "C: $comp[$time] P: $pitch[$time] R: $roll[$time] at 2\n"; #Convert Spd and Dir to U and V components ($u_c[$time],$v_c[$time]) = &spddir2uv(1,$comp[$time]); ($u_p[$time],$v_p[$time]) = &spddir2uv(1,$pitch[$time]); ($u_r[$time],$v_r[$time]) = &spddir2uv(1,$roll[$time]); #print "C: $u_c[$time] $v_c[$time] P: $u_p[$time] $v_p[$time]". #" R: $u_r[$time] $v_r[$time] at 3\n"; } #($mean,$std,$max) = &fill_stats(@array); ($u_c_mean,$u_c_std,$u_c_max) = &fill_stats(@u_c); ($u_p_mean,$u_p_std,$u_p_max) = &fill_stats(@u_p); ($u_r_mean,$u_r_std,$u_r_max) = &fill_stats(@u_r); ($v_c_mean,$v_c_std,$v_c_max) = &fill_stats(@v_c); ($v_p_mean,$v_p_std,$v_p_max) = &fill_stats(@v_p); ($v_r_mean,$v_r_std,$v_r_max) = &fill_stats(@v_r); #Now get back to the direction! (undef,$compd_mean) = &uv2spddir($u_c_mean,$v_c_mean); (undef,$compd_std) = ($u_c_std**2+$v_c_std**2)**(1/2); (undef,$compd_max) = &uv2spddir($u_c_max,$v_c_max); (undef,$pitch_mean) = &uv2spddir($u_p_mean,$v_p_mean); (undef,$pitch_std) = ($u_p_std**2+$v_p_std**2)**(1/2); (undef,$pitch_max) = &uv2spddir($u_p_max,$v_p_max); (undef,$roll_mean) = &uv2spddir($u_r_mean,$v_r_mean); (undef,$roll_std) = ($u_r_std**2+$v_r_std**2)**(1/2); (undef,$roll_max) = &uv2spddir($u_r_max,$v_r_max); #print "Compass: $comp_mean, $comp_std, $comp_max\n@comp\n"; #print "Pitch: $pitch_mean, $pitch_std, $pitch_max\n@pitch\n"; #print "Roll: $roll_mean, $roll_std, $roll_max\n@roll\n"; ${$Vars{COMPMEAN}}[$file_index] = $compd_mean; ${$Vars{COMPSTD}}[$file_index] = $compd_std; ${$Vars{COMPMAX}}[$file_index] = $compd_max; ${$Vars{PITCHMEAN}}[$file_index] = $pitch_mean; ${$Vars{PITCHSTD}}[$file_index] = $pitch_std; ${$Vars{PITCHMAX}}[$file_index] = $pitch_max; ${$Vars{ROLLMEAN}}[$file_index] = $roll_mean; ${$Vars{ROLLSTD}}[$file_index] = $roll_std; ${$Vars{ROLLMAX}}[$file_index] = $roll_max; } else { #print "Nope...\n"; ${$Vars{COMPMEAN}}[$file_index] = $fill_value; ${$Vars{COMPSTD}}[$file_index] = $fill_value; ${$Vars{COMPMAX}}[$file_index] = $fill_value; ${$Vars{PITCHMEAN}}[$file_index] = $fill_value; ${$Vars{PITCHSTD}}[$file_index] = $fill_value; ${$Vars{PITCHMAX}}[$file_index] = $fill_value; ${$Vars{ROLLMEAN}}[$file_index] = $fill_value; ${$Vars{ROLLSTD}}[$file_index] = $fill_value; ${$Vars{ROLLMAX}}[$file_index] = $fill_value; } } ###################################################################### ### Volts2SpdDir ##################################################### sub volts2spddir { my($volts_spd,$volts_dir) = @_; my($wind_spd,$wind_dir) = undef; unless($volts_spd=~/$fill_value/) { #0->1 V : 0 60 m/s $wind_spd = $volts_spd*60; } else { $wind_spd = $fill_value; } unless($volts_dir=~/$fill_value/) { #0->1 V : 0 360 deg $wind_dir = $volts_dir*360; } else { $wind_dir = $fill_value; } return ($wind_spd,$wind_dir); } ###################################################################### ### Volts2Comp ####################################################### sub volts2comp { my($volts_comp) = @_; unless($volts_comp=~/$fill_value/) { #0->1 V : 0 360 deg return $volts_comp*360; } else { return $fill_value; } } ###################################################################### ### SpdDir2UV ######################################################## sub spddir2uv { #convert speed and direction to u and v my($wind_spd,$wind_dir) = @_; my($deg2rad) = .0174533; my($wind_u,$wind_v) = undef; unless($wind_spd=~/$fill_value/ || $wind_dir=~/$fill_value/) { #okay this is terrible but I'm at a loss what else to do. $wind_v = ($wind_spd)*cos($wind_dir*$deg2rad); $wind_u = ($wind_spd)*sin($wind_dir*$deg2rad); return ($wind_u,$wind_v); } else { #print "no!?"; return ($fill_value,$fill_value); } } ###################################################################### ### UV2SpdDir ######################################################## sub uv2spddir { #convert speed and direction to u and v my($wind_u,$wind_v) = @_; my($deg2rad) = .0174533; my($wind_spd,$wind_dir) = undef; unless($wind_u=~/$fill_value/ || $wind_v=~/$fill_value/) { $wind_spd = ($wind_u**2 + $wind_v**2)**(1/2); $wind_dir = &modulo(atan2($wind_u, $wind_v)/$deg2rad,360); return ($wind_spd,$wind_dir); } else { return ($fill_value,$fill_value); } } ###################################################################### ### Avg Rain ######################################################### sub avg_rain { #Convert cumulative rainfall readings for self siphoning rain #guage into a rainfall amount over the sampling period. #I haven't figured this out yet... #print "RAIN???\n"; #First I need to check the voltage -> volume. #Then, make sure it isn't in the process of dumping (look at diff, #confirm that it isn't largely negative) #Finally look at the change since the previous sample loop to get #the cumulative rain (and if it is negative, add the dumping #volume) #print "Testing rain:\n"; #my ($ind,$raindiff); #for($ind=1;$ind<$#cumrain;$ind++) { # $raindiff = $cumrain[$ind]-$cumrain[$ind-1]; # if ($raindiff<-.05) { # print "Dumping???\n"; # } # elsif ($raindiff>.05) { # print "Raining???\n"; # } #} my($maxrain); $maxrain = &max(@cumrain); unless ($maxrain=~/$fill_value/) { $maxrain = $maxrain*10; } else { $maxrain = $fill_value; } ${$Vars{RAIN}}[$index] = $maxrain; } ###################################################################### ### Fill Stats ####################################################### sub fill_stats(@array) { my(@array,$mean_value); @array = @_; $array = join(',',@array); $array =~s/$fill_value//g; $array =~s/\,\,/\,/g; @array = split(',',$array); unless ($#array<0) { ($mean) = &mean(@array); ($std) = &std_dev(@array); ($max) = &max(@array); return ($mean,$std,$max); } else { return ($fill_value,$fill_value,$fill_value); } } ###################################################################### ### Mean ############################################################# sub mean(@array) { my(@array,$mean_value); @array = @_; $sum_array = join('+',@array); if ($sum_array =~m/\+\+/) { return $fill_value; } #print "Averaging!\n$sum_array\n"; eval('$mean_value = ('."$sum_array".')/($#array+1);'); return $mean_value; } ###################################################################### ### Std Dev ########################################################## sub std_dev(@array) { my(@array,$std_dev); @array = @_; my $temp_std = new Statistics::Basic::StdDev(\@array); $std_dev = $temp_std -> query; return $std_dev; } ###################################################################### ### Max ############################################################## sub max { #From perldoc.com my $max = shift(@_); foreach $foo (@_) { $max = $foo if $max < $foo; } return $max; } ###################################################################### ### Modulo ########################################################### sub modulo($element,$modulus) { #Do modulus of the element, but don't throw away the remainder my($element,$modulus,$el,$floor) = @_; if ($element < 0) { return $element + $modulus; } elsif ($element <= $modulus) { return $element; } elsif ($element > 3*$modulus) { #print "Wow $element is HUGE!\n"; return $fill_value; } elsif ($element > 2*$modulus) { return $element - 2*$modulus; } elsif ($element > $modulus) { return $element - $modulus; } }