BESCache.cc

Go to the documentation of this file.
00001 // BESCache.cc
00002 
00003 // This file is part of bes, A C++ back-end server implementation framework
00004 // for the OPeNDAP Data Access Protocol.
00005 
00006 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
00007 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
00008 //
00009 // This library is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU Lesser General Public
00011 // License as published by the Free Software Foundation; either
00012 // version 2.1 of the License, or (at your option) any later version.
00013 // 
00014 // This library is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 // 
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // You can contact University Corporation for Atmospheric Research at
00024 // 3080 Center Green Drive, Boulder, CO 80301
00025  
00026 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
00027 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
00028 //
00029 // Authors:
00030 //      pwest       Patrick West <pwest@ucar.edu>
00031 //      jgarcia     Jose Garcia <jgarcia@ucar.edu>
00032 
00033 #include "config.h"
00034 
00035 #include <unistd.h>  // for unlink
00036 #include <sys/types.h>
00037 #include <sys/stat.h>
00038 #include <dirent.h>
00039 #include <fcntl.h>
00040 
00041 #include <cstring>
00042 #include <cerrno>
00043 #include <map>
00044 #include <iostream>
00045 #include <sstream>
00046 
00047 using std::multimap ;
00048 using std::pair ;
00049 using std::greater ;
00050 using std::endl ;
00051 
00052 #include "BESCache.h"
00053 #include "TheBESKeys.h"
00054 #include "BESSyntaxUserError.h"
00055 #include "BESInternalError.h"
00056 #include "BESDebug.h"
00057 
00058 #define BES_CACHE_CHAR '#'
00059 
00060 typedef struct _cache_entry
00061 {
00062     string name ;
00063     int size ;
00064 } cache_entry ;
00065 
00066 void 
00067 BESCache::check_ctor_params()
00068 {
00069     if( _cache_dir.empty() )
00070     {
00071         string err = "The cache directory was not specified, must be non-empty";
00072         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00073     }
00074 
00075     struct stat buf;
00076     int statret = stat( _cache_dir.c_str(), &buf ) ;
00077     if( statret != 0 || ! S_ISDIR(buf.st_mode) )
00078     {
00079         string err = "The cache directory " + _cache_dir + " does not exist" ;
00080         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00081     }
00082 
00083     if( _prefix.empty() )
00084     {
00085         string err = "The cache file prefix was not specified, must be non-empty" ;
00086         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00087     }
00088 
00089     if( _cache_size == 0 )
00090     {
00091         string err = "The cache size was not specified, must be non-zero" ;
00092         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00093     }
00094     // the cache size is specified in megabytes. When calculating
00095     // the size of the cache we convert to bytes, which is 1048576
00096     // bytes per meg. The max unsigned int allows for only 4095
00097     // megabytes.
00098     if( _cache_size > 4095 ) _cache_size = 4095 ;
00099 
00100     BESDEBUG( "bes", "BES Cache: directory " << _cache_dir
00101                      << ", prefix " << _prefix
00102                      << ", max size " << _cache_size << endl ) ;
00103 }
00104 
00114 BESCache::BESCache( const string &cache_dir,
00115                     const string &prefix,
00116                     unsigned int size )
00117     : _cache_dir( cache_dir ),
00118       _prefix( prefix ),
00119       _cache_size( size ),
00120       _lock_fd( -1 )
00121 {
00122     check_ctor_params(); // Throws BESSyntaxUserError on error.
00123 }
00124 
00139 BESCache::BESCache( BESKeys &keys,
00140                     const string &cache_dir_key,
00141                     const string &prefix_key,
00142                     const string &size_key )
00143     : _cache_size( 0 ),
00144       _lock_fd( -1 )
00145 {
00146     bool found = false ;
00147     keys.get_value( cache_dir_key, _cache_dir, found ) ;
00148     if( !found )
00149     {
00150         string err = "The cache directory key " + cache_dir_key
00151                      + " was not found in the BES configuration file" ;
00152         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00153     }
00154 
00155     found = false ;
00156     keys.get_value( prefix_key, _prefix, found ) ;
00157     if( !found )
00158     {
00159         string err = "The prefix key " + prefix_key
00160                      + " was not found in the BES configuration file" ;
00161         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00162     }
00163 
00164     found = false ;
00165     string cache_size_str ;
00166     keys.get_value( size_key, cache_size_str, found ) ;
00167     if( !found )
00168     {
00169         string err = "The size key " + size_key
00170                      + " was not found in the BES configuration file" ;
00171         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00172     }
00173 
00174     std::istringstream is( cache_size_str ) ;
00175     is >> _cache_size ;
00176 
00177     check_ctor_params(); // Throws BESSyntaxUserError on error.
00178 }
00179 
00186 bool
00187 BESCache::lock( unsigned int retry, unsigned int num_tries )
00188 {
00189     // make sure we aren't retrying too many times
00190     if( num_tries > MAX_LOCK_TRIES )
00191         num_tries = MAX_LOCK_TRIES ;
00192     if( retry > MAX_LOCK_RETRY_MS )
00193         retry = MAX_LOCK_RETRY_MS ;
00194 
00195     bool got_lock = true ;
00196     if( _lock_fd == -1 )
00197     {
00198         string lock_file = _cache_dir + "/lock" ;
00199         unsigned int tries = 0 ;
00200         _lock_fd = open( lock_file.c_str(),
00201                          O_CREAT | O_EXCL,
00202                          S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00203         while( _lock_fd < 0 && got_lock )
00204         {
00205             tries ++ ;
00206             if( tries > num_tries )
00207             {
00208                 _lock_fd = -1 ;
00209                 got_lock = false ;
00210             }
00211             else
00212             {
00213                 usleep( retry ) ;
00214                 _lock_fd = open( lock_file.c_str(),
00215                                  O_CREAT | O_EXCL,
00216                                  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00217             }
00218         }
00219     }
00220     else
00221     {
00222         // This would be a programming error, or we've gotten into a
00223         // situation where the lock is lost. Lock has been called on the
00224         // same cache object twice in a row without an unlock being called.
00225         string err = "The cache dir " + _cache_dir + " is already locked" ;
00226         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00227     }
00228 
00229     return got_lock ;
00230 }
00231 
00238 bool
00239 BESCache::unlock()
00240 {
00241     // if we call unlock twice in a row, does it matter? I say no, just say
00242     // that it is unlocked.
00243     bool unlocked = true ;
00244     if( _lock_fd != -1 )
00245     {
00246         string lock_file = _cache_dir + "/lock" ;
00247         close( _lock_fd ) ;
00248         (void)unlink( lock_file.c_str() ) ;
00249     }
00250 
00251     _lock_fd = -1 ;
00252 
00253     return unlocked ;
00254 }
00255 
00269 bool
00270 BESCache::is_cached( const string &src, string &target )
00271 {
00272     bool is_it = true ;
00273     string tmp_target = src ;
00274 
00275     // Create the file that would be created in the cache directory
00276     //echo ${infile} | sed 's/^\///' | sed 's/\//#/g' | sed 's/\(.*\)\..*$/\1/g'
00277     if( tmp_target.at(0) == '/' )
00278     {
00279         tmp_target = src.substr( 1, tmp_target.length() - 1 ) ;
00280     }
00281     string::size_type slash = 0 ;
00282     while( ( slash = tmp_target.find( '/' ) ) != string::npos )
00283     {
00284         tmp_target.replace( slash, 1, 1, BES_CACHE_CHAR ) ;
00285     }
00286     string::size_type last_dot = tmp_target.rfind( '.' ) ;
00287     if( last_dot != string::npos )
00288     {
00289         tmp_target = tmp_target.substr( 0, last_dot ) ;
00290     }
00291 
00292     target = _cache_dir + "/" + _prefix + BES_CACHE_CHAR + tmp_target ;
00293 
00294     // Determine if the target file is already in the cache or not
00295     struct stat buf;
00296     int statret = stat( target.c_str(), &buf ) ;
00297     if( statret != 0 )
00298     {
00299         is_it = false ;
00300     }
00301 
00302     return is_it ;
00303 }
00304 
00313 void
00314 BESCache::purge( )
00315 {
00316     unsigned int max_size = _cache_size * 1048576 ; // Bytes/Meg
00317     struct stat buf;
00318     unsigned int size = 0 ; // total size of all cached files
00319     unsigned int avg_size = 0 ;
00320     unsigned int num_files_in_cache = 0 ;
00321     time_t curr_time = time( NULL ) ; // grab the current time so we can
00322                                       // determine the oldest file
00323     // map of time,entry values
00324     multimap<double,cache_entry,greater<double> > contents ;
00325 
00326     // the prefix is actually the specified prefix plus the cache char '#'
00327     string match_prefix = _prefix + BES_CACHE_CHAR ;
00328 
00329     // go through the cache directory and collect all of the files that
00330     // start with the matching prefix
00331     DIR *dip = opendir( _cache_dir.c_str() ) ;
00332     if( dip != NULL )
00333     {
00334         struct dirent *dit;
00335         while( ( dit = readdir( dip ) ) != NULL )
00336         {
00337             string dirEntry = dit->d_name ;
00338             if( dirEntry.compare( 0, match_prefix.length(), match_prefix ) == 0)
00339             {
00340                 // Now that we have found a match we want to get the size of
00341                 // the file and the last access time from the file.
00342                 string fullPath = _cache_dir + "/" + dirEntry ;
00343                 int statret = stat( fullPath.c_str(), &buf ) ;
00344                 if( statret == 0 )
00345                 {
00346                     size += buf.st_size ;
00347 
00348                     // Find out how old the file is
00349                     time_t file_time = buf.st_atime ;
00350                     // I think we can use the access time without the diff,
00351                     // since it's the relative ages that determine when to
00352                     // delete a file. Good idea to use the access time so
00353                     // recently used (read) files will linger. jhrg 5/9/07
00354                     double time_diff = difftime( curr_time, file_time ) ;
00355                     cache_entry entry ;
00356                     entry.name = fullPath ;
00357                     entry.size = buf.st_size ;
00358                     contents.insert( pair<double,cache_entry>( time_diff, entry ) );
00359                 }
00360                 num_files_in_cache++ ;
00361             }
00362         }
00363 
00364         // We're done looking in the directory, close it
00365         closedir( dip ) ;
00366 
00367         if( num_files_in_cache ) avg_size = size / num_files_in_cache ;
00368 
00369         BESDEBUG( "bes", "cache size = " << size << endl ) ;
00370         BESDEBUG( "bes", "avg size = " << avg_size << endl ) ;
00371         BESDEBUG( "bes", "num files in cache = "
00372                          << num_files_in_cache << endl ) ;
00373         if( BESISDEBUG( "bes" ) )
00374         {
00375             BESDEBUG( "bes", endl << "BEFORE" << endl ) ;
00376             multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
00377             multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
00378             for( ; ti != te; ti++ )
00379             {
00380                 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ;
00381             }
00382             BESDEBUG( "bes", endl ) ;
00383         }
00384 
00385         // if the size of files is greater than max allowed then we need to
00386         // purge the cache directory. Keep going until the size is less than
00387         // the max.
00388         multimap<double,cache_entry,greater<double> >::iterator i ;
00389         if( (size+avg_size) > max_size )
00390         {
00391             // Maybe change this to size + (fraction of max_size) > max_size?
00392             // jhrg 5/9/07
00393             while( (size+avg_size) > max_size )
00394             {
00395                 i = contents.begin() ;
00396                 if( i == contents.end() )
00397                 {
00398                     // if we've reached the end of the cache directory,
00399                     // there are no more elements in the cache, then set
00400                     // the size and avg_size to 0 so that we can get out
00401                     // of this loop.
00402                     size = 0 ;
00403                     avg_size = 0 ;
00404                 }
00405                 else
00406                 {
00407                     BESDEBUG( "bes", "BESCache::purge - removing "
00408                                      << (*i).second.name << endl ) ;
00409                     if( remove( (*i).second.name.c_str() ) != 0 )
00410                     {
00411                         char *s_err = strerror( errno ) ;
00412                         string err = "Unable to remove the file "
00413                                      + (*i).second.name
00414                                      + " from the cache: " ;
00415                         if( s_err )
00416                         {
00417                             err.append( s_err ) ;
00418                         }
00419                         else
00420                         {
00421                             err.append( "Unknown error" ) ;
00422                         }
00423                         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00424                     }
00425                     size -= (*i).second.size ;
00426                     contents.erase( i ) ;
00427                 }
00428             }
00429         }
00430 
00431         if( BESISDEBUG( "bes" ) )
00432         {
00433             BESDEBUG( "bes", endl << "AFTER" << endl ) ;
00434             multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
00435             multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
00436             for( ; ti != te; ti++ )
00437             {
00438                 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ;
00439             }
00440         }
00441     }
00442     else
00443     {
00444         string err = "Unable to open cache directory " + _cache_dir ;
00445         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00446     }
00447 }
00448 
00456 void
00457 BESCache::dump( ostream &strm ) const
00458 {
00459     strm << BESIndent::LMarg << "BESCache::dump - ("
00460                              << (void *)this << ")" << endl ;
00461     BESIndent::Indent() ;
00462     strm << BESIndent::LMarg << "cache dir: " << _cache_dir << endl ;
00463     strm << BESIndent::LMarg << "prefix: " << _prefix << endl ;
00464     strm << BESIndent::LMarg << "size: " << _cache_size << endl ;
00465     BESIndent::UnIndent() ;
00466 }
00467 

Generated on Thu Sep 16 15:20:28 2010 for OPeNDAP Hyrax Back End Server (BES) by  doxygen 1.4.7