BESCache.cc

Go to the documentation of this file.
00001 // BESCache.cc
00002 
00003 // This file is part of bes, A C++ back-end server implementation framework
00004 // for the OPeNDAP Data Access Protocol.
00005 
00006 // Copyright (c) 2007 University Corporation for Atmospheric Research
00007 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
00008 //
00009 // This library is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU Lesser General Public
00011 // License as published by the Free Software Foundation; either
00012 // version 2.1 of the License, or (at your option) any later version.
00013 // 
00014 // This library is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 // 
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // You can contact University Corporation for Atmospheric Research at
00024 // 3080 Center Green Drive, Boulder, CO 80301
00025  
00026 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
00027 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
00028 //
00029 // Authors:
00030 //      pwest       Patrick West <pwest@ucar.edu>
00031 //      jgarcia     Jose Garcia <jgarcia@ucar.edu>
00032 
00033 #include <unistd.h>  // for unlink
00034 #include <sys/types.h>
00035 #include <sys/stat.h>
00036 #include <dirent.h>
00037 #include <fcntl.h>
00038 
00039 #include <cstring>
00040 #include <cerrno>
00041 #include <map>
00042 #include <iostream>
00043 #include <sstream>
00044 
00045 using std::multimap ;
00046 using std::pair ;
00047 using std::greater ;
00048 using std::endl ;
00049 
00050 #include "BESCache.h"
00051 #include "TheBESKeys.h"
00052 #include "BESInternalError.h"
00053 #include "BESDebug.h"
00054 
00055 #define BES_CACHE_CHAR '#'
00056 
00057 typedef struct _cache_entry
00058 {
00059     string name ;
00060     int size ;
00061 } cache_entry ;
00062 
00063 void 
00064 BESCache::check_ctor_params()
00065 {
00066     if( _cache_dir.empty() )
00067     {
00068         string err = "The cache dir was not specified, must be non-empty" ;
00069         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00070     }
00071 
00072     struct stat buf;
00073     int statret = stat( _cache_dir.c_str(), &buf ) ;
00074     if( statret != 0 || ! S_ISDIR(buf.st_mode) )
00075     {
00076         string err = "The cache dir " + _cache_dir + " does not exist" ;
00077         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00078     }
00079 
00080     if( _prefix.empty() )
00081     {
00082         string err = "The prefix was not specified, must be non-empty" ;
00083         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00084     }
00085 
00086     if( _cache_size == 0 )
00087     {
00088         string err = "The cache size was not specified, must be non-zero" ;
00089         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00090     }
00091 }
00092 
00102 BESCache::BESCache( const string &cache_dir,
00103                     const string &prefix,
00104                     unsigned int size )
00105     : _cache_dir( cache_dir ),
00106       _prefix( prefix ),
00107       _cache_size( size ),
00108       _lock_fd( -1 )
00109 {
00110     check_ctor_params(); // Throws BESInternalError on error.
00111 }
00112 
00127 BESCache::BESCache( BESKeys &keys,
00128                     const string &cache_dir_key,
00129                     const string &prefix_key,
00130                     const string &size_key )
00131     : _cache_size( 0 ),
00132       _lock_fd( -1 )
00133 {
00134     bool found = false ;
00135     _cache_dir = keys.get_key( cache_dir_key, found ) ;
00136     if( !found )
00137     {
00138         string err = "The cache dir key " + cache_dir_key
00139                      + " was not found" ;
00140         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00141     }
00142 
00143     found = false ;
00144     _prefix = keys.get_key( prefix_key, found ) ;
00145     if( !found )
00146     {
00147         string err = "The prefix key " + prefix_key
00148                      + " was not found" ;
00149         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00150     }
00151 
00152     found = false ;
00153     string _cache_size_str = keys.get_key( size_key, found ) ;
00154     if( !found )
00155     {
00156         string err = "The size key " + size_key
00157                      + " was not found" ;
00158         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00159     }
00160 
00161 
00162     std::istringstream is( _cache_size_str ) ;
00163     is >> _cache_size ;
00164 
00165     check_ctor_params(); // Throws BESInternalError on error.
00166 }
00167 
00174 bool
00175 BESCache::lock( unsigned int retry, unsigned int num_tries )
00176 {
00177     // make sure we aren't retrying too many times
00178     if( num_tries > MAX_LOCK_TRIES )
00179         num_tries = MAX_LOCK_TRIES ;
00180     if( retry > MAX_LOCK_RETRY_MS )
00181         retry = MAX_LOCK_RETRY_MS ;
00182 
00183     bool got_lock = true ;
00184     if( _lock_fd == -1 )
00185     {
00186         string lock_file = _cache_dir + "/lock" ;
00187         unsigned int tries = 0 ;
00188         _lock_fd = open( lock_file.c_str(),
00189                          O_CREAT | O_EXCL,
00190                          S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00191         while( _lock_fd < 0 && got_lock )
00192         {
00193             tries ++ ;
00194             if( tries > num_tries )
00195             {
00196                 _lock_fd = -1 ;
00197                 got_lock = false ;
00198             }
00199             else
00200             {
00201                 usleep( retry ) ;
00202                 _lock_fd = open( lock_file.c_str(),
00203                                  O_CREAT | O_EXCL,
00204                                  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00205             }
00206         }
00207     }
00208     else
00209     {
00210         // This would be a programming error, or we've gotten into a
00211         // situation where the lock is lost. Lock has been called on the
00212         // same cache object twice in a row without an unlock being called.
00213         string err = "The cache dir " + _cache_dir + " is already locked" ;
00214         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00215     }
00216 
00217     return got_lock ;
00218 }
00219 
00226 bool
00227 BESCache::unlock()
00228 {
00229     // if we call unlock twice in a row, does it matter? I say no, just say
00230     // that it is unlocked.
00231     bool unlocked = true ;
00232     if( _lock_fd != -1 )
00233     {
00234         string lock_file = _cache_dir + "/lock" ;
00235         close( _lock_fd ) ;
00236         unlink( lock_file.c_str() ) ;
00237     }
00238 
00239     _lock_fd = -1 ;
00240 
00241     return unlocked ;
00242 }
00243 
00257 bool
00258 BESCache::is_cached( const string &src, string &target )
00259 {
00260     bool is_it = true ;
00261     string tmp_target = src ;
00262 
00263     // Create the file that would be created in the cache directory
00264     //echo ${infile} | sed 's/^\///' | sed 's/\//#/g' | sed 's/\(.*\)\..*$/\1/g'
00265     if( tmp_target.at(0) == '/' )
00266     {
00267         tmp_target = src.substr( 1, tmp_target.length() - 1 ) ;
00268     }
00269     string::size_type slash = 0 ;
00270     while( ( slash = tmp_target.find( '/' ) ) != string::npos )
00271     {
00272         tmp_target.replace( slash, 1, 1, BES_CACHE_CHAR ) ;
00273     }
00274     string::size_type last_dot = tmp_target.rfind( '.' ) ;
00275     if( last_dot != string::npos )
00276     {
00277         tmp_target = tmp_target.substr( 0, last_dot ) ;
00278     }
00279 
00280     target = _cache_dir + "/" + _prefix + BES_CACHE_CHAR + tmp_target ;
00281 
00282     // Determine if the target file is already in the cache or not
00283     struct stat buf;
00284     int statret = stat( target.c_str(), &buf ) ;
00285     if( statret != 0 )
00286     {
00287         is_it = false ;
00288     }
00289 
00290     return is_it ;
00291 }
00292 
00301 void
00302 BESCache::purge( )
00303 {
00304     int max_size = _cache_size * 1048576 ; // Bytes/Meg
00305     struct stat buf;
00306     int size = 0 ; // total size of all cached files
00307     time_t curr_time = time( NULL ) ; // grab the current time so we can
00308                                       // determine the oldest file
00309     // map of time,entry values
00310     multimap<double,cache_entry,greater<double> > contents ;
00311 
00312     // the prefix is actually the specified prefix plus the cache char '#'
00313     string match_prefix = _prefix + BES_CACHE_CHAR ;
00314 
00315     // go through the cache directory and collect all of the files that
00316     // start with the matching prefix
00317     DIR *dip = opendir( _cache_dir.c_str() ) ;
00318     if( dip != NULL )
00319     {
00320         struct dirent *dit;
00321         while( ( dit = readdir( dip ) ) != NULL )
00322         {
00323             string dirEntry = dit->d_name ;
00324             if( dirEntry.compare( 0, match_prefix.length(), match_prefix ) == 0)
00325             {
00326                 // Now that we have found a match we want to get the size of
00327                 // the file and the last access time from the file.
00328                 string fullPath = _cache_dir + "/" + dirEntry ;
00329                 int statret = stat( fullPath.c_str(), &buf ) ;
00330                 if( statret == 0 )
00331                 {
00332                     size += buf.st_size ;
00333 
00334                     // Find out how old the file is
00335                     time_t file_time = buf.st_atime ;
00336                     // I think we can use the access time without the diff,
00337                     // since it's the relative ages that determine when to
00338                     // delete a file. Good idea to use the access time so
00339                     // recently used (read) files will linger. jhrg 5/9/07
00340                     double time_diff = difftime( curr_time, file_time ) ;
00341                     cache_entry entry ;
00342                     entry.name = fullPath ;
00343                     entry.size = buf.st_size ;
00344                     contents.insert( pair<double,cache_entry>( time_diff, entry ) );
00345                 }
00346             }
00347         }
00348 
00349         // We're done looking in the directory, close it
00350         closedir( dip ) ;
00351 
00352         if( BESISDEBUG( "bes" ) )
00353         {
00354             BESDEBUG( "bes", endl << "BEFORE" << endl )
00355             multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
00356             multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
00357             for( ; ti != te; ti++ )
00358             {
00359                 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl )
00360             }
00361             BESDEBUG( "bes", endl )
00362         }
00363 
00364         // if the size of files is greater than max allowed then we need to
00365         // purge the cache directory. Keep going until the size is less than
00366         // the max.
00367         multimap<double,cache_entry,greater<double> >::iterator i ;
00368         if( size > max_size )
00369         {
00370             // Maybe change this to size + (fraction of max_size) > max_size?
00371             // jhrg 5/9/07
00372             while( size > max_size )
00373             {
00374                 i = contents.begin() ;
00375                 BESDEBUG( "bes", "BESCache::purge - removing " << (*i).second.name << endl )
00376                 if( remove( (*i).second.name.c_str() ) != 0 )
00377                 {
00378                     char *s_err = strerror( errno ) ;
00379                     string err = "Unable to remove the file "
00380                                  + (*i).second.name + " from the cache: " ;
00381                     if( s_err )
00382                     {
00383                         err.append( s_err ) ;
00384                     }
00385                     else
00386                     {
00387                         err.append( "Unknown error" ) ;
00388                     }
00389                     throw BESInternalError( err, __FILE__, __LINE__ ) ;
00390                 }
00391                 size -= (*i).second.size ;
00392                 contents.erase( i ) ;
00393             }
00394         }
00395 
00396         if( BESISDEBUG( "bes" ) )
00397         {
00398             BESDEBUG( "bes", endl << "AFTER" << endl )
00399             multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
00400             multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
00401             for( ; ti != te; ti++ )
00402             {
00403                 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl )
00404             }
00405         }
00406     }
00407     else
00408     {
00409         string err = "Unable to open cache directory " + _cache_dir ;
00410         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00411     }
00412 }
00413 
00421 void
00422 BESCache::dump( ostream &strm ) const
00423 {
00424     strm << BESIndent::LMarg << "BESCache::dump - ("
00425                              << (void *)this << ")" << endl ;
00426     BESIndent::Indent() ;
00427     strm << BESIndent::LMarg << "cache dir: " << _cache_dir << endl ;
00428     strm << BESIndent::LMarg << "prefix: " << _prefix << endl ;
00429     strm << BESIndent::LMarg << "size: " << _cache_size << endl ;
00430     BESIndent::UnIndent() ;
00431 }
00432 

Generated on Tue Mar 4 23:13:33 2008 for OPeNDAP Back End Server (BES) by  doxygen 1.5.1