00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "config.h"
00027
00028
00029
00030
00031
00032 #include <pthread.h>
00033 #include <limits.h>
00034 #include <unistd.h>
00035 #include <sys/types.h>
00036 #include <sys/stat.h>
00037
00038 #include <cstring>
00039 #include <iostream>
00040 #include <sstream>
00041 #include <algorithm>
00042 #include <iterator>
00043 #include <set>
00044
00045 #include "Error.h"
00046 #include "InternalErr.h"
00047 #include "ResponseTooBigErr.h"
00048 #ifndef WIN32
00049 #include "SignalHandler.h"
00050 #endif
00051 #include "HTTPCacheInterruptHandler.h"
00052 #include "HTTPCacheTable.h"
00053
00054 #include "util_mit.h"
00055 #include "debug.h"
00056
00057 #ifdef WIN32
00058 #include <direct.h>
00059 #include <time.h>
00060 #include <fcntl.h>
00061 #define MKDIR(a,b) _mkdir((a))
00062 #define REMOVE(a) remove((a))
00063 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
00064 #define DIR_SEPARATOR_CHAR '\\'
00065 #define DIR_SEPARATOR_STR "\\"
00066 #else
00067 #define MKDIR(a,b) mkdir((a), (b))
00068 #define REMOVE(a) remove((a))
00069 #define MKSTEMP(a) mkstemp((a))
00070 #define DIR_SEPARATOR_CHAR '/'
00071 #define DIR_SEPARATOR_STR "/"
00072 #endif
00073
00074 #define CACHE_META ".meta"
00075 #define CACHE_INDEX ".index"
00076 #define CACHE_EMPTY_ETAG "@cache@"
#define NO_LM_EXPIRATION 24*3600 // 24 hours
#define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM
// If using LM to find the expiration then take 10% and no more than
// MAX_LM_EXPIRATION.
#ifndef LM_EXPIRATION
#define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
#endif
const int CACHE_TABLE_SIZE = 1499;
using namespace std;
namespace libdap {
00080 int
get_hash(const string &url)
{
int hash = 0;
for (const char *ptr = url.c_str(); *ptr; ptr++)
hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);
return hash;
}
HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size) :
d_cache_root(cache_root),
d_block_size(block_size),
d_current_size(0),
d_new_entries(0)
{
d_cache_index = cache_root + CACHE_INDEX;
d_cache_table = new CacheEntries*[CACHE_TABLE_SIZE];
// Initialize the cache table.
for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
d_cache_table[i] = 0;
cache_index_read();
}
00084 static inline void
delete_cache_entry(HTTPCacheTable::CacheEntry *e)
{
DBG2(cerr << "Deleting CacheEntry: " << e << endl);
00085 delete e;
00086 }
00087
00088 HTTPCacheTable::~HTTPCacheTable() {
00089 for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
00090 HTTPCacheTable::CacheEntries *cp = get_cache_table()[i];
00091 if (cp) {
00092
00093 for_each(cp->begin(), cp->end(), delete_cache_entry);
00094
00095
00096 delete get_cache_table()[i];
00097 get_cache_table()[i] = 0;
00098 }
00099 }
00100
00101 delete[] d_cache_table;
00102 }
00103
00111 class DeleteExpired : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00112 time_t d_time;
00113 HTTPCacheTable &d_table;
00114
00115 public:
00116 DeleteExpired(HTTPCacheTable &table, time_t t) :
00117 d_time(t), d_table(table) {
00118 if (!t)
00119 d_time = time(0);
00120 }
00121
00122 void operator()(HTTPCacheTable::CacheEntry *&e) {
00123 if (e && !e->readers && (e->freshness_lifetime
00124 < (e->corrected_initial_age + (d_time - e->response_time)))) {
00125 DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
00126 d_table.remove_cache_entry(e);
00127 delete e; e = 0;
00128 }
00129 }
00130 };
00131
00132
00133 void HTTPCacheTable::delete_expired_entries(time_t time) {
00134
00135 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00136 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00137 if (slot) {
00138 for_each(slot->begin(), slot->end(), DeleteExpired(*this, time));
00139 slot->erase(remove(slot->begin(), slot->end(),
00140 static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
00141 }
00142 }
00143 }
00144
00151 class DeleteByHits : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00152 HTTPCacheTable &d_table;
00153 int d_hits;
00154
00155 public:
00156 DeleteByHits(HTTPCacheTable &table, int hits) :
00157 d_table(table), d_hits(hits) {
00158 }
00159
00160 void operator()(HTTPCacheTable::CacheEntry *&e) {
00161 if (e && !e->readers && e->hits <= d_hits) {
00162 DBG(cerr << "Deleting cache entry: " << e->url << endl);
00163 d_table.remove_cache_entry(e);
00164 delete e; e = 0;
00165 }
00166 }
00167 };
00168
00169 void
00170 HTTPCacheTable::delete_by_hits(int hits) {
00171 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00172 if (get_cache_table()[cnt]) {
00173 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00174 for_each(slot->begin(), slot->end(), DeleteByHits(*this, hits));
00175 slot->erase(remove(slot->begin(), slot->end(),
00176 static_cast<HTTPCacheTable::CacheEntry*>(0)),
00177 slot->end());
00178
00179 }
00180 }
00181 }
00182
00187 class DeleteBySize : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00188 HTTPCacheTable &d_table;
00189 unsigned int d_size;
00190
00191 public:
00192 DeleteBySize(HTTPCacheTable &table, unsigned int size) :
00193 d_table(table), d_size(size) {
00194 }
00195
00196 void operator()(HTTPCacheTable::CacheEntry *&e) {
00197 if (e && !e->readers && e->size > d_size) {
00198 DBG(cerr << "Deleting cache entry: " << e->url << endl);
00199 d_table.remove_cache_entry(e);
00200 delete e; e = 0;
00201 }
00202 }
00203 };
00204
00205 void HTTPCacheTable::delete_by_size(unsigned int size) {
00206 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00207 if (get_cache_table()[cnt]) {
00208 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00209 for_each(slot->begin(), slot->end(), DeleteBySize(*this, size));
00210 slot->erase(remove(slot->begin(), slot->end(),
00211 static_cast<HTTPCacheTable::CacheEntry*>(0)),
00212 slot->end());
00213
00214 }
00215 }
00216 }
00217
00224
00231 bool
00232 HTTPCacheTable::cache_index_delete()
00233 {
00234 d_new_entries = 0;
00235
00236 return (REMOVE(d_cache_index.c_str()) == 0);
00237 }
00238
00247 bool
00248 HTTPCacheTable::cache_index_read()
00249 {
00250 FILE *fp = fopen(d_cache_index.c_str(), "r");
00251
00252
00253 if (!fp) {
00254 return false;
00255 }
00256
00257 char line[1024];
00258 while (!feof(fp) && fgets(line, 1024, fp)) {
00259 add_entry_to_cache_table(cache_index_parse_line(line));
00260 DBG2(cerr << line << endl);
00261 }
00262
00263 int res = fclose(fp) ;
00264 if (res) {
00265 DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
00266 }
00267
00268 d_new_entries = 0;
00269
00270 return true;
00271 }
00272
00280 HTTPCacheTable::CacheEntry *
00281 HTTPCacheTable::cache_index_parse_line(const char *line)
00282 {
00283
00284 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry;
00285 istringstream iss(line);
00286 iss >> entry->url;
00287 iss >> entry->cachename;
00288
00289 iss >> entry->etag;
00290 if (entry->etag == CACHE_EMPTY_ETAG)
00291 entry->etag = "";
00292
00293 iss >> entry->lm;
00294 iss >> entry->expires;
00295 iss >> entry->size;
00296 iss >> entry->range;
00297
00298 iss >> entry->hash;
00299 iss >> entry->hits;
00300 iss >> entry->freshness_lifetime;
00301 iss >> entry->response_time;
00302 iss >> entry->corrected_initial_age;
00303
00304 iss >> entry->must_revalidate;
00305
00306 return entry;
00307 }
00308
00311 class WriteOneCacheEntry :
00312 public unary_function<HTTPCacheTable::CacheEntry *, void>
00313 {
00314
00315 FILE *d_fp;
00316
00317 public:
00318 WriteOneCacheEntry(FILE *fp) : d_fp(fp)
00319 {}
00320
00321 void operator()(HTTPCacheTable::CacheEntry *e)
00322 {
00323 if (e && fprintf(d_fp,
00324 "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
00325 e->url.c_str(),
00326 e->cachename.c_str(),
00327 e->etag == "" ? CACHE_EMPTY_ETAG : e->etag.c_str(),
00328 (long)(e->lm),
00329 (long)(e->expires),
00330 e->size,
00331 e->range ? '1' : '0',
00332 e->hash,
00333 e->hits,
00334 (long)(e->freshness_lifetime),
00335 (long)(e->response_time),
00336 (long)(e->corrected_initial_age),
00337 e->must_revalidate ? '1' : '0') < 0)
00338 throw Error("Cache Index. Error writing cache index\n");
00339 }
00340 };
00341
00351 void
00352 HTTPCacheTable::cache_index_write()
00353 {
00354 DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
00355
00356
00357 FILE * fp = NULL;
00358 if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
00359 throw Error(string("Cache Index. Can't open `") + d_cache_index
00360 + string("' for writing"));
00361 }
00362
00363
00364
00365
00366 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00367 HTTPCacheTable::CacheEntries *cp = get_cache_table()[cnt];
00368 if (cp)
00369 for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
00370 }
00371
00372
00373 int res = fclose(fp);
00374 if (res) {
00375 DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
00376 << (void *)fp << endl);
00377 }
00378
00379 d_new_entries = 0;
00380 }
00381
00383
00396 string
00397 HTTPCacheTable::create_hash_directory(int hash)
00398 {
00399 struct stat stat_info;
00400 ostringstream path;
00401
00402 path << d_cache_root << hash;
00403 string p = path.str();
00404
00405 if (stat(p.c_str(), &stat_info) == -1) {
00406 DBG2(cerr << "Cache....... Create dir " << p << endl);
00407 if (MKDIR(p.c_str(), 0777) < 0) {
00408 DBG2(cerr << "Cache....... Can't create..." << endl);
00409 throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
00410 }
00411 }
00412 else {
00413 DBG2(cerr << "Cache....... Directory " << p << " already exists"
00414 << endl);
00415 }
00416
00417 return p;
00418 }
00419
00434 void
00435 HTTPCacheTable::create_location(HTTPCacheTable::CacheEntry *entry)
00436 {
00437 string hash_dir = create_hash_directory(entry->hash);
00438 #ifdef WIN32
00439 hash_dir += "\\dodsXXXXXX";
00440 #else
00441 hash_dir += "/dodsXXXXXX";
00442 #endif
00443
00444
00445 char *templat = new char[hash_dir.size() + 1];
00446 strcpy(templat, hash_dir.c_str());
00447
00448
00449
00450
00451
00452 int fd = MKSTEMP(templat);
00453 if (fd < 0) {
00454 delete[] templat; templat = 0;
00455 close(fd);
00456 throw Error("The HTTP Cache could not create a file to hold the response; it will not be cached.");
00457 }
00458
00459 entry->cachename = templat;
00460 delete[] templat; templat = 0;
00461 close(fd);
00462 }
00463
00464
00466 static inline int
00467 entry_disk_space(int size, unsigned int block_size)
00468 {
00469 unsigned int num_of_blocks = (size + block_size) / block_size;
00470
00471 DBG(cerr << "size: " << size << ", block_size: " << block_size
00472 << ", num_of_blocks: " << num_of_blocks << endl);
00473
00474 return num_of_blocks * block_size;
00475 }
00476
00480
00486 void
00487 HTTPCacheTable::add_entry_to_cache_table(CacheEntry *entry)
00488 {
00489 int hash = entry->hash;
00490
00491 if (!d_cache_table[hash])
00492 d_cache_table[hash] = new CacheEntries;
00493
00494 d_cache_table[hash]->push_back(entry);
00495
00496 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
00497 << ", entry->size: " << entry->size << ", block size: " << d_block_size
00498 << endl);
00499
00500 d_current_size += entry_disk_space(entry->size, d_block_size);
00501
00502 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
00503
00504 increment_new_entries();
00505 }
00506
00510 HTTPCacheTable::CacheEntry *
00511 HTTPCacheTable::get_locked_entry_from_cache_table(const string &url)
00512 {
00513 return get_locked_entry_from_cache_table(get_hash(url), url);
00514 }
00515
00523 HTTPCacheTable::CacheEntry *
00524 HTTPCacheTable::get_locked_entry_from_cache_table(int hash, const string &url)
00525 {
00526 DBG(cerr << "url: " << url << "; hash: " << hash << endl);
00527 DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
00528 if (d_cache_table[hash]) {
00529 CacheEntries *cp = d_cache_table[hash];
00530 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
00531
00532
00533 if ((*i) && (*i)->url == url) {
00534 (*i)->lock_read_response();
00535 return *i;
00536 }
00537 }
00538 }
00539
00540 return 0;
00541 }
00542
00549 HTTPCacheTable::CacheEntry *
00550 HTTPCacheTable::get_write_locked_entry_from_cache_table(const string &url)
00551 {
00552 int hash = get_hash(url);
00553 if (d_cache_table[hash]) {
00554 CacheEntries *cp = d_cache_table[hash];
00555 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
00556
00557
00558 if ((*i) && (*i)->url == url) {
00559 (*i)->lock_write_response();
00560 return *i;
00561 }
00562 }
00563 }
00564
00565 return 0;
00566 }
00567
00575 void
00576 HTTPCacheTable::remove_cache_entry(HTTPCacheTable::CacheEntry *entry)
00577 {
00578
00579
00580 if (entry->readers)
00581 throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
00582
00583 REMOVE(entry->cachename.c_str());
00584 REMOVE(string(entry->cachename + CACHE_META).c_str());
00585
00586 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
00587
00588 unsigned int eds = entry_disk_space(entry->size, get_block_size());
00589 set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
00590
00591 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
00592 }
00593
00596 class DeleteCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void>
00597 {
00598 string d_url;
00599 HTTPCacheTable *d_cache_table;
00600
00601 public:
00602 DeleteCacheEntry(HTTPCacheTable *c, const string &url)
00603 : d_url(url), d_cache_table(c)
00604 {}
00605
00606 void operator()(HTTPCacheTable::CacheEntry *&e)
00607 {
00608 if (e && e->url == d_url) {
00609 e->lock_write_response();
00610 d_cache_table->remove_cache_entry(e);
00611 e->unlock_write_response();
00612 delete e; e = 0;
00613 }
00614 }
00615 };
00616
00623 void
00624 HTTPCacheTable::remove_entry_from_cache_table(const string &url)
00625 {
00626 int hash = get_hash(url);
00627 if (d_cache_table[hash]) {
00628 CacheEntries *cp = d_cache_table[hash];
00629 for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
00630 cp->erase(remove(cp->begin(), cp->end(), static_cast<HTTPCacheTable::CacheEntry*>(0)),
00631 cp->end());
00632 }
00633 }
00634
00637 class DeleteUnlockedCacheEntry :
00638 public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00639 HTTPCacheTable &d_table;
00640
00641 public:
00642 DeleteUnlockedCacheEntry(HTTPCacheTable &t) :
00643 d_table(t) {
00644 }
00645 void operator()(HTTPCacheTable::CacheEntry *&e) {
00646 if (e) {
00647 d_table.remove_cache_entry(e);
00648 delete e; e = 0;
00649 }
00650 }
00651 };
00652
00653 void HTTPCacheTable::delete_all_entries() {
00654
00655
00656 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00657 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00658 if (slot) {
00659 for_each(slot->begin(), slot->end(), DeleteUnlockedCacheEntry(*this));
00660 slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *>(0)),
00661 slot->end());
00662 }
00663 }
00664
00665 cache_index_delete();
00666 }
00667
00681 void
00682 HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
00683 {
00684 entry->response_time = time(NULL);
00685 time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
00686 time_t corrected_received_age = max(apparent_age, entry->age);
00687 time_t response_delay = entry->response_time - request_time;
00688 entry->corrected_initial_age = corrected_received_age + response_delay;
00689
00690
00691
00692
00693 time_t freshness_lifetime = entry->max_age;
00694 if (freshness_lifetime < 0) {
00695 if (entry->expires < 0) {
00696 if (entry->lm < 0) {
00697 freshness_lifetime = default_expiration;
00698 }
00699 else {
00700 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
00701 }
00702 }
00703 else
00704 freshness_lifetime = entry->expires - entry->date;
00705 }
00706
00707 entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
00708
00709 DBG2(cerr << "Cache....... Received Age " << entry->age
00710 << ", corrected " << entry->corrected_initial_age
00711 << ", freshness lifetime " << entry->freshness_lifetime << endl);
00712 }
00713
00725 void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry,
00726 unsigned long max_entry_size, const vector<string> &headers) {
00727 vector<string>::const_iterator i;
00728 for (i = headers.begin(); i != headers.end(); ++i) {
00729
00730 if ((*i).empty())
00731 continue;
00732
00733 string::size_type colon = (*i).find(':');
00734
00735
00736 if (colon == string::npos)
00737 continue;
00738
00739 string header = (*i).substr(0, (*i).find(':'));
00740 string value = (*i).substr((*i).find(": ") + 2);
00741 DBG2(cerr << "Header: " << header << endl);DBG2(cerr << "Value: " << value << endl);
00742
00743 if (header == "ETag") {
00744 entry->etag = value;
00745 } else if (header == "Last-Modified") {
00746 entry->lm = parse_time(value.c_str());
00747 } else if (header == "Expires") {
00748 entry->expires = parse_time(value.c_str());
00749 } else if (header == "Date") {
00750 entry->date = parse_time(value.c_str());
00751 } else if (header == "Age") {
00752 entry->age = parse_time(value.c_str());
00753 } else if (header == "Content-Length") {
00754 unsigned long clength = strtoul(value.c_str(), 0, 0);
00755 if (clength > max_entry_size)
00756 entry->set_no_cache(true);
00757 } else if (header == "Cache-Control") {
00758
00759
00760
00761 if (value == "no-cache" || value == "no-store")
00762
00763
00764
00765 entry->set_no_cache(true);
00766 else if (value == "must-revalidate")
00767 entry->must_revalidate = true;
00768 else if (value.find("max-age") != string::npos) {
00769 string max_age = value.substr(value.find("=" + 1));
00770 entry->max_age = parse_time(max_age.c_str());
00771 }
00772 }
00773 }
00774 }
00775
00777
00778
00779 void HTTPCacheTable::bind_entry_to_data(HTTPCacheTable::CacheEntry *entry, FILE *body) {
00780 entry->hits++;
00781 d_locked_entries[body] = entry;
00782 }
00783
00784 void HTTPCacheTable::uncouple_entry_from_data(FILE *body) {
00785 HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
00786 if (!entry)
00787 throw InternalErr("There is no cache entry for the response given.");
00788
00789 d_locked_entries.erase(body);
00790 entry->unlock_read_response();
00791
00792 if (entry->readers < 0)
00793 throw InternalErr("An unlocked entry was released");
00794 }
00795
00796 bool HTTPCacheTable::is_locked_read_responses() {
00797 return !d_locked_entries.empty();
00798 }
00799
00800 }
00801