From 149f4058fb4ad8579a4ceebf370dbb603bfbe0bd Mon Sep 17 00:00:00 2001 From: Leon Date: Tue, 21 Mar 2017 17:47:30 +1300 Subject: [PATCH] dont save empty responses Former-commit-id: 7a659647155428c57d6346bf3fc2feed5300f2bc --- library/StaticHtmlOutput.php | 22 ++++++++++++---------- library/StaticHtmlOutput/UrlRequest.php | 4 +++- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/library/StaticHtmlOutput.php b/library/StaticHtmlOutput.php index eef9a189..a96019b5 100644 --- a/library/StaticHtmlOutput.php +++ b/library/StaticHtmlOutput.php @@ -219,17 +219,11 @@ class StaticHtmlOutput { } else { // Add current url to the list of processed urls $this->_exportLog[$currentUrl] = true; - - // TODO: this shouldnt be part of urlrequest, just general settings - // add conditional logic here whether to do cleanup, vs in each request? - $urlResponse->cleanup(); - - // rewrite url and save to file - $urlResponse->replaceBaseUlr($baseUrl, $newBaseUrl); - - $this->_saveUrlData($urlResponse, $archiveDir); } + // TODO: this shouldnt be part of urlrequest, just general settings + // add conditional logic here whether to do cleanup, vs in each request? + $urlResponse->cleanup(); // get all other urls from within this one and add to queue if not there foreach ($urlResponse->extractAllUrls($baseUrl) as $newUrl) { @@ -239,6 +233,8 @@ class StaticHtmlOutput { } } + $urlResponse->replaceBaseUlr($baseUrl, $newBaseUrl); + $this->_saveUrlData($urlResponse, $archiveDir); } $tempZip = $archiveName . '.tmp'; @@ -423,6 +419,12 @@ class StaticHtmlOutput { $fileExtension = ($url->isHtml() || !isset($pathInfo['extension']) ? 'html' : $pathInfo['extension']); $fileName = $fileDir . '/' . $pathInfo['filename'] . '.' . $fileExtension; - file_put_contents($fileName, $url->getResponseBody()); + $fileContents = $url->getResponseBody(); + error_log($filename); + if ($fileContents != '') { + file_put_contents($fileName, $fileContents); + } else { + error_log('response body was empty'); + } } } diff --git a/library/StaticHtmlOutput/UrlRequest.php b/library/StaticHtmlOutput/UrlRequest.php index 6eb291a7..5158dba1 100644 --- a/library/StaticHtmlOutput/UrlRequest.php +++ b/library/StaticHtmlOutput/UrlRequest.php @@ -124,7 +124,9 @@ class StaticHtmlOutput_UrlRequest public function extractAllUrls($baseUrl) { $allUrls = array(); - + + + // TODO: will this follow urls for JS/CSS easily by adjusting? if ($this->isHtml() && preg_match_all('/' . str_replace('/', '\/', $baseUrl) . '[^"\'#\? ]+/i', $this->_response['body'], $matches)) { $allUrls = array_unique($matches[0]);