diff --git a/app/src/main/java/org/fdroid/fdroid/net/HttpDownloader.java b/app/src/main/java/org/fdroid/fdroid/net/HttpDownloader.java index 9cde774ef..ef86f696a 100644 --- a/app/src/main/java/org/fdroid/fdroid/net/HttpDownloader.java +++ b/app/src/main/java/org/fdroid/fdroid/net/HttpDownloader.java @@ -95,12 +95,35 @@ public class HttpDownloader extends Downloader { } /** - * Get a remote file, checking the HTTP response code and the {@code etag}. - * In order to prevent the {@code etag} from being used as a form of tracking - * cookie, this code never sends the {@code etag} to the server. Instead, it - * uses a {@code HEAD} request to get the {@code etag} from the server, then - * only issues a {@code GET} if the {@code etag} has changed. + * Get a remote file, checking the HTTP response code, if it has changed since + * the last time a download was tried. + *

+ * If the {@code ETag} does not match, it could be caused by the previous + * download of the same file coming from a mirror running on a different + * webserver, e.g. Apache vs Nginx. {@code Content-Length} and + * {@code Last-Modified} are used to check whether the file has changed since + * those are more standardized than {@code ETag}. Plus, Nginx and Apache 2.4 + * defaults use only those two values to generate the {@code ETag} anyway. + * Unfortunately, other webservers and CDNs have totally different methods + * for generating the {@code ETag}. And mirrors that are syncing using a + * method other than {@code rsync} could easily have different {@code Last-Modified} + * times on the exact same file. On top of that, some services like GitHub's + * raw file support {@code raw.githubusercontent.com} and GitLab's raw file + * support do not set the {@code Last-Modified} header at all. So ultimately, + * then {@code ETag} needs to be used first and foremost, then this calculated + * {@code ETag} can serve as a common fallback. + *

+ * In order to prevent the {@code ETag} from being used as a form of tracking + * cookie, this code never sends the {@code ETag} to the server. Instead, it + * uses a {@code HEAD} request to get the {@code ETag} from the server, then + * only issues a {@code GET} if the {@code ETag} has changed. + *

+ * This uses a integer value for {@code Last-Modified} to avoid enabling the + * use of that value as some kind of "cookieless cookie". One second time + * resolution should be plenty since these files change more on the time + * space of minutes or hours. * + * @see update index from any available mirror * @see Cookieless cookies */ @Override @@ -108,7 +131,6 @@ public class HttpDownloader extends Downloader { // get the file size from the server HttpURLConnection tmpConn = getConnection(); tmpConn.setRequestMethod("HEAD"); - String etag = tmpConn.getHeaderField(HEADER_FIELD_ETAG); int contentLength = -1; int statusCode = tmpConn.getResponseCode(); @@ -116,10 +138,21 @@ public class HttpDownloader extends Downloader { newFileAvailableOnServer = false; switch (statusCode) { case HttpURLConnection.HTTP_OK: + String headETag = tmpConn.getHeaderField(HEADER_FIELD_ETAG); contentLength = tmpConn.getContentLength(); - if (!TextUtils.isEmpty(etag) && etag.equals(cacheTag)) { - Utils.debugLog(TAG, urlString + " is cached, not downloading"); - return; + if (!TextUtils.isEmpty(cacheTag)) { + if (cacheTag.equals(headETag)) { + Utils.debugLog(TAG, urlString + " cached, not downloading: " + headETag); + return; + } else { + String calcedETag = String.format("\"%x-%x\"", + tmpConn.getLastModified() / 1000, contentLength); + if (calcedETag.equals(headETag)) { + Utils.debugLog(TAG, urlString + " cached based on calced ETag, not downloading: " + + headETag); + return; + } + } } newFileAvailableOnServer = true; break;