(svn r11119) -Codechange: analyze .tar files upon loading, remembering their files and offsets, which speeds up .tar handling with a big factor
authortruelight
Sun, 16 Sep 2007 18:10:52 +0000
changeset 8088 7c1a4dd586ae
parent 8087 064305edb998
child 8089 a172d415e963
(svn r11119) -Codechange: analyze .tar files upon loading, remembering their files and offsets, which speeds up .tar handling with a big factor
-Fix: several win32 .tar support problems
-Fix: better checking of .tar versions and other minor things
-Codechange: don't call fclose() but FioFClose to close a file
src/fileio.cpp
src/fileio.h
src/gfxinit.cpp
src/newgrf_config.cpp
--- a/src/fileio.cpp	Sat Sep 15 16:01:34 2007 +0000
+++ b/src/fileio.cpp	Sun Sep 16 18:10:52 2007 +0000
@@ -204,7 +204,8 @@
 };
 
 const char *_searchpaths[NUM_SEARCHPATHS];
-std::vector<const char *> _tar_list;
+TarList _tar_list;
+TarFileList _tar_filelist;
 
 /**
  * Check whether the given file exists
@@ -217,8 +218,16 @@
 	FILE *f = FioFOpenFile(filename, "rb", subdir);
 	if (f == NULL) return false;
 
+	FioFCloseFile(f);
+	return true;
+}
+
+/**
+ * Close a file in a safe way.
+ */
+void FioFCloseFile(FILE *f)
+{
 	fclose(f);
-	return true;
 }
 
 char *FioGetFullPath(char *buf, size_t buflen, Searchpath sp, Subdirectory subdir, const char *filename)
@@ -303,101 +312,14 @@
 	return f;
 }
 
-FILE *FioTarFileList(const char *tar, const char *mode, size_t *filesize, FioTarFileListCallback *callback, void *userdata)
+FILE *FioFOpenFileTar(TarFileListEntry *entry, size_t *filesize)
 {
-	/* The TAR-header, repeated for every file */
-	typedef struct TarHeader {
-		char name[100];      ///< Name of the file
-		char mode[8];
-		char uid[8];
-		char gid[8];
-		char size[12];       ///< Size of the file, in ASCII
-		char mtime[12];
-		char chksum[8];
-		char typeflag;
-		char linkname[100];
-		char magic[6];
-		char version[2];
-		char uname[32];
-		char gname[32];
-		char devmajor[8];
-		char devminor[8];
-		char prefix[155];    ///< Path of the file
-
-		char unused[12];
-	} TarHeader;
-
-	assert(mode[0] == 'r'); // Only reading is supported
-	assert(callback != NULL); // We need a callback, else this function doens't do much
-
-#if defined(WIN32) && defined(UNICODE)
-	/* fopen is implemented as a define with ellipses for
-	 * Unicode support (prepend an L). As we are not sending
-	 * a string, but a variable, it 'renames' the variable,
-	 * so make that variable to makes it compile happily */
-	wchar_t Lmode[5];
-	MultiByteToWideChar(CP_ACP, 0, mode, -1, Lmode, lengthof(Lmode));
-#endif
-
-	FILE *f = fopen(tar, mode);
+	FILE *f = fopen(entry->tar->filename, "rb");
 	assert(f != NULL);
 
-	TarHeader th;
-	char buf[sizeof(th.name) + 1], *end;
-	char name[sizeof(th.prefix) + 1 + sizeof(th.name) + 1];
-
-	while (!feof(f)) {
-		/* Read the header and make sure it is a valid one */
-		fread(&th, 1, 512, f);
-		/* 'ustar' is the new format, '\0' is the old format */
-		if (th.magic[0] != '\0' && strncmp(th.magic, "ustar", 5) != 0) return NULL;
-
-		name[0] = '\0';
-		int len = 0;
-
-		/* The prefix contains the directory-name */
-		if (th.prefix[0] != '\0') {
-			memcpy(name, th.prefix, sizeof(th.prefix));
-			name[sizeof(th.prefix)] = '\0';
-			len = strlen(name);
-			name[len] = PATHSEPCHAR;
-			len++;
-		}
-
-		/* Copy the name of the file in a safe way at the end of 'name' */
-		memcpy(&name[len], th.name, sizeof(th.name));
-		name[len + sizeof(th.name)] = '\0';
-
-		/* Calculate the size of the file.. for some strange reason this is stored as a string */
-		memcpy(buf, th.size, sizeof(th.size));
-		buf[sizeof(th.size)] = '\0';
-		int skip = strtol(buf, &end, 8);
-
-		/* 0 byte sized files can be skipped (dirs, symlinks, ..) */
-		if (skip == 0) continue;
-
-		/* Check in the callback if this is the file we want */
-		if (callback(name, skip, userdata)) {
-			if (filesize != NULL) *filesize = skip;
-			return f;
-		}
-
-		/* Skip to the next block.. */
-		fseek(f, ALIGN(skip, 512), SEEK_CUR);
-	}
-
-	fclose(f);
-	return NULL;
-}
-
-bool FioFOpenFileTarFileListCallback(const char *filename, int size, void *search_filename)
-{
-	return strcasecmp(filename, (const char *)search_filename) == 0;
-}
-
-FILE *FioFOpenFileTar(const char *filename, const char *tar_filename, size_t *filesize)
-{
-	return FioTarFileList(tar_filename, "rb", filesize, FioFOpenFileTarFileListCallback, (void *)filename);
+	fseek(f, entry->position, SEEK_SET);
+	if (filesize != NULL) *filesize = entry->size;
+	return f;
 }
 
 /** Opens OpenTTD files somewhere in a personal or global directory */
@@ -412,12 +334,16 @@
 		f = FioFOpenFileSp(filename, mode, sp, subdir, filesize);
 		if (f != NULL || subdir == NO_DIRECTORY) break;
 	}
+
 	/* We can only use .tar in case of data-dir, and read-mode */
 	if (f == NULL && subdir == DATA_DIR && mode[0] == 'r') {
-		const char *tar;
-		FOR_ALL_TARS(tar) {
-			f = FioFOpenFileTar(filename, tar, filesize);
-			if (f != NULL) break;
+		/* Filenames in tars are always forced to be lowercase */
+		char *lcfilename = strdup(filename);
+		strtolower(lcfilename);
+		TarFileList::iterator it = _tar_filelist.find(lcfilename);
+		free(lcfilename);
+		if (it != _tar_filelist.end()) {
+			f = FioFOpenFileTar(&((*it).second), filesize);
 		}
 	}
 
@@ -483,14 +409,109 @@
 
 static bool TarListAddFile(const char *filename)
 {
-	/* See if we already have a tar by that name; useless to have double entries in our list */
-	const char *tar;
-	FOR_ALL_TARS(tar) {
-		if (strcmp(tar, filename) == 0) return false;
+	/* The TAR-header, repeated for every file */
+	typedef struct TarHeader {
+		char name[100];      ///< Name of the file
+		char mode[8];
+		char uid[8];
+		char gid[8];
+		char size[12];       ///< Size of the file, in ASCII
+		char mtime[12];
+		char chksum[8];
+		char typeflag;
+		char linkname[100];
+		char magic[6];
+		char version[2];
+		char uname[32];
+		char gname[32];
+		char devmajor[8];
+		char devminor[8];
+		char prefix[155];    ///< Path of the file
+
+		char unused[12];
+	} TarHeader;
+
+	/* Check if we already seen this file */
+	TarList::iterator it = _tar_list.find(filename);
+	if (it != _tar_list.end()) return false;
+
+	FILE *f = fopen(filename, "rb");
+	assert(f != NULL);
+
+	TarListEntry *tar_entry = MallocT<TarListEntry>(1);
+	tar_entry->filename = strdup(filename);
+	_tar_list.insert(TarList::value_type(filename, tar_entry));
+
+	TarHeader th;
+	char buf[sizeof(th.name) + 1], *end;
+	char name[sizeof(th.prefix) + 1 + sizeof(th.name) + 1];
+	int num = 0, pos = 0;
+
+	/* Make a char of 512 empty bytes */
+	char empty[512];
+	memset(&empty[0], 0, sizeof(empty));
+
+	while (!feof(f)) {
+		fread(&th, 1, 512, f);
+		pos += 512;
+
+		/* Check if we have the new tar-format (ustar) or the old one (a lot of zeros after 'link' field) */
+		if (strncmp(th.magic, "ustar", 5) != 0 && memcmp(&th.magic, &empty[0], 512 - offsetof(TarHeader, magic)) != 0) {
+			/* If we have only zeros in the block, it can be an end-of-file indicator */
+			if (memcmp(&th, &empty[0], 512) == 0) continue;
+
+			DEBUG(misc, 0, "The file '%s' isn't a valid tar-file", filename);
+			return false;
+		}
+
+		name[0] = '\0';
+		int len = 0;
+
+		/* The prefix contains the directory-name */
+		if (th.prefix[0] != '\0') {
+			memcpy(name, th.prefix, sizeof(th.prefix));
+			name[sizeof(th.prefix)] = '\0';
+			len = strlen(name);
+			name[len] = PATHSEPCHAR;
+			len++;
+		}
+
+		/* Copy the name of the file in a safe way at the end of 'name' */
+		memcpy(&name[len], th.name, sizeof(th.name));
+		name[len + sizeof(th.name)] = '\0';
+
+		/* Calculate the size of the file.. for some strange reason this is stored as a string */
+		memcpy(buf, th.size, sizeof(th.size));
+		buf[sizeof(th.size)] = '\0';
+		int skip = strtol(buf, &end, 8);
+
+		/* 0 byte sized files can be skipped (dirs, symlinks, ..) */
+		if (skip == 0) continue;
+
+		/* Store this entry in the list */
+		TarFileListEntry entry;
+		entry.tar      = tar_entry;
+		entry.size     = skip;
+		entry.position = pos;
+		/* Force lowercase */
+		strtolower(name);
+
+		/* Tar-files always have '/' path-seperator, but we want our PATHSEPCHAR */
+#if (PATHSEPCHAR != '/')
+		for (char *n = name; *n != '\0'; n++) if (*n == '/') *n = PATHSEPCHAR;
+#endif
+
+		DEBUG(misc, 6, "Found file in tar: %s (%d bytes, %d offset)", name, skip, pos);
+		if (_tar_filelist.insert(TarFileList::value_type(name, entry)).second) num++;
+
+		/* Skip to the next block.. */
+		skip = ALIGN(skip, 512);
+		fseek(f, skip, SEEK_CUR);
+		pos += skip;
 	}
 
-	DEBUG(misc, 1, "Found tar: %s", filename);
-	_tar_list.push_back(strdup(filename));
+	DEBUG(misc, 1, "Found tar '%s' with %d new files", filename, num);
+	fclose(f);
 
 	return true;
 }
--- a/src/fileio.h	Sat Sep 15 16:01:34 2007 +0000
+++ b/src/fileio.h	Sun Sep 16 18:10:52 2007 +0000
@@ -6,7 +6,8 @@
 #define FILEIO_H
 
 #include "helpers.hpp"
-#include <vector>
+#include <map>
+#include <string>
 
 void FioSeekTo(uint32 pos, int mode);
 void FioSeekToFile(uint8 slot, uint32 pos);
@@ -62,9 +63,20 @@
 extern const char *_searchpaths[NUM_SEARCHPATHS];
 
 /**
- * All the tar-files OpenTTD could search through.
+ * The define of a TarList.
  */
-extern std::vector<const char *>_tar_list;
+struct TarListEntry {
+	const char *filename;
+};
+struct TarFileListEntry {
+	TarListEntry *tar;
+	int size;
+	int position;
+};
+typedef std::map<std::string, TarListEntry *> TarList;
+typedef std::map<std::string, TarFileListEntry> TarFileList;
+extern TarList _tar_list;
+extern TarFileList _tar_filelist;
 
 /**
  * Checks whether the given search path is a valid search path
@@ -78,11 +90,12 @@
 
 /** Iterator for all the search paths */
 #define FOR_ALL_SEARCHPATHS(sp) for (sp = SP_FIRST_DIR; sp < NUM_SEARCHPATHS; sp++) if (IsValidSearchPath(sp))
-#define FOR_ALL_TARS(tar) for (std::vector<const char *>::iterator it = _tar_list.begin(); it != _tar_list.end(); it++) if (tar = *it, true)
+#define FOR_ALL_TARS(tar) for (tar = _tar_filelist.begin(); tar != _tar_filelist.end(); tar++)
 
 typedef bool FioTarFileListCallback(const char *filename, int size, void *userdata);
 FILE *FioTarFileList(const char *tar, const char *mode, size_t *filesize, FioTarFileListCallback *callback, void *userdata);
 
+void FioFCloseFile(FILE *f);
 FILE *FioFOpenFile(const char *filename, const char *mode = "rb", Subdirectory subdir = DATA_DIR, size_t *filesize = NULL);
 bool FioCheckFileExists(const char *filename, Subdirectory subdir = DATA_DIR);
 char *FioGetFullPath(char *buf, size_t buflen, Searchpath sp, Subdirectory subdir, const char *filename);
--- a/src/gfxinit.cpp	Sat Sep 15 16:01:34 2007 +0000
+++ b/src/gfxinit.cpp	Sun Sep 16 18:10:52 2007 +0000
@@ -130,7 +130,7 @@
 		}
 
 		if (ferror(f) && warn) ShowInfoF("Error Reading from %s \n", file.filename);
-		fclose(f);
+		FioFCloseFile(f);
 
 		md5_finish(&filemd5state, digest);
 		return CheckMD5Digest(file, digest, warn);
--- a/src/newgrf_config.cpp	Sat Sep 15 16:01:34 2007 +0000
+++ b/src/newgrf_config.cpp	Sun Sep 16 18:10:52 2007 +0000
@@ -51,7 +51,7 @@
 	}
 	md5_finish(&md5state, config->md5sum);
 
-	fclose(f);
+	FioFCloseFile(f);
 
 	return true;
 }
@@ -356,26 +356,18 @@
 	return num;
 }
 
-bool FioTarFileListScanNewGRFCallback(const char *filename, int size, void *userdata)
+static uint ScanTar(TarFileList::iterator tar)
 {
-	uint *num = (uint *)userdata;
+	uint num = 0;
+	const char *filename = (*tar).first.c_str();
 	const char *ext = strrchr(filename, '.');
 
 	/* If no extension or extension isn't .grf, skip the file */
 	if (ext == NULL) return false;
 	if (strcasecmp(ext, ".grf") != 0) return false;
 
-	if (ScanPathAddGrf(filename)) (*num)++;
+	if (ScanPathAddGrf(filename)) num++;
 
-	/* Always return false, as we don't want to stop with listing all the files */
-	return false;
-}
-
-static uint ScanTar(const char *filename)
-{
-	uint num = 0;
-
-	FioTarFileList(filename, "rb", NULL, FioTarFileListScanNewGRFCallback, &num);
 	return num;
 }
 
@@ -384,7 +376,7 @@
 {
 	Searchpath sp;
 	char path[MAX_PATH];
-	const char *tar;
+	TarFileList::iterator tar;
 	uint num = 0;
 
 	ClearGRFConfigList(&_all_grfs);