Staging
v0.5.1
swh:1:snp:c5feb7ee9221a3820c8879e85e8a18470c0b3afa
Raw File
Tip revision: 32e49d5d1ccbc4833b0c6f30d6dfc0f0936cc3f3 authored by Junio C Hamano on 10 October 2005, 02:19:36 UTC
GIT 0.99.8c
Tip revision: 32e49d5
mailsplit.c
/*
 * Totally braindamaged mbox splitter program.
 *
 * It just splits a mbox into a list of files: "0001" "0002" ..
 * so you can process them further from there.
 */
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <assert.h>

static int usage(void)
{
	fprintf(stderr, "mailsplit <mbox> <directory>\n");
	exit(1);
}

static int linelen(const char *map, unsigned long size)
{
	int len = 0, c;

	do {
		c = *map;
		map++;
		size--;
		len++;
	} while (size && c != '\n');
	return len;
}

static int is_from_line(const char *line, int len)
{
	const char *colon;

	if (len < 20 || memcmp("From ", line, 5))
		return 0;

	colon = line + len - 2;
	line += 5;
	for (;;) {
		if (colon < line)
			return 0;
		if (*--colon == ':')
			break;
	}

	if (!isdigit(colon[-4]) ||
	    !isdigit(colon[-2]) ||
	    !isdigit(colon[-1]) ||
	    !isdigit(colon[ 1]) ||
	    !isdigit(colon[ 2]))
		return 0;

	/* year */
	if (strtol(colon+3, NULL, 10) <= 90)
		return 0;

	/* Ok, close enough */
	return 1;
}

static int parse_email(const void *map, unsigned long size)
{
	unsigned long offset;

	if (size < 6 || memcmp("From ", map, 5))
		goto corrupt;

	/* Make sure we don't trigger on this first line */
	map++; size--; offset=1;

	/*
	 * Search for a line beginning with "From ", and 
	 * having something that looks like a date format.
	 */
	do {
		int len = linelen(map, size);
		if (is_from_line(map, len))
			return offset;
		map += len;
		size -= len;
		offset += len;
	} while (size);
	return offset;

corrupt:
	fprintf(stderr, "corrupt mailbox\n");
	exit(1);
}

int main(int argc, char **argv)
{
	int fd, nr;
	struct stat st;
	unsigned long size;
	void *map;

	if (argc != 3)
		usage();
	fd = open(argv[1], O_RDONLY);
	if (fd < 0) {
		perror(argv[1]);
		exit(1);
	}
	if (chdir(argv[2]) < 0)
		usage();
	if (fstat(fd, &st) < 0) {
		perror("stat");
		exit(1);
	}
	size = st.st_size;
	map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
	if (map == MAP_FAILED) {
		perror("mmap");
		close(fd);
		exit(1);
	}
	close(fd);
	nr = 0;
	do {
		char name[10];
		unsigned long len = parse_email(map, size);
		assert(len <= size);
		sprintf(name, "%04d", ++nr);
		fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0666);
		if (fd < 0) {
			perror(name);
			exit(1);
		}
		if (write(fd, map, len) != len) {
			perror("write");
			exit(1);
		}
		close(fd);
		map += len;
		size -= len;
	} while (size > 0);
	return 0;
}
back to top