-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1a58cf0
commit 566e763
Showing
7 changed files
with
39,959 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
README | ||
|
||
This data directory contains the following files and scripts: | ||
|
||
PROJECT_GUTENBERG_LICENSE.txt - licence (excerpted from end of pg1404.txt file) | ||
|
||
pg1404.txt - text of the federalist papers, minor irregularities cleaned up | ||
pg1404.txt.as.downloaded - text of the federalist papers as downloaded from: | ||
http://www.gutenberg.org/cache/epub/1404/pg1404.txt | ||
|
||
> diff pg1404.txt pg1404.txt.as.downloaded | ||
3569,3571c3569,3570 | ||
< Confederation to Preserve the Union) | ||
< | ||
< For the New York Packet. Friday, December 7, 1787 | ||
--- | ||
> Confederation to Preserve the Union) For the New York Packet. Friday, | ||
> December 7, 1787 | ||
8397,8399c8396 | ||
< Sustained | ||
< | ||
< For the New York Packet. Friday, January 18, 1788. | ||
--- | ||
> Sustained For the New York Packet. Friday, January 18, 1788. | ||
12679c12676 | ||
< For the Independent Journal. Wednesday, February 20, 1788. | ||
--- | ||
> Considered For the Independent Journal Wednesday, February 20, 1788. | ||
|
||
get_fedpapers.sh - shell script that creates sub-directory called "texts" | ||
and populates it with the individual papers, 1 per file, filename is "paper_XX.txt" | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/bin/sh | ||
mkdir texts | ||
awk 'BEGIN{file="/dev/null"; g=0}/FEDERALIST No. [1-9][^0-9]/{close(file); g++; file="texts/paper_0"g".txt"}{print $0 > file}' pg1404.txt | ||
|
||
# clean up last paper - paper_09.txt | ||
mv texts/paper_09.txt texts/tmp | ||
awk 'BEGIN{file="texts/paper_09.txt"}/FEDERALIST No. 10/{close(file); file="/dev/null"}{print $0 > file}' texts/tmp | ||
rm texts/tmp | ||
|
||
awk 'BEGIN{file="/dev/null"; g=9}/FEDERALIST No. [1-9][0-9]/{close(file); g++; file="texts/paper_"g".txt"}{print $0 > file}' pg1404.txt | ||
|
||
# clean up last paper - paper_85.txt | ||
mv texts/paper_85.txt texts/tmp | ||
awk 'BEGIN{file="texts/paper_85.txt"}/End of the Project Gutenberg EBook/{close(file); file="/dev/null"}{print $0 > file}' texts/tmp | ||
rm texts/tmp |
Large diffs are not rendered by default.
Oops, something went wrong.
19,757 changes: 19,757 additions & 0 deletions
19,757
data/federalist-papers/pg1404.txt.as.downloaded
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package com.colloquial.io; | ||
|
||
import com.aliasi.util.Streams; | ||
|
||
import java.io.File; | ||
import java.io.FileInputStream; | ||
import java.io.FileOutputStream; | ||
import java.io.FileNotFoundException; | ||
import java.io.InputStream; | ||
import java.io.OutputStream; | ||
import java.io.IOException; | ||
|
||
public class CopyFile { | ||
|
||
public static void main(String[] args) | ||
throws FileNotFoundException, IOException { | ||
|
||
File fileIn = new File(args[0]); | ||
File fileOut = new File(args[1]); | ||
/*x CopyFile.1 */ | ||
InputStream in = new FileInputStream(fileIn); | ||
OutputStream out = new FileOutputStream(fileOut); | ||
byte[] buf = new byte[8192]; | ||
int n; | ||
while ((n = in.read(buf)) >= 0) | ||
out.write(buf,0,n); | ||
out.close(); | ||
in.close(); | ||
/*x*/ | ||
} | ||
|
||
public static void main2(String[] args) | ||
throws FileNotFoundException, IOException { | ||
|
||
File fileIn = new File(args[0]); | ||
File fileOut = new File(args[1]); | ||
/*x CopyFile.2 */ | ||
InputStream in = null; | ||
try { | ||
in = new FileInputStream(fileIn); | ||
OutputStream out = null; | ||
try { | ||
out = new FileOutputStream(fileOut); | ||
byte[] buf = new byte[8192]; | ||
int n; | ||
try { | ||
while ((n = in.read(buf)) >= 0) | ||
out.write(buf,0,n); | ||
} catch (IOException e) { | ||
fileOut.delete(); | ||
throw e; | ||
} | ||
} finally { | ||
Streams.closeQuietly(out); | ||
} | ||
} finally { | ||
Streams.closeQuietly(in); | ||
} | ||
/*x*/ | ||
} | ||
|
||
} |