Move scripts

2018-09-07 19:26:45 +02:00
parent cbd8626c6f
commit 985b54b3fa
10 changed files with 0 additions and 0 deletions
--- a/scripts/CharacterInfoExtraction/HigurashiTextExtractor/Language.swift
+++ b/scripts/CharacterInfoExtraction/HigurashiTextExtractor/Language.swift
@@ -0,0 +1,114 @@
+//
+//  Language.swift
+//  CParser_CS440
+//
+
+import Foundation
+
+enum Associativity {
+	case left, right;
+}
+
+var typeNames: Set<String> = ["bool", "char", "short", "int", "long", "float", "double"]
+
+var convertibleTypes: [Set<String>] = [["bool", "char", "short", "int", "long"], ["float", "double"]]
+
+func isConvertible(left: String, right: String) -> Bool {
+	for convertibleSet in convertibleTypes {
+		if convertibleSet.contains(left) && convertibleSet.contains(right) {
+			return true
+		}
+	}
+	return false
+}
+
+enum Type: CustomStringConvertible {
+	case any, noType, specific(String)
+	var description: String {
+		switch self {
+		case .specific(let string):
+			return string
+		case .noType:
+			return "None"
+		case .any:
+			return "Any"
+		}
+	}
+}
+
+let binaryOperators: Dictionary<Int, (Associativity, Set<String>)> = [
+	160: (.left, ["<<", ">>"]),
+	150: (.left, ["*", "/", "%", "&"]),
+	140: (.left, ["+", "-", "|", "^"]),
+	130: (.left, ["<", "<=", ">", ">=", "==", "!="]),
+	120: (.left, ["&&"]),
+	110: (.left, ["||"])
+]
+
+let allIntegersBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .specific("int"), right: .specific("int"), out: .specific("int"))]
+let allNumbersBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .specific("int"), right: .specific("int"), out: .specific("int")), (left: .specific("double"), right: .specific("double"), out: .specific("double")), (left: .specific("float"), right: .specific("float"), out: .specific("float"))]
+let allBooleansBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .specific("bool"), right: .specific("bool"), out: .specific("bool"))]
+let comparisonBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .any, right: .any, out: .specific("bool"))]
+
+let binaryOperatorTypes: Dictionary<String, [(left: Type, right: Type, out: Type)]> = [
+	"<<": allIntegersBinaryOperator,
+	">>": allIntegersBinaryOperator,
+	"*": allNumbersBinaryOperator,
+	"/": allNumbersBinaryOperator,
+	"%": allIntegersBinaryOperator,
+	"&": allIntegersBinaryOperator,
+	"+": allNumbersBinaryOperator,
+	"-": allNumbersBinaryOperator,
+	"|": allIntegersBinaryOperator,
+	"^": allIntegersBinaryOperator,
+	"<": comparisonBinaryOperator,
+	"<=": comparisonBinaryOperator,
+	">": comparisonBinaryOperator,
+	">=": comparisonBinaryOperator,
+	"==": comparisonBinaryOperator,
+	"!=": comparisonBinaryOperator,
+	"&&": allBooleansBinaryOperator,
+	"||": allBooleansBinaryOperator
+]
+
+let assignmentOperators: Set<String> = ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", "^=", "|="]
+
+let assignmentOperatorTypes: Dictionary<String, [Type]> = [
+	"=": [.any],
+	"*=": [.specific("int"), .specific("double"), .specific("float")],
+	"/=": [.specific("int"), .specific("double"), .specific("float")],
+	"%=": [.specific("int")],
+	"+=": [.specific("int"), .specific("double"), .specific("float")],
+	"-=": [.specific("int"), .specific("double"), .specific("float")],
+	"<<=": [.specific("int")],
+	">>=": [.specific("int")],
+	"&=": [.specific("int")],
+	"^=": [.specific("int")],
+	"|=": [.specific("int")]
+]
+
+let prefixOperators: Set<String> = ["!", "~", "++", "--", "+", "-"]
+let postfixOperators: Set<String> = ["++", "--"]
+
+
+let allIntegersUnaryOperator: [(in: Type, out: Type)] = [(in: .specific("int"), out: .specific("int"))]
+let allNumbersUnaryOperator: [(in: Type, out: Type)] = [(in: .specific("int"), out: .specific("int")), (in: .specific("double"), out: .specific("double")), (in: .specific("float"), out: .specific("float"))]
+let allBooleansUnaryOperator: [(in: Type, out: Type)] = [(in: .specific("bool"), out: .specific("bool"))]
+
+let unaryOperatorTypes: Dictionary<String, [(in: Type, out: Type)]> = [
+	"!": allBooleansUnaryOperator,
+	"~": allIntegersUnaryOperator,
+	"++": allNumbersUnaryOperator,
+	"--": allNumbersUnaryOperator,
+	"+": allNumbersUnaryOperator,
+	"-": allNumbersUnaryOperator
+]
+
+let otherPunctuation: Set<String> = ["(", ")", "{", "}", "[", "]", ";", ",", "."]
+let commentPunctuation: Set<String> = ["//", "/*"]
+
+let allPunctuation: Set<String> = prefixOperators.union(postfixOperators).union(assignmentOperators).union(binaryOperators.values.flatMap({$0.1})).union(otherPunctuation).union(commentPunctuation)
+let punctuationCharacters = Set(allPunctuation.flatMap({ $0.unicodeScalars }))
+let longestPunctuation = allPunctuation.reduce(0, { longest, current in let len = current.count; return len > longest ? len : longest })
+let nonIdentifierCharacters = punctuationCharacters.union(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
+
--- a/scripts/CharacterInfoExtraction/HigurashiTextExtractor/README.md
+++ b/scripts/CharacterInfoExtraction/HigurashiTextExtractor/README.md
@@ -0,0 +1 @@
+Extractor that extracts text from Higurashi script files.  Compile with `swiftc -O -wmo *.swift -o HigurashiTextExtractor`.  Run with `./HigurashiTextExtractor scriptFile [e|j]` where `e` will get you the English script and `j` will get you Japanese
--- a/scripts/CharacterInfoExtraction/HigurashiTextExtractor/Scanner.swift
+++ b/scripts/CharacterInfoExtraction/HigurashiTextExtractor/Scanner.swift
@@ -0,0 +1,315 @@
+//
+//  Scanner.swift
+//  CParser_CS440
+//
+
+import Foundation
+
+enum TokenType: String {
+	case stringLiteral, characterLiteral, punctuation, identifier
+}
+
+enum TokenizationError: Error {
+	case badPunctuation(row: Int, column: Int, character: UnicodeScalar)
+	case unclosedString(row: Int, column: Int, string: String)
+}
+
+struct TokenListSignature: Hashable, Equatable {
+	let list: [TokenType]
+	init(_ array: [TokenType]) {
+		self.list = array
+	}
+	init(from tokens: [Token]) {
+		list = tokens.map { $0.type }
+	}
+	static func ==(lhs: TokenListSignature, rhs: TokenListSignature) -> Bool {
+		guard lhs.list.count == rhs.list.count else { return false }
+		for tokentype in lhs.list.enumerated() {
+			if rhs.list[tokentype.offset] != tokentype.element {
+				return false
+			}
+		}
+		return true
+	}
+	var hashValue: Int {
+		return list.map({ $0.hashValue }).reduce(5381) {
+			($0 << 5) &+ $0 &+ $1
+		}
+	}
+}
+
+struct MovingStringRange {
+	let string: String.UnicodeScalarView
+	private(set) var back: String.UnicodeScalarIndex
+	private(set) var front: String.UnicodeScalarIndex
+	private(set) var length: Int
+	private(set) var backRow: Int
+	private(set) var backColumn: Int
+	private(set) var frontRow: Int
+	private(set) var frontColumn: Int
+	init(_ string: String.UnicodeScalarView, atEnd: Bool = false) {
+		self.string = string
+		self.length = 0
+		self.back = atEnd ? string.endIndex : string.startIndex
+		self.front = back
+		self.backRow = 1
+		self.backColumn = 1
+		self.frontRow = 1
+		self.frontColumn = 1
+	}
+	init(_ string: String, atEnd: Bool = false) {
+		self.init(string.unicodeScalars, atEnd: atEnd)
+	}
+	private mutating func advanceFront() {
+		if frontChar == "\n" {
+			frontColumn = 1
+			frontRow += 1
+		}
+		else {
+			frontColumn += 1
+		}
+		front = string.index(after: front)
+		length += 1
+	}
+	private mutating func retreatFront() {
+		front = string.index(before: front)
+		length -= 1
+		if frontChar == "\n" {
+			var check = string.index(before: front)
+			frontColumn = 2
+			while check >= string.startIndex && string[check] != "\n" {
+				frontColumn += 1
+				check = string.index(before: check)
+			}
+			frontRow -= 1
+		}
+		else {
+			frontColumn -= 1
+		}
+	}
+	mutating func advanceFront(by: Int = 1) {
+		if by > 0 {
+			for _ in 0..<by {
+				advanceFront()
+			}
+		}
+		else {
+			for _ in 0..<(-by) {
+				retreatFront()
+			}
+		}
+	}
+	private mutating func advanceBack() {
+		if backChar == "\n" {
+			backColumn = 1
+			backRow += 1
+		}
+		else {
+			backColumn += 1
+		}
+		back = string.index(after: back)
+		length += 1
+	}
+	private mutating func retreatBack() {
+		back = string.index(before: back)
+		length -= 1
+		if backChar == "\n" {
+			var check = string.index(before: back)
+			backColumn = 2
+			while check >= string.startIndex && string[check] != "\n" {
+				backColumn += 1
+				check = string.index(before: check)
+			}
+			backRow -= 1
+		}
+		else {
+			backColumn -= 1
+		}
+	}
+	mutating func advanceBack(by: Int = 1) {
+		if by > 0 {
+			for _ in 0..<by {
+				advanceBack()
+			}
+		}
+		else {
+			for _ in 0..<(-by) {
+				retreatBack()
+			}
+		}
+	}
+	mutating func setBackToFront() {
+		back = front
+		backColumn = frontColumn
+		backRow = frontRow
+		length = 0
+	}
+	mutating func setFrontToBack() {
+		front = back
+		frontColumn = backColumn
+		frontRow = backRow
+		length = 0
+	}
+	var currentRange: String {
+		return String(string[back..<front])
+	}
+	var frontChar: UnicodeScalar {
+		return string[front]
+	}
+	var backChar: UnicodeScalar {
+		return string[back]
+	}
+	var backIsBeginning: Bool {
+		return back <= string.startIndex
+	}
+	var backIsEnd: Bool {
+		return back >= string.endIndex
+	}
+	var frontIsBeginning: Bool {
+		return front <= string.startIndex
+	}
+	var frontIsEnd: Bool {
+		return front >= string.endIndex
+	}
+}
+
+extension UnicodeScalar {
+	var isNewline: Bool {
+		return (0x0a...0x0d).contains(self.value) || self.value == 0x85 || self.value == 0x2028 || self.value == 0x2029
+	}
+	var isWhitespace: Bool {
+		return self.value == 0x20 || self.value == 0xa0 || self.value == 0x1680 || (0x2000...0x200a).contains(self.value) || self.value == 0x202f || self.value == 0x205f || self.value == 0x3000
+	}
+	var isNewlineOrWhitespace: Bool {
+		return isNewline || isWhitespace
+	}
+}
+
+struct Token: CustomStringConvertible {
+	let type: TokenType
+	let value: String
+	let row: Int
+	let column: Int
+	init(type: TokenType, value: String, row: Int, column: Int) {
+		self.type = type
+		self.value = value
+		self.row = row
+		self.column = column
+	}
+	init(type: TokenType, value: String.UnicodeScalarView, row: Int, column: Int) {
+		self.init(type: type, value: String(value), row: row, column: column)
+	}
+	var description: String {
+//		return "[\(type) \(value)]"
+		switch type {
+		case .identifier, .punctuation:
+			return value
+		case .stringLiteral:
+			return "\"\(value)\""
+		case .characterLiteral:
+			return "'\(value)'"
+		}
+	}
+	
+	static func tokenize(input: String) throws -> [Token] {
+		var inputRange = MovingStringRange(input)
+		var tokens: [Token] = []
+		
+		while !inputRange.backIsEnd {
+			//print("Currently looking from row \(inputRange.backRow) column \(inputRange.backColumn) to row \(inputRange.frontRow) column \(inputRange.frontColumn), \(inputRange.currentRange)")
+			if inputRange.frontIsEnd { // If this is the end of the file
+				if (inputRange.length > 0) {
+					tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
+				}
+				inputRange.setBackToFront()
+				continue
+			}
+			else if inputRange.frontChar.isNewlineOrWhitespace { // Whitespace, end of token
+				if inputRange.length > 0 { // If there's multiple whitespace chars in a row, don't add empty tokens
+					tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
+				}
+				inputRange.advanceFront()
+				inputRange.setBackToFront()
+			}
+			else if punctuationCharacters.contains(inputRange.frontChar) {
+				if inputRange.length > 0 { // Add the previous identifier if it exists
+					tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
+				}
+				inputRange.setBackToFront()
+				while !inputRange.frontIsEnd && (punctuationCharacters.contains(inputRange.frontChar) || inputRange.length > 0) {
+					// Keep going until we reach the end of the file and have parsed it all
+					inputRange.advanceFront()
+					if inputRange.length > longestPunctuation || inputRange.frontIsEnd || !punctuationCharacters.contains(inputRange.frontChar) {
+						var punctuationToken = inputRange.currentRange
+						while inputRange.length > 1 && !allPunctuation.contains(punctuationToken) {
+							inputRange.advanceFront(by: -1)
+							punctuationToken = inputRange.currentRange
+						}
+						if commentPunctuation.contains(punctuationToken) {
+							if punctuationToken == "//" {
+								while !inputRange.frontIsEnd && !inputRange.frontChar.isNewline {
+									inputRange.advanceFront()
+								}
+							}
+							else {
+								while !inputRange.frontIsEnd && inputRange.frontChar != "/" {
+									while !inputRange.frontIsEnd && inputRange.frontChar != "*" {
+										inputRange.advanceFront()
+									}
+									inputRange.advanceFront()
+								}
+							}
+							inputRange.advanceFront()
+							inputRange.setBackToFront()
+							continue
+						}
+						if allPunctuation.contains(punctuationToken) {
+							tokens.append(Token(type: .punctuation, value: punctuationToken, row: inputRange.backRow, column: inputRange.backColumn))
+							inputRange.setBackToFront()
+						}
+						else {
+							throw TokenizationError.badPunctuation(row: inputRange.backRow, column: inputRange.backColumn, character: inputRange.backChar)
+						}
+					}
+				}
+			}
+			else if inputRange.frontChar == "\"" || inputRange.frontChar == "'" {
+				let quoteType = inputRange.frontChar
+				if inputRange.length > 0 { // Add the previous identifier if it exists
+					tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
+				}
+				inputRange.advanceFront()
+				inputRange.setBackToFront()
+				while true {
+					if inputRange.frontIsEnd || inputRange.frontChar.isNewline {
+						inputRange.advanceBack(by: -1)
+						throw TokenizationError.unclosedString(row: inputRange.backRow, column: inputRange.backColumn, string: inputRange.currentRange)
+					}
+					else if inputRange.frontChar == "\\" {
+						inputRange.advanceFront(by: 2)
+					}
+					else if inputRange.frontChar == quoteType {
+						let type: TokenType
+						if quoteType == "'" {
+							type = .characterLiteral
+						}
+						else {
+							type = .stringLiteral
+						}
+						tokens.append(Token(type: type, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn - 1))
+						inputRange.advanceFront()
+						inputRange.setBackToFront()
+						break
+					}
+					else {
+						inputRange.advanceFront()
+					}
+				}
+			}
+			else {
+				inputRange.advanceFront()
+			}
+		}
+		return tokens
+	}
+}
--- a/scripts/CharacterInfoExtraction/HigurashiTextExtractor/main.swift
+++ b/scripts/CharacterInfoExtraction/HigurashiTextExtractor/main.swift
@@ -0,0 +1,93 @@
+import Foundation
+
+guard CommandLine.arguments.count > 1 else {
+	print("Usage: \(CommandLine.arguments[0]) file [(e|j)]\nExtracts text from Higurashi script files.  Use e or j to specify English or Japanese, otherwise you'll get both")
+	exit(EXIT_FAILURE)
+}
+
+var verbose = false
+var mode = 3
+if CommandLine.arguments.count >= 3 {
+	if CommandLine.arguments.contains(where: { $0.lowercased() == "e" }) { mode = 2 }
+	if CommandLine.arguments.contains(where: { $0.lowercased() == "j" }) { mode = 1 }
+	if CommandLine.arguments.contains(where: { $0.lowercased() == "-v" }) { verbose = true }
+}
+
+var standardError = FileHandle.standardError
+
+extension FileHandle : TextOutputStream {
+	public func write(_ string: String) {
+		guard let data = string.data(using: .utf8) else { return }
+		self.write(data)
+	}
+}
+
+struct Command {
+	let name: String
+	let arguments: [Token]
+	
+	init?(tokens: [Token]) {
+		guard tokens.count >= 3 else { return nil }
+		guard tokens[0].type == .identifier else { return nil }
+		guard tokens[1].type == .punctuation && tokens[1].value == "(" else { return nil }
+		guard tokens.last!.type == .punctuation && tokens.last!.value == ")" else { return nil }
+		self.name = tokens[0].value
+		self.arguments = tokens.dropFirst(2).dropLast().filter({ $0.value != "," })
+	}
+}
+
+func loadFile(path: String) throws -> [[Token]] {
+	let file: String
+	if path == "-" {
+		file = String(decoding: FileHandle.standardInput.readDataToEndOfFile(), as: UTF8.self)
+	} else {
+		file = try String(contentsOf: URL(fileURLWithPath: path))
+	}
+	let tokens = try Token.tokenize(input: file)
+	let statements = tokens.split(whereSeparator: { $0.value == ";" || $0.value == "{" || $0.value == "}" }).map(Array.init)
+	return statements
+}
+
+
+
+let tokens = try loadFile(path: CommandLine.arguments[1])
+let commands = tokens.compactMap { tokens -> Command? in
+	let output = Command(tokens: tokens)
+	if (output == nil) {
+		if verbose { print("\(tokens) was not a command!", to: &standardError) }
+	}
+	return output
+}
+
+let ignore: Set = ["FadeOutBGM", "DisableWindow", "DrawScene", "PlayBGM", "Wait", "SetValidityOfInput", "DrawSceneWithMask", "SetSpeedOfMessage", "DrawBustshot", "FadeBustshot", "DrawBustshotWithFiltering", "FadeBustshotWithFiltering", "PlaySE", "ShakeScreen", "DrawFilm", "FadeFilm", "FadeAllBustshots", "DrawSpriteWithFiltering", "MoveSprite", "DrawSprite", "FadeSprite", "TitleScreen", "SetLocalFlag", "ShowChapterPreview", "SetCharSpacing", "SetLineSpacing", "SetScreenAspect", "SetWindowPos", "SetWindowSize", "SetWindowMargins", "FadeBG", "SetValidityOfSkipping", "SetGUIPosition", "SetStyleOfMessageSwinging", "EnableJumpingOfReturnIcon", "SetValidityOfTextFade", "SetValidityOfInterface", "Negative", "CallScript", "SavePoint", "SetValidityOfWindowDisablingWhenGraphicsControl", "SetFontSize", "SetNameFormat", "SetFontId", "StopBGM", "SetGlobalFlag", "LanguagePrompt", "SetValidityOfSaving", "ShowTips", "CheckTipsAchievements", "if", "StoreValueToLocalWork", "DrawBG", "ChangeScene", "StopSE", "ShakeScreenSx", "StopSE", "GetAchievement", "CallSection", "JumpSection", "SetDrawingPointOfMessage"]
+var japanese = ""
+var english = ""
+
+func stringFromLiteral(literal: Token) -> String {
+	guard literal.type == .stringLiteral else { 
+		if literal.value == "NULL" { return "" }
+		fatalError("\(literal) wasn't a string literal!") 
+	}
+	return literal.value.replacingOccurrences(of: "\\\"", with: "\"").replacingOccurrences(of: "\\n", with: "\n")
+}
+
+for command in commands {
+	if ignore.contains(command.name) { continue }
+	
+	switch command.name {
+	case "OutputLine":
+		japanese += stringFromLiteral(literal: command.arguments[1])
+		english += stringFromLiteral(literal: command.arguments[3])
+	case "OutputLineAll":
+		let line = stringFromLiteral(literal: command.arguments[1])
+		japanese += line
+		english += line
+	case "ClearMessage":
+		japanese += "\n\n"
+		english += "\n\n"
+	default: if verbose { print(command, to: &standardError) }
+	}
+}
+
+if mode & 1 > 0 { print(japanese) }
+if mode & 2 > 0 { print(english) }
--- a/scripts/CharacterInfoExtraction/KanjiFinder.swift
+++ b/scripts/CharacterInfoExtraction/KanjiFinder.swift
@@ -0,0 +1,90 @@
+import Foundation
+
+var standardError = FileHandle.standardError
+
+extension FileHandle : TextOutputStream {
+	public func write(_ string: String) {
+		guard let data = string.data(using: .utf8) else { return }
+		self.write(data)
+	}
+}
+
+guard CommandLine.arguments.count > 1 else {
+	print("""
+		Usage: \(CommandLine.arguments[0]) [-filter filterFile.txt] assetBundle1.assets [assetBundle2.assets ...]
+		Use - to read from stdin
+		Finds 3-byte unicode characters (like kanji) in files
+		If a filter is supplied, only characters also in the filter will be outputted
+		""", to: &standardError)
+	exit(EXIT_FAILURE)
+}
+
+#if !swift(>=4.2)
+extension Collection {
+	func firstIndex(where predicate: (Element) throws -> Bool) rethrows -> Index? {
+		return try self.index(where: predicate)
+	}
+}
+#endif
+
+var filter: String? = nil
+var inFiles: [String] = Array(CommandLine.arguments[1...])
+
+if let filterIndex = inFiles.firstIndex(where: { $0.lowercased() == "-filter" }) {
+	if filterIndex + 1 < inFiles.endIndex {
+		filter = try String(contentsOf: URL(fileURLWithPath: inFiles[filterIndex + 1]))
+		inFiles[filterIndex...filterIndex+1] = []
+	}
+}
+
+let bundles: [Data]
+if inFiles == ["-"] {
+	bundles = [FileHandle.standardInput.readDataToEndOfFile()]
+} else {
+	bundles = try inFiles.map { try Data(contentsOf: URL(fileURLWithPath: $0)) }
+}
+
+extension UTF8.CodeUnit {
+	var isStart3: Bool {
+		return self & 0b11110000 == 0b11100000
+	}
+	var isContinuation: Bool {
+		return self & 0b11000000 == 0b10000000
+	}
+}
+
+func unicodeFinder(data: [UInt8], minLength: Int = 2) -> String {
+	var out = [UInt8]()
+	var left = data[...]
+	while true {
+		guard let index = left.firstIndex(where: { ($0 & 0b11110000) == 0b11100000 }) else { break }
+		left = left[index...]
+		guard left.count > 5 else { break }
+		var good = 0
+		for i in stride(from: left.startIndex, to: left.endIndex, by: 3) {
+			if left[i].isStart3 && left[i+1].isContinuation && left[i+2].isContinuation {
+				good += 1
+			}
+			else {
+				if good >= minLength {
+					out.append(contentsOf: left[..<i])
+					good = 0
+				}
+				left = left[(i+1)...]
+				break
+			}
+		}
+		if good >= minLength {
+			out.append(contentsOf: left.prefix(left.count / 3 * 3))
+		}
+	}
+	return String(decoding: out, as: UTF8.self)
+}
+
+let unicodeStrings = bundles.map({ unicodeFinder(data: Array($0)) })
+var chars = unicodeStrings.map({ Set($0.unicodeScalars) }).reduce(Set(), { $0.union($1) })
+if let filter = filter {
+	chars.formIntersection(filter.unicodeScalars)
+}
+
+print(String(chars.sorted().lazy.map(Character.init)), terminator: "")
--- a/scripts/CharacterInfoExtraction/README.md
+++ b/scripts/CharacterInfoExtraction/README.md
@@ -0,0 +1,7 @@
+Some scripts for figuring out what characters are used in games to help with choosing what characters to put on font atlases
+
+I had originally made these for personal use and wasn't thinking about publishing them, so I wrote them in Swift, which doesn't currently support compiling on Windows.  Very sorry about that.  I guess you could try out WSL?
+
+Download Swift [here](https://swift.org/download/) for Ubuntu or macOS, it also appears to be [on the AUR](https://aur.archlinux.org/packages/swift/) for Arch users.  Compile a script with `swiftc -O scriptFile.swift` or run it directly with `swift -O scriptFile.swift arguments`, though that will be fairly slow if you plan to run the script multiple times.
+
+Documentation coming soon™
--- a/scripts/CharacterInfoExtraction/UniqueCharacters.swift
+++ b/scripts/CharacterInfoExtraction/UniqueCharacters.swift
@@ -0,0 +1,24 @@
+import Foundation
+
+var standardError = FileHandle.standardError
+
+extension FileHandle : TextOutputStream {
+	public func write(_ string: String) {
+		guard let data = string.data(using: .utf8) else { return }
+		self.write(data)
+	}
+}
+
+guard CommandLine.arguments.count > 1 else {
+	print("Usage: \(CommandLine.arguments[0]) file\nUse - to read from stdin", to: &standardError)
+	exit(EXIT_FAILURE)
+}
+let input: String
+if CommandLine.arguments[1] == "-" {
+	input = String(decoding: FileHandle.standardInput.readDataToEndOfFile(), as: UTF8.self)
+} else {
+	input = try String(contentsOf: URL(fileURLWithPath: CommandLine.arguments[1]))
+}
+let chars = Set(input.unicodeScalars)
+let out = chars.sorted().lazy.map(Character.init)
+print(String(out), terminator: "")
--- a/scripts/EMIPGenerator.py
+++ b/scripts/EMIPGenerator.py
@@ -0,0 +1,171 @@
+import sys
+import os
+import re
+from PIL import Image
+from PIL import ImageOps
+from unitypack.asset import Asset
+
+if len(sys.argv) < 4:
+	print("Usage: " + sys.argv[0] + " assetfile.assets inputFolder outputFile.emip\nInput folder should contain files whose names start with the object ID they want to replace.")
+	exit()
+
+if not os.path.isdir(sys.argv[2]):
+	print("Input folder " + sys.argv[2] + " must be a directory!")
+	exit()
+
+class AssetEdit:
+	def __init__(self, file, id, name, type):
+		self.file = file
+		self.id = id
+		self.name = name
+		self.type = type
+		self.shouldDecode = False
+
+	@property
+	def filePath(self):
+		return sys.argv[2] + "/" + self.file
+
+	def pngToTexture2D(self, pngData):
+		image = Image.open(self.filePath)
+		image = ImageOps.flip(image)
+		imageData = image.convert("RGBA").tobytes()
+		output = len(self.name).to_bytes(4, byteorder="little")
+		output += self.name.encode("utf-8")
+		output += b"\0" * ((4 - len(self.name)) % 4)
+		output += image.width.to_bytes(4, byteorder="little")
+		output += image.height.to_bytes(4, byteorder="little")
+		output += len(imageData).to_bytes(4, byteorder="little")
+		output += (4).to_bytes(4, byteorder="little") # m_TextureFormat
+		output += (1).to_bytes(4, byteorder="little") # m_MipCount
+		output += b"\0\x01\0\0" # Flags
+		output += (1).to_bytes(4, byteorder="little") # m_ImageCount
+		output += (2).to_bytes(4, byteorder="little") # m_TextureDimension
+		output += (2).to_bytes(4, byteorder="little") # m_FilterMode
+		output += (2).to_bytes(4, byteorder="little") # m_Aniso
+		output += (0).to_bytes(4, byteorder="little") # m_MipBias
+		output += (1).to_bytes(4, byteorder="little") # m_WrapMode
+		output += (0).to_bytes(4, byteorder="little") # m_LightmapFormat
+		output += (1).to_bytes(4, byteorder="little") # m_ColorSpace
+		output += len(imageData).to_bytes(4, byteorder="little")
+		output += imageData
+		if self.type > 0:
+			output += b"\0" * 12 # Empty Streaming Data
+		return output
+
+	def loadTexture2DInfo(self, assets, bundle):
+		self.shouldDecode = True
+		obj = assets.objects[self.id]
+		data = bundle[obj.data_offset:(obj.data_offset + obj.size)]
+		length = int.from_bytes(data[0:4], byteorder='little')
+		paddedLength = length + (4 - length) % 4
+		self.name = data[4:4+length].decode('utf-8')
+
+	def getAssetInfo(self, assets, bundle):
+		if self.id is None:
+			for id, obj in assets.objects.items():
+				try:
+					objType = obj.type
+					if objType != self.type: continue
+				except:
+					# Special case handling for newer files that fail to read type id
+					if self.type == "TextMeshProFont" and obj.type_id < 0:
+						objType = self.type
+						pass
+					else:
+						continue
+				
+				# UnityPack is broken and overreads its buffer if we try to use it to automatically decode things, so instead we use this sometimes-working thing to decode the name
+				data = bundle[obj.data_offset:(obj.data_offset + obj.size)]
+				
+				name = None
+				try:
+					name = obj.read()["m_Name"]
+				except:
+					length = int.from_bytes(data[0:4], byteorder='little')
+					if length + 4 <= len(data) and length < 40:
+						name = data[4:4+length].decode('utf-8')
+					elif len(data) > 32:
+						length = int.from_bytes(data[28:32], byteorder='little')
+						if length + 4 <= len(data) and length < 40:
+							name = data[4:4+length].decode('utf-8')
+				if name is not None:
+					if self.name == name:
+						self.id = id
+						if objType == "Texture2D" and self.file[-4:] == ".png":
+							print(f"Will replace object #{id} with contents of {self.file} converted to a Texture2D")
+							self.shouldDecode = True
+						else:
+							print(f"Will replace object #{id} with contents of {self.file}")
+						break
+		else:
+			if self.file[-4:] == ".png":
+				self.loadTexture2DInfo(assets, bundle)
+				print(f"Will replace object #{self.id} with contents of {self.file} converted to a Texture2D")
+			else:
+				print(f"Will replace object #{self.id} with contents of {self.file}")
+
+		if self.id == None:
+			print(f"Couldn't find object named {self.name} for {self.file}, skipping")
+			return
+		obj = assets.objects[self.id]
+		self.type = obj.type_id
+
+	@property
+	def bytes(self):
+		out = (2).to_bytes(4, byteorder='little') # Unknown
+		out += b"\0" * 3 # Unknown
+		out += self.id.to_bytes(4, byteorder='little') # Unknown
+		out += b"\0" * 4 # Unknown
+		out += self.type.to_bytes(4, byteorder='little', signed=True) # Type
+		out += b"\xff" * 2 # Unknown
+		with open(self.filePath, "rb") as file:
+			fileBytes = file.read()
+			if self.shouldDecode:
+				fileBytes = self.pngToTexture2D(fileBytes)
+			out += len(fileBytes).to_bytes(4, byteorder='little') # Payload Size
+			out += b"\0" * 4 # Unknown
+			out += fileBytes # Payload
+		return out
+
+def generateHeader(numEdits):
+	header = b"EMIP" # Magic
+	header += b"\0" * 4 # Unknown
+	header += (1).to_bytes(4, byteorder='big') # Number of files
+	header += b"\0" * 4 # Unknown
+	if os.path.abspath(sys.argv[1])[1] == ":": # Windows paths will be read properly, UNIX paths won't since UABE will be run with wine, so use a relative path
+		path = os.path.abspath(sys.argv[1]).encode('utf-8')
+	else:
+		path = sys.argv[1].encode('utf-8')
+	header += len(path).to_bytes(2, byteorder='little') # Path length
+	header += path # File path
+	header += numEdits.to_bytes(4, byteorder='little') # Number of file changes
+	return header
+
+edits = []
+
+for file in os.listdir(sys.argv[2]):
+	if file[0] == ".": continue
+	matches = re.match(r"^(\d+).*", file)
+	if matches:
+		edits.append(AssetEdit(file, int(matches.group(1)), None, None))
+	else:
+		name = os.path.splitext(file)[0]
+		parts = name.split("_")
+		if len(parts) < 2: continue
+		edits.append(AssetEdit(file, None, "_".join(parts[:-1]), parts[-1]))
+
+with open(sys.argv[1], "rb") as assetsFile:
+	bundle = assetsFile.read()
+	assetsFile.seek(0)
+	assets = Asset.from_file(assetsFile)
+	for edit in edits:
+		edit.getAssetInfo(assets, bundle)
+	edits = [x for x in edits if x.id != None]
+
+edits = sorted(edits, key=lambda x: x.id)
+
+with open(sys.argv[3], "wb") as outputFile:
+	outputFile.write(generateHeader(len(edits)))
+	for edit in edits:
+		outputFile.write(edit.bytes)
+
--- a/scripts/TMPAssetConverter.py
+++ b/scripts/TMPAssetConverter.py
@@ -0,0 +1,242 @@
+import sys
+import os
+import struct
+
+class DataScanner:
+	def __init__(self, data):
+		self.offset = 0
+		self.data = data
+
+	def advance(self, length):
+		self.offset += length
+
+	def read(self, length):
+		output = self.peek(length)
+		self.advance(length)
+		return output
+
+	def peek(self, length):
+		return self.data[self.offset:(self.offset + length)]
+
+	def readString(self):
+		length = int.from_bytes(self.peek(4), byteorder="little")
+		length += (4 - length) % 4
+		return self.read(length + 4)
+
+	def rest(self):
+		return self.data[self.offset:]
+
+def readString(str):
+	length = int.from_bytes(str[0:4], byteorder="little")
+	return str[4:(4+length)].decode("utf-8")
+
+class FontFile:
+	def __init__(self, data, filename, emptyAtlasPoint=None):
+		stringLength = int.from_bytes(data[(4 * 15):(4 * 16)], byteorder="little")
+		if stringLength in range(4, 26): # Assumes filenames are between 4 and 25 chars
+			print(f"Detected {filename} as coming from TextMesh Pro")
+			self.type = "TMP"
+			self._fromAssetCreator(data, filename, emptyAtlasPoint)
+		else:
+			stringLength = int.from_bytes(data[(4*12):(4*13)], byteorder="little")
+			if stringLength == 1:
+				print(f"Detected {filename} as coming from a newer Higurashi game")
+				self.type = "Hima"
+			else:
+				print(f"Detected {filename} as coming from an older Higurashi game")
+				self.type = "Oni"
+			self._fromGame(data, filename, emptyAtlasPoint)
+		self.filename = readString(self.Filename)
+
+	def _readArray(self, data, itemLength, emptyAtlasPoint):
+		self.Array = data.read(4)
+		length = int.from_bytes(self.Array, byteorder="little")
+		atlasWidth = struct.unpack("<f", self.AtlasWidth)[0]
+		atlasHeight = struct.unpack("<f", self.AtlasHeight)[0]
+		for _ in range(length):
+			info = bytearray(data.read(4 * 8))
+			if itemLength > 8:
+				data.advance((itemLength - 8) * 4)
+			x, y, width, height = struct.unpack("<ffff", info[4:20])
+			if emptyAtlasPoint is not None:
+				if width == 0 and height == 0:
+					charid = int.from_bytes(info[:4], byteorder="little")
+					print(f"Relocating 0x0 character U+{charid:x} to ({emptyAtlasPoint[0]}, {emptyAtlasPoint[1]})")
+					info[4:12] = struct.pack("<ff", emptyAtlasPoint[0], emptyAtlasPoint[1])
+			if x > atlasWidth:
+				info[4:8] = self.AtlasWidth
+			if y > atlasHeight:
+				info[8:12] = self.AtlasHeight
+			self.Array += info
+
+	def _fromGame(self, data, filename, emptyAtlasPoint):
+		data = DataScanner(data)
+		self.Header = data.read(4 * 7)
+		self.Filename = data.readString()
+		if self.type == "Hima":
+			self.BeforeFontName = data.read(4)
+		else:
+			self.BeforeFontName = bytes()
+		self.FontName = data.readString()
+		# Font face data
+		self.PointSize = data.read(4)
+		self.Padding = data.read(4)
+		self.LineHeight = data.read(4)
+		self.Baseline = data.read(4)
+		self.Ascender = data.read(4)
+		self.Descender = data.read(4)
+		self.CenterLine = data.read(4)
+		self.SuperscriptOffset = data.read(4)
+		self.SubscriptOffest = data.read(4)
+		self.SubSize = data.read(4)
+		self.Underline = data.read(4)
+		self.UnderlineThickness = data.read(4)
+		self.TabWidth = data.read(4)
+		self.CharacterCount = data.read(4)
+		self.AtlasWidth = data.read(4)
+		self.AtlasHeight = data.read(4)
+
+		self.AfterFontFace = data.read(4 * 8)
+		self._readArray(data, 8, emptyAtlasPoint)
+		self.Footer = data.rest()
+
+	def _fromAssetCreator(self, data, filename, emptyAtlasPoint):
+		data = DataScanner(data)
+		self.Header = data.read(4 * 15)
+		self.Filename = data.readString()
+		self.BeforeFontName = data.read(4 * 7)
+		self.FontName = data.readString()
+		# Font face data
+		self.PointSize = data.read(4)
+		self.Scale = data.read(4)
+		self.CharacterCount = data.read(4)
+		self.LineHeight = data.read(4)
+		self.Baseline = data.read(4)
+		self.Ascender = data.read(4)
+		self.CapHeight = data.read(4)
+		self.Descender = data.read(4)
+		self.CenterLine = data.read(4)
+		self.SuperscriptOffset = data.read(4)
+		self.SubscriptOffest = data.read(4)
+		self.SubSize = data.read(4)
+		self.Underline = data.read(4)
+		self.UnderlineThickness = data.read(4)
+		self.strikethrough = data.read(4)
+		self.strikethroughThickness = data.read(4)
+		self.TabWidth = data.read(4)
+		self.Padding = data.read(4)
+		self.AtlasWidth = data.read(4)
+		self.AtlasHeight = data.read(4)
+
+		self.AfterFontFace = data.read(4 * 3)
+		self._readArray(data, 9, emptyAtlasPoint)
+		self.Footer = data.rest()
+
+def combineFonts(original: FontFile, new: FontFile):
+	out = bytes()
+	out += original.Header
+	out += original.Filename
+	out += original.BeforeFontName
+	out += new.FontName
+
+	out += new.PointSize
+	out += new.Padding
+	out += new.LineHeight
+	out += new.Baseline
+	out += new.Ascender
+	out += new.Descender
+	out += new.CenterLine
+	out += new.SuperscriptOffset
+	out += new.SubscriptOffest
+	out += new.SubSize
+	out += new.Underline
+	out += new.UnderlineThickness
+	out += new.TabWidth
+	out += new.CharacterCount
+	out += new.AtlasWidth
+	out += new.AtlasHeight
+
+	out += original.AfterFontFace
+	out += new.Array
+	out += original.Footer
+
+	return out
+
+# Finds a size x size completely blank spot in the atlas
+# Can be used for whitespace character relocation
+def findEmptyAtlasPoint(atlas, size):
+	atlasWidth = int.from_bytes(atlas[0:4], byteorder="little")
+	atlasHeight = int.from_bytes(atlas[4:8], byteorder="little")
+	atlasSize = int.from_bytes(atlas[56:60], byteorder="little")
+	atlasData = atlas[60:]
+	if atlasWidth * atlasHeight != atlasSize:
+		print("Atlas doesn't match width and height!  This shouldn't happen")
+		return None
+	distance = size // 2
+	stringToFind = b"\0" * size
+	pos = 0
+	try:
+		while True:
+			pos = atlasData.index(stringToFind, pos)
+			offsets = []
+			for i in range(-distance, distance+1):
+				offset = ((i * atlasWidth) + pos) % atlasSize
+				offsets.append(offset)
+				if atlasData[offset:offset+size] != stringToFind:
+					pos += 1
+					break
+			else:
+				y = pos // atlasWidth
+				y = atlasHeight - 1 - y # Texture2Ds are flipped
+				x = pos % atlasWidth + distance
+				return (x, y)
+	except ValueError:
+		return None
+
+
+if len(sys.argv) > 4:
+	atlasFN = sys.argv[1]
+	behaviourFN = sys.argv[2]
+	originalFN = sys.argv[3]
+	outFN = sys.argv[4]
+else:
+	if len(sys.argv) < 4:
+		print("Usage: " + sys.argv[0] + " [newAtlas.dat] newMonoBehaviour.dat originalMonoBehaviour.dat outputFolder")
+		exit()
+
+	behaviourFN = sys.argv[1]
+	originalFN = sys.argv[2]
+	outFN = sys.argv[3]
+
+if not os.path.isdir(outFN):
+	print("Output folder " + outFN + " must be a directory!")
+	exit()
+
+emptyAtlasPoint = None
+if len(sys.argv) > 4:
+	with open(atlasFN, "rb") as atlasFile:
+		atlas = DataScanner(atlasFile.read())
+	atlasOut = b""
+	if int.from_bytes(atlas.peek(4), byteorder="little") not in range(8, 32):
+		atlas.advance(4 * 7)
+		atlasName = atlas.readString()
+		atlas.advance(4 * 4)
+	else:
+		atlasName = atlas.readString()
+	atlasOut += atlasName
+	atlasRest = atlas.rest()
+	emptyAtlasPoint = findEmptyAtlasPoint(atlasRest, 13)
+	atlasOut += atlasRest
+
+with open(originalFN, "rb") as originalFile:
+	original = FontFile(originalFile.read(), originalFN)
+with open(behaviourFN, "rb") as behaviourFile:
+	behaviour = FontFile(behaviourFile.read(), behaviourFN, emptyAtlasPoint)
+
+
+atlasName = readString(atlasName)
+with open(outFN + "/" + atlasName + "_Texture2D.dat", "wb") as outFile:
+	outFile.write(atlasOut)
+with open(outFN + "/" + original.filename + "_TextMeshProFont.dat", "wb") as outFile:
+	outFile.write(combineFonts(original=original, new=behaviour))
+
--- a/scripts/UnityTextModifier.py
+++ b/scripts/UnityTextModifier.py
@@ -0,0 +1,121 @@
+import sys
+import os
+import json
+import unitypack
+from unitypack.asset import Asset
+
+if len(sys.argv) < 4:
+	print("Usage: " + sys.argv[0] + " assetfile.assets edits.json outputfolder\nEdits.json should be an array of objects with the fields 'CurrentEnglish', 'CurrentJapanese', 'NewEnglish', and 'NewJapanese'.  An optional 'Discriminator' field can be added if multiple texts have the same English and Japanese values.");
+	exit()
+
+if not os.path.isdir(sys.argv[3]):
+	print("Output folder " + sys.argv[3] + " must be a directory!")
+	exit()
+
+class ScriptEdit:
+	def __init__(self, currentEnglish, currentJapanese, newEnglish, newJapanese, discriminator=None):
+		self.currentEnglish = currentEnglish
+		self.currentJapanese = currentJapanese
+		self.newEnglish = newEnglish
+		self.newJapanese = newJapanese
+		self.discriminator = discriminator
+
+	@staticmethod
+	def fromJSON(json):
+		if "Discriminator" in json:
+			discriminator = json["Discriminator"]
+		else:
+			discriminator = None
+		return ScriptEdit(json["CurrentEnglish"], json["CurrentJapanese"], json["NewEnglish"], json["NewJapanese"], discriminator)
+
+	@staticmethod
+	def bytesFromString(string):
+		strBytes = string.encode('utf-8')
+		out = len(strBytes).to_bytes(4, byteorder='little')
+		out += strBytes
+		out += b"\0" * ((4 - len(strBytes)) % 4)
+		return out
+
+	@property
+	def expectedBytes(self):
+		return self.bytesFromString(self.currentEnglish) + self.bytesFromString(self.currentJapanese)
+
+	@property
+	def newBytes(self):
+		return self.bytesFromString(self.newEnglish) + self.bytesFromString(self.newJapanese)
+
+	def findInAssetBundle(self, bundle):
+		search = self.expectedBytes
+		offsets = []
+		start = 0
+		while True:
+			offset = bundle.find(search, start)
+			if offset == -1:
+				break
+			offsets.append(offset)
+			start = offset + 1
+		if len(offsets) == 0:
+			raise IndexError(f"No asset found for {self.currentEnglish} / {self.currentJapanese}")
+		if self.discriminator == None:
+			if len(offsets) > 1:
+				raise IndexError(f"Multiple assets found for {self.currentEnglish} / {self.currentJapanese}, candidates are " + ", ".join(f"{index}: 0x{offset:x}" for index, offset in enumerate(offsets)) + ".  Please select one and add a Discriminator tag for it.")
+			self.offset = offsets[0]
+		else:
+			if len(offsets) <= self.discriminator:
+				raise IndexError(f"Not enough offsets found for ${self.currentEnglish} / {self.currentJapanese} to meet request for #{self.discriminator}, there were only {len(offsets)}")
+			self.offset = offsets[self.discriminator]
+
+	def checkObject(self, id, object, bundle):
+		if obj.data_offset <= self.offset and obj.data_offset + obj.size >= self.offset:
+			self.id = id
+			self.currentData = bundle[obj.data_offset:(obj.data_offset + obj.size)]
+			expectedBytes = self.expectedBytes
+			smallOffset = self.currentData.find(expectedBytes)
+			self.newData = self.currentData[:smallOffset] + self.newBytes + self.currentData[(smallOffset + len(expectedBytes)):]
+			print(f"Found {self.currentEnglish} / {self.currentJapanese} in object #{id}")
+
+	def write(self, folder):
+		try:
+			self.newData
+		except:
+			print(f"Failed to find object id for {self.currentEnglish} / {self.currentJapanese}!")
+			return
+		filename = folder + "/" + str(self.id) + ".dat"
+		with open(filename, "wb") as outputFile:
+			outputFile.write(self.newData)
+
+	def __repr__(self):
+		string = f"ScriptEdit(currentEnglish: {self.currentEnglish}, currentJapanese: {self.currentJapanese}, newEnglish: {self.newEnglish}, newJapanese: {self.newJapanese}"
+		if self.discriminator != None:
+			string += f", discriminator: {self.discriminator}"
+		try: string += f", offset: 0x{self.offset:x}"
+		except: pass
+		return string + ")"
+
+	def __str__(self):
+		try: return f"<ScriptEdit for position 0x{self.offset:x}>"
+		except: return "<ScriptEdit for unknown position>"
+
+
+with open(sys.argv[2], encoding="utf-8") as jsonFile:
+	edits = [ScriptEdit.fromJSON(x) for x in json.load(jsonFile)]
+
+with open(sys.argv[1], "rb") as assetsFile:
+	bundle = assetsFile.read()
+	newEdits = []
+	for edit in edits:
+		try:
+			edit.findInAssetBundle(bundle)
+			newEdits.append(edit)
+			print(f"Found {edit.currentEnglish} / {edit.currentJapanese} at offset 0x{edit.offset:x}")
+		except IndexError as e:
+			print(e)
+	edits = newEdits
+
+	assetsFile.seek(0)
+	assets = Asset.from_file(assetsFile)
+	for id, obj in assets.objects.items():
+		for edit in edits:
+			edit.checkObject(id, obj, bundle)
+	for edit in edits:
+		edit.write(sys.argv[3])
				`@@ -0,0 +1 @@`
				Extractor that extracts text from Higurashi script files. Compile with `swiftc -O -wmo *.swift -o HigurashiTextExtractor`. Run with `./HigurashiTextExtractor scriptFile [e\|j]` where `e` will get you the English script and `j` will get you Japanese