Add some scripts for getting character usage info
This commit is contained in:
114
CharacterInfoExtraction/HigurashiTextExtractor/Language.swift
Normal file
114
CharacterInfoExtraction/HigurashiTextExtractor/Language.swift
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
//
|
||||||
|
// Language.swift
|
||||||
|
// CParser_CS440
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
enum Associativity {
|
||||||
|
case left, right;
|
||||||
|
}
|
||||||
|
|
||||||
|
var typeNames: Set<String> = ["bool", "char", "short", "int", "long", "float", "double"]
|
||||||
|
|
||||||
|
var convertibleTypes: [Set<String>] = [["bool", "char", "short", "int", "long"], ["float", "double"]]
|
||||||
|
|
||||||
|
func isConvertible(left: String, right: String) -> Bool {
|
||||||
|
for convertibleSet in convertibleTypes {
|
||||||
|
if convertibleSet.contains(left) && convertibleSet.contains(right) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Type: CustomStringConvertible {
|
||||||
|
case any, noType, specific(String)
|
||||||
|
var description: String {
|
||||||
|
switch self {
|
||||||
|
case .specific(let string):
|
||||||
|
return string
|
||||||
|
case .noType:
|
||||||
|
return "None"
|
||||||
|
case .any:
|
||||||
|
return "Any"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let binaryOperators: Dictionary<Int, (Associativity, Set<String>)> = [
|
||||||
|
160: (.left, ["<<", ">>"]),
|
||||||
|
150: (.left, ["*", "/", "%", "&"]),
|
||||||
|
140: (.left, ["+", "-", "|", "^"]),
|
||||||
|
130: (.left, ["<", "<=", ">", ">=", "==", "!="]),
|
||||||
|
120: (.left, ["&&"]),
|
||||||
|
110: (.left, ["||"])
|
||||||
|
]
|
||||||
|
|
||||||
|
let allIntegersBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .specific("int"), right: .specific("int"), out: .specific("int"))]
|
||||||
|
let allNumbersBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .specific("int"), right: .specific("int"), out: .specific("int")), (left: .specific("double"), right: .specific("double"), out: .specific("double")), (left: .specific("float"), right: .specific("float"), out: .specific("float"))]
|
||||||
|
let allBooleansBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .specific("bool"), right: .specific("bool"), out: .specific("bool"))]
|
||||||
|
let comparisonBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .any, right: .any, out: .specific("bool"))]
|
||||||
|
|
||||||
|
let binaryOperatorTypes: Dictionary<String, [(left: Type, right: Type, out: Type)]> = [
|
||||||
|
"<<": allIntegersBinaryOperator,
|
||||||
|
">>": allIntegersBinaryOperator,
|
||||||
|
"*": allNumbersBinaryOperator,
|
||||||
|
"/": allNumbersBinaryOperator,
|
||||||
|
"%": allIntegersBinaryOperator,
|
||||||
|
"&": allIntegersBinaryOperator,
|
||||||
|
"+": allNumbersBinaryOperator,
|
||||||
|
"-": allNumbersBinaryOperator,
|
||||||
|
"|": allIntegersBinaryOperator,
|
||||||
|
"^": allIntegersBinaryOperator,
|
||||||
|
"<": comparisonBinaryOperator,
|
||||||
|
"<=": comparisonBinaryOperator,
|
||||||
|
">": comparisonBinaryOperator,
|
||||||
|
">=": comparisonBinaryOperator,
|
||||||
|
"==": comparisonBinaryOperator,
|
||||||
|
"!=": comparisonBinaryOperator,
|
||||||
|
"&&": allBooleansBinaryOperator,
|
||||||
|
"||": allBooleansBinaryOperator
|
||||||
|
]
|
||||||
|
|
||||||
|
let assignmentOperators: Set<String> = ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", "^=", "|="]
|
||||||
|
|
||||||
|
let assignmentOperatorTypes: Dictionary<String, [Type]> = [
|
||||||
|
"=": [.any],
|
||||||
|
"*=": [.specific("int"), .specific("double"), .specific("float")],
|
||||||
|
"/=": [.specific("int"), .specific("double"), .specific("float")],
|
||||||
|
"%=": [.specific("int")],
|
||||||
|
"+=": [.specific("int"), .specific("double"), .specific("float")],
|
||||||
|
"-=": [.specific("int"), .specific("double"), .specific("float")],
|
||||||
|
"<<=": [.specific("int")],
|
||||||
|
">>=": [.specific("int")],
|
||||||
|
"&=": [.specific("int")],
|
||||||
|
"^=": [.specific("int")],
|
||||||
|
"|=": [.specific("int")]
|
||||||
|
]
|
||||||
|
|
||||||
|
let prefixOperators: Set<String> = ["!", "~", "++", "--", "+", "-"]
|
||||||
|
let postfixOperators: Set<String> = ["++", "--"]
|
||||||
|
|
||||||
|
|
||||||
|
let allIntegersUnaryOperator: [(in: Type, out: Type)] = [(in: .specific("int"), out: .specific("int"))]
|
||||||
|
let allNumbersUnaryOperator: [(in: Type, out: Type)] = [(in: .specific("int"), out: .specific("int")), (in: .specific("double"), out: .specific("double")), (in: .specific("float"), out: .specific("float"))]
|
||||||
|
let allBooleansUnaryOperator: [(in: Type, out: Type)] = [(in: .specific("bool"), out: .specific("bool"))]
|
||||||
|
|
||||||
|
let unaryOperatorTypes: Dictionary<String, [(in: Type, out: Type)]> = [
|
||||||
|
"!": allBooleansUnaryOperator,
|
||||||
|
"~": allIntegersUnaryOperator,
|
||||||
|
"++": allNumbersUnaryOperator,
|
||||||
|
"--": allNumbersUnaryOperator,
|
||||||
|
"+": allNumbersUnaryOperator,
|
||||||
|
"-": allNumbersUnaryOperator
|
||||||
|
]
|
||||||
|
|
||||||
|
let otherPunctuation: Set<String> = ["(", ")", "{", "}", "[", "]", ";", ",", "."]
|
||||||
|
let commentPunctuation: Set<String> = ["//", "/*"]
|
||||||
|
|
||||||
|
let allPunctuation: Set<String> = prefixOperators.union(postfixOperators).union(assignmentOperators).union(binaryOperators.values.flatMap({$0.1})).union(otherPunctuation).union(commentPunctuation)
|
||||||
|
let punctuationCharacters = Set(allPunctuation.flatMap({ $0.unicodeScalars }))
|
||||||
|
let longestPunctuation = allPunctuation.reduce(0, { longest, current in let len = current.count; return len > longest ? len : longest })
|
||||||
|
let nonIdentifierCharacters = punctuationCharacters.union(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
|
||||||
|
|
||||||
1
CharacterInfoExtraction/HigurashiTextExtractor/README.md
Normal file
1
CharacterInfoExtraction/HigurashiTextExtractor/README.md
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Extractor that extracts text from Higurashi script files. Compile with `swiftc -O -wmo *.swift -o HigurashiTextExtractor`. Run with `./HigurashiTextExtractor scriptFile [e|j]` where `e` will get you the English script and `j` will get you Japanese
|
||||||
315
CharacterInfoExtraction/HigurashiTextExtractor/Scanner.swift
Normal file
315
CharacterInfoExtraction/HigurashiTextExtractor/Scanner.swift
Normal file
@@ -0,0 +1,315 @@
|
|||||||
|
//
|
||||||
|
// Scanner.swift
|
||||||
|
// CParser_CS440
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
enum TokenType: String {
|
||||||
|
case stringLiteral, characterLiteral, punctuation, identifier
|
||||||
|
}
|
||||||
|
|
||||||
|
enum TokenizationError: Error {
|
||||||
|
case badPunctuation(row: Int, column: Int, character: UnicodeScalar)
|
||||||
|
case unclosedString(row: Int, column: Int, string: String)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TokenListSignature: Hashable, Equatable {
|
||||||
|
let list: [TokenType]
|
||||||
|
init(_ array: [TokenType]) {
|
||||||
|
self.list = array
|
||||||
|
}
|
||||||
|
init(from tokens: [Token]) {
|
||||||
|
list = tokens.map { $0.type }
|
||||||
|
}
|
||||||
|
static func ==(lhs: TokenListSignature, rhs: TokenListSignature) -> Bool {
|
||||||
|
guard lhs.list.count == rhs.list.count else { return false }
|
||||||
|
for tokentype in lhs.list.enumerated() {
|
||||||
|
if rhs.list[tokentype.offset] != tokentype.element {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
var hashValue: Int {
|
||||||
|
return list.map({ $0.hashValue }).reduce(5381) {
|
||||||
|
($0 << 5) &+ $0 &+ $1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct MovingStringRange {
|
||||||
|
let string: String.UnicodeScalarView
|
||||||
|
private(set) var back: String.UnicodeScalarIndex
|
||||||
|
private(set) var front: String.UnicodeScalarIndex
|
||||||
|
private(set) var length: Int
|
||||||
|
private(set) var backRow: Int
|
||||||
|
private(set) var backColumn: Int
|
||||||
|
private(set) var frontRow: Int
|
||||||
|
private(set) var frontColumn: Int
|
||||||
|
init(_ string: String.UnicodeScalarView, atEnd: Bool = false) {
|
||||||
|
self.string = string
|
||||||
|
self.length = 0
|
||||||
|
self.back = atEnd ? string.endIndex : string.startIndex
|
||||||
|
self.front = back
|
||||||
|
self.backRow = 1
|
||||||
|
self.backColumn = 1
|
||||||
|
self.frontRow = 1
|
||||||
|
self.frontColumn = 1
|
||||||
|
}
|
||||||
|
init(_ string: String, atEnd: Bool = false) {
|
||||||
|
self.init(string.unicodeScalars, atEnd: atEnd)
|
||||||
|
}
|
||||||
|
private mutating func advanceFront() {
|
||||||
|
if frontChar == "\n" {
|
||||||
|
frontColumn = 1
|
||||||
|
frontRow += 1
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
frontColumn += 1
|
||||||
|
}
|
||||||
|
front = string.index(after: front)
|
||||||
|
length += 1
|
||||||
|
}
|
||||||
|
private mutating func retreatFront() {
|
||||||
|
front = string.index(before: front)
|
||||||
|
length -= 1
|
||||||
|
if frontChar == "\n" {
|
||||||
|
var check = string.index(before: front)
|
||||||
|
frontColumn = 2
|
||||||
|
while check >= string.startIndex && string[check] != "\n" {
|
||||||
|
frontColumn += 1
|
||||||
|
check = string.index(before: check)
|
||||||
|
}
|
||||||
|
frontRow -= 1
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
frontColumn -= 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mutating func advanceFront(by: Int = 1) {
|
||||||
|
if by > 0 {
|
||||||
|
for _ in 0..<by {
|
||||||
|
advanceFront()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for _ in 0..<(-by) {
|
||||||
|
retreatFront()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private mutating func advanceBack() {
|
||||||
|
if backChar == "\n" {
|
||||||
|
backColumn = 1
|
||||||
|
backRow += 1
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
backColumn += 1
|
||||||
|
}
|
||||||
|
back = string.index(after: back)
|
||||||
|
length += 1
|
||||||
|
}
|
||||||
|
private mutating func retreatBack() {
|
||||||
|
back = string.index(before: back)
|
||||||
|
length -= 1
|
||||||
|
if backChar == "\n" {
|
||||||
|
var check = string.index(before: back)
|
||||||
|
backColumn = 2
|
||||||
|
while check >= string.startIndex && string[check] != "\n" {
|
||||||
|
backColumn += 1
|
||||||
|
check = string.index(before: check)
|
||||||
|
}
|
||||||
|
backRow -= 1
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
backColumn -= 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mutating func advanceBack(by: Int = 1) {
|
||||||
|
if by > 0 {
|
||||||
|
for _ in 0..<by {
|
||||||
|
advanceBack()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for _ in 0..<(-by) {
|
||||||
|
retreatBack()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mutating func setBackToFront() {
|
||||||
|
back = front
|
||||||
|
backColumn = frontColumn
|
||||||
|
backRow = frontRow
|
||||||
|
length = 0
|
||||||
|
}
|
||||||
|
mutating func setFrontToBack() {
|
||||||
|
front = back
|
||||||
|
frontColumn = backColumn
|
||||||
|
frontRow = backRow
|
||||||
|
length = 0
|
||||||
|
}
|
||||||
|
var currentRange: String {
|
||||||
|
return String(string[back..<front])
|
||||||
|
}
|
||||||
|
var frontChar: UnicodeScalar {
|
||||||
|
return string[front]
|
||||||
|
}
|
||||||
|
var backChar: UnicodeScalar {
|
||||||
|
return string[back]
|
||||||
|
}
|
||||||
|
var backIsBeginning: Bool {
|
||||||
|
return back <= string.startIndex
|
||||||
|
}
|
||||||
|
var backIsEnd: Bool {
|
||||||
|
return back >= string.endIndex
|
||||||
|
}
|
||||||
|
var frontIsBeginning: Bool {
|
||||||
|
return front <= string.startIndex
|
||||||
|
}
|
||||||
|
var frontIsEnd: Bool {
|
||||||
|
return front >= string.endIndex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extension UnicodeScalar {
|
||||||
|
var isNewline: Bool {
|
||||||
|
return (0x0a...0x0d).contains(self.value) || self.value == 0x85 || self.value == 0x2028 || self.value == 0x2029
|
||||||
|
}
|
||||||
|
var isWhitespace: Bool {
|
||||||
|
return self.value == 0x20 || self.value == 0xa0 || self.value == 0x1680 || (0x2000...0x200a).contains(self.value) || self.value == 0x202f || self.value == 0x205f || self.value == 0x3000
|
||||||
|
}
|
||||||
|
var isNewlineOrWhitespace: Bool {
|
||||||
|
return isNewline || isWhitespace
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Token: CustomStringConvertible {
|
||||||
|
let type: TokenType
|
||||||
|
let value: String
|
||||||
|
let row: Int
|
||||||
|
let column: Int
|
||||||
|
init(type: TokenType, value: String, row: Int, column: Int) {
|
||||||
|
self.type = type
|
||||||
|
self.value = value
|
||||||
|
self.row = row
|
||||||
|
self.column = column
|
||||||
|
}
|
||||||
|
init(type: TokenType, value: String.UnicodeScalarView, row: Int, column: Int) {
|
||||||
|
self.init(type: type, value: String(value), row: row, column: column)
|
||||||
|
}
|
||||||
|
var description: String {
|
||||||
|
// return "[\(type) \(value)]"
|
||||||
|
switch type {
|
||||||
|
case .identifier, .punctuation:
|
||||||
|
return value
|
||||||
|
case .stringLiteral:
|
||||||
|
return "\"\(value)\""
|
||||||
|
case .characterLiteral:
|
||||||
|
return "'\(value)'"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static func tokenize(input: String) throws -> [Token] {
|
||||||
|
var inputRange = MovingStringRange(input)
|
||||||
|
var tokens: [Token] = []
|
||||||
|
|
||||||
|
while !inputRange.backIsEnd {
|
||||||
|
//print("Currently looking from row \(inputRange.backRow) column \(inputRange.backColumn) to row \(inputRange.frontRow) column \(inputRange.frontColumn), \(inputRange.currentRange)")
|
||||||
|
if inputRange.frontIsEnd { // If this is the end of the file
|
||||||
|
if (inputRange.length > 0) {
|
||||||
|
tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
|
||||||
|
}
|
||||||
|
inputRange.setBackToFront()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
else if inputRange.frontChar.isNewlineOrWhitespace { // Whitespace, end of token
|
||||||
|
if inputRange.length > 0 { // If there's multiple whitespace chars in a row, don't add empty tokens
|
||||||
|
tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
|
||||||
|
}
|
||||||
|
inputRange.advanceFront()
|
||||||
|
inputRange.setBackToFront()
|
||||||
|
}
|
||||||
|
else if punctuationCharacters.contains(inputRange.frontChar) {
|
||||||
|
if inputRange.length > 0 { // Add the previous identifier if it exists
|
||||||
|
tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
|
||||||
|
}
|
||||||
|
inputRange.setBackToFront()
|
||||||
|
while !inputRange.frontIsEnd && (punctuationCharacters.contains(inputRange.frontChar) || inputRange.length > 0) {
|
||||||
|
// Keep going until we reach the end of the file and have parsed it all
|
||||||
|
inputRange.advanceFront()
|
||||||
|
if inputRange.length > longestPunctuation || inputRange.frontIsEnd || !punctuationCharacters.contains(inputRange.frontChar) {
|
||||||
|
var punctuationToken = inputRange.currentRange
|
||||||
|
while inputRange.length > 1 && !allPunctuation.contains(punctuationToken) {
|
||||||
|
inputRange.advanceFront(by: -1)
|
||||||
|
punctuationToken = inputRange.currentRange
|
||||||
|
}
|
||||||
|
if commentPunctuation.contains(punctuationToken) {
|
||||||
|
if punctuationToken == "//" {
|
||||||
|
while !inputRange.frontIsEnd && !inputRange.frontChar.isNewline {
|
||||||
|
inputRange.advanceFront()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
while !inputRange.frontIsEnd && inputRange.frontChar != "/" {
|
||||||
|
while !inputRange.frontIsEnd && inputRange.frontChar != "*" {
|
||||||
|
inputRange.advanceFront()
|
||||||
|
}
|
||||||
|
inputRange.advanceFront()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inputRange.advanceFront()
|
||||||
|
inputRange.setBackToFront()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if allPunctuation.contains(punctuationToken) {
|
||||||
|
tokens.append(Token(type: .punctuation, value: punctuationToken, row: inputRange.backRow, column: inputRange.backColumn))
|
||||||
|
inputRange.setBackToFront()
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw TokenizationError.badPunctuation(row: inputRange.backRow, column: inputRange.backColumn, character: inputRange.backChar)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if inputRange.frontChar == "\"" || inputRange.frontChar == "'" {
|
||||||
|
let quoteType = inputRange.frontChar
|
||||||
|
if inputRange.length > 0 { // Add the previous identifier if it exists
|
||||||
|
tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
|
||||||
|
}
|
||||||
|
inputRange.advanceFront()
|
||||||
|
inputRange.setBackToFront()
|
||||||
|
while true {
|
||||||
|
if inputRange.frontIsEnd || inputRange.frontChar.isNewline {
|
||||||
|
inputRange.advanceBack(by: -1)
|
||||||
|
throw TokenizationError.unclosedString(row: inputRange.backRow, column: inputRange.backColumn, string: inputRange.currentRange)
|
||||||
|
}
|
||||||
|
else if inputRange.frontChar == "\\" {
|
||||||
|
inputRange.advanceFront(by: 2)
|
||||||
|
}
|
||||||
|
else if inputRange.frontChar == quoteType {
|
||||||
|
let type: TokenType
|
||||||
|
if quoteType == "'" {
|
||||||
|
type = .characterLiteral
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
type = .stringLiteral
|
||||||
|
}
|
||||||
|
tokens.append(Token(type: type, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn - 1))
|
||||||
|
inputRange.advanceFront()
|
||||||
|
inputRange.setBackToFront()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
inputRange.advanceFront()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
inputRange.advanceFront()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tokens
|
||||||
|
}
|
||||||
|
}
|
||||||
93
CharacterInfoExtraction/HigurashiTextExtractor/main.swift
Normal file
93
CharacterInfoExtraction/HigurashiTextExtractor/main.swift
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
import Foundation
|
||||||
|
|
||||||
|
guard CommandLine.arguments.count > 1 else {
|
||||||
|
print("Usage: \(CommandLine.arguments[0]) file [(e|j)]\nExtracts text from Higurashi script files. Use e or j to specify English or Japanese, otherwise you'll get both")
|
||||||
|
exit(EXIT_FAILURE)
|
||||||
|
}
|
||||||
|
|
||||||
|
var verbose = false
|
||||||
|
var mode = 3
|
||||||
|
if CommandLine.arguments.count >= 3 {
|
||||||
|
if CommandLine.arguments.contains(where: { $0.lowercased() == "e" }) { mode = 2 }
|
||||||
|
if CommandLine.arguments.contains(where: { $0.lowercased() == "j" }) { mode = 1 }
|
||||||
|
if CommandLine.arguments.contains(where: { $0.lowercased() == "-v" }) { verbose = true }
|
||||||
|
}
|
||||||
|
|
||||||
|
var standardError = FileHandle.standardError
|
||||||
|
|
||||||
|
extension FileHandle : TextOutputStream {
|
||||||
|
public func write(_ string: String) {
|
||||||
|
guard let data = string.data(using: .utf8) else { return }
|
||||||
|
self.write(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Command {
|
||||||
|
let name: String
|
||||||
|
let arguments: [Token]
|
||||||
|
|
||||||
|
init?(tokens: [Token]) {
|
||||||
|
guard tokens.count >= 3 else { return nil }
|
||||||
|
guard tokens[0].type == .identifier else { return nil }
|
||||||
|
guard tokens[1].type == .punctuation && tokens[1].value == "(" else { return nil }
|
||||||
|
guard tokens.last!.type == .punctuation && tokens.last!.value == ")" else { return nil }
|
||||||
|
self.name = tokens[0].value
|
||||||
|
self.arguments = tokens.dropFirst(2).dropLast().filter({ $0.value != "," })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadFile(path: String) throws -> [[Token]] {
|
||||||
|
let file: String
|
||||||
|
if path == "-" {
|
||||||
|
file = String(decoding: FileHandle.standardInput.readDataToEndOfFile(), as: UTF8.self)
|
||||||
|
} else {
|
||||||
|
file = try String(contentsOf: URL(fileURLWithPath: path))
|
||||||
|
}
|
||||||
|
let tokens = try Token.tokenize(input: file)
|
||||||
|
let statements = tokens.split(whereSeparator: { $0.value == ";" || $0.value == "{" || $0.value == "}" }).map(Array.init)
|
||||||
|
return statements
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
let tokens = try loadFile(path: CommandLine.arguments[1])
|
||||||
|
let commands = tokens.compactMap { tokens -> Command? in
|
||||||
|
let output = Command(tokens: tokens)
|
||||||
|
if (output == nil) {
|
||||||
|
if verbose { print("\(tokens) was not a command!", to: &standardError) }
|
||||||
|
}
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
|
||||||
|
let ignore: Set = ["FadeOutBGM", "DisableWindow", "DrawScene", "PlayBGM", "Wait", "SetValidityOfInput", "DrawSceneWithMask", "SetSpeedOfMessage", "DrawBustshot", "FadeBustshot", "DrawBustshotWithFiltering", "FadeBustshotWithFiltering", "PlaySE", "ShakeScreen", "DrawFilm", "FadeFilm", "FadeAllBustshots", "DrawSpriteWithFiltering", "MoveSprite", "DrawSprite", "FadeSprite", "TitleScreen", "SetLocalFlag", "ShowChapterPreview", "SetCharSpacing", "SetLineSpacing", "SetScreenAspect", "SetWindowPos", "SetWindowSize", "SetWindowMargins", "FadeBG", "SetValidityOfSkipping", "SetGUIPosition", "SetStyleOfMessageSwinging", "EnableJumpingOfReturnIcon", "SetValidityOfTextFade", "SetValidityOfInterface", "Negative", "CallScript", "SavePoint", "SetValidityOfWindowDisablingWhenGraphicsControl", "SetFontSize", "SetNameFormat", "SetFontId", "StopBGM", "SetGlobalFlag", "LanguagePrompt", "SetValidityOfSaving", "ShowTips", "CheckTipsAchievements", "if", "StoreValueToLocalWork", "DrawBG", "ChangeScene", "StopSE", "ShakeScreenSx", "StopSE", "GetAchievement", "CallSection", "JumpSection", "SetDrawingPointOfMessage"]
|
||||||
|
var japanese = ""
|
||||||
|
var english = ""
|
||||||
|
|
||||||
|
func stringFromLiteral(literal: Token) -> String {
|
||||||
|
guard literal.type == .stringLiteral else {
|
||||||
|
if literal.value == "NULL" { return "" }
|
||||||
|
fatalError("\(literal) wasn't a string literal!")
|
||||||
|
}
|
||||||
|
return literal.value.replacingOccurrences(of: "\\\"", with: "\"").replacingOccurrences(of: "\\n", with: "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
for command in commands {
|
||||||
|
if ignore.contains(command.name) { continue }
|
||||||
|
|
||||||
|
switch command.name {
|
||||||
|
case "OutputLine":
|
||||||
|
japanese += stringFromLiteral(literal: command.arguments[1])
|
||||||
|
english += stringFromLiteral(literal: command.arguments[3])
|
||||||
|
case "OutputLineAll":
|
||||||
|
let line = stringFromLiteral(literal: command.arguments[1])
|
||||||
|
japanese += line
|
||||||
|
english += line
|
||||||
|
case "ClearMessage":
|
||||||
|
japanese += "\n\n"
|
||||||
|
english += "\n\n"
|
||||||
|
default: if verbose { print(command, to: &standardError) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if mode & 1 > 0 { print(japanese) }
|
||||||
|
if mode & 2 > 0 { print(english) }
|
||||||
90
CharacterInfoExtraction/KanjiFinder.swift
Normal file
90
CharacterInfoExtraction/KanjiFinder.swift
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
import Foundation
|
||||||
|
|
||||||
|
var standardError = FileHandle.standardError
|
||||||
|
|
||||||
|
extension FileHandle : TextOutputStream {
|
||||||
|
public func write(_ string: String) {
|
||||||
|
guard let data = string.data(using: .utf8) else { return }
|
||||||
|
self.write(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
guard CommandLine.arguments.count > 1 else {
|
||||||
|
print("""
|
||||||
|
Usage: \(CommandLine.arguments[0]) [-filter filterFile.txt] assetBundle1.assets [assetBundle2.assets ...]
|
||||||
|
Use - to read from stdin
|
||||||
|
Finds 3-byte unicode characters (like kanji) in files
|
||||||
|
If a filter is supplied, only characters also in the filter will be outputted
|
||||||
|
""", to: &standardError)
|
||||||
|
exit(EXIT_FAILURE)
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !swift(>=4.2)
|
||||||
|
extension Collection {
|
||||||
|
func firstIndex(where predicate: (Element) throws -> Bool) rethrows -> Index? {
|
||||||
|
return try self.index(where: predicate)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
var filter: String? = nil
|
||||||
|
var inFiles: [String] = Array(CommandLine.arguments[1...])
|
||||||
|
|
||||||
|
if let filterIndex = inFiles.firstIndex(where: { $0.lowercased() == "-filter" }) {
|
||||||
|
if filterIndex + 1 < inFiles.endIndex {
|
||||||
|
filter = try String(contentsOf: URL(fileURLWithPath: inFiles[filterIndex + 1]))
|
||||||
|
inFiles[filterIndex...filterIndex+1] = []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let bundles: [Data]
|
||||||
|
if inFiles == ["-"] {
|
||||||
|
bundles = [FileHandle.standardInput.readDataToEndOfFile()]
|
||||||
|
} else {
|
||||||
|
bundles = try inFiles.map { try Data(contentsOf: URL(fileURLWithPath: $0)) }
|
||||||
|
}
|
||||||
|
|
||||||
|
extension UTF8.CodeUnit {
|
||||||
|
var isStart3: Bool {
|
||||||
|
return self & 0b11110000 == 0b11100000
|
||||||
|
}
|
||||||
|
var isContinuation: Bool {
|
||||||
|
return self & 0b11000000 == 0b10000000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func unicodeFinder(data: [UInt8], minLength: Int = 2) -> String {
|
||||||
|
var out = [UInt8]()
|
||||||
|
var left = data[...]
|
||||||
|
while true {
|
||||||
|
guard let index = left.firstIndex(where: { ($0 & 0b11110000) == 0b11100000 }) else { break }
|
||||||
|
left = left[index...]
|
||||||
|
guard left.count > 5 else { break }
|
||||||
|
var good = 0
|
||||||
|
for i in stride(from: left.startIndex, to: left.endIndex, by: 3) {
|
||||||
|
if left[i].isStart3 && left[i+1].isContinuation && left[i+2].isContinuation {
|
||||||
|
good += 1
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if good >= minLength {
|
||||||
|
out.append(contentsOf: left[..<i])
|
||||||
|
good = 0
|
||||||
|
}
|
||||||
|
left = left[(i+1)...]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if good >= minLength {
|
||||||
|
out.append(contentsOf: left.prefix(left.count / 3 * 3))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return String(decoding: out, as: UTF8.self)
|
||||||
|
}
|
||||||
|
|
||||||
|
let unicodeStrings = bundles.map({ unicodeFinder(data: Array($0)) })
|
||||||
|
var chars = unicodeStrings.map({ Set($0.unicodeScalars) }).reduce(Set(), { $0.union($1) })
|
||||||
|
if let filter = filter {
|
||||||
|
chars.formIntersection(filter.unicodeScalars)
|
||||||
|
}
|
||||||
|
|
||||||
|
print(String(chars.sorted().lazy.map(Character.init)), terminator: "")
|
||||||
5
CharacterInfoExtraction/README.md
Normal file
5
CharacterInfoExtraction/README.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
Some scripts for figuring out what characters are used in games to help with choosing what characters to put on font atlases
|
||||||
|
|
||||||
|
I had originally made these for personal use and wasn't thinking about publishing them, so I wrote them in Swift, which doesn't currently support compiling on Windows. Very sorry about that. I guess you could try out WSL?
|
||||||
|
|
||||||
|
Download Swift [here](https://swift.org/download/) for Ubuntu or macOS, it also appears to be [on the AUR](https://aur.archlinux.org/packages/swift/) for Arch users. Compile a script with `swiftc -O scriptFile.swift` or run it directly with `swift -O scriptFile.swift arguments`, though that will be fairly slow if you plan to run the script multiple times.
|
||||||
24
CharacterInfoExtraction/UniqueCharacters.swift
Normal file
24
CharacterInfoExtraction/UniqueCharacters.swift
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import Foundation
|
||||||
|
|
||||||
|
var standardError = FileHandle.standardError
|
||||||
|
|
||||||
|
extension FileHandle : TextOutputStream {
|
||||||
|
public func write(_ string: String) {
|
||||||
|
guard let data = string.data(using: .utf8) else { return }
|
||||||
|
self.write(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
guard CommandLine.arguments.count > 1 else {
|
||||||
|
print("Usage: \(CommandLine.arguments[0]) file\nUse - to read from stdin", to: &standardError)
|
||||||
|
exit(EXIT_FAILURE)
|
||||||
|
}
|
||||||
|
let input: String
|
||||||
|
if CommandLine.arguments[1] == "-" {
|
||||||
|
input = String(decoding: FileHandle.standardInput.readDataToEndOfFile(), as: UTF8.self)
|
||||||
|
} else {
|
||||||
|
input = try String(contentsOf: URL(fileURLWithPath: CommandLine.arguments[1]))
|
||||||
|
}
|
||||||
|
let chars = Set(input.unicodeScalars)
|
||||||
|
let out = chars.sorted().lazy.map(Character.init)
|
||||||
|
print(String(out), terminator: "")
|
||||||
Reference in New Issue
Block a user