Move scripts

This commit is contained in:
Jáchym Toušek
2018-09-07 19:26:45 +02:00
parent cbd8626c6f
commit 985b54b3fa
10 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,114 @@
//
// Language.swift
// CParser_CS440
//
import Foundation
enum Associativity {
case left, right;
}
var typeNames: Set<String> = ["bool", "char", "short", "int", "long", "float", "double"]
var convertibleTypes: [Set<String>] = [["bool", "char", "short", "int", "long"], ["float", "double"]]
func isConvertible(left: String, right: String) -> Bool {
for convertibleSet in convertibleTypes {
if convertibleSet.contains(left) && convertibleSet.contains(right) {
return true
}
}
return false
}
enum Type: CustomStringConvertible {
case any, noType, specific(String)
var description: String {
switch self {
case .specific(let string):
return string
case .noType:
return "None"
case .any:
return "Any"
}
}
}
let binaryOperators: Dictionary<Int, (Associativity, Set<String>)> = [
160: (.left, ["<<", ">>"]),
150: (.left, ["*", "/", "%", "&"]),
140: (.left, ["+", "-", "|", "^"]),
130: (.left, ["<", "<=", ">", ">=", "==", "!="]),
120: (.left, ["&&"]),
110: (.left, ["||"])
]
let allIntegersBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .specific("int"), right: .specific("int"), out: .specific("int"))]
let allNumbersBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .specific("int"), right: .specific("int"), out: .specific("int")), (left: .specific("double"), right: .specific("double"), out: .specific("double")), (left: .specific("float"), right: .specific("float"), out: .specific("float"))]
let allBooleansBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .specific("bool"), right: .specific("bool"), out: .specific("bool"))]
let comparisonBinaryOperator: [(left: Type, right: Type, out: Type)] = [(left: .any, right: .any, out: .specific("bool"))]
let binaryOperatorTypes: Dictionary<String, [(left: Type, right: Type, out: Type)]> = [
"<<": allIntegersBinaryOperator,
">>": allIntegersBinaryOperator,
"*": allNumbersBinaryOperator,
"/": allNumbersBinaryOperator,
"%": allIntegersBinaryOperator,
"&": allIntegersBinaryOperator,
"+": allNumbersBinaryOperator,
"-": allNumbersBinaryOperator,
"|": allIntegersBinaryOperator,
"^": allIntegersBinaryOperator,
"<": comparisonBinaryOperator,
"<=": comparisonBinaryOperator,
">": comparisonBinaryOperator,
">=": comparisonBinaryOperator,
"==": comparisonBinaryOperator,
"!=": comparisonBinaryOperator,
"&&": allBooleansBinaryOperator,
"||": allBooleansBinaryOperator
]
let assignmentOperators: Set<String> = ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", "^=", "|="]
let assignmentOperatorTypes: Dictionary<String, [Type]> = [
"=": [.any],
"*=": [.specific("int"), .specific("double"), .specific("float")],
"/=": [.specific("int"), .specific("double"), .specific("float")],
"%=": [.specific("int")],
"+=": [.specific("int"), .specific("double"), .specific("float")],
"-=": [.specific("int"), .specific("double"), .specific("float")],
"<<=": [.specific("int")],
">>=": [.specific("int")],
"&=": [.specific("int")],
"^=": [.specific("int")],
"|=": [.specific("int")]
]
let prefixOperators: Set<String> = ["!", "~", "++", "--", "+", "-"]
let postfixOperators: Set<String> = ["++", "--"]
let allIntegersUnaryOperator: [(in: Type, out: Type)] = [(in: .specific("int"), out: .specific("int"))]
let allNumbersUnaryOperator: [(in: Type, out: Type)] = [(in: .specific("int"), out: .specific("int")), (in: .specific("double"), out: .specific("double")), (in: .specific("float"), out: .specific("float"))]
let allBooleansUnaryOperator: [(in: Type, out: Type)] = [(in: .specific("bool"), out: .specific("bool"))]
let unaryOperatorTypes: Dictionary<String, [(in: Type, out: Type)]> = [
"!": allBooleansUnaryOperator,
"~": allIntegersUnaryOperator,
"++": allNumbersUnaryOperator,
"--": allNumbersUnaryOperator,
"+": allNumbersUnaryOperator,
"-": allNumbersUnaryOperator
]
let otherPunctuation: Set<String> = ["(", ")", "{", "}", "[", "]", ";", ",", "."]
let commentPunctuation: Set<String> = ["//", "/*"]
let allPunctuation: Set<String> = prefixOperators.union(postfixOperators).union(assignmentOperators).union(binaryOperators.values.flatMap({$0.1})).union(otherPunctuation).union(commentPunctuation)
let punctuationCharacters = Set(allPunctuation.flatMap({ $0.unicodeScalars }))
let longestPunctuation = allPunctuation.reduce(0, { longest, current in let len = current.count; return len > longest ? len : longest })
let nonIdentifierCharacters = punctuationCharacters.union(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])

View File

@@ -0,0 +1 @@
Extractor that extracts text from Higurashi script files. Compile with `swiftc -O -wmo *.swift -o HigurashiTextExtractor`. Run with `./HigurashiTextExtractor scriptFile [e|j]` where `e` will get you the English script and `j` will get you Japanese

View File

@@ -0,0 +1,315 @@
//
// Scanner.swift
// CParser_CS440
//
import Foundation
enum TokenType: String {
case stringLiteral, characterLiteral, punctuation, identifier
}
enum TokenizationError: Error {
case badPunctuation(row: Int, column: Int, character: UnicodeScalar)
case unclosedString(row: Int, column: Int, string: String)
}
struct TokenListSignature: Hashable, Equatable {
let list: [TokenType]
init(_ array: [TokenType]) {
self.list = array
}
init(from tokens: [Token]) {
list = tokens.map { $0.type }
}
static func ==(lhs: TokenListSignature, rhs: TokenListSignature) -> Bool {
guard lhs.list.count == rhs.list.count else { return false }
for tokentype in lhs.list.enumerated() {
if rhs.list[tokentype.offset] != tokentype.element {
return false
}
}
return true
}
var hashValue: Int {
return list.map({ $0.hashValue }).reduce(5381) {
($0 << 5) &+ $0 &+ $1
}
}
}
struct MovingStringRange {
let string: String.UnicodeScalarView
private(set) var back: String.UnicodeScalarIndex
private(set) var front: String.UnicodeScalarIndex
private(set) var length: Int
private(set) var backRow: Int
private(set) var backColumn: Int
private(set) var frontRow: Int
private(set) var frontColumn: Int
init(_ string: String.UnicodeScalarView, atEnd: Bool = false) {
self.string = string
self.length = 0
self.back = atEnd ? string.endIndex : string.startIndex
self.front = back
self.backRow = 1
self.backColumn = 1
self.frontRow = 1
self.frontColumn = 1
}
init(_ string: String, atEnd: Bool = false) {
self.init(string.unicodeScalars, atEnd: atEnd)
}
private mutating func advanceFront() {
if frontChar == "\n" {
frontColumn = 1
frontRow += 1
}
else {
frontColumn += 1
}
front = string.index(after: front)
length += 1
}
private mutating func retreatFront() {
front = string.index(before: front)
length -= 1
if frontChar == "\n" {
var check = string.index(before: front)
frontColumn = 2
while check >= string.startIndex && string[check] != "\n" {
frontColumn += 1
check = string.index(before: check)
}
frontRow -= 1
}
else {
frontColumn -= 1
}
}
mutating func advanceFront(by: Int = 1) {
if by > 0 {
for _ in 0..<by {
advanceFront()
}
}
else {
for _ in 0..<(-by) {
retreatFront()
}
}
}
private mutating func advanceBack() {
if backChar == "\n" {
backColumn = 1
backRow += 1
}
else {
backColumn += 1
}
back = string.index(after: back)
length += 1
}
private mutating func retreatBack() {
back = string.index(before: back)
length -= 1
if backChar == "\n" {
var check = string.index(before: back)
backColumn = 2
while check >= string.startIndex && string[check] != "\n" {
backColumn += 1
check = string.index(before: check)
}
backRow -= 1
}
else {
backColumn -= 1
}
}
mutating func advanceBack(by: Int = 1) {
if by > 0 {
for _ in 0..<by {
advanceBack()
}
}
else {
for _ in 0..<(-by) {
retreatBack()
}
}
}
mutating func setBackToFront() {
back = front
backColumn = frontColumn
backRow = frontRow
length = 0
}
mutating func setFrontToBack() {
front = back
frontColumn = backColumn
frontRow = backRow
length = 0
}
var currentRange: String {
return String(string[back..<front])
}
var frontChar: UnicodeScalar {
return string[front]
}
var backChar: UnicodeScalar {
return string[back]
}
var backIsBeginning: Bool {
return back <= string.startIndex
}
var backIsEnd: Bool {
return back >= string.endIndex
}
var frontIsBeginning: Bool {
return front <= string.startIndex
}
var frontIsEnd: Bool {
return front >= string.endIndex
}
}
extension UnicodeScalar {
var isNewline: Bool {
return (0x0a...0x0d).contains(self.value) || self.value == 0x85 || self.value == 0x2028 || self.value == 0x2029
}
var isWhitespace: Bool {
return self.value == 0x20 || self.value == 0xa0 || self.value == 0x1680 || (0x2000...0x200a).contains(self.value) || self.value == 0x202f || self.value == 0x205f || self.value == 0x3000
}
var isNewlineOrWhitespace: Bool {
return isNewline || isWhitespace
}
}
struct Token: CustomStringConvertible {
let type: TokenType
let value: String
let row: Int
let column: Int
init(type: TokenType, value: String, row: Int, column: Int) {
self.type = type
self.value = value
self.row = row
self.column = column
}
init(type: TokenType, value: String.UnicodeScalarView, row: Int, column: Int) {
self.init(type: type, value: String(value), row: row, column: column)
}
var description: String {
// return "[\(type) \(value)]"
switch type {
case .identifier, .punctuation:
return value
case .stringLiteral:
return "\"\(value)\""
case .characterLiteral:
return "'\(value)'"
}
}
static func tokenize(input: String) throws -> [Token] {
var inputRange = MovingStringRange(input)
var tokens: [Token] = []
while !inputRange.backIsEnd {
//print("Currently looking from row \(inputRange.backRow) column \(inputRange.backColumn) to row \(inputRange.frontRow) column \(inputRange.frontColumn), \(inputRange.currentRange)")
if inputRange.frontIsEnd { // If this is the end of the file
if (inputRange.length > 0) {
tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
}
inputRange.setBackToFront()
continue
}
else if inputRange.frontChar.isNewlineOrWhitespace { // Whitespace, end of token
if inputRange.length > 0 { // If there's multiple whitespace chars in a row, don't add empty tokens
tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
}
inputRange.advanceFront()
inputRange.setBackToFront()
}
else if punctuationCharacters.contains(inputRange.frontChar) {
if inputRange.length > 0 { // Add the previous identifier if it exists
tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
}
inputRange.setBackToFront()
while !inputRange.frontIsEnd && (punctuationCharacters.contains(inputRange.frontChar) || inputRange.length > 0) {
// Keep going until we reach the end of the file and have parsed it all
inputRange.advanceFront()
if inputRange.length > longestPunctuation || inputRange.frontIsEnd || !punctuationCharacters.contains(inputRange.frontChar) {
var punctuationToken = inputRange.currentRange
while inputRange.length > 1 && !allPunctuation.contains(punctuationToken) {
inputRange.advanceFront(by: -1)
punctuationToken = inputRange.currentRange
}
if commentPunctuation.contains(punctuationToken) {
if punctuationToken == "//" {
while !inputRange.frontIsEnd && !inputRange.frontChar.isNewline {
inputRange.advanceFront()
}
}
else {
while !inputRange.frontIsEnd && inputRange.frontChar != "/" {
while !inputRange.frontIsEnd && inputRange.frontChar != "*" {
inputRange.advanceFront()
}
inputRange.advanceFront()
}
}
inputRange.advanceFront()
inputRange.setBackToFront()
continue
}
if allPunctuation.contains(punctuationToken) {
tokens.append(Token(type: .punctuation, value: punctuationToken, row: inputRange.backRow, column: inputRange.backColumn))
inputRange.setBackToFront()
}
else {
throw TokenizationError.badPunctuation(row: inputRange.backRow, column: inputRange.backColumn, character: inputRange.backChar)
}
}
}
}
else if inputRange.frontChar == "\"" || inputRange.frontChar == "'" {
let quoteType = inputRange.frontChar
if inputRange.length > 0 { // Add the previous identifier if it exists
tokens.append(Token(type: .identifier, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn))
}
inputRange.advanceFront()
inputRange.setBackToFront()
while true {
if inputRange.frontIsEnd || inputRange.frontChar.isNewline {
inputRange.advanceBack(by: -1)
throw TokenizationError.unclosedString(row: inputRange.backRow, column: inputRange.backColumn, string: inputRange.currentRange)
}
else if inputRange.frontChar == "\\" {
inputRange.advanceFront(by: 2)
}
else if inputRange.frontChar == quoteType {
let type: TokenType
if quoteType == "'" {
type = .characterLiteral
}
else {
type = .stringLiteral
}
tokens.append(Token(type: type, value: inputRange.currentRange, row: inputRange.backRow, column: inputRange.backColumn - 1))
inputRange.advanceFront()
inputRange.setBackToFront()
break
}
else {
inputRange.advanceFront()
}
}
}
else {
inputRange.advanceFront()
}
}
return tokens
}
}

View File

@@ -0,0 +1,93 @@
import Foundation
guard CommandLine.arguments.count > 1 else {
print("Usage: \(CommandLine.arguments[0]) file [(e|j)]\nExtracts text from Higurashi script files. Use e or j to specify English or Japanese, otherwise you'll get both")
exit(EXIT_FAILURE)
}
var verbose = false
var mode = 3
if CommandLine.arguments.count >= 3 {
if CommandLine.arguments.contains(where: { $0.lowercased() == "e" }) { mode = 2 }
if CommandLine.arguments.contains(where: { $0.lowercased() == "j" }) { mode = 1 }
if CommandLine.arguments.contains(where: { $0.lowercased() == "-v" }) { verbose = true }
}
var standardError = FileHandle.standardError
extension FileHandle : TextOutputStream {
public func write(_ string: String) {
guard let data = string.data(using: .utf8) else { return }
self.write(data)
}
}
struct Command {
let name: String
let arguments: [Token]
init?(tokens: [Token]) {
guard tokens.count >= 3 else { return nil }
guard tokens[0].type == .identifier else { return nil }
guard tokens[1].type == .punctuation && tokens[1].value == "(" else { return nil }
guard tokens.last!.type == .punctuation && tokens.last!.value == ")" else { return nil }
self.name = tokens[0].value
self.arguments = tokens.dropFirst(2).dropLast().filter({ $0.value != "," })
}
}
func loadFile(path: String) throws -> [[Token]] {
let file: String
if path == "-" {
file = String(decoding: FileHandle.standardInput.readDataToEndOfFile(), as: UTF8.self)
} else {
file = try String(contentsOf: URL(fileURLWithPath: path))
}
let tokens = try Token.tokenize(input: file)
let statements = tokens.split(whereSeparator: { $0.value == ";" || $0.value == "{" || $0.value == "}" }).map(Array.init)
return statements
}
let tokens = try loadFile(path: CommandLine.arguments[1])
let commands = tokens.compactMap { tokens -> Command? in
let output = Command(tokens: tokens)
if (output == nil) {
if verbose { print("\(tokens) was not a command!", to: &standardError) }
}
return output
}
let ignore: Set = ["FadeOutBGM", "DisableWindow", "DrawScene", "PlayBGM", "Wait", "SetValidityOfInput", "DrawSceneWithMask", "SetSpeedOfMessage", "DrawBustshot", "FadeBustshot", "DrawBustshotWithFiltering", "FadeBustshotWithFiltering", "PlaySE", "ShakeScreen", "DrawFilm", "FadeFilm", "FadeAllBustshots", "DrawSpriteWithFiltering", "MoveSprite", "DrawSprite", "FadeSprite", "TitleScreen", "SetLocalFlag", "ShowChapterPreview", "SetCharSpacing", "SetLineSpacing", "SetScreenAspect", "SetWindowPos", "SetWindowSize", "SetWindowMargins", "FadeBG", "SetValidityOfSkipping", "SetGUIPosition", "SetStyleOfMessageSwinging", "EnableJumpingOfReturnIcon", "SetValidityOfTextFade", "SetValidityOfInterface", "Negative", "CallScript", "SavePoint", "SetValidityOfWindowDisablingWhenGraphicsControl", "SetFontSize", "SetNameFormat", "SetFontId", "StopBGM", "SetGlobalFlag", "LanguagePrompt", "SetValidityOfSaving", "ShowTips", "CheckTipsAchievements", "if", "StoreValueToLocalWork", "DrawBG", "ChangeScene", "StopSE", "ShakeScreenSx", "StopSE", "GetAchievement", "CallSection", "JumpSection", "SetDrawingPointOfMessage"]
var japanese = ""
var english = ""
func stringFromLiteral(literal: Token) -> String {
guard literal.type == .stringLiteral else {
if literal.value == "NULL" { return "" }
fatalError("\(literal) wasn't a string literal!")
}
return literal.value.replacingOccurrences(of: "\\\"", with: "\"").replacingOccurrences(of: "\\n", with: "\n")
}
for command in commands {
if ignore.contains(command.name) { continue }
switch command.name {
case "OutputLine":
japanese += stringFromLiteral(literal: command.arguments[1])
english += stringFromLiteral(literal: command.arguments[3])
case "OutputLineAll":
let line = stringFromLiteral(literal: command.arguments[1])
japanese += line
english += line
case "ClearMessage":
japanese += "\n\n"
english += "\n\n"
default: if verbose { print(command, to: &standardError) }
}
}
if mode & 1 > 0 { print(japanese) }
if mode & 2 > 0 { print(english) }

View File

@@ -0,0 +1,90 @@
import Foundation
var standardError = FileHandle.standardError
extension FileHandle : TextOutputStream {
public func write(_ string: String) {
guard let data = string.data(using: .utf8) else { return }
self.write(data)
}
}
guard CommandLine.arguments.count > 1 else {
print("""
Usage: \(CommandLine.arguments[0]) [-filter filterFile.txt] assetBundle1.assets [assetBundle2.assets ...]
Use - to read from stdin
Finds 3-byte unicode characters (like kanji) in files
If a filter is supplied, only characters also in the filter will be outputted
""", to: &standardError)
exit(EXIT_FAILURE)
}
#if !swift(>=4.2)
extension Collection {
func firstIndex(where predicate: (Element) throws -> Bool) rethrows -> Index? {
return try self.index(where: predicate)
}
}
#endif
var filter: String? = nil
var inFiles: [String] = Array(CommandLine.arguments[1...])
if let filterIndex = inFiles.firstIndex(where: { $0.lowercased() == "-filter" }) {
if filterIndex + 1 < inFiles.endIndex {
filter = try String(contentsOf: URL(fileURLWithPath: inFiles[filterIndex + 1]))
inFiles[filterIndex...filterIndex+1] = []
}
}
let bundles: [Data]
if inFiles == ["-"] {
bundles = [FileHandle.standardInput.readDataToEndOfFile()]
} else {
bundles = try inFiles.map { try Data(contentsOf: URL(fileURLWithPath: $0)) }
}
extension UTF8.CodeUnit {
var isStart3: Bool {
return self & 0b11110000 == 0b11100000
}
var isContinuation: Bool {
return self & 0b11000000 == 0b10000000
}
}
func unicodeFinder(data: [UInt8], minLength: Int = 2) -> String {
var out = [UInt8]()
var left = data[...]
while true {
guard let index = left.firstIndex(where: { ($0 & 0b11110000) == 0b11100000 }) else { break }
left = left[index...]
guard left.count > 5 else { break }
var good = 0
for i in stride(from: left.startIndex, to: left.endIndex, by: 3) {
if left[i].isStart3 && left[i+1].isContinuation && left[i+2].isContinuation {
good += 1
}
else {
if good >= minLength {
out.append(contentsOf: left[..<i])
good = 0
}
left = left[(i+1)...]
break
}
}
if good >= minLength {
out.append(contentsOf: left.prefix(left.count / 3 * 3))
}
}
return String(decoding: out, as: UTF8.self)
}
let unicodeStrings = bundles.map({ unicodeFinder(data: Array($0)) })
var chars = unicodeStrings.map({ Set($0.unicodeScalars) }).reduce(Set(), { $0.union($1) })
if let filter = filter {
chars.formIntersection(filter.unicodeScalars)
}
print(String(chars.sorted().lazy.map(Character.init)), terminator: "")

View File

@@ -0,0 +1,7 @@
Some scripts for figuring out what characters are used in games to help with choosing what characters to put on font atlases
I had originally made these for personal use and wasn't thinking about publishing them, so I wrote them in Swift, which doesn't currently support compiling on Windows. Very sorry about that. I guess you could try out WSL?
Download Swift [here](https://swift.org/download/) for Ubuntu or macOS, it also appears to be [on the AUR](https://aur.archlinux.org/packages/swift/) for Arch users. Compile a script with `swiftc -O scriptFile.swift` or run it directly with `swift -O scriptFile.swift arguments`, though that will be fairly slow if you plan to run the script multiple times.
Documentation coming soon™

View File

@@ -0,0 +1,24 @@
import Foundation
var standardError = FileHandle.standardError
extension FileHandle : TextOutputStream {
public func write(_ string: String) {
guard let data = string.data(using: .utf8) else { return }
self.write(data)
}
}
guard CommandLine.arguments.count > 1 else {
print("Usage: \(CommandLine.arguments[0]) file\nUse - to read from stdin", to: &standardError)
exit(EXIT_FAILURE)
}
let input: String
if CommandLine.arguments[1] == "-" {
input = String(decoding: FileHandle.standardInput.readDataToEndOfFile(), as: UTF8.self)
} else {
input = try String(contentsOf: URL(fileURLWithPath: CommandLine.arguments[1]))
}
let chars = Set(input.unicodeScalars)
let out = chars.sorted().lazy.map(Character.init)
print(String(out), terminator: "")