From a5d353e3027cf5c1d4edca6b9360693577bab1be Mon Sep 17 00:00:00 2001 From: Gabor Guzmics Date: Mon, 22 May 2017 18:27:30 +0200 Subject: [PATCH] further refactoring * dropping trash scripts * using settings now internally for encoding. * added requirements.txt * quick.py now serves as some quick basic test --- requirements.txt | 4 ++ src/scon/analyze.py | 21 +++---- src/scon/app.py | 17 ------ src/scon/battle.py | 33 ----------- src/scon/brainstorm.py | 85 ---------------------------- src/scon/config/settings.py | 107 +++++++++++++++++++++++++++++++++++- src/scon/logs/logfile.py | 4 +- src/scon/logs/session.py | 7 ++- src/scon/qlogviewer.py | 2 + src/scon/quick.py | 33 +++++++++++ 10 files changed, 158 insertions(+), 155 deletions(-) create mode 100644 requirements.txt delete mode 100644 src/scon/app.py delete mode 100644 src/scon/battle.py delete mode 100644 src/scon/brainstorm.py create mode 100644 src/scon/quick.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a20934e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +# Requirements for the full functionality. Note, core library is aimed to work without dependency. +chardet +pyqt5 +watchdog diff --git a/src/scon/analyze.py b/src/scon/analyze.py index bf74583..58fcdd1 100644 --- a/src/scon/analyze.py +++ b/src/scon/analyze.py @@ -19,21 +19,14 @@ from scon.logs.logfiles import LogFileResolver as LogFile from scon.logs import combat, game, chat from scon.logs.session import LogSessionCollector from scon.logs.game import ClientInfo +from scon.config.settings import settings -# only analyze_path is used in this script. the others are for example. -settings = {'analyze_path': os.path.join(os.path.expanduser('~'), - 'Documents', 'My Games', 'sc'), - - 'root_path': os.path.join(os.path.expanduser('~'), - 'Documents', - 'My Games', - 'StarConflict',), - 'logfiles': os.path.join(os.path.expanduser('~'), - 'Documents', - 'My Games', - 'StarConflict', - 'logs' - ),} + + +settings.autodetect() +# only analyze_path is used in this script. set it to settings.get_log_path() if you want to scan only your recent log directories. +settings['analyze_path'] = os.path.join(os.path.expanduser('~'), + 'Documents', 'My Games', 'sc') def select_parsing_sessions(alist): # for micro controlling, which sessions to parse. diff --git a/src/scon/app.py b/src/scon/app.py deleted file mode 100644 index db66202..0000000 --- a/src/scon/app.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" - Main application functions. - - * backing up logs. - - from directory to directory - - compression as option - - * log-sessions: - - contains one session of log - - has a source (directory, file) - - determines user - - parses logs - - -""" \ No newline at end of file diff --git a/src/scon/battle.py b/src/scon/battle.py deleted file mode 100644 index 609f8f5..0000000 --- a/src/scon/battle.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" - Tool to analyze Logs in general. -""" -import os, sys, logging -from logs.logfiles import LogFileResolver as LogFile -from logs import combat, game, chat -from logs.session import LogSessionCollector -from logs.game import ClientInfo - -# for windows its kinda this: -settings = {'root_path': os.path.join(os.path.expanduser('~'), - 'Documents', - 'My Games', - 'StarConflict',), - 'logfiles': os.path.join(os.path.expanduser('~'), - 'Documents', - 'My Games', - 'StarConflict', - 'logs' - )} -if __name__ == '__main__': - coll = LogSessionCollector(os.path.join(os.path.expanduser('~'), - 'Documents', 'My Games', 'sc')) - coll.collect_unique() - for logf in coll.sessions: - logf.parse_files(['game.log', 'combat.log']) - logf.clean() - if logf.combat_log: - print(('length combat log ', len(logf.combat_log.lines))) - if logf.game_log: - print(('length game log ', len(logf.game_log.lines))) \ No newline at end of file diff --git a/src/scon/brainstorm.py b/src/scon/brainstorm.py deleted file mode 100644 index db3be2f..0000000 --- a/src/scon/brainstorm.py +++ /dev/null @@ -1,85 +0,0 @@ -""" - Brainstorm File for Star Conflict Log Parsing - - Needed - - find steam/scon folder on windows - - find steam/scon folder on mac - - find steam/scon folder on linux - - what about steamless installs? - - Elaborate - - which GUI to use? wx? PyQt4? PySide? - - take over the database stuff from weltenfall.starconflict? - - Investigate - - language based log files? -""" -#from win32com.shell import shell, shellcon -import os, sys, logging -from logs.logfiles import LogFileResolver as LogFile -from logs import combat - -# for windows its kinda this: -settings = {'root_path': os.path.join(os.path.expanduser('~'), - 'Documents', - 'My Games', - 'StarConflict',), - 'logfiles': os.path.join(os.path.expanduser('~'), - 'Documents', - 'My Games', - 'StarConflict', - 'logs' - )} - -def find_log_files(logpath): - ''' returns a list of 4-tuples representing - (combat.log, game.log, chat.log, game.net.log) - for each directory in the logpath - ''' - ret = [] - for directory in os.listdir(logpath): - full_dir = os.path.join(logpath, directory) - if os.path.isdir(full_dir): - if os.path.exists(os.path.join(full_dir, 'combat.log'))\ - and os.path.exists(os.path.join(full_dir, 'game.log'))\ - and os.path.exists(os.path.join(full_dir, 'chat.log'))\ - and os.path.exists(os.path.join(full_dir, 'game.net.log')): - ret.append(( - os.path.join(full_dir, 'combat.log'), - os.path.join(full_dir, 'game.log'), - os.path.join(full_dir, 'chat.log'), - os.path.join(full_dir, 'game.net.log') - )) - return ret - -def parse_games(logfiles): - _logfiles = [] - for logpack in logfiles: - combatlog, gamelog, chatlog, gamenetlog = logpack - _logfiles.append(LogFile(combatlog)) - #_logfiles.append(LogFile(gamelog)) - #_logfiles.append(LogFile(chatlog)) - #_logfiles.append(LogFile(gamenetlog)) - return _logfiles - -if __name__ == '__main__': - logfiles = find_log_files(settings['logfiles']) - logfiles = parse_games(logfiles) - #f = open('output.txt', 'w') - rex = {} - for logf in logfiles: - logf.read() - logf.parse() - for l in logf.lines: - if isinstance(l, dict): - #print l - pass - else: - if not l.unpack(): - rex[l.__class__.__name__] = rex.get(l.__class__.__name__, 0) + 1 - if not isinstance(l, combat.UserEvent): - print((l.values['log'])) - #f.write(l.values['log'] + '\n') - #f.close() - #print type(l) - print(rex) \ No newline at end of file diff --git a/src/scon/config/settings.py b/src/scon/config/settings.py index 38b6820..b8b9677 100644 --- a/src/scon/config/settings.py +++ b/src/scon/config/settings.py @@ -1,12 +1,20 @@ -import os + +import os, sys, codecs, locale import platform +import logging +FALLBACK = 'iso8859-1' class Settings(dict): # note that settings is a dict. def autodetect(self, path=None): - """ autodetects config_path, returns True on success. + """ autodetects config_path and default_encoding, returns True on success. if a path is given, it is set to it, as far as it exists. """ + # default encoding for text files, however even at cp1252 used at some chats, iso8859-1 seems working best. + #self['default_encoding'] = locale.getpreferredencoding() + self['default_encoding'] = FALLBACK + + # this code is mostly in here to remember how to check Operation Systems, # if the project ever needs releasable binaries. # following code tries autodetecting star conflict user file folder @@ -28,6 +36,7 @@ class Settings(dict): if not d: raise NotImplementedError("Unknown System! %s" % platform.system()) if not os.path.exists(d) or not os.path.isdir(d): + logging.error('Autodetection: log path %s does not exist.' % d) return False self['config_path'] = os.path.abspath(d) return True @@ -42,5 +51,99 @@ class Settings(dict): if p: return os.path.join(p, 'logs') return None + + def detect_encoding(self, filename=None, ): + """ Detecting the encoding of a file, or generally """ + # given atm this is called by every file once, for speed reasons, this has to be refactored later. + if filename is None or not self.get('use_chardet', False): + # default if filename is none: return self['default_encoding'] or iso8859-1 + return self.get('default_encoding', FALLBACK) + else: + # if a filename is given, we could use chardet. + # atm we only use it to debug this process if use_chardet is true + if not self.get('use_chardet', False): + return self.get('default_encoding', FALLBACK) + try: + cde = self.detect_encoding_chardet(filename, quick=True) + default_encoding = self.get('default_encoding', FALLBACK) + if cde == default_encoding: + logging.info('Logfile %s has encoding %s' % (filename, cde)) + else: + logging.warning('Logfile %s has a different encoding than %s, being %s' % (filename, default_encoding, cde)) + return cde + except: + import traceback + traceback.print_exc() + return self.get('default_encoding', FALLBACK) + + def detect_encoding_chardet(self, filename, detector=None, quick=False): + """ Detect file encoding with chardet. + This is an experimental utility function. + Returns iso8859-1 if even chardet fails, as i assume its the standard encoding used in log engine. + Returns default_encoding entry if chardet is not installed, or else, yet again iso8859-1 + """ + try: + from chardet.universaldetector import UniversalDetector + except ImportError: + logging.error('Chardet is not installed.') + return self.get('default_encoding', FALLBACK) + detector = detector or UniversalDetector() + detector.reset() + det_tresh = 0 + with open(filename, 'rb') as file: + for line in file: + detector.feed(line) + if detector.done: + break + else: + det_tresh +=1 + if det_tresh > 100 and quick: + break + elif det_tresh > 10000: + logging.error("Detector is too hungry") + break + detector.close() + try: + return codecs.lookup( detector.result.get('encoding', FALLBACK) ).name + except LookupError: + return detector.result.get('encoding', FALLBACK) +# this acts as a singleton. modify it from here. settings = Settings() + +if __name__ == '__main__': + # test this. + if settings.autodetect(): + print("Settings was autodetected.") + print("Config Path is %s" % settings.get_config_path()) + else: + print("Settings was not autodetected :(") + sys.exit(1) + + try: + import chardet + settings['use_chardet'] = True + except ImportError: + pass + + # http://www.i18nqa.com/debug/table-iso8859-1-vs-windows-1252.html + # This makes it sound easy. + + # it may be that its by default cp1252, which is the locale preferred encoding given by locale on my system. + # on the other hand, python uses charmap for that, which fails on some bytes, iso8859-1 can handle. + # no wonder, the detector detects iso8859-1 for most log files i scanned, except a few chats in cp1252 + # it may be, that this is also related to windows updates and my habit of scanning old data aswell, it may also be that sc uses a hardcoded encoding + + # now some tests for encoding detection + print('Locale Get Preferred Encoding is ', locale.getpreferredencoding()) + print('File System Encoding is ', sys.getfilesystemencoding()) + + print('The UserConfig xml is saved as ', settings.detect_encoding(os.path.join(settings.get_config_path(), + 'user_config.xml'))) + + + print('Found Log file saved as ', settings.detect_encoding(os.path.join(settings.get_logs_path(), + # note: you need to add a logfile here. + '2017.03.17 12.02.13', + 'combat.log'))) + \ No newline at end of file diff --git a/src/scon/logs/logfile.py b/src/scon/logs/logfile.py index e32098f..f1b3fbb 100644 --- a/src/scon/logs/logfile.py +++ b/src/scon/logs/logfile.py @@ -10,6 +10,7 @@ """ from .logstream import LogStream import io, logging +from scon.config.settings import settings class LogFile(LogStream): def __init__(self, fname=None, @@ -22,9 +23,10 @@ class LogFile(LogStream): def read(self, fname=None): fname = fname or self.fname try: - f = io.open(fname, 'r', encoding="iso8859-1") + f = io.open(fname, 'r', encoding=settings.detect_encoding(fname)) self.set_data(f.read()) except Exception as e: + logging.info("settings.detect_encoding(fname): %s" % settings.detect_encoding(fname) ) logging.error("Error %s reading file %s " % (e, fname, )) finally: f.close() diff --git a/src/scon/logs/session.py b/src/scon/logs/session.py index 0321ee4..4c1af37 100644 --- a/src/scon/logs/session.py +++ b/src/scon/logs/session.py @@ -3,6 +3,7 @@ """ import zipfile, logging, os, io from .logfiles import CombatLogFile, GameLogFile, ChatLogFile +from scon.config.settings import settings class LogSession(object): """ @@ -112,17 +113,17 @@ class LogFileSession(LogSession): if fn: if fn == 'combat.log' and (not files or fn in files) and not 'combat.log' in self.files_parsed: self.combat_log = CombatLogFile(fn) - self.combat_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding='iso8859-1').read()) + self.combat_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding=settings.detect_encoding()).read()) self.combat_log.parse() self.files_parsed.append('combat.log') elif fn == 'game.log' and (not files or fn in files) and not 'game.log' in self.files_parsed: self.game_log = GameLogFile(fn) - self.game_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding='iso8859-1').read()) + self.game_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding=settings.detect_encoding()).read()) self.game_log.parse() self.files_parsed.append('game.log') elif fn == 'chat.log' and (not files or fn in files) and not 'chat.log' in self.files_parsed: self.chat_log = ChatLogFile(fn) - self.chat_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding='iso8859-1').read()) + self.chat_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding=settings.detect_encoding()).read()) self.chat_log.parse() self.files_parsed.append('chat.log') except Exception as e: diff --git a/src/scon/qlogviewer.py b/src/scon/qlogviewer.py index b8159d1..7755ee3 100644 --- a/src/scon/qlogviewer.py +++ b/src/scon/qlogviewer.py @@ -131,4 +131,6 @@ def main(): sys.exit(r) if __name__ == "__main__": + import logging + logging.basicConfig(level=logging.DEBUG) main() \ No newline at end of file diff --git a/src/scon/quick.py b/src/scon/quick.py new file mode 100644 index 0000000..309c579 --- /dev/null +++ b/src/scon/quick.py @@ -0,0 +1,33 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" + Script to quickly test recent logs + +""" +import os, sys, logging +from scon.logs.logfiles import LogFileResolver as LogFile +from scon.logs import combat, game, chat +from scon.logs.session import LogSessionCollector +from scon.logs.game import ClientInfo +from scon.config.settings import settings + +settings.autodetect() + +if __name__ == '__main__': + coll = LogSessionCollector(settings.get_logs_path()) + coll.collect_unique() + for logf in coll.sessions: + print (logf.idstr) + logf.parse_files(['game.log', 'combat.log']) + + if logf.combat_log: + print(('length combat log ', len(logf.combat_log.lines))) + if logf.game_log: + print(('length game log ', len(logf.game_log.lines))) + print ("Cleaning.") + logf.clean() + if logf.combat_log: + print(('length combat log ', len(logf.combat_log.lines))) + if logf.game_log: + print(('length game log ', len(logf.game_log.lines))) + \ No newline at end of file