further refactoring

* dropping trash scripts
 * using settings now internally for encoding.
 * added requirements.txt
 * quick.py now serves as some quick basic test
This commit is contained in:
Gabor Körber 2017-05-22 18:27:30 +02:00
parent b25095001b
commit a5d353e302
10 changed files with 158 additions and 155 deletions

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
# Requirements for the full functionality. Note, core library is aimed to work without dependency.
chardet
pyqt5
watchdog

View File

@ -19,21 +19,14 @@ from scon.logs.logfiles import LogFileResolver as LogFile
from scon.logs import combat, game, chat
from scon.logs.session import LogSessionCollector
from scon.logs.game import ClientInfo
from scon.config.settings import settings
# only analyze_path is used in this script. the others are for example.
settings = {'analyze_path': os.path.join(os.path.expanduser('~'),
'Documents', 'My Games', 'sc'),
'root_path': os.path.join(os.path.expanduser('~'),
'Documents',
'My Games',
'StarConflict',),
'logfiles': os.path.join(os.path.expanduser('~'),
'Documents',
'My Games',
'StarConflict',
'logs'
),}
settings.autodetect()
# only analyze_path is used in this script. set it to settings.get_log_path() if you want to scan only your recent log directories.
settings['analyze_path'] = os.path.join(os.path.expanduser('~'),
'Documents', 'My Games', 'sc')
def select_parsing_sessions(alist):
# for micro controlling, which sessions to parse.

View File

@ -1,17 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Main application functions.
* backing up logs.
- from directory to directory
- compression as option
* log-sessions:
- contains one session of log
- has a source (directory, file)
- determines user
- parses logs
"""

View File

@ -1,33 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Tool to analyze Logs in general.
"""
import os, sys, logging
from logs.logfiles import LogFileResolver as LogFile
from logs import combat, game, chat
from logs.session import LogSessionCollector
from logs.game import ClientInfo
# for windows its kinda this:
settings = {'root_path': os.path.join(os.path.expanduser('~'),
'Documents',
'My Games',
'StarConflict',),
'logfiles': os.path.join(os.path.expanduser('~'),
'Documents',
'My Games',
'StarConflict',
'logs'
)}
if __name__ == '__main__':
coll = LogSessionCollector(os.path.join(os.path.expanduser('~'),
'Documents', 'My Games', 'sc'))
coll.collect_unique()
for logf in coll.sessions:
logf.parse_files(['game.log', 'combat.log'])
logf.clean()
if logf.combat_log:
print(('length combat log ', len(logf.combat_log.lines)))
if logf.game_log:
print(('length game log ', len(logf.game_log.lines)))

View File

@ -1,85 +0,0 @@
"""
Brainstorm File for Star Conflict Log Parsing
Needed
- find steam/scon folder on windows
- find steam/scon folder on mac
- find steam/scon folder on linux
- what about steamless installs?
Elaborate
- which GUI to use? wx? PyQt4? PySide?
- take over the database stuff from weltenfall.starconflict?
Investigate
- language based log files?
"""
#from win32com.shell import shell, shellcon
import os, sys, logging
from logs.logfiles import LogFileResolver as LogFile
from logs import combat
# for windows its kinda this:
settings = {'root_path': os.path.join(os.path.expanduser('~'),
'Documents',
'My Games',
'StarConflict',),
'logfiles': os.path.join(os.path.expanduser('~'),
'Documents',
'My Games',
'StarConflict',
'logs'
)}
def find_log_files(logpath):
''' returns a list of 4-tuples representing
(combat.log, game.log, chat.log, game.net.log)
for each directory in the logpath
'''
ret = []
for directory in os.listdir(logpath):
full_dir = os.path.join(logpath, directory)
if os.path.isdir(full_dir):
if os.path.exists(os.path.join(full_dir, 'combat.log'))\
and os.path.exists(os.path.join(full_dir, 'game.log'))\
and os.path.exists(os.path.join(full_dir, 'chat.log'))\
and os.path.exists(os.path.join(full_dir, 'game.net.log')):
ret.append((
os.path.join(full_dir, 'combat.log'),
os.path.join(full_dir, 'game.log'),
os.path.join(full_dir, 'chat.log'),
os.path.join(full_dir, 'game.net.log')
))
return ret
def parse_games(logfiles):
_logfiles = []
for logpack in logfiles:
combatlog, gamelog, chatlog, gamenetlog = logpack
_logfiles.append(LogFile(combatlog))
#_logfiles.append(LogFile(gamelog))
#_logfiles.append(LogFile(chatlog))
#_logfiles.append(LogFile(gamenetlog))
return _logfiles
if __name__ == '__main__':
logfiles = find_log_files(settings['logfiles'])
logfiles = parse_games(logfiles)
#f = open('output.txt', 'w')
rex = {}
for logf in logfiles:
logf.read()
logf.parse()
for l in logf.lines:
if isinstance(l, dict):
#print l
pass
else:
if not l.unpack():
rex[l.__class__.__name__] = rex.get(l.__class__.__name__, 0) + 1
if not isinstance(l, combat.UserEvent):
print((l.values['log']))
#f.write(l.values['log'] + '\n')
#f.close()
#print type(l)
print(rex)

View File

@ -1,12 +1,20 @@
import os
import os, sys, codecs, locale
import platform
import logging
FALLBACK = 'iso8859-1'
class Settings(dict):
# note that settings is a dict.
def autodetect(self, path=None):
""" autodetects config_path, returns True on success.
""" autodetects config_path and default_encoding, returns True on success.
if a path is given, it is set to it, as far as it exists.
"""
# default encoding for text files, however even at cp1252 used at some chats, iso8859-1 seems working best.
#self['default_encoding'] = locale.getpreferredencoding()
self['default_encoding'] = FALLBACK
# this code is mostly in here to remember how to check Operation Systems,
# if the project ever needs releasable binaries.
# following code tries autodetecting star conflict user file folder
@ -28,6 +36,7 @@ class Settings(dict):
if not d:
raise NotImplementedError("Unknown System! %s" % platform.system())
if not os.path.exists(d) or not os.path.isdir(d):
logging.error('Autodetection: log path %s does not exist.' % d)
return False
self['config_path'] = os.path.abspath(d)
return True
@ -43,4 +52,98 @@ class Settings(dict):
return os.path.join(p, 'logs')
return None
def detect_encoding(self, filename=None, ):
""" Detecting the encoding of a file, or generally """
# given atm this is called by every file once, for speed reasons, this has to be refactored later.
if filename is None or not self.get('use_chardet', False):
# default if filename is none: return self['default_encoding'] or iso8859-1
return self.get('default_encoding', FALLBACK)
else:
# if a filename is given, we could use chardet.
# atm we only use it to debug this process if use_chardet is true
if not self.get('use_chardet', False):
return self.get('default_encoding', FALLBACK)
try:
cde = self.detect_encoding_chardet(filename, quick=True)
default_encoding = self.get('default_encoding', FALLBACK)
if cde == default_encoding:
logging.info('Logfile %s has encoding %s' % (filename, cde))
else:
logging.warning('Logfile %s has a different encoding than %s, being %s' % (filename, default_encoding, cde))
return cde
except:
import traceback
traceback.print_exc()
return self.get('default_encoding', FALLBACK)
def detect_encoding_chardet(self, filename, detector=None, quick=False):
""" Detect file encoding with chardet.
This is an experimental utility function.
Returns iso8859-1 if even chardet fails, as i assume its the standard encoding used in log engine.
Returns default_encoding entry if chardet is not installed, or else, yet again iso8859-1
"""
try:
from chardet.universaldetector import UniversalDetector
except ImportError:
logging.error('Chardet is not installed.')
return self.get('default_encoding', FALLBACK)
detector = detector or UniversalDetector()
detector.reset()
det_tresh = 0
with open(filename, 'rb') as file:
for line in file:
detector.feed(line)
if detector.done:
break
else:
det_tresh +=1
if det_tresh > 100 and quick:
break
elif det_tresh > 10000:
logging.error("Detector is too hungry")
break
detector.close()
try:
return codecs.lookup( detector.result.get('encoding', FALLBACK) ).name
except LookupError:
return detector.result.get('encoding', FALLBACK)
# this acts as a singleton. modify it from here.
settings = Settings()
if __name__ == '__main__':
# test this.
if settings.autodetect():
print("Settings was autodetected.")
print("Config Path is %s" % settings.get_config_path())
else:
print("Settings was not autodetected :(")
sys.exit(1)
try:
import chardet
settings['use_chardet'] = True
except ImportError:
pass
# http://www.i18nqa.com/debug/table-iso8859-1-vs-windows-1252.html
# This makes it sound easy.
# it may be that its by default cp1252, which is the locale preferred encoding given by locale on my system.
# on the other hand, python uses charmap for that, which fails on some bytes, iso8859-1 can handle.
# no wonder, the detector detects iso8859-1 for most log files i scanned, except a few chats in cp1252
# it may be, that this is also related to windows updates and my habit of scanning old data aswell, it may also be that sc uses a hardcoded encoding
# now some tests for encoding detection
print('Locale Get Preferred Encoding is ', locale.getpreferredencoding())
print('File System Encoding is ', sys.getfilesystemencoding())
print('The UserConfig xml is saved as ', settings.detect_encoding(os.path.join(settings.get_config_path(),
'user_config.xml')))
print('Found Log file saved as ', settings.detect_encoding(os.path.join(settings.get_logs_path(),
# note: you need to add a logfile here.
'2017.03.17 12.02.13',
'combat.log')))

View File

@ -10,6 +10,7 @@
"""
from .logstream import LogStream
import io, logging
from scon.config.settings import settings
class LogFile(LogStream):
def __init__(self, fname=None,
@ -22,9 +23,10 @@ class LogFile(LogStream):
def read(self, fname=None):
fname = fname or self.fname
try:
f = io.open(fname, 'r', encoding="iso8859-1")
f = io.open(fname, 'r', encoding=settings.detect_encoding(fname))
self.set_data(f.read())
except Exception as e:
logging.info("settings.detect_encoding(fname): %s" % settings.detect_encoding(fname) )
logging.error("Error %s reading file %s " % (e, fname, ))
finally:
f.close()

View File

@ -3,6 +3,7 @@
"""
import zipfile, logging, os, io
from .logfiles import CombatLogFile, GameLogFile, ChatLogFile
from scon.config.settings import settings
class LogSession(object):
"""
@ -112,17 +113,17 @@ class LogFileSession(LogSession):
if fn:
if fn == 'combat.log' and (not files or fn in files) and not 'combat.log' in self.files_parsed:
self.combat_log = CombatLogFile(fn)
self.combat_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding='iso8859-1').read())
self.combat_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding=settings.detect_encoding()).read())
self.combat_log.parse()
self.files_parsed.append('combat.log')
elif fn == 'game.log' and (not files or fn in files) and not 'game.log' in self.files_parsed:
self.game_log = GameLogFile(fn)
self.game_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding='iso8859-1').read())
self.game_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding=settings.detect_encoding()).read())
self.game_log.parse()
self.files_parsed.append('game.log')
elif fn == 'chat.log' and (not files or fn in files) and not 'chat.log' in self.files_parsed:
self.chat_log = ChatLogFile(fn)
self.chat_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding='iso8859-1').read())
self.chat_log.set_data(io.TextIOWrapper(io.BytesIO(z.read(filename)), encoding=settings.detect_encoding()).read())
self.chat_log.parse()
self.files_parsed.append('chat.log')
except Exception as e:

View File

@ -131,4 +131,6 @@ def main():
sys.exit(r)
if __name__ == "__main__":
import logging
logging.basicConfig(level=logging.DEBUG)
main()

33
src/scon/quick.py Normal file
View File

@ -0,0 +1,33 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Script to quickly test recent logs
"""
import os, sys, logging
from scon.logs.logfiles import LogFileResolver as LogFile
from scon.logs import combat, game, chat
from scon.logs.session import LogSessionCollector
from scon.logs.game import ClientInfo
from scon.config.settings import settings
settings.autodetect()
if __name__ == '__main__':
coll = LogSessionCollector(settings.get_logs_path())
coll.collect_unique()
for logf in coll.sessions:
print (logf.idstr)
logf.parse_files(['game.log', 'combat.log'])
if logf.combat_log:
print(('length combat log ', len(logf.combat_log.lines)))
if logf.game_log:
print(('length game log ', len(logf.game_log.lines)))
print ("Cleaning.")
logf.clean()
if logf.combat_log:
print(('length combat log ', len(logf.combat_log.lines)))
if logf.game_log:
print(('length game log ', len(logf.game_log.lines)))