diff --git a/readme.rst b/readme.rst index f253a06..71639e2 100644 --- a/readme.rst +++ b/readme.rst @@ -1,8 +1,21 @@ -library for reading SC logs +Library for reading SC logs +=========================== Github Release of my logparsing attempt and other stuffs, from internal git. -This software is in a pre-alpha stage, and is considered mostly useful for scientific purposes. +This software is in a _pre-alpha_ stage, and is considered mostly useful for _scientific_ purposes. -License: LGPL +*License: LGPL* + +Current Goals +------------- + - make library usable with an example + - more documentation + - update to handle all current combat events in pvp + +Changelog +--------- + - converted 99% to python3. switched to py3 development. + - moved to github with parser library only + \ No newline at end of file diff --git a/src/scon/analyze.py b/src/scon/analyze.py index df07476..491a617 100644 --- a/src/scon/analyze.py +++ b/src/scon/analyze.py @@ -2,6 +2,17 @@ # -*- coding: utf-8 -*- """ Tool to analyze Logs in general. + + This tool is built to discover unidentified packets. + It is mainly used in development + (for the whole library, this is actually the most important script atm) + + It outputs a trash.log.bak and a scon.log.bak, and works itself through gigabytes of my backuped test data. + + This script therefore has following purposes: + - a) find bugs, find unknown packets (so new type of log entries in combat.log) + - b) see speed of parsing + - c) test parsing for memory efficiency, because parsing lots of big files needs that. """ import os, sys, logging from scon.logs.logfiles import LogFileResolver as LogFile @@ -9,10 +20,11 @@ from scon.logs import combat, game, chat from scon.logs.session import LogSessionCollector from scon.logs.game import ClientInfo -# for windows its kinda this: -# note, this isnt used in this script. yeah i know right, defined up here, but not used down there. -# it's because i want to unify this to be on one configurable place ;) -settings = {'root_path': os.path.join(os.path.expanduser('~'), +# only analyze_path is used in this script. the others are for example. +settings = {'analyze_path': os.path.join(os.path.expanduser('~'), + 'Documents', 'My Games', 'sc'), + + 'root_path': os.path.join(os.path.expanduser('~'), 'Documents', 'My Games', 'StarConflict',), @@ -21,10 +33,22 @@ settings = {'root_path': os.path.join(os.path.expanduser('~'), 'My Games', 'StarConflict', 'logs' - )} + ),} +def select_parsing_sessions(alist): + # for micro controlling, which sessions to parse. + # default: return alist + return alist[-50:] if __name__ == '__main__': + # set this to your liking: + COUNT_GOOD = True # count via rex good packets aswell. useful to see total encountered packets in summary. + LOG_GOOD_ONLY = True # Log good packets only. if set to false, will log unknown packets to trash_log. + LOG_BAD_CMBT = True # by default, the main logs of interest for unknown entries is combat logs. here you can finetune which logs to catch. + LOG_BAD_CHAT = False + LOG_BAD_GAME = False + + # set up our logging to do our task: import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', @@ -32,17 +56,36 @@ if __name__ == '__main__': logfile = logging.FileHandler('scon.log.bak') logfile.setLevel(logging.DEBUG) logging.getLogger().addHandler(logfile) - coll = LogSessionCollector(os.path.join(os.path.expanduser('~'), - 'Documents', 'My Games', 'sc')) + + trashfile = logging.FileHandler('trash.log.bak') + trashfile.setLevel(logging.INFO) + trash_log = logging.getLogger('trash_log') + + trash_log.addHandler(trashfile) + trash_log.propagate = False # only log to file. + + ################################### + + # collect all sessions, as in, get all log directories: + coll = LogSessionCollector(settings.get('analyze_path')) logging.info('Collecting Sessions...') coll.collect_unique() logging.info('collected %s sessions.' % (len(coll.sessions))) + + # now do in depth parsing per session: + #f = open('output.txt', 'w') rex_combat = {} rex_game = {} rex_chat = {} - LOG_GOOD = True # Log good packets. - for logf in coll.sessions: + + selected = select_parsing_sessions(coll.sessions) + logging.info("Start In Depth parsing for %s sessions. %s" % (len(selected),'Counting good packets' if COUNT_GOOD else 'Counting only bad packets.')) + if LOG_GOOD_ONLY: + logging.info("Only logging unknown variants of known packet types") + else: + logging.info("Logging unknown packets aswell, CMBT: %s, GAME: %s, CHAT %s" % (LOG_BAD_CMBT, LOG_BAD_GAME, LOG_BAD_CHAT)) + for logf in selected: logf.parse_files(['game.log', 'combat.log', 'chat.log']) logging.info(("## Processing Log %s" % logf.idstr)) @@ -52,11 +95,11 @@ if __name__ == '__main__': #print l rex_combat['dict'] = rex_combat.get('dict', 0) + 1 else: - if not l.unpack() or LOG_GOOD: + if not l.unpack() or COUNT_GOOD: rex_combat[l.__class__.__name__] = rex_combat.get(l.__class__.__name__, 0) + 1 - if not isinstance(l, combat.UserEvent): - if not LOG_GOOD: - logging.debug((l.values['log'])) + if not isinstance(l, combat.UserEvent): + if not LOG_GOOD_ONLY and LOG_BAD_CMBT: + trash_log.info((l.values['log'])) else: logging.warning('No combat log in %s' % logf.idstr) if logf.game_log: @@ -66,12 +109,10 @@ if __name__ == '__main__': elif isinstance(l, str): print(l) else: - if l.unpack() and not LOG_GOOD: - pass - else: + if not l.unpack() or COUNT_GOOD: rex_game[l.__class__.__name__] = rex_game.get(l.__class__.__name__, 0) + 1 - if not LOG_GOOD: - logging.debug((l.values['log'])) + if not LOG_GOOD_ONLY and LOG_BAD_GAME and not isinstance(l, game.GameLog): + trash_log.info((l.values['log'])) else: logging.warning('No game log in %s' % logf.idstr) if logf.chat_log: @@ -81,22 +122,27 @@ if __name__ == '__main__': elif isinstance(l, str): print(l) else: - if l.unpack() and not LOG_GOOD: - pass - else: + if not l.unpack() or COUNT_GOOD: rex_chat[l.__class__.__name__] = rex_chat.get(l.__class__.__name__, 0) + 1 - if not LOG_GOOD: - logging.debug((l.values['log'])) + if not LOG_GOOD_ONLY and LOG_BAD_CHAT and not isinstance(l, chat.ChatLog): + trash_log.info((l.values['log'])) else: logging.warning('No chat log in %s' % logf.idstr) + + # Okay, parsing done. + # default cleanup: will remove all dictionaries, trash logs, etc. logf.clean(True) - # additional cleanup: + # additional cleanup + # we remove actually ALL log lines, as we are not interested in the data anymore. + # this allows us to parse a lot more files if logf.chat_log: logf.chat_log.lines = [] if logf.game_log: logf.game_log.lines = [] if logf.combat_log: logf.combat_log.lines = [] + + # Summary: logging.info('Analysis complete:') logging.info(('#'*20+' RexCombat ' + '#' *20)) logging.info(rex_combat) diff --git a/src/scon/backup.py b/src/scon/backup.py index 5ee21be..a23ee46 100644 --- a/src/scon/backup.py +++ b/src/scon/backup.py @@ -1,7 +1,10 @@ #!/usr/bin/python # -*- coding: utf-8 -*- """ - Backup Directories, Handle Files... + This script backs up all recent star conflict log directories into a common repository, by zipping each directory. + It omits directories already backed up, and ommits files in the directories. + + Analyze.py can then scan these directories / zipfiles in development. """ import os, logging, zipfile diff --git a/src/scon/logs/base.py b/src/scon/logs/base.py index f74cda2..b5302c2 100644 --- a/src/scon/logs/base.py +++ b/src/scon/logs/base.py @@ -33,12 +33,14 @@ L_NET = 'NET' # Not supported in near future. L_CHAT = 'CHAT' class Log(object): - __slots__ = ['trash', 'reviewed'] + __slots__ = ['trash', 'reviewed', '_match_id', 'values'] matcher = None def __init__(self): self.trash = False self.reviewed = False + self.values = None + self._match_id = None @classmethod def is_handler(cls, log): @@ -61,8 +63,11 @@ class Log(object): return False class Stacktrace(Log): - ''' Special Log to catch error reports ''' - __slots__ = ['trash', 'reviewed', 'message'] + ''' Special Log to catch error reports + -> holds data in message not in values. + -> makes use of append + ''' + __slots__ = ['trash', 'reviewed', 'message', '_match_id', 'values'] def __init__(self, values=None): super(Stacktrace, self).__init__() diff --git a/src/scon/logs/chat.py b/src/scon/logs/chat.py index 7c58b16..b9a64c8 100644 --- a/src/scon/logs/chat.py +++ b/src/scon/logs/chat.py @@ -14,7 +14,7 @@ between 33-33-33 and FF-33 FF-33 FF-33 """ class ChatLog(Log): - __slots__ = Log.__slots__ + ['_match_id', 'values'] + __slots__ = Log.__slots__ @classmethod def is_handler(cls, log): diff --git a/src/scon/logs/combat.py b/src/scon/logs/combat.py index 1756d4d..30a0385 100644 --- a/src/scon/logs/combat.py +++ b/src/scon/logs/combat.py @@ -22,9 +22,10 @@ import re from .base import Log, L_CMBT, Stacktrace import logging +trash_log = logging.getLogger('trash_log') class CombatLog(Log): - __slots__ = Log.__slots__ + [ '_match_id', 'values'] + __slots__ = Log.__slots__ @classmethod def _log_handler(cls, log): if log.startswith(cls.__name__): @@ -60,8 +61,7 @@ class CombatLog(Log): return True # unknown? if not isinstance(self, UserEvent): - logging.warning('Unknown Packet for %s:\n%s' % (self.__class__.__name__, - self.values.get('log', ''))) + trash_log.info('%s\t\t%s' % (self.__class__.__name__, self.values.get('log', ''))) # trash if unknown or no matcher. self.trash = True @@ -98,7 +98,7 @@ class Damage(CombatLog): class Spawn(CombatLog): __slots__ = CombatLog.__slots__ - matcher = re.compile(r"^Spawn\sSpaceShip\sfor\splayer(?P\d+)\s\((?P[^,]+),\s+(?P#\w+)\)\.\s+'(?P\w+)'") + matcher = re.compile(r"^Spawn\sSpaceShip\sfor\splayer(?P-*\d+)\s\((?P[^,]*),\s+(?P#\w+)\)\.\s+'(?P\w+)'") class Spell(CombatLog): __slots__ = CombatLog.__slots__ @@ -115,28 +115,25 @@ class Reward(CombatLog): class Participant(CombatLog): __slots__ = CombatLog.__slots__ - matcher = re.compile(r"^\s+Participant\s+(?P[^\s]+)(?:\s+(?P\w+)|\s{30,})\s+(?:totalDamage\s(?P(?:\d+|\d+\.\d+));\s+|\s+)(?:mostDamageWith\s'(?P[^']+)';\s*(?P.*)|<(?P\w+)>)") - -""" -2017-03-29 13:25:49 - Unknown Packet for Rocket: -Rocket launch 18912, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', target 'white213mouse' (17894) -2017-03-29 13:25:49 - Unknown Packet for Rocket: -Rocket detonation 18912, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'auto_detonate', directHit 'white213mouse' -2017-03-29 13:25:49 - Unknown Packet for Rocket: -Rocket launch 18966, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', target 'white213mouse' (17894) -2017-03-29 13:25:49 - Unknown Packet for Rocket: -Rocket detonation 18966, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'auto_detonate', directHit 'white213mouse' -2017-03-29 13:25:49 - Unknown Packet for Rocket: -Rocket detonation 18892, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'ttl' -2017-03-29 13:25:49 - Unknown Packet for Rocket: -Rocket detonation 18931, owner 'optimistik', def 'Weapon_Railgun_Heavy_T5_Epic', reason 'hit' -2017-03-29 13:25:49 - Unknown Packet for Participant: - Participant white213mouse Ship_Race5_M_ATTACK_Rank15 -""" + matcher = [ + # more complex version: + re.compile(r"^\s+Participant\s+(?P[^\s]+)(?:\s+(?P\w+)|\s{30,})\s+(?:totalDamage\s(?P(?:\d+|\d+\.\d+));\s+|\s+)(?:mostDamageWith\s'(?P[^']+)';\s*(?P.*)|<(?P\w+)>)"), + # simple version (new): + re.compile(r"^\s+Participant\s+(?P[^\s]+)\s+(?P\w+)"), + re.compile(r"^\s+Participant\s+(?P[^\s]+)"), + ] class Rocket(CombatLog): __slots__ = CombatLog.__slots__ - matcher = re.compile(r"^Rocket\s(?Plaunch|detonation)\.\sowner\s'(?P[^']+)'(?:,\s(?:def\s'(?P\w+)'|target\s'(?P[^']+)'|reason\s'(?P\w+)'|directHit\s'(?P[^']+)'))+") + # keys = [ 'event', 'name', 'def', 'target', 'reason', 'direct_hit', 'rocket_id' ] + # changed 'missile_type' to 'def' + + matcher = [ + # old version: Rocket detonation. owner... + re.compile(r"^Rocket\s(?Plaunch|detonation)\.\sowner\s'(?P[^']+)'(?:,\s(?:def\s'(?P[^']+)'|target\s'(?P[^']+)'|reason\s'(?P[^']+)'|directHit\s'(?P[^']+)'))+"), + # new version: Rocket detonation rocket ID (is that range? it can be -1), owner ... + re.compile(r"^Rocket\s(?Plaunch|detonation)\s+(?P-*\d+),\sowner\s'(?P[^']+)'(?:,\s(?:def\s'(?P[^']+)'|target\s'(?P[^']+)'|reason\s'(?P[^']+)'|directHit\s'(?P[^']+)'))+"), + ] class Heal(CombatLog): __slots__ = CombatLog.__slots__ @@ -144,7 +141,9 @@ class Heal(CombatLog): # heal by module re.compile(r"^Heal\s+(?P[^\s]+)\s\->\s+(?P[^\s]+)\s+(?P(?:\d+|\d+\.\d+))\s(?P[^\s]+)"), # direct heal by source or n/a (global buff) - re.compile(r"^Heal\s+(?:n/a|(?P\w+))\s+\->\s+(?P[^\s]+)\s+(?P(?:\d+|\d+\.\d+))"), + re.compile(r"^Heal\s+(?:n/a|(?P\w+))\s+\->\s+(?P[^\s]+)\s+(?P(?:\d+\.\d+|\d+))"), + # new heal with microtid + re.compile(r"^Heal\s+(?:n/a|(?P[^\|]+)\|(?P\d+))\s+\->\s+(?P[^\|]+)\|(?P\d+)\s+(?P(?:\d+\.\d+|\d+))") ] class Killed(CombatLog): @@ -253,11 +252,19 @@ class Set(CombatLog): called on setting "relationship" / OpenSpace Variables in values: - what (relationship) + + Optionals: - name (who do i set?) - value (to what value?) + - def: spell usually in combination with level and deftype. """ __slots__ = CombatLog.__slots__ - matcher = re.compile("^Set\s(?P\w+)\s(?P[^\s]+)\sto\s(?P\w+)") + matcher = [ + # what: usually reputation. + re.compile("^Set\s(?P\w+)\s(?P[^\s]+)\sto\s(?P\w+)"), + # what: 'stage', +level +deftype (aura), def (aura spell name), index is weird array lookup always 0, id is the id of the aura. + re.compile("^Set\s(?P\w+)\s(?P\d+)\s+for\s+(?P\w+)\s+'(?P[^']+)'\[(?P\d+)\]\s+id\s(?P-*\d+)"), + ] class SqIdChange(CombatLog): """ - number: player number diff --git a/src/scon/logs/game.py b/src/scon/logs/game.py index eb86f93..35a061f 100644 --- a/src/scon/logs/game.py +++ b/src/scon/logs/game.py @@ -52,7 +52,7 @@ Interesting Lines: """ class GameLog(Log): - __slots__ = Log.__slots__ + [ '_match_id', 'values'] + __slots__ = Log.__slots__ @classmethod def is_handler(cls, log): if log.get('logtype', None) == '': # we handle only logs with empty logtype. diff --git a/src/scon/logs/logstream.py b/src/scon/logs/logstream.py index 44641a4..2f99d8c 100644 --- a/src/scon/logs/logstream.py +++ b/src/scon/logs/logstream.py @@ -1,4 +1,11 @@ """ + Why? + - initial implementation only followed to read whole files, but the logparser itself would work also on streamed data. + - now the initial implementation builds on top of logstream, which should keep the stream functionality intact, aka allow later to parse + files as they get written. + - much of the parsing therefore may be designed to be repetible, if information is partial. Unfortunately this makes the whole process a bit mind-crunching. + + A LogStream is supposed to: - parse data feeded into it. - yield new objects @@ -22,6 +29,8 @@ combine it with the lookup for "watching files being changed", to create a program which listens to the logs live @see: monitor.py @see: watchdog https://pypi.python.org/pypi/watchdog + + """ from .base import Log import re @@ -61,6 +70,8 @@ class LogStream(object): def clean(self, remove_log=True): # cleans the logs by removing all non parsed packets. + # in essence, every line which is a dict, is removed. every log class is called for clean. + # every log that flags itself as trash, is removed. # remove_log: should i remove the raw log entry? lines = [] for l in self.lines: @@ -71,8 +82,7 @@ class LogStream(object): l.clean() lines.append(l) else: - print((type(l))) - print(l) + logging.warning('The Packet of Type %s has no trash attribute. Is it a valid Log Class? %s' % (type(l), l)) self.lines = lines self._unset_data() @@ -82,7 +92,11 @@ class LogStream(object): self._data = None def pre_parse_line(self, line): + # pre parse line expects a raw line from the log. + # it will basicly return None if that line is not important for logs. + # otherwise it will return a dictionary, containing logtype, hh, dd, mm, ss, ns, and log as logline. if not isinstance(line, str): + # if this has already been parsed: return line elif line.startswith('---'): return None @@ -103,7 +117,7 @@ class LogStream(object): return None def _parse_line(self, line): - # add the line to my lines. + # add the line to the current packets lines. if line is not None: o = line if isinstance(line, str): @@ -121,16 +135,18 @@ class LogStream(object): if self._last_object is not None and isinstance(self._last_object, Log): self._last_object.unpack() if self._last_object.append(line): + # last object accepted this line, return. return + # at this point, either the last object did not accept this string, + # or last object wasnt a stacktrace. + # either way, this is a weird one. logging.debug('#: %s' % line) - o = None + o = None # will return later. elif isinstance(line, dict): # Unresolved Log. o = self.resolve(line) - # this is where the whole thing gets polluted with weird dicts. - # what exactly should resolve do!? - # by default it returns what its given, if unknown. - # #@TODO @XXX @CRITICAL + # after resolving the log, it hopefully is not a dict anymore. + # if it still is, its just the same dict. self._last_object = o else: self._last_object = o @@ -145,4 +161,5 @@ class LogStream(object): def resolve(self, gd): # gd is a dict. # try to find a class that is responsible for this log. + # this is done in subclasses of logstream. return gd