bugfixes, updates

* updated all known packets to be parsed correctly * made analyze script more useful * updated readme and some doc
2017-05-19 16:21:51 +02:00 · 2017-05-19 16:21:51 +02:00 · e003f8e7c5
commit e003f8e7c5
parent a3f185683c
8 changed files with 157 additions and 66 deletions
--- a/readme.rst
+++ b/readme.rst
@ -1,8 +1,21 @@
-library for reading SC logs
+Library for reading SC logs
 ===========================
 Github Release of my logparsing attempt
 and other stuffs, from internal git.
-This software is in a pre-alpha stage, and is considered mostly useful for scientific purposes.
+This software is in a _pre-alpha_ stage, and is considered mostly useful for _scientific_ purposes.
-License: LGPL
+*License: LGPL*
 Current Goals
 -------------
 	- make library usable with an example
 	- more documentation
 	- update to handle all current combat events in pvp
 Changelog
 ---------
 	- converted 99% to python3. switched to py3 development.
 	- moved to github with parser library only
--- a/src/scon/analyze.py
+++ b/src/scon/analyze.py
@ -2,6 +2,17 @@
 # -*- coding: utf-8 -*-
 """
    Tool to analyze Logs in general.
    This tool is built to discover unidentified packets.
    It is mainly used in development 
    (for the whole library, this is actually the most important script atm)
    It outputs a trash.log.bak and a scon.log.bak, and works itself through gigabytes of my backuped test data.
    This script therefore has following purposes:
        - a) find bugs, find unknown packets (so new type of log entries in combat.log)
        - b) see speed of parsing
        - c) test parsing for memory efficiency, because parsing lots of big files needs that.
 """
 import os, sys, logging
 from scon.logs.logfiles import LogFileResolver as LogFile
@ -9,10 +20,11 @@ from scon.logs import combat, game, chat
 from scon.logs.session import LogSessionCollector
 from scon.logs.game import ClientInfo
-# for windows its kinda this:
+# only analyze_path is used in this script. the others are for example.
-# note, this isnt used in this script. yeah i know right, defined up here, but not used down there.
+settings = {'analyze_path': os.path.join(os.path.expanduser('~'),
-# it's because i want to unify this to be on one configurable place ;)
+                        'Documents', 'My Games', 'sc'),
-settings = {'root_path': os.path.join(os.path.expanduser('~'),
+            
            'root_path': os.path.join(os.path.expanduser('~'),
                                     'Documents',
                                     'My Games',
                                     'StarConflict',),            
@ -21,10 +33,22 @@ settings = {'root_path': os.path.join(os.path.expanduser('~'),
                                     'My Games',
                                     'StarConflict',
                                     'logs'
-                                     )}
+                                     ),}
 def select_parsing_sessions(alist):
    # for micro controlling, which sessions to parse.
    # default: return alist
    return alist[-50:]
 if __name__ == '__main__':
    # set this to your liking:
    COUNT_GOOD = True # count via rex good packets aswell. useful to see total encountered packets in summary.
    LOG_GOOD_ONLY = True # Log good packets only. if set to false, will log unknown packets to trash_log.
    LOG_BAD_CMBT = True # by default, the main logs of interest for unknown entries is combat logs. here you can finetune which logs to catch.
    LOG_BAD_CHAT = False
    LOG_BAD_GAME = False
    # set up our logging to do our task:
    import logging
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(message)s',
@ -32,17 +56,36 @@ if __name__ == '__main__':
    logfile = logging.FileHandler('scon.log.bak')
    logfile.setLevel(logging.DEBUG)
    logging.getLogger().addHandler(logfile)
-    coll = LogSessionCollector(os.path.join(os.path.expanduser('~'),
+    
-                        'Documents', 'My Games', 'sc'))
+    trashfile = logging.FileHandler('trash.log.bak')
    trashfile.setLevel(logging.INFO)
    trash_log = logging.getLogger('trash_log')
    trash_log.addHandler(trashfile)
    trash_log.propagate = False # only log to file.
    ###################################
    # collect all sessions, as in, get all log directories:
    coll = LogSessionCollector(settings.get('analyze_path'))
    logging.info('Collecting Sessions...')
    coll.collect_unique()
    logging.info('collected %s sessions.' % (len(coll.sessions)))
    # now do in depth parsing per session:
    #f = open('output.txt', 'w')
    rex_combat = {}
    rex_game = {}
    rex_chat = {}
-    LOG_GOOD = True # Log good packets.
+    
-    for logf in coll.sessions:
+    selected = select_parsing_sessions(coll.sessions)
    logging.info("Start In Depth parsing for %s sessions. %s" % (len(selected),'Counting good packets' if COUNT_GOOD else 'Counting only bad packets.'))
    if LOG_GOOD_ONLY:
        logging.info("Only logging unknown variants of known packet types")
    else:
        logging.info("Logging unknown packets aswell, CMBT: %s, GAME: %s, CHAT %s" % (LOG_BAD_CMBT, LOG_BAD_GAME, LOG_BAD_CHAT))
    for logf in selected:
        logf.parse_files(['game.log', 'combat.log', 'chat.log'])
        logging.info(("## Processing Log %s" % logf.idstr))
@ -52,11 +95,11 @@ if __name__ == '__main__':
                    #print l
                    rex_combat['dict'] = rex_combat.get('dict', 0) + 1
                else:
-                    if not l.unpack() or LOG_GOOD:
+                    if not l.unpack() or COUNT_GOOD:
                        rex_combat[l.__class__.__name__] = rex_combat.get(l.__class__.__name__, 0) + 1
-                        if not isinstance(l, combat.UserEvent):
+                    if not isinstance(l, combat.UserEvent):
-                            if not LOG_GOOD:
+                        if not LOG_GOOD_ONLY and LOG_BAD_CMBT:
-                                logging.debug((l.values['log']))
+                            trash_log.info((l.values['log']))
        else:
            logging.warning('No combat log in %s' % logf.idstr)
        if logf.game_log:
@ -66,12 +109,10 @@ if __name__ == '__main__':
                elif isinstance(l, str):
                    print(l)
                else:
-                    if l.unpack() and not LOG_GOOD:
+                    if not l.unpack() or COUNT_GOOD:
                        pass
                    else:
                        rex_game[l.__class__.__name__] = rex_game.get(l.__class__.__name__, 0) + 1
-                        if not LOG_GOOD:
+                    if not LOG_GOOD_ONLY and LOG_BAD_GAME and not isinstance(l, game.GameLog):
-                            logging.debug((l.values['log']))
+                        trash_log.info((l.values['log']))
        else:
            logging.warning('No game log in %s' % logf.idstr)
        if logf.chat_log:
@ -81,22 +122,27 @@ if __name__ == '__main__':
                elif isinstance(l, str):
                    print(l)
                else:
-                    if l.unpack() and not LOG_GOOD:
+                    if not l.unpack() or COUNT_GOOD:
                        pass
                    else:
                        rex_chat[l.__class__.__name__] = rex_chat.get(l.__class__.__name__, 0) + 1
-                        if not LOG_GOOD:
+                    if not LOG_GOOD_ONLY and LOG_BAD_CHAT and not isinstance(l, chat.ChatLog):
-                            logging.debug((l.values['log']))
+                        trash_log.info((l.values['log']))
        else:
            logging.warning('No chat log in %s' % logf.idstr)
        # Okay, parsing done.
        # default cleanup: will remove all dictionaries, trash logs, etc.
        logf.clean(True)
-        # additional cleanup:
+        # additional cleanup
        # we remove actually ALL log lines, as we are not interested in the data anymore.
        # this allows us to parse a lot more files
        if logf.chat_log:
            logf.chat_log.lines = []
        if logf.game_log:
            logf.game_log.lines = []
        if logf.combat_log:
            logf.combat_log.lines = []
    # Summary:
    logging.info('Analysis complete:')
    logging.info(('#'*20+' RexCombat ' + '#' *20))
    logging.info(rex_combat)
--- a/src/scon/backup.py
+++ b/src/scon/backup.py
@ -1,7 +1,10 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """
-    Backup Directories, Handle Files...
+    This script backs up all recent star conflict log directories into a common repository, by zipping each directory.
    It omits directories already backed up, and ommits files in the directories.
    Analyze.py can then scan these directories / zipfiles in development.
 """
 import os, logging, zipfile
--- a/src/scon/logs/base.py
+++ b/src/scon/logs/base.py
@ -33,12 +33,14 @@ L_NET = 'NET' # Not supported in near future.
 L_CHAT = 'CHAT'
 class Log(object):
-    __slots__ = ['trash', 'reviewed']
+    __slots__ = ['trash', 'reviewed', '_match_id', 'values']
    matcher = None
    def __init__(self):
        self.trash = False
        self.reviewed = False
        self.values = None
        self._match_id = None
    @classmethod
    def is_handler(cls, log):
@ -61,8 +63,11 @@ class Log(object):
        return False
 class Stacktrace(Log):
-    ''' Special Log to catch error reports '''
+    ''' Special Log to catch error reports 
-    __slots__ = ['trash', 'reviewed', 'message']
+        -> holds data in message not in values.
        -> makes use of append
    '''
    __slots__ = ['trash', 'reviewed', 'message', '_match_id', 'values']
    def __init__(self, values=None):
        super(Stacktrace, self).__init__()
--- a/src/scon/logs/chat.py
+++ b/src/scon/logs/chat.py
@ -14,7 +14,7 @@ between 33-33-33 and FF-33 FF-33 FF-33
 """
 class ChatLog(Log):
-    __slots__ = Log.__slots__ + ['_match_id', 'values']
+    __slots__ = Log.__slots__
    @classmethod
    def is_handler(cls, log):
--- a/src/scon/logs/combat.py
+++ b/src/scon/logs/combat.py
@ -22,9 +22,10 @@
 import re
 from .base import Log, L_CMBT, Stacktrace
 import logging
 trash_log = logging.getLogger('trash_log')
 class CombatLog(Log):
-    __slots__ = Log.__slots__ + [ '_match_id', 'values']
+    __slots__ = Log.__slots__
    @classmethod
    def _log_handler(cls, log):
        if log.startswith(cls.__name__):
@ -60,8 +61,7 @@ class CombatLog(Log):
                    return True
        # unknown?
        if not isinstance(self, UserEvent):
-            logging.warning('Unknown Packet for %s:\n%s' % (self.__class__.__name__, 
+            trash_log.info('%s\t\t%s' % (self.__class__.__name__, self.values.get('log', '')))
                                                             self.values.get('log', '')))
        # trash if unknown or no matcher.
        self.trash = True
@ -98,7 +98,7 @@ class Damage(CombatLog):
 class Spawn(CombatLog):
    __slots__ = CombatLog.__slots__
-    matcher = re.compile(r"^Spawn\sSpaceShip\sfor\splayer(?P<player>\d+)\s\((?P<name>[^,]+),\s+(?P<hash>#\w+)\)\.\s+'(?P<ship_class>\w+)'")
+    matcher = re.compile(r"^Spawn\sSpaceShip\sfor\splayer(?P<player>-*\d+)\s\((?P<name>[^,]*),\s+(?P<hash>#\w+)\)\.\s+'(?P<ship_class>\w+)'")
 class Spell(CombatLog):
    __slots__ = CombatLog.__slots__
@ -115,28 +115,25 @@ class Reward(CombatLog):
 class Participant(CombatLog):
    __slots__ = CombatLog.__slots__
-    matcher = re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)(?:\s+(?P<ship_class>\w+)|\s{30,})\s+(?:totalDamage\s(?P<total_damage>(?:\d+|\d+\.\d+));\s+|\s+)(?:mostDamageWith\s'(?P<module_class>[^']+)';\s*(?P<additional>.*)|<(?P<other>\w+)>)")
+    matcher = [
-
+        # more complex version:       
-"""
+        re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)(?:\s+(?P<ship_class>\w+)|\s{30,})\s+(?:totalDamage\s(?P<total_damage>(?:\d+|\d+\.\d+));\s+|\s+)(?:mostDamageWith\s'(?P<module_class>[^']+)';\s*(?P<additional>.*)|<(?P<other>\w+)>)"),
-2017-03-29 13:25:49 - Unknown Packet for Rocket:
+        # simple version (new):
-Rocket launch 18912, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', target 'white213mouse' (17894)
+        re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)\s+(?P<ship_class>\w+)"),
-2017-03-29 13:25:49 - Unknown Packet for Rocket:
+        re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)"),
-Rocket detonation 18912, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'auto_detonate', directHit 'white213mouse'
+        ]
 2017-03-29 13:25:49 - Unknown Packet for Rocket:
 Rocket launch 18966, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', target 'white213mouse' (17894)
 2017-03-29 13:25:49 - Unknown Packet for Rocket:
 Rocket detonation 18966, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'auto_detonate', directHit 'white213mouse'
 2017-03-29 13:25:49 - Unknown Packet for Rocket:
 Rocket detonation 18892, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'ttl'
 2017-03-29 13:25:49 - Unknown Packet for Rocket:
 Rocket detonation 18931, owner 'optimistik', def 'Weapon_Railgun_Heavy_T5_Epic', reason 'hit'
 2017-03-29 13:25:49 - Unknown Packet for Participant:
   Participant    white213mouse     Ship_Race5_M_ATTACK_Rank15   
 """
 class Rocket(CombatLog):
    __slots__ = CombatLog.__slots__
-    matcher = re.compile(r"^Rocket\s(?P<event>launch|detonation)\.\sowner\s'(?P<name>[^']+)'(?:,\s(?:def\s'(?P<missile_type>\w+)'|target\s'(?P<target>[^']+)'|reason\s'(?P<reason>\w+)'|directHit\s'(?P<direct_hit>[^']+)'))+")
+    # keys = [ 'event', 'name', 'def', 'target', 'reason', 'direct_hit', 'rocket_id' ]
    # changed 'missile_type' to 'def'
    matcher = [
        # old version: Rocket detonation. owner...
        re.compile(r"^Rocket\s(?P<event>launch|detonation)\.\sowner\s'(?P<name>[^']+)'(?:,\s(?:def\s'(?P<def>[^']+)'|target\s'(?P<target>[^']+)'|reason\s'(?P<reason>[^']+)'|directHit\s'(?P<direct_hit>[^']+)'))+"),
        # new version: Rocket detonation rocket ID (is that range? it can be -1), owner ...
        re.compile(r"^Rocket\s(?P<event>launch|detonation)\s+(?P<rocket_id>-*\d+),\sowner\s'(?P<name>[^']+)'(?:,\s(?:def\s'(?P<def>[^']+)'|target\s'(?P<target>[^']+)'|reason\s'(?P<reason>[^']+)'|directHit\s'(?P<direct_hit>[^']+)'))+"),
        ]
 class Heal(CombatLog):
    __slots__ = CombatLog.__slots__
@ -144,7 +141,9 @@ class Heal(CombatLog):
        # heal by module
        re.compile(r"^Heal\s+(?P<source_name>[^\s]+)\s\->\s+(?P<target_name>[^\s]+)\s+(?P<amount>(?:\d+|\d+\.\d+))\s(?P<module_class>[^\s]+)"),
        # direct heal by source or n/a (global buff)
-        re.compile(r"^Heal\s+(?:n/a|(?P<source_name>\w+))\s+\->\s+(?P<target_name>[^\s]+)\s+(?P<amount>(?:\d+|\d+\.\d+))"),
+        re.compile(r"^Heal\s+(?:n/a|(?P<source_name>\w+))\s+\->\s+(?P<target_name>[^\s]+)\s+(?P<amount>(?:\d+\.\d+|\d+))"),
        # new heal with microtid
        re.compile(r"^Heal\s+(?:n/a|(?P<source_name>[^\|]+)\|(?P<source_tid>\d+))\s+\->\s+(?P<target_name>[^\|]+)\|(?P<target_tid>\d+)\s+(?P<amount>(?:\d+\.\d+|\d+))")
        ]
 class Killed(CombatLog):
@ -253,11 +252,19 @@ class Set(CombatLog):
    called on setting "relationship" / OpenSpace
    Variables in values:
     - what (relationship)
    Optionals:
     - name (who do i set?)
     - value (to what value?)
     - def: spell usually in combination with level and deftype.
    """
    __slots__ = CombatLog.__slots__
-    matcher = re.compile("^Set\s(?P<what>\w+)\s(?P<name>[^\s]+)\sto\s(?P<value>\w+)")
+    matcher = [
        # what: usually reputation.
        re.compile("^Set\s(?P<what>\w+)\s(?P<name>[^\s]+)\sto\s(?P<value>\w+)"),
        # what: 'stage', +level +deftype (aura), def (aura spell name), index is weird array lookup always 0, id is the id of the aura. 
        re.compile("^Set\s(?P<what>\w+)\s(?P<level>\d+)\s+for\s+(?P<deftype>\w+)\s+'(?P<def>[^']+)'\[(?P<index>\d+)\]\s+id\s(?P<id>-*\d+)"),
        ]
 class SqIdChange(CombatLog):
    """ - number: player number
--- a/src/scon/logs/game.py
+++ b/src/scon/logs/game.py
@ -52,7 +52,7 @@ Interesting Lines:
 """
 class GameLog(Log):
-    __slots__ = Log.__slots__ + [ '_match_id', 'values']
+    __slots__ = Log.__slots__
    @classmethod
    def is_handler(cls, log):
        if log.get('logtype', None) == '': # we handle only logs with empty logtype.
--- a/src/scon/logs/logstream.py
+++ b/src/scon/logs/logstream.py
@ -1,4 +1,11 @@
 """
    Why?
        - initial implementation only followed to read whole files, but the logparser itself would work also on streamed data.
        - now the initial implementation builds on top of logstream, which should keep the stream functionality intact, aka allow later to parse
          files as they get written.
        - much of the parsing therefore may be designed to be repetible, if information is partial. Unfortunately this makes the whole process a bit mind-crunching.
    A LogStream is supposed to:
     - parse data feeded into it.
     - yield new objects
@ -22,6 +29,8 @@
    combine it with the lookup for "watching files being changed", to create a program which listens to the logs live
    @see: monitor.py
    @see: watchdog https://pypi.python.org/pypi/watchdog
 """
 from .base import Log
 import re
@ -61,6 +70,8 @@ class LogStream(object):
    def clean(self, remove_log=True):
        # cleans the logs by removing all non parsed packets.
        # in essence, every line which is a dict, is removed. every log class is called for clean.
        # every log that flags itself as trash, is removed.
        # remove_log: should i remove the raw log entry?
        lines = []
        for l in self.lines:
@ -71,8 +82,7 @@ class LogStream(object):
                            l.clean()
                        lines.append(l)
                    else:
-                        print((type(l)))
+                        logging.warning('The Packet of Type %s has no trash attribute. Is it a valid Log Class? %s' % (type(l), l))
                        print(l)
        self.lines = lines
        self._unset_data()
@ -82,7 +92,11 @@ class LogStream(object):
        self._data = None
    def pre_parse_line(self, line):
        # pre parse line expects a raw line from the log.
        # it will basicly return None if that line is not important for logs.
        # otherwise it will return a dictionary, containing logtype, hh, dd, mm, ss, ns, and log as logline.
        if not isinstance(line, str):
            # if this has already been parsed:
            return line
        elif line.startswith('---'):
            return None
@ -103,7 +117,7 @@ class LogStream(object):
        return None
    def _parse_line(self, line):
-        # add the line to my lines.
+        # add the line to the current packets lines.
        if line is not None:
            o = line
            if isinstance(line, str):
@ -121,16 +135,18 @@ class LogStream(object):
                    if self._last_object is not None and isinstance(self._last_object, Log):
                        self._last_object.unpack()
                        if self._last_object.append(line):
                            # last object accepted this line, return.
                            return
                    # at this point, either the last object did not accept this string,
                    # or last object wasnt a stacktrace.
                    # either way, this is a weird one.
                    logging.debug('#: %s' % line)
-                    o = None
+                    o = None # will return later.
            elif isinstance(line, dict):
                # Unresolved Log.
                o = self.resolve(line)
-                # this is where the whole thing gets polluted with weird dicts.
+                # after resolving the log, it hopefully is not a dict anymore.
-                # what exactly should resolve do!?
+                # if it still is, its just the same dict.
                # by default it returns what its given, if unknown.
                # #@TODO @XXX @CRITICAL
                self._last_object = o
            else:
                self._last_object = o
@ -145,4 +161,5 @@ class LogStream(object):
    def resolve(self, gd):
        # gd is a dict.
        # try to find a class that is responsible for this log.
        # this is done in subclasses of logstream.
        return gd