bugfixes, updates

* updated all known packets to be parsed correctly
 * made analyze script more useful
 * updated readme and some doc
This commit is contained in:
Gabor Körber 2017-05-19 16:21:51 +02:00
parent a3f185683c
commit e003f8e7c5
8 changed files with 157 additions and 66 deletions

View File

@ -1,8 +1,21 @@
library for reading SC logs Library for reading SC logs
===========================
Github Release of my logparsing attempt Github Release of my logparsing attempt
and other stuffs, from internal git. and other stuffs, from internal git.
This software is in a pre-alpha stage, and is considered mostly useful for scientific purposes. This software is in a _pre-alpha_ stage, and is considered mostly useful for _scientific_ purposes.
License: LGPL *License: LGPL*
Current Goals
-------------
- make library usable with an example
- more documentation
- update to handle all current combat events in pvp
Changelog
---------
- converted 99% to python3. switched to py3 development.
- moved to github with parser library only

View File

@ -2,6 +2,17 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Tool to analyze Logs in general. Tool to analyze Logs in general.
This tool is built to discover unidentified packets.
It is mainly used in development
(for the whole library, this is actually the most important script atm)
It outputs a trash.log.bak and a scon.log.bak, and works itself through gigabytes of my backuped test data.
This script therefore has following purposes:
- a) find bugs, find unknown packets (so new type of log entries in combat.log)
- b) see speed of parsing
- c) test parsing for memory efficiency, because parsing lots of big files needs that.
""" """
import os, sys, logging import os, sys, logging
from scon.logs.logfiles import LogFileResolver as LogFile from scon.logs.logfiles import LogFileResolver as LogFile
@ -9,10 +20,11 @@ from scon.logs import combat, game, chat
from scon.logs.session import LogSessionCollector from scon.logs.session import LogSessionCollector
from scon.logs.game import ClientInfo from scon.logs.game import ClientInfo
# for windows its kinda this: # only analyze_path is used in this script. the others are for example.
# note, this isnt used in this script. yeah i know right, defined up here, but not used down there. settings = {'analyze_path': os.path.join(os.path.expanduser('~'),
# it's because i want to unify this to be on one configurable place ;) 'Documents', 'My Games', 'sc'),
settings = {'root_path': os.path.join(os.path.expanduser('~'),
'root_path': os.path.join(os.path.expanduser('~'),
'Documents', 'Documents',
'My Games', 'My Games',
'StarConflict',), 'StarConflict',),
@ -21,10 +33,22 @@ settings = {'root_path': os.path.join(os.path.expanduser('~'),
'My Games', 'My Games',
'StarConflict', 'StarConflict',
'logs' 'logs'
)} ),}
def select_parsing_sessions(alist):
# for micro controlling, which sessions to parse.
# default: return alist
return alist[-50:]
if __name__ == '__main__': if __name__ == '__main__':
# set this to your liking:
COUNT_GOOD = True # count via rex good packets aswell. useful to see total encountered packets in summary.
LOG_GOOD_ONLY = True # Log good packets only. if set to false, will log unknown packets to trash_log.
LOG_BAD_CMBT = True # by default, the main logs of interest for unknown entries is combat logs. here you can finetune which logs to catch.
LOG_BAD_CHAT = False
LOG_BAD_GAME = False
# set up our logging to do our task:
import logging import logging
logging.basicConfig(level=logging.INFO, logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(message)s', format='%(asctime)s - %(message)s',
@ -32,17 +56,36 @@ if __name__ == '__main__':
logfile = logging.FileHandler('scon.log.bak') logfile = logging.FileHandler('scon.log.bak')
logfile.setLevel(logging.DEBUG) logfile.setLevel(logging.DEBUG)
logging.getLogger().addHandler(logfile) logging.getLogger().addHandler(logfile)
coll = LogSessionCollector(os.path.join(os.path.expanduser('~'),
'Documents', 'My Games', 'sc')) trashfile = logging.FileHandler('trash.log.bak')
trashfile.setLevel(logging.INFO)
trash_log = logging.getLogger('trash_log')
trash_log.addHandler(trashfile)
trash_log.propagate = False # only log to file.
###################################
# collect all sessions, as in, get all log directories:
coll = LogSessionCollector(settings.get('analyze_path'))
logging.info('Collecting Sessions...') logging.info('Collecting Sessions...')
coll.collect_unique() coll.collect_unique()
logging.info('collected %s sessions.' % (len(coll.sessions))) logging.info('collected %s sessions.' % (len(coll.sessions)))
# now do in depth parsing per session:
#f = open('output.txt', 'w') #f = open('output.txt', 'w')
rex_combat = {} rex_combat = {}
rex_game = {} rex_game = {}
rex_chat = {} rex_chat = {}
LOG_GOOD = True # Log good packets.
for logf in coll.sessions: selected = select_parsing_sessions(coll.sessions)
logging.info("Start In Depth parsing for %s sessions. %s" % (len(selected),'Counting good packets' if COUNT_GOOD else 'Counting only bad packets.'))
if LOG_GOOD_ONLY:
logging.info("Only logging unknown variants of known packet types")
else:
logging.info("Logging unknown packets aswell, CMBT: %s, GAME: %s, CHAT %s" % (LOG_BAD_CMBT, LOG_BAD_GAME, LOG_BAD_CHAT))
for logf in selected:
logf.parse_files(['game.log', 'combat.log', 'chat.log']) logf.parse_files(['game.log', 'combat.log', 'chat.log'])
logging.info(("## Processing Log %s" % logf.idstr)) logging.info(("## Processing Log %s" % logf.idstr))
@ -52,11 +95,11 @@ if __name__ == '__main__':
#print l #print l
rex_combat['dict'] = rex_combat.get('dict', 0) + 1 rex_combat['dict'] = rex_combat.get('dict', 0) + 1
else: else:
if not l.unpack() or LOG_GOOD: if not l.unpack() or COUNT_GOOD:
rex_combat[l.__class__.__name__] = rex_combat.get(l.__class__.__name__, 0) + 1 rex_combat[l.__class__.__name__] = rex_combat.get(l.__class__.__name__, 0) + 1
if not isinstance(l, combat.UserEvent): if not isinstance(l, combat.UserEvent):
if not LOG_GOOD: if not LOG_GOOD_ONLY and LOG_BAD_CMBT:
logging.debug((l.values['log'])) trash_log.info((l.values['log']))
else: else:
logging.warning('No combat log in %s' % logf.idstr) logging.warning('No combat log in %s' % logf.idstr)
if logf.game_log: if logf.game_log:
@ -66,12 +109,10 @@ if __name__ == '__main__':
elif isinstance(l, str): elif isinstance(l, str):
print(l) print(l)
else: else:
if l.unpack() and not LOG_GOOD: if not l.unpack() or COUNT_GOOD:
pass
else:
rex_game[l.__class__.__name__] = rex_game.get(l.__class__.__name__, 0) + 1 rex_game[l.__class__.__name__] = rex_game.get(l.__class__.__name__, 0) + 1
if not LOG_GOOD: if not LOG_GOOD_ONLY and LOG_BAD_GAME and not isinstance(l, game.GameLog):
logging.debug((l.values['log'])) trash_log.info((l.values['log']))
else: else:
logging.warning('No game log in %s' % logf.idstr) logging.warning('No game log in %s' % logf.idstr)
if logf.chat_log: if logf.chat_log:
@ -81,22 +122,27 @@ if __name__ == '__main__':
elif isinstance(l, str): elif isinstance(l, str):
print(l) print(l)
else: else:
if l.unpack() and not LOG_GOOD: if not l.unpack() or COUNT_GOOD:
pass
else:
rex_chat[l.__class__.__name__] = rex_chat.get(l.__class__.__name__, 0) + 1 rex_chat[l.__class__.__name__] = rex_chat.get(l.__class__.__name__, 0) + 1
if not LOG_GOOD: if not LOG_GOOD_ONLY and LOG_BAD_CHAT and not isinstance(l, chat.ChatLog):
logging.debug((l.values['log'])) trash_log.info((l.values['log']))
else: else:
logging.warning('No chat log in %s' % logf.idstr) logging.warning('No chat log in %s' % logf.idstr)
# Okay, parsing done.
# default cleanup: will remove all dictionaries, trash logs, etc.
logf.clean(True) logf.clean(True)
# additional cleanup: # additional cleanup
# we remove actually ALL log lines, as we are not interested in the data anymore.
# this allows us to parse a lot more files
if logf.chat_log: if logf.chat_log:
logf.chat_log.lines = [] logf.chat_log.lines = []
if logf.game_log: if logf.game_log:
logf.game_log.lines = [] logf.game_log.lines = []
if logf.combat_log: if logf.combat_log:
logf.combat_log.lines = [] logf.combat_log.lines = []
# Summary:
logging.info('Analysis complete:') logging.info('Analysis complete:')
logging.info(('#'*20+' RexCombat ' + '#' *20)) logging.info(('#'*20+' RexCombat ' + '#' *20))
logging.info(rex_combat) logging.info(rex_combat)

View File

@ -1,7 +1,10 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Backup Directories, Handle Files... This script backs up all recent star conflict log directories into a common repository, by zipping each directory.
It omits directories already backed up, and ommits files in the directories.
Analyze.py can then scan these directories / zipfiles in development.
""" """
import os, logging, zipfile import os, logging, zipfile

View File

@ -33,12 +33,14 @@ L_NET = 'NET' # Not supported in near future.
L_CHAT = 'CHAT' L_CHAT = 'CHAT'
class Log(object): class Log(object):
__slots__ = ['trash', 'reviewed'] __slots__ = ['trash', 'reviewed', '_match_id', 'values']
matcher = None matcher = None
def __init__(self): def __init__(self):
self.trash = False self.trash = False
self.reviewed = False self.reviewed = False
self.values = None
self._match_id = None
@classmethod @classmethod
def is_handler(cls, log): def is_handler(cls, log):
@ -61,8 +63,11 @@ class Log(object):
return False return False
class Stacktrace(Log): class Stacktrace(Log):
''' Special Log to catch error reports ''' ''' Special Log to catch error reports
__slots__ = ['trash', 'reviewed', 'message'] -> holds data in message not in values.
-> makes use of append
'''
__slots__ = ['trash', 'reviewed', 'message', '_match_id', 'values']
def __init__(self, values=None): def __init__(self, values=None):
super(Stacktrace, self).__init__() super(Stacktrace, self).__init__()

View File

@ -14,7 +14,7 @@ between 33-33-33 and FF-33 FF-33 FF-33
""" """
class ChatLog(Log): class ChatLog(Log):
__slots__ = Log.__slots__ + ['_match_id', 'values'] __slots__ = Log.__slots__
@classmethod @classmethod
def is_handler(cls, log): def is_handler(cls, log):

View File

@ -22,9 +22,10 @@
import re import re
from .base import Log, L_CMBT, Stacktrace from .base import Log, L_CMBT, Stacktrace
import logging import logging
trash_log = logging.getLogger('trash_log')
class CombatLog(Log): class CombatLog(Log):
__slots__ = Log.__slots__ + [ '_match_id', 'values'] __slots__ = Log.__slots__
@classmethod @classmethod
def _log_handler(cls, log): def _log_handler(cls, log):
if log.startswith(cls.__name__): if log.startswith(cls.__name__):
@ -60,8 +61,7 @@ class CombatLog(Log):
return True return True
# unknown? # unknown?
if not isinstance(self, UserEvent): if not isinstance(self, UserEvent):
logging.warning('Unknown Packet for %s:\n%s' % (self.__class__.__name__, trash_log.info('%s\t\t%s' % (self.__class__.__name__, self.values.get('log', '')))
self.values.get('log', '')))
# trash if unknown or no matcher. # trash if unknown or no matcher.
self.trash = True self.trash = True
@ -98,7 +98,7 @@ class Damage(CombatLog):
class Spawn(CombatLog): class Spawn(CombatLog):
__slots__ = CombatLog.__slots__ __slots__ = CombatLog.__slots__
matcher = re.compile(r"^Spawn\sSpaceShip\sfor\splayer(?P<player>\d+)\s\((?P<name>[^,]+),\s+(?P<hash>#\w+)\)\.\s+'(?P<ship_class>\w+)'") matcher = re.compile(r"^Spawn\sSpaceShip\sfor\splayer(?P<player>-*\d+)\s\((?P<name>[^,]*),\s+(?P<hash>#\w+)\)\.\s+'(?P<ship_class>\w+)'")
class Spell(CombatLog): class Spell(CombatLog):
__slots__ = CombatLog.__slots__ __slots__ = CombatLog.__slots__
@ -115,28 +115,25 @@ class Reward(CombatLog):
class Participant(CombatLog): class Participant(CombatLog):
__slots__ = CombatLog.__slots__ __slots__ = CombatLog.__slots__
matcher = re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)(?:\s+(?P<ship_class>\w+)|\s{30,})\s+(?:totalDamage\s(?P<total_damage>(?:\d+|\d+\.\d+));\s+|\s+)(?:mostDamageWith\s'(?P<module_class>[^']+)';\s*(?P<additional>.*)|<(?P<other>\w+)>)") matcher = [
# more complex version:
""" re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)(?:\s+(?P<ship_class>\w+)|\s{30,})\s+(?:totalDamage\s(?P<total_damage>(?:\d+|\d+\.\d+));\s+|\s+)(?:mostDamageWith\s'(?P<module_class>[^']+)';\s*(?P<additional>.*)|<(?P<other>\w+)>)"),
2017-03-29 13:25:49 - Unknown Packet for Rocket: # simple version (new):
Rocket launch 18912, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', target 'white213mouse' (17894) re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)\s+(?P<ship_class>\w+)"),
2017-03-29 13:25:49 - Unknown Packet for Rocket: re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)"),
Rocket detonation 18912, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'auto_detonate', directHit 'white213mouse' ]
2017-03-29 13:25:49 - Unknown Packet for Rocket:
Rocket launch 18966, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', target 'white213mouse' (17894)
2017-03-29 13:25:49 - Unknown Packet for Rocket:
Rocket detonation 18966, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'auto_detonate', directHit 'white213mouse'
2017-03-29 13:25:49 - Unknown Packet for Rocket:
Rocket detonation 18892, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'ttl'
2017-03-29 13:25:49 - Unknown Packet for Rocket:
Rocket detonation 18931, owner 'optimistik', def 'Weapon_Railgun_Heavy_T5_Epic', reason 'hit'
2017-03-29 13:25:49 - Unknown Packet for Participant:
Participant white213mouse Ship_Race5_M_ATTACK_Rank15
"""
class Rocket(CombatLog): class Rocket(CombatLog):
__slots__ = CombatLog.__slots__ __slots__ = CombatLog.__slots__
matcher = re.compile(r"^Rocket\s(?P<event>launch|detonation)\.\sowner\s'(?P<name>[^']+)'(?:,\s(?:def\s'(?P<missile_type>\w+)'|target\s'(?P<target>[^']+)'|reason\s'(?P<reason>\w+)'|directHit\s'(?P<direct_hit>[^']+)'))+") # keys = [ 'event', 'name', 'def', 'target', 'reason', 'direct_hit', 'rocket_id' ]
# changed 'missile_type' to 'def'
matcher = [
# old version: Rocket detonation. owner...
re.compile(r"^Rocket\s(?P<event>launch|detonation)\.\sowner\s'(?P<name>[^']+)'(?:,\s(?:def\s'(?P<def>[^']+)'|target\s'(?P<target>[^']+)'|reason\s'(?P<reason>[^']+)'|directHit\s'(?P<direct_hit>[^']+)'))+"),
# new version: Rocket detonation rocket ID (is that range? it can be -1), owner ...
re.compile(r"^Rocket\s(?P<event>launch|detonation)\s+(?P<rocket_id>-*\d+),\sowner\s'(?P<name>[^']+)'(?:,\s(?:def\s'(?P<def>[^']+)'|target\s'(?P<target>[^']+)'|reason\s'(?P<reason>[^']+)'|directHit\s'(?P<direct_hit>[^']+)'))+"),
]
class Heal(CombatLog): class Heal(CombatLog):
__slots__ = CombatLog.__slots__ __slots__ = CombatLog.__slots__
@ -144,7 +141,9 @@ class Heal(CombatLog):
# heal by module # heal by module
re.compile(r"^Heal\s+(?P<source_name>[^\s]+)\s\->\s+(?P<target_name>[^\s]+)\s+(?P<amount>(?:\d+|\d+\.\d+))\s(?P<module_class>[^\s]+)"), re.compile(r"^Heal\s+(?P<source_name>[^\s]+)\s\->\s+(?P<target_name>[^\s]+)\s+(?P<amount>(?:\d+|\d+\.\d+))\s(?P<module_class>[^\s]+)"),
# direct heal by source or n/a (global buff) # direct heal by source or n/a (global buff)
re.compile(r"^Heal\s+(?:n/a|(?P<source_name>\w+))\s+\->\s+(?P<target_name>[^\s]+)\s+(?P<amount>(?:\d+|\d+\.\d+))"), re.compile(r"^Heal\s+(?:n/a|(?P<source_name>\w+))\s+\->\s+(?P<target_name>[^\s]+)\s+(?P<amount>(?:\d+\.\d+|\d+))"),
# new heal with microtid
re.compile(r"^Heal\s+(?:n/a|(?P<source_name>[^\|]+)\|(?P<source_tid>\d+))\s+\->\s+(?P<target_name>[^\|]+)\|(?P<target_tid>\d+)\s+(?P<amount>(?:\d+\.\d+|\d+))")
] ]
class Killed(CombatLog): class Killed(CombatLog):
@ -253,11 +252,19 @@ class Set(CombatLog):
called on setting "relationship" / OpenSpace called on setting "relationship" / OpenSpace
Variables in values: Variables in values:
- what (relationship) - what (relationship)
Optionals:
- name (who do i set?) - name (who do i set?)
- value (to what value?) - value (to what value?)
- def: spell usually in combination with level and deftype.
""" """
__slots__ = CombatLog.__slots__ __slots__ = CombatLog.__slots__
matcher = re.compile("^Set\s(?P<what>\w+)\s(?P<name>[^\s]+)\sto\s(?P<value>\w+)") matcher = [
# what: usually reputation.
re.compile("^Set\s(?P<what>\w+)\s(?P<name>[^\s]+)\sto\s(?P<value>\w+)"),
# what: 'stage', +level +deftype (aura), def (aura spell name), index is weird array lookup always 0, id is the id of the aura.
re.compile("^Set\s(?P<what>\w+)\s(?P<level>\d+)\s+for\s+(?P<deftype>\w+)\s+'(?P<def>[^']+)'\[(?P<index>\d+)\]\s+id\s(?P<id>-*\d+)"),
]
class SqIdChange(CombatLog): class SqIdChange(CombatLog):
""" - number: player number """ - number: player number

View File

@ -52,7 +52,7 @@ Interesting Lines:
""" """
class GameLog(Log): class GameLog(Log):
__slots__ = Log.__slots__ + [ '_match_id', 'values'] __slots__ = Log.__slots__
@classmethod @classmethod
def is_handler(cls, log): def is_handler(cls, log):
if log.get('logtype', None) == '': # we handle only logs with empty logtype. if log.get('logtype', None) == '': # we handle only logs with empty logtype.

View File

@ -1,4 +1,11 @@
""" """
Why?
- initial implementation only followed to read whole files, but the logparser itself would work also on streamed data.
- now the initial implementation builds on top of logstream, which should keep the stream functionality intact, aka allow later to parse
files as they get written.
- much of the parsing therefore may be designed to be repetible, if information is partial. Unfortunately this makes the whole process a bit mind-crunching.
A LogStream is supposed to: A LogStream is supposed to:
- parse data feeded into it. - parse data feeded into it.
- yield new objects - yield new objects
@ -22,6 +29,8 @@
combine it with the lookup for "watching files being changed", to create a program which listens to the logs live combine it with the lookup for "watching files being changed", to create a program which listens to the logs live
@see: monitor.py @see: monitor.py
@see: watchdog https://pypi.python.org/pypi/watchdog @see: watchdog https://pypi.python.org/pypi/watchdog
""" """
from .base import Log from .base import Log
import re import re
@ -61,6 +70,8 @@ class LogStream(object):
def clean(self, remove_log=True): def clean(self, remove_log=True):
# cleans the logs by removing all non parsed packets. # cleans the logs by removing all non parsed packets.
# in essence, every line which is a dict, is removed. every log class is called for clean.
# every log that flags itself as trash, is removed.
# remove_log: should i remove the raw log entry? # remove_log: should i remove the raw log entry?
lines = [] lines = []
for l in self.lines: for l in self.lines:
@ -71,8 +82,7 @@ class LogStream(object):
l.clean() l.clean()
lines.append(l) lines.append(l)
else: else:
print((type(l))) logging.warning('The Packet of Type %s has no trash attribute. Is it a valid Log Class? %s' % (type(l), l))
print(l)
self.lines = lines self.lines = lines
self._unset_data() self._unset_data()
@ -82,7 +92,11 @@ class LogStream(object):
self._data = None self._data = None
def pre_parse_line(self, line): def pre_parse_line(self, line):
# pre parse line expects a raw line from the log.
# it will basicly return None if that line is not important for logs.
# otherwise it will return a dictionary, containing logtype, hh, dd, mm, ss, ns, and log as logline.
if not isinstance(line, str): if not isinstance(line, str):
# if this has already been parsed:
return line return line
elif line.startswith('---'): elif line.startswith('---'):
return None return None
@ -103,7 +117,7 @@ class LogStream(object):
return None return None
def _parse_line(self, line): def _parse_line(self, line):
# add the line to my lines. # add the line to the current packets lines.
if line is not None: if line is not None:
o = line o = line
if isinstance(line, str): if isinstance(line, str):
@ -121,16 +135,18 @@ class LogStream(object):
if self._last_object is not None and isinstance(self._last_object, Log): if self._last_object is not None and isinstance(self._last_object, Log):
self._last_object.unpack() self._last_object.unpack()
if self._last_object.append(line): if self._last_object.append(line):
# last object accepted this line, return.
return return
# at this point, either the last object did not accept this string,
# or last object wasnt a stacktrace.
# either way, this is a weird one.
logging.debug('#: %s' % line) logging.debug('#: %s' % line)
o = None o = None # will return later.
elif isinstance(line, dict): elif isinstance(line, dict):
# Unresolved Log. # Unresolved Log.
o = self.resolve(line) o = self.resolve(line)
# this is where the whole thing gets polluted with weird dicts. # after resolving the log, it hopefully is not a dict anymore.
# what exactly should resolve do!? # if it still is, its just the same dict.
# by default it returns what its given, if unknown.
# #@TODO @XXX @CRITICAL
self._last_object = o self._last_object = o
else: else:
self._last_object = o self._last_object = o
@ -145,4 +161,5 @@ class LogStream(object):
def resolve(self, gd): def resolve(self, gd):
# gd is a dict. # gd is a dict.
# try to find a class that is responsible for this log. # try to find a class that is responsible for this log.
# this is done in subclasses of logstream.
return gd return gd