bugfixes, updates

* updated all known packets to be parsed correctly
 * made analyze script more useful
 * updated readme and some doc
This commit is contained in:
Gabor Körber 2017-05-19 16:21:51 +02:00
parent a3f185683c
commit e003f8e7c5
8 changed files with 157 additions and 66 deletions

View File

@ -1,8 +1,21 @@
library for reading SC logs
Library for reading SC logs
===========================
Github Release of my logparsing attempt
and other stuffs, from internal git.
This software is in a pre-alpha stage, and is considered mostly useful for scientific purposes.
This software is in a _pre-alpha_ stage, and is considered mostly useful for _scientific_ purposes.
*License: LGPL*
Current Goals
-------------
- make library usable with an example
- more documentation
- update to handle all current combat events in pvp
Changelog
---------
- converted 99% to python3. switched to py3 development.
- moved to github with parser library only
License: LGPL

View File

@ -2,6 +2,17 @@
# -*- coding: utf-8 -*-
"""
Tool to analyze Logs in general.
This tool is built to discover unidentified packets.
It is mainly used in development
(for the whole library, this is actually the most important script atm)
It outputs a trash.log.bak and a scon.log.bak, and works itself through gigabytes of my backuped test data.
This script therefore has following purposes:
- a) find bugs, find unknown packets (so new type of log entries in combat.log)
- b) see speed of parsing
- c) test parsing for memory efficiency, because parsing lots of big files needs that.
"""
import os, sys, logging
from scon.logs.logfiles import LogFileResolver as LogFile
@ -9,10 +20,11 @@ from scon.logs import combat, game, chat
from scon.logs.session import LogSessionCollector
from scon.logs.game import ClientInfo
# for windows its kinda this:
# note, this isnt used in this script. yeah i know right, defined up here, but not used down there.
# it's because i want to unify this to be on one configurable place ;)
settings = {'root_path': os.path.join(os.path.expanduser('~'),
# only analyze_path is used in this script. the others are for example.
settings = {'analyze_path': os.path.join(os.path.expanduser('~'),
'Documents', 'My Games', 'sc'),
'root_path': os.path.join(os.path.expanduser('~'),
'Documents',
'My Games',
'StarConflict',),
@ -21,10 +33,22 @@ settings = {'root_path': os.path.join(os.path.expanduser('~'),
'My Games',
'StarConflict',
'logs'
)}
),}
def select_parsing_sessions(alist):
# for micro controlling, which sessions to parse.
# default: return alist
return alist[-50:]
if __name__ == '__main__':
# set this to your liking:
COUNT_GOOD = True # count via rex good packets aswell. useful to see total encountered packets in summary.
LOG_GOOD_ONLY = True # Log good packets only. if set to false, will log unknown packets to trash_log.
LOG_BAD_CMBT = True # by default, the main logs of interest for unknown entries is combat logs. here you can finetune which logs to catch.
LOG_BAD_CHAT = False
LOG_BAD_GAME = False
# set up our logging to do our task:
import logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(message)s',
@ -32,17 +56,36 @@ if __name__ == '__main__':
logfile = logging.FileHandler('scon.log.bak')
logfile.setLevel(logging.DEBUG)
logging.getLogger().addHandler(logfile)
coll = LogSessionCollector(os.path.join(os.path.expanduser('~'),
'Documents', 'My Games', 'sc'))
trashfile = logging.FileHandler('trash.log.bak')
trashfile.setLevel(logging.INFO)
trash_log = logging.getLogger('trash_log')
trash_log.addHandler(trashfile)
trash_log.propagate = False # only log to file.
###################################
# collect all sessions, as in, get all log directories:
coll = LogSessionCollector(settings.get('analyze_path'))
logging.info('Collecting Sessions...')
coll.collect_unique()
logging.info('collected %s sessions.' % (len(coll.sessions)))
# now do in depth parsing per session:
#f = open('output.txt', 'w')
rex_combat = {}
rex_game = {}
rex_chat = {}
LOG_GOOD = True # Log good packets.
for logf in coll.sessions:
selected = select_parsing_sessions(coll.sessions)
logging.info("Start In Depth parsing for %s sessions. %s" % (len(selected),'Counting good packets' if COUNT_GOOD else 'Counting only bad packets.'))
if LOG_GOOD_ONLY:
logging.info("Only logging unknown variants of known packet types")
else:
logging.info("Logging unknown packets aswell, CMBT: %s, GAME: %s, CHAT %s" % (LOG_BAD_CMBT, LOG_BAD_GAME, LOG_BAD_CHAT))
for logf in selected:
logf.parse_files(['game.log', 'combat.log', 'chat.log'])
logging.info(("## Processing Log %s" % logf.idstr))
@ -52,11 +95,11 @@ if __name__ == '__main__':
#print l
rex_combat['dict'] = rex_combat.get('dict', 0) + 1
else:
if not l.unpack() or LOG_GOOD:
if not l.unpack() or COUNT_GOOD:
rex_combat[l.__class__.__name__] = rex_combat.get(l.__class__.__name__, 0) + 1
if not isinstance(l, combat.UserEvent):
if not LOG_GOOD:
logging.debug((l.values['log']))
if not LOG_GOOD_ONLY and LOG_BAD_CMBT:
trash_log.info((l.values['log']))
else:
logging.warning('No combat log in %s' % logf.idstr)
if logf.game_log:
@ -66,12 +109,10 @@ if __name__ == '__main__':
elif isinstance(l, str):
print(l)
else:
if l.unpack() and not LOG_GOOD:
pass
else:
if not l.unpack() or COUNT_GOOD:
rex_game[l.__class__.__name__] = rex_game.get(l.__class__.__name__, 0) + 1
if not LOG_GOOD:
logging.debug((l.values['log']))
if not LOG_GOOD_ONLY and LOG_BAD_GAME and not isinstance(l, game.GameLog):
trash_log.info((l.values['log']))
else:
logging.warning('No game log in %s' % logf.idstr)
if logf.chat_log:
@ -81,22 +122,27 @@ if __name__ == '__main__':
elif isinstance(l, str):
print(l)
else:
if l.unpack() and not LOG_GOOD:
pass
else:
if not l.unpack() or COUNT_GOOD:
rex_chat[l.__class__.__name__] = rex_chat.get(l.__class__.__name__, 0) + 1
if not LOG_GOOD:
logging.debug((l.values['log']))
if not LOG_GOOD_ONLY and LOG_BAD_CHAT and not isinstance(l, chat.ChatLog):
trash_log.info((l.values['log']))
else:
logging.warning('No chat log in %s' % logf.idstr)
# Okay, parsing done.
# default cleanup: will remove all dictionaries, trash logs, etc.
logf.clean(True)
# additional cleanup:
# additional cleanup
# we remove actually ALL log lines, as we are not interested in the data anymore.
# this allows us to parse a lot more files
if logf.chat_log:
logf.chat_log.lines = []
if logf.game_log:
logf.game_log.lines = []
if logf.combat_log:
logf.combat_log.lines = []
# Summary:
logging.info('Analysis complete:')
logging.info(('#'*20+' RexCombat ' + '#' *20))
logging.info(rex_combat)

View File

@ -1,7 +1,10 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Backup Directories, Handle Files...
This script backs up all recent star conflict log directories into a common repository, by zipping each directory.
It omits directories already backed up, and ommits files in the directories.
Analyze.py can then scan these directories / zipfiles in development.
"""
import os, logging, zipfile

View File

@ -33,12 +33,14 @@ L_NET = 'NET' # Not supported in near future.
L_CHAT = 'CHAT'
class Log(object):
__slots__ = ['trash', 'reviewed']
__slots__ = ['trash', 'reviewed', '_match_id', 'values']
matcher = None
def __init__(self):
self.trash = False
self.reviewed = False
self.values = None
self._match_id = None
@classmethod
def is_handler(cls, log):
@ -61,8 +63,11 @@ class Log(object):
return False
class Stacktrace(Log):
''' Special Log to catch error reports '''
__slots__ = ['trash', 'reviewed', 'message']
''' Special Log to catch error reports
-> holds data in message not in values.
-> makes use of append
'''
__slots__ = ['trash', 'reviewed', 'message', '_match_id', 'values']
def __init__(self, values=None):
super(Stacktrace, self).__init__()

View File

@ -14,7 +14,7 @@ between 33-33-33 and FF-33 FF-33 FF-33
"""
class ChatLog(Log):
__slots__ = Log.__slots__ + ['_match_id', 'values']
__slots__ = Log.__slots__
@classmethod
def is_handler(cls, log):

View File

@ -22,9 +22,10 @@
import re
from .base import Log, L_CMBT, Stacktrace
import logging
trash_log = logging.getLogger('trash_log')
class CombatLog(Log):
__slots__ = Log.__slots__ + [ '_match_id', 'values']
__slots__ = Log.__slots__
@classmethod
def _log_handler(cls, log):
if log.startswith(cls.__name__):
@ -60,8 +61,7 @@ class CombatLog(Log):
return True
# unknown?
if not isinstance(self, UserEvent):
logging.warning('Unknown Packet for %s:\n%s' % (self.__class__.__name__,
self.values.get('log', '')))
trash_log.info('%s\t\t%s' % (self.__class__.__name__, self.values.get('log', '')))
# trash if unknown or no matcher.
self.trash = True
@ -98,7 +98,7 @@ class Damage(CombatLog):
class Spawn(CombatLog):
__slots__ = CombatLog.__slots__
matcher = re.compile(r"^Spawn\sSpaceShip\sfor\splayer(?P<player>\d+)\s\((?P<name>[^,]+),\s+(?P<hash>#\w+)\)\.\s+'(?P<ship_class>\w+)'")
matcher = re.compile(r"^Spawn\sSpaceShip\sfor\splayer(?P<player>-*\d+)\s\((?P<name>[^,]*),\s+(?P<hash>#\w+)\)\.\s+'(?P<ship_class>\w+)'")
class Spell(CombatLog):
__slots__ = CombatLog.__slots__
@ -115,28 +115,25 @@ class Reward(CombatLog):
class Participant(CombatLog):
__slots__ = CombatLog.__slots__
matcher = re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)(?:\s+(?P<ship_class>\w+)|\s{30,})\s+(?:totalDamage\s(?P<total_damage>(?:\d+|\d+\.\d+));\s+|\s+)(?:mostDamageWith\s'(?P<module_class>[^']+)';\s*(?P<additional>.*)|<(?P<other>\w+)>)")
"""
2017-03-29 13:25:49 - Unknown Packet for Rocket:
Rocket launch 18912, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', target 'white213mouse' (17894)
2017-03-29 13:25:49 - Unknown Packet for Rocket:
Rocket detonation 18912, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'auto_detonate', directHit 'white213mouse'
2017-03-29 13:25:49 - Unknown Packet for Rocket:
Rocket launch 18966, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', target 'white213mouse' (17894)
2017-03-29 13:25:49 - Unknown Packet for Rocket:
Rocket detonation 18966, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'auto_detonate', directHit 'white213mouse'
2017-03-29 13:25:49 - Unknown Packet for Rocket:
Rocket detonation 18892, owner 'LOSNAR', def 'SpaceMissile_Barrage_T5_Mk3', reason 'ttl'
2017-03-29 13:25:49 - Unknown Packet for Rocket:
Rocket detonation 18931, owner 'optimistik', def 'Weapon_Railgun_Heavy_T5_Epic', reason 'hit'
2017-03-29 13:25:49 - Unknown Packet for Participant:
Participant white213mouse Ship_Race5_M_ATTACK_Rank15
"""
matcher = [
# more complex version:
re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)(?:\s+(?P<ship_class>\w+)|\s{30,})\s+(?:totalDamage\s(?P<total_damage>(?:\d+|\d+\.\d+));\s+|\s+)(?:mostDamageWith\s'(?P<module_class>[^']+)';\s*(?P<additional>.*)|<(?P<other>\w+)>)"),
# simple version (new):
re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)\s+(?P<ship_class>\w+)"),
re.compile(r"^\s+Participant\s+(?P<source_name>[^\s]+)"),
]
class Rocket(CombatLog):
__slots__ = CombatLog.__slots__
matcher = re.compile(r"^Rocket\s(?P<event>launch|detonation)\.\sowner\s'(?P<name>[^']+)'(?:,\s(?:def\s'(?P<missile_type>\w+)'|target\s'(?P<target>[^']+)'|reason\s'(?P<reason>\w+)'|directHit\s'(?P<direct_hit>[^']+)'))+")
# keys = [ 'event', 'name', 'def', 'target', 'reason', 'direct_hit', 'rocket_id' ]
# changed 'missile_type' to 'def'
matcher = [
# old version: Rocket detonation. owner...
re.compile(r"^Rocket\s(?P<event>launch|detonation)\.\sowner\s'(?P<name>[^']+)'(?:,\s(?:def\s'(?P<def>[^']+)'|target\s'(?P<target>[^']+)'|reason\s'(?P<reason>[^']+)'|directHit\s'(?P<direct_hit>[^']+)'))+"),
# new version: Rocket detonation rocket ID (is that range? it can be -1), owner ...
re.compile(r"^Rocket\s(?P<event>launch|detonation)\s+(?P<rocket_id>-*\d+),\sowner\s'(?P<name>[^']+)'(?:,\s(?:def\s'(?P<def>[^']+)'|target\s'(?P<target>[^']+)'|reason\s'(?P<reason>[^']+)'|directHit\s'(?P<direct_hit>[^']+)'))+"),
]
class Heal(CombatLog):
__slots__ = CombatLog.__slots__
@ -144,7 +141,9 @@ class Heal(CombatLog):
# heal by module
re.compile(r"^Heal\s+(?P<source_name>[^\s]+)\s\->\s+(?P<target_name>[^\s]+)\s+(?P<amount>(?:\d+|\d+\.\d+))\s(?P<module_class>[^\s]+)"),
# direct heal by source or n/a (global buff)
re.compile(r"^Heal\s+(?:n/a|(?P<source_name>\w+))\s+\->\s+(?P<target_name>[^\s]+)\s+(?P<amount>(?:\d+|\d+\.\d+))"),
re.compile(r"^Heal\s+(?:n/a|(?P<source_name>\w+))\s+\->\s+(?P<target_name>[^\s]+)\s+(?P<amount>(?:\d+\.\d+|\d+))"),
# new heal with microtid
re.compile(r"^Heal\s+(?:n/a|(?P<source_name>[^\|]+)\|(?P<source_tid>\d+))\s+\->\s+(?P<target_name>[^\|]+)\|(?P<target_tid>\d+)\s+(?P<amount>(?:\d+\.\d+|\d+))")
]
class Killed(CombatLog):
@ -253,11 +252,19 @@ class Set(CombatLog):
called on setting "relationship" / OpenSpace
Variables in values:
- what (relationship)
Optionals:
- name (who do i set?)
- value (to what value?)
- def: spell usually in combination with level and deftype.
"""
__slots__ = CombatLog.__slots__
matcher = re.compile("^Set\s(?P<what>\w+)\s(?P<name>[^\s]+)\sto\s(?P<value>\w+)")
matcher = [
# what: usually reputation.
re.compile("^Set\s(?P<what>\w+)\s(?P<name>[^\s]+)\sto\s(?P<value>\w+)"),
# what: 'stage', +level +deftype (aura), def (aura spell name), index is weird array lookup always 0, id is the id of the aura.
re.compile("^Set\s(?P<what>\w+)\s(?P<level>\d+)\s+for\s+(?P<deftype>\w+)\s+'(?P<def>[^']+)'\[(?P<index>\d+)\]\s+id\s(?P<id>-*\d+)"),
]
class SqIdChange(CombatLog):
""" - number: player number

View File

@ -52,7 +52,7 @@ Interesting Lines:
"""
class GameLog(Log):
__slots__ = Log.__slots__ + [ '_match_id', 'values']
__slots__ = Log.__slots__
@classmethod
def is_handler(cls, log):
if log.get('logtype', None) == '': # we handle only logs with empty logtype.

View File

@ -1,4 +1,11 @@
"""
Why?
- initial implementation only followed to read whole files, but the logparser itself would work also on streamed data.
- now the initial implementation builds on top of logstream, which should keep the stream functionality intact, aka allow later to parse
files as they get written.
- much of the parsing therefore may be designed to be repetible, if information is partial. Unfortunately this makes the whole process a bit mind-crunching.
A LogStream is supposed to:
- parse data feeded into it.
- yield new objects
@ -22,6 +29,8 @@
combine it with the lookup for "watching files being changed", to create a program which listens to the logs live
@see: monitor.py
@see: watchdog https://pypi.python.org/pypi/watchdog
"""
from .base import Log
import re
@ -61,6 +70,8 @@ class LogStream(object):
def clean(self, remove_log=True):
# cleans the logs by removing all non parsed packets.
# in essence, every line which is a dict, is removed. every log class is called for clean.
# every log that flags itself as trash, is removed.
# remove_log: should i remove the raw log entry?
lines = []
for l in self.lines:
@ -71,8 +82,7 @@ class LogStream(object):
l.clean()
lines.append(l)
else:
print((type(l)))
print(l)
logging.warning('The Packet of Type %s has no trash attribute. Is it a valid Log Class? %s' % (type(l), l))
self.lines = lines
self._unset_data()
@ -82,7 +92,11 @@ class LogStream(object):
self._data = None
def pre_parse_line(self, line):
# pre parse line expects a raw line from the log.
# it will basicly return None if that line is not important for logs.
# otherwise it will return a dictionary, containing logtype, hh, dd, mm, ss, ns, and log as logline.
if not isinstance(line, str):
# if this has already been parsed:
return line
elif line.startswith('---'):
return None
@ -103,7 +117,7 @@ class LogStream(object):
return None
def _parse_line(self, line):
# add the line to my lines.
# add the line to the current packets lines.
if line is not None:
o = line
if isinstance(line, str):
@ -121,16 +135,18 @@ class LogStream(object):
if self._last_object is not None and isinstance(self._last_object, Log):
self._last_object.unpack()
if self._last_object.append(line):
# last object accepted this line, return.
return
# at this point, either the last object did not accept this string,
# or last object wasnt a stacktrace.
# either way, this is a weird one.
logging.debug('#: %s' % line)
o = None
o = None # will return later.
elif isinstance(line, dict):
# Unresolved Log.
o = self.resolve(line)
# this is where the whole thing gets polluted with weird dicts.
# what exactly should resolve do!?
# by default it returns what its given, if unknown.
# #@TODO @XXX @CRITICAL
# after resolving the log, it hopefully is not a dict anymore.
# if it still is, its just the same dict.
self._last_object = o
else:
self._last_object = o
@ -145,4 +161,5 @@ class LogStream(object):
def resolve(self, gd):
# gd is a dict.
# try to find a class that is responsible for this log.
# this is done in subclasses of logstream.
return gd