GP-4287: Format. Revise. Certify.

This commit is contained in:
Dan 2024-02-05 10:15:25 -05:00
parent 43131199cf
commit e645d74a5f
10 changed files with 160 additions and 101 deletions

View file

@ -2,5 +2,5 @@
##MODULE IP: FAMFAMFAM Icons - CC 2.5
##MODULE IP: Oxygen Icons - LGPL 3.0
Module.manifest||GHIDRA||||END|
src/main/help/help/TOC_Source.xml||GHIDRA||reviewed||END|
src/main/help/help/TOC_Source.xml||GHIDRA||||END|
src/main/help/help/topics/WildcardAssemblerModule/Wildcard_Assembler.html||GHIDRA||||END|

View file

@ -29,35 +29,28 @@
// See the "WildSleighAssemblerInfo" script for a simpler use of the WildSleighAssembler.
// @category Examples
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;
import ghidra.app.plugin.assembler.AssemblySelector;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyPatternBlock;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolutionResults;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.asm.wild.WildOperandInfo;
import ghidra.asm.wild.WildSleighAssembler;
import ghidra.asm.wild.WildSleighAssemblerBuilder;
import ghidra.asm.wild.*;
import ghidra.asm.wild.sem.WildAssemblyResolvedPatterns;
import ghidra.program.model.mem.*;
import ghidra.program.model.address.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.mem.Memory;
import ghidra.program.model.mem.MemoryAccessException;
public class FindInstructionWithWildcard extends GhidraScript {
@Override
public void run() throws Exception {
var instruction = askString("Instruction to search",
"Instruction to search for with wildcard (example is for x86_64, adjust if you are using a different architecture):",
"XOR R13D,`Q1/R1(2|3)D`");
var instruction = askString("Instruction to search", """
Instruction to search for with wildcard (example is for x86_64, adjust if you are \
using a different architecture): \
XOR R13D,`Q1/R1(2|3)D`""");
var allValidResults = getAllResolvedPatterns(instruction);
var encodings = getMapOfUniqueInstructionEncodings(allValidResults);
@ -175,8 +168,7 @@ public class FindInstructionWithWildcard extends GhidraScript {
* Returns true of the given value shares the same {@code maskedInstruction} and wildcard(s)
* as this instance.
*
* @param other
* Value to compare against
* @param other Value to compare against
* @return True if both values share the same maskedInstruction and wildcard(s)
*/
boolean sameBaseEncoding(ReducedWildcardAssemblyResolvedPattern other) {
@ -194,7 +186,7 @@ public class FindInstructionWithWildcard extends GhidraScript {
// Check all of other's WildOperandInfo
for (WildOperandInfo otherInfo : other.parent.getOperandInfo()) {
// Check if we have matching wildcards (names), expressions, and locations.
// Notice that we're *NOT* checking choice here, as we expect those to be different.
// We're *NOT* checking choice here, as we expect those to be different.
if (info.wildcard().equals(otherInfo.wildcard()) &&
info.expression().equals(otherInfo.expression()) &&
info.location().equals(otherInfo.location())) {
@ -222,10 +214,8 @@ public class FindInstructionWithWildcard extends GhidraScript {
* Does not currently print wildcard information about the search results, but this could be
* added.
*
* @param encodings
* HashMap of encodings to that encoding's possible WildOperandInfo values.
* @throws MemoryAccessException
* If we find bytes but can't read them
* @param encodings Map of encodings to that encoding's possible WildOperandInfo values.
* @throws MemoryAccessException If we find bytes but can't read them
*/
private void searchMemoryForEncodings(
Map<AssemblyPatternBlock, Set<WildOperandInfo>> encodings,
@ -274,12 +264,11 @@ public class FindInstructionWithWildcard extends GhidraScript {
* NOTE: This is certainly not the highest performance way to do this, but it is reasonably
* simple and shows what is possible.
*
* @param matchAddress
* The address where our search hit occurred
* @param matchData
* The bytes found at matchAddress. Must include the entire matching instruction!
* @param allValidResolvedPatterns
* All resolved patterns which were searched from (used to find wildcard information)
* @param matchAddress The address where our search hit occurred
* @param matchData The bytes found at matchAddress. Must include the entire matching
* instruction!
* @param allValidResolvedPatterns All resolved patterns which were searched from (used to find
* wildcard information)
*/
private void printSearchHitInfo(Address matchAddress, byte[] matchData,
List<WildAssemblyResolvedPatterns> allValidResolvedPatterns) {
@ -321,8 +310,7 @@ public class FindInstructionWithWildcard extends GhidraScript {
* Return all items from {@code results} which are instances of
* {@link WildAssemblyResolvedPatterns}
*
* @param results
* The results to return {@link WildAssemblyResolvePatterns} from
* @param results The results to return {@link WildAssemblyResolvePatterns} from
* @return All {@link WildAssemblyResolvedPatterns} which were found in the input
*/
private List<WildAssemblyResolvedPatterns> getValidResults(AssemblyResolutionResults results) {

View file

@ -24,7 +24,6 @@
// See the "FindInstructionWithWildcard" script for another example of using the WildSleighAssembler
// @category Examples
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
@ -34,16 +33,17 @@ import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.asm.wild.WildOperandInfo;
import ghidra.asm.wild.WildSleighAssembler;
import ghidra.asm.wild.WildSleighAssemblerBuilder;
import ghidra.asm.wild.*;
import ghidra.asm.wild.sem.WildAssemblyResolvedPatterns;
public class WildSleighAssemblerInfo extends GhidraScript {
List<String> sampleInstructions =
Arrays.asList("MOV EAX,`Q1`", "MOV RDI,qword ptr [`Q1` + -0x30]", "Custom");
List<String> sampleInstructions = List.of(
"MOV EAX,`Q1`",
"MOV RDI,qword ptr [`Q1` + -0x30]",
"Custom");
@Override
public void run() throws Exception {
String instruction = askChoice("Instruction to assemble", "Assemble this instruction:",
@ -61,8 +61,8 @@ public class WildSleighAssemblerInfo extends GhidraScript {
/**
* Use a {@link WildSleighAssembler} to assemble the given {@code wildcardedInstruction}
*
* @param wildcardedInstruction
* String of the instruction to assemble, possibly including a wildcard
* @param wildcardedInstruction String of the instruction to assemble, possibly including a
* wildcard
* @return All AssemblyParseResult produced from the given input
*/
private List<AssemblyResolution> getAllAssemblyResolutions(
@ -74,10 +74,10 @@ public class WildSleighAssemblerInfo extends GhidraScript {
// correct architecture.
if (sampleInstructions.contains(wildcardedInstruction) &&
!language.getLanguageID().toString().equals("x86:LE:64:default")) {
popup(
"The current program is not a \"x86:LE:64:default\" binary that the example was " +
"designed for. This script will continue and try anyway, but the results might " +
"not be as expected. Retry with a custom instruction in your architecture!");
popup("""
The current program is not a \"x86:LE:64:default\" binary that the example was \
designed for. This script will continue and try anyway, but the results might \
not be as expected. Retry with a custom instruction in your architecture!""");
}
// Create a WildSleighAssembler that we'll use to assemble our wildcard-included instruction
@ -123,9 +123,8 @@ public class WildSleighAssemblerInfo extends GhidraScript {
}
if (errorCount > 0) {
println(
"Additionally " + errorCount +
" non-WildAssemblyResolvedPatterns were not printed");
println("Additionally, " + errorCount +
" non-WildAssemblyResolvedPatterns were not printed");
}
}
@ -134,8 +133,7 @@ public class WildSleighAssemblerInfo extends GhidraScript {
* Print information about a single {@link WildAssemblyResolvedPatterns}, including information
* about each of its wildcards.
*
* @param x
* The value to print information about.
* @param x The value to print information about.
*/
private void printWildAssemblyResolvedPatterns(WildAssemblyResolvedPatterns x) {
println("Instruction bits (including wildcard values): " + x.getInstruction());

View file

@ -2,6 +2,8 @@
<HTML>
<HEAD>
<META name="generator" content=
"HTML Tidy for Java (vers. 2009-12-01), see jtidy.sourceforge.net">
<META http-equiv="Content-Language" content="en-us">
<META http-equiv="Content-Type" content="text/html; charset=windows-1252">
@ -13,84 +15,93 @@
<H1><A name="Wildcard_Assembler_Module"></A>Wildcard Assembler Module</H1>
<BLOCKQUOTE>
<P><B>This feature is currently only available as an API for Ghidra scripts and plugins. For
an example of how to use the API, see the FindInstructionWithWildcard and
WildSleighAssemblerInfo scripts in the Script Manager.</B></P>
<P><B>This feature is currently only available as an API for Ghidra
scripts and plugins. For an example of how to use the API, see the
FindInstructionWithWildcard and WildSleighAssemblerInfo scripts in the
Script Manager.</B></P>
<P>The <I>Wildcard Assembler</I> extends Ghidra's assembler to enable
assembling instructions with specific tokens replaced with wildcards.</P>
<P>The <I>Wildcard Assembler</I> extends Ghidra's assembler to enable assembling instructions
with specific tokens replaced with wildcards.</P>
<p>This assembler will return metadata for each wildcard in an assembled
instruction. This metadata includes details of which specific bits of an
assembled instruction are used to derive the value of the wildcarded token
and the expression used to derive the value.</p>
<P>This assembler will return metadata for each wildcard in an assembled instruction. This
metadata includes details of which specific bits of an assembled instruction are used to
derive the value of the wildcarded token and the expression used to derive the value.</P>
<H2>Wildcard Syntax</H2>
<P>Wildcards in instructions are specified by replacing the
to-be-wildcarded token with a wildcard name surrounded by backticks (e.g.
<CODE>`Q1`</CODE> where Q1 is an arbitrary wildcard name) and passing the
entire instruction to the Wildcard Assembler.</P>
<P>Wildcards in instructions are specified by replacing the to-be-wildcarded token with a
wildcard name surrounded by backticks (e.g. <CODE>`Q1`</CODE> where Q1 is an arbitrary
wildcard name) and passing the entire instruction to the Wildcard Assembler.</P>
<P>By default, the Wildcard Assembler will return metadata about all
possible values that a wildcarded token could take and all the encodings
of all these values. This behavior can be limited by filtering the
wildcard by appending specific syntax after the wildcard name:</P>
<P>By default, the Wildcard Assembler will return metadata about all possible values that a
wildcarded token could take and all the encodings of all these values. This behavior can be
limited by filtering the wildcard by appending specific syntax after the wildcard name:</P>
<UL>
<LI><B>Numeric Filter:</B>
<LI>
<B>Numeric Filter:</B>
<UL>
<LI>Appending <CODE>[..]</CODE> will constrain the wildcarded token
to only numeric values (and not registers or other strings).</LI>
<LI>Appending <CODE>[0x0..0x100]</CODE> (where 0x0 and 0x100 are
arbitrary hexadecimal values with the smaller number first) will
constrain the wildcarded token to only numeric values between the
two given values. This can be used to ensure that the returned
encodings can hold values of a desired size. Multiple non-contiguous
ranges can be specified by separating them with commas (e.g.
<CODE>[0x0..0x5,0x1000-0x4000]</CODE>)</LI>
<LI>Appending <CODE>[..]</CODE> e.g., <CODE>MOV RAX, `Q1[..]`</CODE>, will constrain
the wildcarded token to only numeric values (and not registers or other strings).</LI>
<LI>Appending <CODE>[0x0..0x100]</CODE> (where 0x0 and 0x100 are arbitrary hexadecimal
values with the smaller number first) will constrain the wildcarded token to only
numeric values between the two given values. This can be used to ensure that the
returned encodings can hold values of a desired size. Multiple non-contiguous ranges
can be specified by separating them with commas (e.g.
<CODE>[0x0..0x5,0x1000..0x4000]</CODE>)</LI>
</UL>
</LI>
<LI><B>Regex Filter:</B>
<UL>
<LI>Appending <CODE>/ABCD</CODE> where ABCD is an arbitrary
regular expression will constrain the wildcarded token to only be
string tokens matching the given regular expression. This is most
likely used for filtering register names; for example appending
<CODE>/(sp)|(lr)</CODE> to a wildcard in a register position in
ARM assembly will limit the wildcard results to only encodings
using the <CODE>sp</CODE> or <CODE>lr</CODE> registers in that
position.</LI>
</UL>
</LI>
</LI>
<LI>
<B>Regex Filter:</B>
<UL>
<LI>Appending <CODE>/ABCD</CODE> where ABCD is an arbitrary regular expression will
constrain the wildcarded token to only be string tokens matching the given regular
expression. This is most likely used for filtering register names; for example
appending <CODE>/(sp)|(lr)</CODE> to a wildcard in a register position in ARM assembly
will limit the wildcard results to only encodings using the <CODE>sp</CODE> or
<CODE>lr</CODE> registers in that position.</LI>
</UL>
</LI>
</UL>
<P>Normally a wildcard will only match a single token. To allow a single
wildcard to match multiple related tokens: precede the wildcard name with a
<CODE>!</CODE> character. For example, in a x86:LE:32:default binary:</P>
<P>Normally a wildcard will only match a single token. For example, in a x86:LE:32:default
binary:</P>
<BLOCKQUOTE>
<DL>
<DT>No wildcard:</DT>
<DD><CODE>MOVSD.REP ES:EDI,ESI</CODE></DD>
<DT>Single token:</DT>
<DD><CODE>MOVSD.REP `Q1`:EDI,ESI</CODE></DD>
<DT>Single token:</DT>
<DD><CODE>MOVSD.REP ES:`Q2`,ESI</CODE></DD>
</DL>
</BLOCKQUOTE>
<P>To allow a single wildcard to match multiple related tokens: precede the wildcard name
with a <CODE>!</CODE> character:</P>
<BLOCKQUOTE>
<DL>
<DT>Multi-token:</DT>
<DD><CODE>MOVSD.REP `!Q4`,ESI</CODE></DD>
<DT>Single token (Does <I>NOT</I> assemble):</DT>
<DD><CODE>MOVSD.REP `Q3`,ESI</CODE></DD>
<DT>Multi-token:</DT>
<DD><CODE>MOVSD.REP `!Q4`,ESI</CODE></DD>
<DD><CODE>MOVSD.REP `Q3`,ESI</CODE></DD>
</DL>
</BLOCKQUOTE>
<P class="providedbyplugin">Provided by: <I>Wildcard Assembler Module</I></P>
</BLOCKQUOTE>
</BODY>
</HTML>

View file

@ -21,9 +21,25 @@ import ghidra.app.plugin.assembler.sleigh.sem.AssemblyConstructorSemantic;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyPatternBlock;
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
/**
* Information about an operand that was matched to a wildcard
*
* @param wildcard the name of the wildcard that matched the operand
* @param path the hierarchy of Sleigh constructors leading to the operand
* @param location the bit pattern giving the location of the operand's field(s) in the machine
* instruction
* @param expression the expression describing how to encode the operand in the field(s)
* @param choice if applicable, the value encoded in the result containing this information
*/
public record WildOperandInfo(String wildcard, List<AssemblyConstructorSemantic> path,
AssemblyPatternBlock location, PatternExpression expression, Object choice) {
/**
* Copy this wildcard info, but with an increased shift amount
*
* @param amt the number of bits to shift (right)
* @return the copy
*/
public WildOperandInfo shift(int amt) {
return new WildOperandInfo(wildcard, path, location.shift(amt), expression, choice);
}

View file

@ -26,6 +26,12 @@ import ghidra.asm.wild.sem.WildAssemblyTreeResolver;
import ghidra.program.model.address.Address;
import ghidra.program.model.listing.Program;
/**
* An assembler implementation that allows for wildcard operands
*
* <p>
* Construct these using {@link WildSleighAssemblerBuilder}.
*/
public class WildSleighAssembler extends AbstractSleighAssembler<WildAssemblyResolvedPatterns> {
protected WildSleighAssembler(

View file

@ -19,6 +19,7 @@ import java.util.*;
import ghidra.app.plugin.assembler.AssemblySelector;
import ghidra.app.plugin.assembler.sleigh.AbstractSleighAssemblerBuilder;
import ghidra.app.plugin.assembler.sleigh.SleighAssemblerBuilder;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblySentential;
import ghidra.app.plugin.assembler.sleigh.sem.AbstractAssemblyResolutionFactory;
@ -33,11 +34,32 @@ import ghidra.asm.wild.sem.WildAssemblyResolvedPatterns;
import ghidra.asm.wild.symbol.*;
import ghidra.program.model.listing.Program;
/**
* The builder for wildcard-enabled assemblers.
*
* <p>
* Ideally, only one of these is created and cached per language, to save on the cost of building
* the assembler. However, if heap space needs to be freed up, then the builder must be disposed.
*
* <p>
* This is based on the same abstract class as {@link SleighAssemblerBuilder}. See its documentation
* for more information.
*/
public class WildSleighAssemblerBuilder
extends AbstractSleighAssemblerBuilder<WildAssemblyResolvedPatterns, WildSleighAssembler> {
protected final Map<AssemblySymbol, AssemblyNonTerminal> wildNTs = new HashMap<>();
/**
* Construct a builder for the given language
*
* <p>
* Once a builder is prepared for the given language, it can be used to build an assembler for
* any number of programs using that same language. Clients should take advantage of this to
* avoid re-incurring the steep cost of constructing an assembler for the same language.
*
* @param lang the language
*/
public WildSleighAssemblerBuilder(SleighLanguage lang) {
super(lang);
}

View file

@ -20,7 +20,6 @@ import java.util.stream.Stream;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
import ghidra.asm.wild.tree.WildAssemblyParseToken;
import ghidra.asm.wild.tree.WildAssemblyParseToken.RegexWildcard;
public class WildAssemblyFixedNumericStateGenerator
extends AbstractAssemblyStateGenerator<WildAssemblyParseToken> {

View file

@ -22,10 +22,28 @@ import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
import ghidra.asm.wild.WildOperandInfo;
/**
* The result of assembling an instruction with the wildcard assembler
*/
public interface WildAssemblyResolvedPatterns extends AssemblyResolvedPatterns {
/**
* The information for wildcarded operands in this instruction
*
* @return the set of information
*/
Set<WildOperandInfo> getOperandInfo();
/**
* Create a copy of this result with added wilcard information
*
* @param wildcard see {@link WildOperandInfo}
* @param path see {@link WildOperandInfo}
* @param location see {@link WildOperandInfo}
* @param expression see {@link WildOperandInfo}
* @param choice see {@link WildOperandInfo}
* @return the copy
*/
WildAssemblyResolvedPatterns withWildInfo(String wildcard,
List<AssemblyConstructorSemantic> path, AssemblyPatternBlock location,
PatternExpression expression, Object choice);

View file

@ -157,6 +157,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
* Convert a block from a disjoint pattern into an assembly pattern block
*
* @param pat the pattern to convert
* @param minLen the minimum byte length of the block
* @param context true to select the context block, false to select the instruction block
* @return the converted pattern block
*/