commit 78033ee07397cb53bb3807a8facf9b899c228be5 Author: Dustella Date: Sat Dec 13 21:49:17 2025 +0800 init: first version diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a136337 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pdf diff --git a/img/examples.png b/img/examples.png new file mode 100644 index 0000000..6de0cc4 Binary files /dev/null and b/img/examples.png differ diff --git a/img/fail1.png b/img/fail1.png new file mode 100644 index 0000000..52e69b3 Binary files /dev/null and b/img/fail1.png differ diff --git a/img/fail2.png b/img/fail2.png new file mode 100644 index 0000000..e8d3f8c Binary files /dev/null and b/img/fail2.png differ diff --git a/img/metrics.png b/img/metrics.png new file mode 100644 index 0000000..29b8101 Binary files /dev/null and b/img/metrics.png differ diff --git a/img/sam_f1.png b/img/sam_f1.png new file mode 100644 index 0000000..4c15c84 Binary files /dev/null and b/img/sam_f1.png differ diff --git a/img/sam_iou.png b/img/sam_iou.png new file mode 100644 index 0000000..0b423a6 Binary files /dev/null and b/img/sam_iou.png differ diff --git a/img/xjtlu-o.png b/img/xjtlu-o.png new file mode 100644 index 0000000..cc2f833 Binary files /dev/null and b/img/xjtlu-o.png differ diff --git a/poster.typ b/poster.typ new file mode 100644 index 0000000..da810cb --- /dev/null +++ b/poster.typ @@ -0,0 +1,155 @@ +#import "@preview/postercise:0.2.0": * +#import themes.boxes: * +#import "@preview/fletcher:0.5.8" as fletcher: diagram, edge, node + +#set page(width: 16in, height: 22in) +#set text(size: 16pt) + +#show: theme.with( + primary-color: rgb(28, 55, 103), // Dark blue + background-color: white, + accent-color: rgb(243, 163, 30), // Yellow + titletext-color: white, + titletext-size: 1.8em, +) + +#poster-header( + title: [Can SAM "Segment Anything"? #linebreak() ], + subtitle: [Evaluating Zero-Shot Performance on Crack Detection], + authors: [Hanwen Yu], + affiliation: [School of Advanced Technology, Supervisor: SiYue Yu + ], + logo-2: image("./img/xjtlu-o.png", width: 15em), +) + + +// #image("examples.png", width: 100%) + + +#poster-content(col: 3)[ + // Content goes here + #normal-box(color: none)[ + == Introduction + he Segment Anything Model (SAM) has demonstrated remarkable + zero-shot segmentation capabilities on natural images. However, its zero-shot performance on domain-specific tasks remains underexplored. + // WHY CRACK SEGMENTATION? + // • Critical for infrastructure safety monitoring + // • Challenging characteristics: + // - Thin, elongated structures (often 1-5 pixels wide) + // - Low contrast against background + // - Complex branching topology + + // RESEARCH QUESTION + + *Can SAM2 achieve competitive crack segmentation + performance without domain-specific training?* + // CONTRIBUTIONS + // • First systematic evaluation of SAM2 zero-shot capability + // on crack segmentation + // • Comprehensive comparison of prompt strategies + // (bounding box vs. point-based prompts) + // • Analysis of failure modes and practical limitations + + ] + #normal-box(color: none)[ + == Methodology + + + *Dataset* + + - Crack500: 500 images with pixel-wise annotations + + - Test set: 100 images for evaluation + + *Prompt Strategies* + + We evaluate four prompt generation approaches: + + #table( + columns: 2, + + [Prompt Type], [Description], + [Bounding Box], [Tight box around ground truth mask], + [1-Point Prompt], [Single point sampled from GT skeleton (morphological center)], + [3-Point Prompt], [Three uniformly distributed points along GT skeleton], + [5-Point Prompt], [Five uniformly distributed points along GT skeleton], + ) + + *Evaluation* + + $ + "IoU" = "TP" / ("TP" + "FP" + "FN") + $ + + $ + "F1" = 2 * ("Precision" * "Recall") / ("Precision" + "Recall") + $ + + *Baselines* + + Supervised models: UNet, DeepCrack, TransUNet, + CT-CrackSeg, VM-UNet, CrackSegMamba + + + #import fletcher.shapes: brace, diamond, hexagon, parallelogram, pill + + #set text(size: 16pt) + #diagram( + node-fill: gradient.radial(white, blue, radius: 200%), + node-stroke: blue, + spacing: 25pt, + ( + node((0, 0), [Crack Image], shape: rect), + node((0, 1), [SAM Image Encoder], shape: rect), + node((0, 2), [Prompt Generation #linebreak() BBox, 1/3/5 points], shape: rect), + node((1, 2), [SAM Mask Decoder], shape: rect), + node((1, 1), [Predircted Mask], shape: rect), + node((1, 0), [Metrics (IoU, F1)], shape: rect), + ) + .intersperse(edge("-|>")) + .join(), + ) + + ] + #normal-box(color: none)[ + == Experiments and Results + #image("img/examples.png") + #image("img/metrics.png") + #image("img/sam_iou.png") + #image("img/sam_f1.png") + + ] + + #normal-box(color: none)[ + + == Qualitative Analysis + #image("img/fail1.png") + #image("img/fail2.png") + + ] + + #normal-box(color: none)[ + == Key Findings and Discussion + // *Prompt Effectiveness* + + Bounding box prompts yield the best performance among zero-shot methods. There is a 4.7x performance gap between bbox(39.6% IoU) and 1-point prompts(8.4% IoU). + + + SAM2 with bbox prompts (39.6% IoU) lags behind supervised models, even UNet in 2015. which highlights limitations of zero-shot approach without fine-tuning. + + // *Single Point Prompt Limitations* + 1-point prompts perform poorly (12.3% IoU), indicating insufficient guidance for complex crack structures. 5-point prompts approach bbox performance for highly irregular cracks, suggesting multiple points help capture shape. + ] + + #normal-box(color: none)[ + + == Conclusion and Future Work + + SAM2 shows limited zero-shot capability for crack segmentation. Bounding box prompts significantly outperform point-based prompts. Performance still lags behind supervised methods, indicating need for domain adaptation. + ] + #poster-footer[ + // Content + Hanwen Yu | Email: Hanwen.Yu24\@student.xjtlu.edu.cn + ] +] +